diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index a787fb7510..5502b1db54 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -25,42 +25,10 @@ export RAPIDS_ARTIFACTS_DIR # populates `RATTLER_CHANNELS` array and `RATTLER_ARGS` array source rapids-rattler-channel-string -# Construct the extra variants according to the architecture -if [[ "$(arch)" == "x86_64" ]]; then - cat > variants.yaml << EOF - c_compiler_version: - - 14 - - cxx_compiler_version: - - 14 - - cuda_version: - - ${RAPIDS_CUDA_VERSION%.*} -EOF -else - cat > variants.yaml << EOF - zip_keys: - - [c_compiler_version, cxx_compiler_version, cuda_version] - - c_compiler_version: - - 12 - - 14 - - cxx_compiler_version: - - 12 - - 14 - - cuda_version: - - 12.1 # The last version to not support cufile - - ${RAPIDS_CUDA_VERSION%.*} -EOF -fi - # --no-build-id allows for caching with `sccache` # more info is available at # https://rattler.build/latest/tips_and_tricks/#using-sccache-or-ccache-with-rattler-build rattler-build build --recipe conda/recipes/libkvikio \ - --variant-config variants.yaml \ "${RATTLER_ARGS[@]}" \ "${RATTLER_CHANNELS[@]}" diff --git a/conda/recipes/kvikio/recipe.yaml b/conda/recipes/kvikio/recipe.yaml index cc31c8ea80..2415362abb 100644 --- a/conda/recipes/kvikio/recipe.yaml +++ b/conda/recipes/kvikio/recipe.yaml @@ -4,7 +4,6 @@ schema_version: 1 context: version: ${{ env.get("RAPIDS_PACKAGE_VERSION") }} - minor_version: ${{ (version | split("."))[:2] | join(".") }} cuda_version: ${{ (env.get("RAPIDS_CUDA_VERSION") | split("."))[:2] | join(".") }} cuda_major: '${{ (env.get("RAPIDS_CUDA_VERSION") | split("."))[0] }}' date_string: '${{ env.get("RAPIDS_DATE_STRING") }}' diff --git a/conda/recipes/libkvikio/recipe.yaml b/conda/recipes/libkvikio/recipe.yaml index 30942b7215..3fde1675e4 100644 --- a/conda/recipes/libkvikio/recipe.yaml +++ b/conda/recipes/libkvikio/recipe.yaml @@ -4,17 +4,8 @@ schema_version: 1 context: version: ${{ env.get("RAPIDS_PACKAGE_VERSION") }} - minor_version: ${{ (version | split("."))[:2] | join(".") }} - # We need to support three cases: - # 1. Linux x86_64, which always uses libcufile - # 2. Linux aarch64 with CUDA >= 12.2, which uses libcufile - # 3. Linux aarch64 with CUDA < 12.2, which does not use libcufile - # Each case has different cuda-version constraints as expressed below - should_use_cufile: ${{ x86_64 or (aarch64 and cuda_version >= "12.2") }} - # When reverting, instances of cuda_key_string can be replaced with cuda_major - cuda_key_string: ${{ cuda_version | replace(".", "_") }} - #cuda_version: ${{ (env.get("RAPIDS_CUDA_VERSION") | split("."))[:2] | join(".") }} - #cuda_major: '${{ (env.get("RAPIDS_CUDA_VERSION") | split("."))[0] }}' + cuda_version: ${{ (env.get("RAPIDS_CUDA_VERSION") | split("."))[:2] | join(".") }} + cuda_major: '${{ (env.get("RAPIDS_CUDA_VERSION") | split("."))[0] }}' date_string: '${{ env.get("RAPIDS_DATE_STRING") }}' head_rev: '${{ git.head_rev(".")[:8] }}' @@ -62,7 +53,7 @@ cache: SCCACHE_NO_CACHE: ${{ env.get("SCCACHE_NO_CACHE", default="") }} SCCACHE_RECACHE: ${{ env.get("SCCACHE_RECACHE", default="") }} SCCACHE_REGION: ${{ env.get("SCCACHE_REGION", default="") }} - SCCACHE_S3_KEY_PREFIX: libkvikio/${{ env.get("RAPIDS_CONDA_ARCH") }}/cuda${{ cuda_key_string }} + SCCACHE_S3_KEY_PREFIX: libkvikio/${{ env.get("RAPIDS_CONDA_ARCH") }}/cuda${{ cuda_major }} SCCACHE_S3_NO_CREDENTIALS: ${{ env.get("SCCACHE_S3_NO_CREDENTIALS", default="false") }} SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX: libkvikio/${{ env.get("RAPIDS_CONDA_ARCH") }}/cuda${{ cuda_major }}/conda/preprocessor-cache SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE: ${{ env.get("SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE", default="true") }} @@ -80,9 +71,7 @@ cache: host: - cuda-version =${{ cuda_version }} - libcurl ==${{ libcurl_version }} - - if: should_use_cufile - then: - - libcufile-dev + - libcufile-dev - libnuma outputs: @@ -93,7 +82,7 @@ outputs: script: content: | cmake --install cpp/build - string: cuda${{ cuda_key_string }}_${{ date_string }}_${{ head_rev }} + string: cuda${{ cuda_major }}_${{ date_string }}_${{ head_rev }} dynamic_linking: overlinking_behavior: "error" prefix_detection: @@ -107,24 +96,16 @@ outputs: - cuda-version =${{ cuda_version }} - libcurl ==${{ libcurl_version }} run: - - if: x86_64 or (aarch64 and cuda_version >= "13.0") + - if: cuda_version >= "13.0" then: - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} else: - - if: aarch64 and cuda_version >= "12.2" - then: - - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="12.2.0a0") }} - else: - - ${{ pin_compatible("cuda-version", upper_bound="12.2.0a0", lower_bound="12.0") }} - - if: should_use_cufile - then: - - libcufile-dev + - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="12.2.0a0") }} + - libcufile-dev ignore_run_exports: by_name: - cuda-version - - if: should_use_cufile - then: - - libcufile + - libcufile tests: - script: - test -f $PREFIX/include/kvikio/file_handle.hpp @@ -137,7 +118,7 @@ outputs: name: libkvikio-tests version: ${{ version }} build: - string: cuda${{ cuda_key_string }}_${{ date_string }}_${{ head_rev }} + string: cuda${{ cuda_major }}_${{ date_string }}_${{ head_rev }} dynamic_linking: overlinking_behavior: "error" script: @@ -152,28 +133,20 @@ outputs: - cuda-version =${{ cuda_version }} - cuda-cudart-dev - libcurl ==${{ libcurl_version }} - - if: should_use_cufile - then: - - libcufile-dev + - libcufile-dev run: - - if: x86_64 + - if: cuda_version >= "13.0" then: - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} else: - - if: aarch64 and cuda_version >= "12.2" - then: - - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="12.2.0a0") }} - else: - - ${{ pin_compatible("cuda-version", upper_bound="12.2.0a0", lower_bound="12.0") }} + - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="12.2.0a0") }} - cuda-cudart ignore_run_exports: by_name: - cuda-cudart - cuda-version - libnuma - - if: should_use_cufile - then: - - libcufile + - libcufile about: homepage: ${{ load_from_file("python/libkvikio/pyproject.toml").project.urls.Homepage }} license: ${{ load_from_file("python/libkvikio/pyproject.toml").project.license.text }} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 71cbc258de..a8897a27e2 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -94,6 +94,10 @@ else() OUTPUT_VARIABLE batch_output ) message(STATUS "Found cuFile Batch API: ${cuFile_BATCH_API_FOUND}") + if(NOT cuFile_BATCH_API_FOUND) + message(FATAL_ERROR "Missing cuFile Batch API") + endif() + try_compile( cuFile_STREAM_API_FOUND SOURCE_FROM_CONTENT stream.cpp @@ -109,6 +113,10 @@ else() OUTPUT_VARIABLE stream_output ) message(STATUS "Found cuFile Stream API: ${cuFile_STREAM_API_FOUND}") + if(NOT cuFile_STREAM_API_FOUND) + message(FATAL_ERROR "Missing cuFile Stream API") + endif() + try_compile( cuFile_VERSION_API_FOUND SOURCE_FROM_CONTENT version.cpp @@ -188,8 +196,6 @@ target_compile_definitions( kvikio PUBLIC $<$:KVIKIO_LIBCURL_FOUND> $<$:KVIKIO_CUFILE_FOUND> - $<$:KVIKIO_CUFILE_BATCH_API_FOUND> - $<$:KVIKIO_CUFILE_STREAM_API_FOUND> $<$:KVIKIO_CUFILE_VERSION_API_FOUND> ) diff --git a/cpp/examples/basic_io.cpp b/cpp/examples/basic_io.cpp index 4cc050ffd9..c9c0c64272 100644 --- a/cpp/examples/basic_io.cpp +++ b/cpp/examples/basic_io.cpp @@ -170,7 +170,7 @@ int main() cout << "Parallel POSIX read (" << kvikio::defaults::thread_pool_nthreads() << " threads): " << read << endl; } - if (kvikio::is_batch_api_available() && !kvikio::defaults::is_compat_mode_preferred()) { + if (!kvikio::defaults::is_compat_mode_preferred()) { std::cout << std::endl; Timer timer; // Here we use the batch API to read "/tmp/test-file" into `b_dev` by diff --git a/cpp/include/kvikio/batch.hpp b/cpp/include/kvikio/batch.hpp index f85c87657c..78b3a7924f 100644 --- a/cpp/include/kvikio/batch.hpp +++ b/cpp/include/kvikio/batch.hpp @@ -33,8 +33,6 @@ struct BatchOp { CUfileOpcode_t opcode; }; -#ifdef KVIKIO_CUFILE_BATCH_API_FOUND - /** * @brief Handle of an cuFile batch using semantic. * @@ -104,27 +102,4 @@ class BatchHandle { void cancel(); }; -#else - -class BatchHandle { - public: - BatchHandle() noexcept = default; - - BatchHandle(int max_num_events); - - [[nodiscard]] bool closed() const noexcept; - - void close() noexcept; - - void submit(std::vector const& operations); - - std::vector status(unsigned min_nr, - unsigned max_nr, - struct timespec* timeout = nullptr); - - void cancel(); -}; - -#endif - } // namespace kvikio diff --git a/cpp/include/kvikio/file_handle.hpp b/cpp/include/kvikio/file_handle.hpp index e74b8e3e20..fd24e7623b 100644 --- a/cpp/include/kvikio/file_handle.hpp +++ b/cpp/include/kvikio/file_handle.hpp @@ -283,9 +283,6 @@ class FileHandle { * This is an asynchronous version of `.read()`, which will be executed in sequence * for the specified stream. * - * When running CUDA v12.1 or older, this function falls back to use `.read()` after - * `stream` has been synchronized. - * * The arguments have the same meaning as in `.read()` but some of them are deferred. * That is, the values pointed to by `size_p`, `file_offset_p` and `devPtr_offset_p` * will not be evaluated until execution time. Notice, this behavior can be changed @@ -324,9 +321,6 @@ class FileHandle { * This is an asynchronous version of `.read()`, which will be executed in sequence * for the specified stream. * - * When running CUDA v12.1 or older, this function falls back to use `.read()` after - * `stream` has been synchronized. - * * The arguments have the same meaning as in `.read()` but returns a `StreamFuture` object * that the caller must keep alive until all data has been read from disk. One way to do this, * is by calling `StreamFuture.check_bytes_done()`, which will synchronize the associated stream @@ -355,9 +349,6 @@ class FileHandle { * This is an asynchronous version of `.write()`, which will be executed in sequence * for the specified stream. * - * When running CUDA v12.1 or older, this function falls back to use `.read()` after - * `stream` has been synchronized. - * * The arguments have the same meaning as in `.write()` but some of them are deferred. * That is, the values pointed to by `size_p`, `file_offset_p` and `devPtr_offset_p` * will not be evaluated until execution time. Notice, this behavior can be changed @@ -397,9 +388,6 @@ class FileHandle { * This is an asynchronous version of `.write()`, which will be executed in sequence * for the specified stream. * - * When running CUDA v12.1 or older, this function falls back to use `.read()` after - * `stream` has been synchronized. - * * The arguments have the same meaning as in `.write()` but returns a `StreamFuture` object * that the caller must keep alive until all data has been written to disk. One way to do this, * is by calling `StreamFuture.check_bytes_done()`, which will synchronize the associated stream diff --git a/cpp/include/kvikio/shim/cufile.hpp b/cpp/include/kvikio/shim/cufile.hpp index 721b2f4e17..5bbf6ff427 100644 --- a/cpp/include/kvikio/shim/cufile.hpp +++ b/cpp/include/kvikio/shim/cufile.hpp @@ -110,26 +110,4 @@ bool is_cufile_available() noexcept; */ int cufile_version() noexcept; -/** - * @brief Check if cuFile's batch API is available. - * - * Since `cuFileGetVersion()` first became available in cufile v1.8 (CTK v12.3), - * this function returns false for versions older than v1.8 even though the batch - * API became available in v1.6. - * - * @return The boolean answer - */ -bool is_batch_api_available() noexcept; - -/** - * @brief Check if cuFile's stream (async) API is available. - * - * Since `cuFileGetVersion()` first became available in cufile v1.8 (CTK v12.3), - * this function returns false for versions older than v1.8 even though the stream - * API became available in v1.7. - * - * @return The boolean answer - */ -bool is_stream_api_available() noexcept; - } // namespace kvikio diff --git a/cpp/include/kvikio/shim/cufile_h_wrapper.hpp b/cpp/include/kvikio/shim/cufile_h_wrapper.hpp index 9aff7248c4..ed97a1f935 100644 --- a/cpp/include/kvikio/shim/cufile_h_wrapper.hpp +++ b/cpp/include/kvikio/shim/cufile_h_wrapper.hpp @@ -68,38 +68,6 @@ CUfileError_t cuFileDriverSetMaxPinnedMemSize(...); // Notice, this doesn't need to be ABI compatible with the cufile definitions and // the lack of definitions is not a problem because the linker will never look for // these symbols because the "real" function calls are made through the shim instance. -#ifndef KVIKIO_CUFILE_BATCH_API_FOUND -typedef enum CUfileOpcode { CUFILE_READ = 0, CUFILE_WRITE } CUfileOpcode_t; - -typedef enum CUFILEStatus_enum { - CUFILE_WAITING = 0x000001, /* required value prior to submission */ - CUFILE_PENDING = 0x000002, /* once enqueued */ - CUFILE_INVALID = 0x000004, /* request was ill-formed or could not be enqueued */ - CUFILE_CANCELED = 0x000008, /* request successfully canceled */ - CUFILE_COMPLETE = 0x0000010, /* request successfully completed */ - CUFILE_TIMEOUT = 0x0000020, /* request timed out */ - CUFILE_FAILED = 0x0000040 /* unable to complete */ -} CUfileStatus_t; - -typedef struct CUfileIOEvents { - void* cookie; - CUfileStatus_t status; /* status of the operation */ - size_t ret; /* -ve error or amount of I/O done. */ -} CUfileIOEvents_t; - -CUfileError_t cuFileBatchIOSetUp(...); -CUfileError_t cuFileBatchIOSubmit(...); -CUfileError_t cuFileBatchIOGetStatus(...); -CUfileError_t cuFileBatchIOCancel(...); -CUfileError_t cuFileBatchIODestroy(...); -#endif - -#ifndef KVIKIO_CUFILE_STREAM_API_FOUND -CUfileError_t cuFileReadAsync(...); -CUfileError_t cuFileWriteAsync(...); -CUfileError_t cuFileStreamRegister(...); -CUfileError_t cuFileStreamDeregister(...); -#endif #ifndef KVIKIO_CUFILE_VERSION_API_FOUND CUfileError_t cuFileGetVersion(...); diff --git a/cpp/src/batch.cpp b/cpp/src/batch.cpp index e1a0f6214d..a8a8978f26 100644 --- a/cpp/src/batch.cpp +++ b/cpp/src/batch.cpp @@ -15,8 +15,6 @@ namespace kvikio { -#ifdef KVIKIO_CUFILE_BATCH_API_FOUND - BatchHandle::BatchHandle(int max_num_events) : _initialized{true}, _max_num_events{max_num_events} { CUFILE_TRY(cuFileAPI::instance().BatchIOSetUp(&_handle, max_num_events)); @@ -78,28 +76,4 @@ std::vector BatchHandle::status(unsigned min_nr, void BatchHandle::cancel() { CUFILE_TRY(cuFileAPI::instance().BatchIOCancel(_handle)); } -#else - -BatchHandle::BatchHandle(int max_num_events) -{ - KVIKIO_FAIL("BatchHandle requires cuFile's batch API, please build with CUDA v12.1+"); -} - -bool BatchHandle::closed() const noexcept { return true; } - -void BatchHandle::close() noexcept {} - -void BatchHandle::submit(std::vector const& operations) {} - -std::vector BatchHandle::status(unsigned min_nr, - unsigned max_nr, - struct timespec* timeout) -{ - return std::vector{}; -} - -void BatchHandle::cancel() {} - -#endif - } // namespace kvikio diff --git a/cpp/src/compat_mode_manager.cpp b/cpp/src/compat_mode_manager.cpp index eb601af375..bb09313f68 100644 --- a/cpp/src/compat_mode_manager.cpp +++ b/cpp/src/compat_mode_manager.cpp @@ -81,24 +81,18 @@ CompatModeManager::CompatModeManager(std::string const& file_path, } // Check cuFile async API - static bool const is_extra_symbol_available = is_stream_api_available(); - static bool const is_config_path_empty = config_path().empty(); - _is_compat_mode_preferred_for_async = - _is_compat_mode_preferred || !is_extra_symbol_available || is_config_path_empty; + static bool const is_config_path_empty = config_path().empty(); + _is_compat_mode_preferred_for_async = _is_compat_mode_preferred || is_config_path_empty; } void CompatModeManager::validate_compat_mode_for_async() const { KVIKIO_NVTX_FUNC_RANGE(); - if (_is_compat_mode_preferred_for_async && _compat_mode_requested == CompatMode::OFF) { - std::string err_msg; - if (!is_stream_api_available()) { err_msg += "Missing the cuFile stream api."; } - - // When checking for availability, we also check if cuFile's config file exists. This is - // because even when the stream API is available, it doesn't work if no config file exists. - if (config_path().empty()) { err_msg += " Missing cuFile configuration file."; } - - KVIKIO_FAIL(err_msg, std::runtime_error); + // When checking for availability, we check if cuFile's config file exists. This is + // because even when the stream API is available, it doesn't work if no config file exists. + if (_is_compat_mode_preferred_for_async && _compat_mode_requested == CompatMode::OFF && + config_path().empty()) { + KVIKIO_FAIL("Missing cuFile configuration file.", std::runtime_error); } } diff --git a/cpp/src/cufile/driver.cpp b/cpp/src/cufile/driver.cpp index 98070afc84..a090f930da 100644 --- a/cpp/src/cufile/driver.cpp +++ b/cpp/src/cufile/driver.cpp @@ -150,12 +150,8 @@ void DriverProperties::set_max_pinned_memory_size(std::size_t size_in_kb) std::size_t DriverProperties::get_max_batch_io_size() { -#ifdef KVIKIO_CUFILE_BATCH_API_FOUND lazy_init(); return _props.max_batch_io_size; -#else - return 0; -#endif } #else diff --git a/cpp/src/shim/cufile.cpp b/cpp/src/shim/cufile.cpp index 0098a10deb..05b6e7f4e4 100644 --- a/cpp/src/shim/cufile.cpp +++ b/cpp/src/shim/cufile.cpp @@ -37,27 +37,25 @@ cuFileAPI::cuFileAPI() CUfileError_t const error = GetVersion(&ver); if (error.err == CU_FILE_SUCCESS) { version = ver; } } catch (std::runtime_error const&) { + version = 1070; } +#else + version = 1070; #endif - // Some symbols were introduced in later versions, so version guards are required. - // Note: `version` is 0 for cuFile versions prior to v1.8 because `cuFileGetVersion` - // did not exist. As a result, the batch and stream APIs are not loaded in versions - // 1.6 and 1.7, respectively, even though they are available. This trade-off is made - // for improved robustness. - if (version >= 1060) { - get_symbol(BatchIOSetUp, lib, KVIKIO_STRINGIFY(cuFileBatchIOSetUp)); - get_symbol(BatchIOSubmit, lib, KVIKIO_STRINGIFY(cuFileBatchIOSubmit)); - get_symbol(BatchIOGetStatus, lib, KVIKIO_STRINGIFY(cuFileBatchIOGetStatus)); - get_symbol(BatchIOCancel, lib, KVIKIO_STRINGIFY(cuFileBatchIOCancel)); - get_symbol(BatchIODestroy, lib, KVIKIO_STRINGIFY(cuFileBatchIODestroy)); - } - if (version >= 1070) { - get_symbol(ReadAsync, lib, KVIKIO_STRINGIFY(cuFileReadAsync)); - get_symbol(WriteAsync, lib, KVIKIO_STRINGIFY(cuFileWriteAsync)); - get_symbol(StreamRegister, lib, KVIKIO_STRINGIFY(cuFileStreamRegister)); - get_symbol(StreamDeregister, lib, KVIKIO_STRINGIFY(cuFileStreamDeregister)); - } + // Note: CUDA 12.2.0 included cuFile 1.7.0.49, which added all of these symbols. + // Refs: + // * https://docs.nvidia.com/cuda/archive/12.2.0/cuda-toolkit-release-notes/index.html#cuda-toolkit-major-component-versions + // * https://docs.nvidia.com/gpudirect-storage/release-notes/index.html#new-features-and-changes + get_symbol(BatchIOSetUp, lib, KVIKIO_STRINGIFY(cuFileBatchIOSetUp)); + get_symbol(BatchIOSubmit, lib, KVIKIO_STRINGIFY(cuFileBatchIOSubmit)); + get_symbol(BatchIOGetStatus, lib, KVIKIO_STRINGIFY(cuFileBatchIOGetStatus)); + get_symbol(BatchIOCancel, lib, KVIKIO_STRINGIFY(cuFileBatchIOCancel)); + get_symbol(BatchIODestroy, lib, KVIKIO_STRINGIFY(cuFileBatchIODestroy)); + get_symbol(ReadAsync, lib, KVIKIO_STRINGIFY(cuFileReadAsync)); + get_symbol(WriteAsync, lib, KVIKIO_STRINGIFY(cuFileWriteAsync)); + get_symbol(StreamRegister, lib, KVIKIO_STRINGIFY(cuFileStreamRegister)); + get_symbol(StreamDeregister, lib, KVIKIO_STRINGIFY(cuFileStreamDeregister)); } #else cuFileAPI::cuFileAPI() { KVIKIO_FAIL("KvikIO not compiled with cuFile.h", std::runtime_error); } @@ -115,8 +113,4 @@ int cufile_version() noexcept int cufile_version() noexcept { return 0; } #endif -bool is_batch_api_available() noexcept { return cufile_version() >= 1060; } - -bool is_stream_api_available() noexcept { return cufile_version() >= 1070; } - } // namespace kvikio diff --git a/dependencies.yaml b/dependencies.yaml index bed9980785..f75810c24c 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -180,10 +180,6 @@ dependencies: specific: - output_types: conda matrices: - - matrix: - cuda: "12.0" - packages: - - cuda-version=12.0 - matrix: cuda: "12.2" packages: