Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 0 additions & 32 deletions ci/build_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,42 +26,10 @@ export RAPIDS_ARTIFACTS_DIR
# populates `RATTLER_CHANNELS` array and `RATTLER_ARGS` array
source rapids-rattler-channel-string

# Construct the extra variants according to the architecture
if [[ "$(arch)" == "x86_64" ]]; then
cat > variants.yaml << EOF
c_compiler_version:
- 14

cxx_compiler_version:
- 14

cuda_version:
- ${RAPIDS_CUDA_VERSION%.*}
EOF
else
cat > variants.yaml << EOF
zip_keys:
- [c_compiler_version, cxx_compiler_version, cuda_version]

c_compiler_version:
- 12
- 14

cxx_compiler_version:
- 12
- 14

cuda_version:
- 12.1 # The last version to not support cufile
- ${RAPIDS_CUDA_VERSION%.*}
EOF
fi

# --no-build-id allows for caching with `sccache`
# more info is available at
# https://rattler.build/latest/tips_and_tricks/#using-sccache-or-ccache-with-rattler-build
rattler-build build --recipe conda/recipes/libkvikio \
--variant-config variants.yaml \
"${RATTLER_ARGS[@]}" \
"${RATTLER_CHANNELS[@]}"

Expand Down
1 change: 0 additions & 1 deletion conda/recipes/kvikio/recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ schema_version: 1

context:
version: ${{ env.get("RAPIDS_PACKAGE_VERSION") }}
minor_version: ${{ (version | split("."))[:2] | join(".") }}
cuda_version: ${{ (env.get("RAPIDS_CUDA_VERSION") | split("."))[:2] | join(".") }}
cuda_major: '${{ (env.get("RAPIDS_CUDA_VERSION") | split("."))[0] }}'
date_string: '${{ env.get("RAPIDS_DATE_STRING") }}'
Expand Down
55 changes: 14 additions & 41 deletions conda/recipes/libkvikio/recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,8 @@ schema_version: 1

context:
version: ${{ env.get("RAPIDS_PACKAGE_VERSION") }}
minor_version: ${{ (version | split("."))[:2] | join(".") }}
# We need to support three cases:
# 1. Linux x86_64, which always uses libcufile
# 2. Linux aarch64 with CUDA >= 12.2, which uses libcufile
# 3. Linux aarch64 with CUDA < 12.2, which does not use libcufile
# Each case has different cuda-version constraints as expressed below
should_use_cufile: ${{ x86_64 or (aarch64 and cuda_version >= "12.2") }}
# When reverting, instances of cuda_key_string can be replaced with cuda_major
cuda_key_string: ${{ cuda_version | replace(".", "_") }}
#cuda_version: ${{ (env.get("RAPIDS_CUDA_VERSION") | split("."))[:2] | join(".") }}
#cuda_major: '${{ (env.get("RAPIDS_CUDA_VERSION") | split("."))[0] }}'
cuda_version: ${{ (env.get("RAPIDS_CUDA_VERSION") | split("."))[:2] | join(".") }}
cuda_major: '${{ (env.get("RAPIDS_CUDA_VERSION") | split("."))[0] }}'
date_string: '${{ env.get("RAPIDS_DATE_STRING") }}'
head_rev: '${{ git.head_rev(".")[:8] }}'

Expand Down Expand Up @@ -54,7 +45,7 @@ cache:
SCCACHE_REGION: ${{ env.get("SCCACHE_REGION") }}
SCCACHE_S3_USE_SSL: ${{ env.get("SCCACHE_S3_USE_SSL") }}
SCCACHE_S3_NO_CREDENTIALS: ${{ env.get("SCCACHE_S3_NO_CREDENTIALS") }}
SCCACHE_S3_KEY_PREFIX: libkvikio/${{ env.get("RAPIDS_CONDA_ARCH") }}/cuda${{ cuda_key_string }}
SCCACHE_S3_KEY_PREFIX: libkvikio/${{ env.get("RAPIDS_CONDA_ARCH") }}/cuda${{ cuda_major }}
requirements:
build:
- ${{ compiler("c") }}
Expand All @@ -67,9 +58,7 @@ cache:
host:
- cuda-version =${{ cuda_version }}
- libcurl ==${{ libcurl_version }}
- if: should_use_cufile
then:
- libcufile-dev
- libcufile-dev
- libnuma

outputs:
Expand All @@ -80,7 +69,7 @@ outputs:
script:
content: |
cmake --install cpp/build
string: cuda${{ cuda_key_string }}_${{ date_string }}_${{ head_rev }}
string: cuda${{ cuda_major }}_${{ date_string }}_${{ head_rev }}
dynamic_linking:
overlinking_behavior: "error"
prefix_detection:
Expand All @@ -94,24 +83,16 @@ outputs:
- cuda-version =${{ cuda_version }}
- libcurl ==${{ libcurl_version }}
run:
- if: x86_64 or (aarch64 and cuda_version >= "13.0")
- if: cuda_version >= "13.0"
then:
- ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
else:
- if: aarch64 and cuda_version >= "12.2"
then:
- ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="12.2.0a0") }}
else:
- ${{ pin_compatible("cuda-version", upper_bound="12.2.0a0", lower_bound="12.0") }}
- if: should_use_cufile
then:
- libcufile-dev
- ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="12.2.0a0") }}
- libcufile-dev
ignore_run_exports:
by_name:
- cuda-version
- if: should_use_cufile
then:
- libcufile
- libcufile
tests:
- script:
- test -f $PREFIX/include/kvikio/file_handle.hpp
Expand All @@ -124,7 +105,7 @@ outputs:
name: libkvikio-tests
version: ${{ version }}
build:
string: cuda${{ cuda_key_string }}_${{ date_string }}_${{ head_rev }}
string: cuda${{ cuda_major }}_${{ date_string }}_${{ head_rev }}
dynamic_linking:
overlinking_behavior: "error"
script:
Expand All @@ -139,28 +120,20 @@ outputs:
- cuda-version =${{ cuda_version }}
- cuda-cudart-dev
- libcurl ==${{ libcurl_version }}
- if: should_use_cufile
then:
- libcufile-dev
- libcufile-dev
run:
- if: x86_64
- if: cuda_version >= "13.0"
then:
- ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
else:
- if: aarch64 and cuda_version >= "12.2"
then:
- ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="12.2.0a0") }}
else:
- ${{ pin_compatible("cuda-version", upper_bound="12.2.0a0", lower_bound="12.0") }}
- ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="12.2.0a0") }}
- cuda-cudart
ignore_run_exports:
by_name:
- cuda-cudart
- cuda-version
- libnuma
- if: should_use_cufile
then:
- libcufile
- libcufile
about:
homepage: ${{ load_from_file("python/libkvikio/pyproject.toml").project.urls.Homepage }}
license: ${{ load_from_file("python/libkvikio/pyproject.toml").project.license.text }}
Expand Down
2 changes: 0 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,6 @@ target_compile_definitions(
kvikio
PUBLIC $<$<BOOL:${KvikIO_REMOTE_SUPPORT}>:KVIKIO_LIBCURL_FOUND>
$<$<BOOL:${cuFile_FOUND}>:KVIKIO_CUFILE_FOUND>
$<$<BOOL:${cuFile_BATCH_API_FOUND}>:KVIKIO_CUFILE_BATCH_API_FOUND>
$<$<BOOL:${cuFile_STREAM_API_FOUND}>:KVIKIO_CUFILE_STREAM_API_FOUND>
$<$<BOOL:${cuFile_VERSION_API_FOUND}>:KVIKIO_CUFILE_VERSION_API_FOUND>
)

Expand Down
2 changes: 1 addition & 1 deletion cpp/examples/basic_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ int main()
cout << "Parallel POSIX read (" << kvikio::defaults::thread_pool_nthreads()
<< " threads): " << read << endl;
}
if (kvikio::is_batch_api_available() && !kvikio::defaults::is_compat_mode_preferred()) {
if (!kvikio::defaults::is_compat_mode_preferred()) {
std::cout << std::endl;
Timer timer;
// Here we use the batch API to read "/tmp/test-file" into `b_dev` by
Expand Down
25 changes: 0 additions & 25 deletions cpp/include/kvikio/batch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ struct BatchOp {
CUfileOpcode_t opcode;
};

#ifdef KVIKIO_CUFILE_BATCH_API_FOUND

/**
* @brief Handle of an cuFile batch using semantic.
*
Expand Down Expand Up @@ -104,27 +102,4 @@ class BatchHandle {
void cancel();
};

#else

class BatchHandle {
public:
BatchHandle() noexcept = default;

BatchHandle(int max_num_events);

[[nodiscard]] bool closed() const noexcept;

void close() noexcept;

void submit(std::vector<BatchOp> const& operations);

std::vector<CUfileIOEvents_t> status(unsigned min_nr,
unsigned max_nr,
struct timespec* timeout = nullptr);

void cancel();
};

#endif

} // namespace kvikio
12 changes: 0 additions & 12 deletions cpp/include/kvikio/file_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,9 +283,6 @@ class FileHandle {
* This is an asynchronous version of `.read()`, which will be executed in sequence
* for the specified stream.
*
* When running CUDA v12.1 or older, this function falls back to use `.read()` after
* `stream` has been synchronized.
*
* The arguments have the same meaning as in `.read()` but some of them are deferred.
* That is, the values pointed to by `size_p`, `file_offset_p` and `devPtr_offset_p`
* will not be evaluated until execution time. Notice, this behavior can be changed
Expand Down Expand Up @@ -324,9 +321,6 @@ class FileHandle {
* This is an asynchronous version of `.read()`, which will be executed in sequence
* for the specified stream.
*
* When running CUDA v12.1 or older, this function falls back to use `.read()` after
* `stream` has been synchronized.
*
* The arguments have the same meaning as in `.read()` but returns a `StreamFuture` object
* that the caller must keep alive until all data has been read from disk. One way to do this,
* is by calling `StreamFuture.check_bytes_done()`, which will synchronize the associated stream
Expand Down Expand Up @@ -355,9 +349,6 @@ class FileHandle {
* This is an asynchronous version of `.write()`, which will be executed in sequence
* for the specified stream.
*
* When running CUDA v12.1 or older, this function falls back to use `.read()` after
* `stream` has been synchronized.
*
* The arguments have the same meaning as in `.write()` but some of them are deferred.
* That is, the values pointed to by `size_p`, `file_offset_p` and `devPtr_offset_p`
* will not be evaluated until execution time. Notice, this behavior can be changed
Expand Down Expand Up @@ -397,9 +388,6 @@ class FileHandle {
* This is an asynchronous version of `.write()`, which will be executed in sequence
* for the specified stream.
*
* When running CUDA v12.1 or older, this function falls back to use `.read()` after
* `stream` has been synchronized.
*
* The arguments have the same meaning as in `.write()` but returns a `StreamFuture` object
* that the caller must keep alive until all data has been written to disk. One way to do this,
* is by calling `StreamFuture.check_bytes_done()`, which will synchronize the associated stream
Expand Down
22 changes: 0 additions & 22 deletions cpp/include/kvikio/shim/cufile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,26 +110,4 @@ bool is_cufile_available() noexcept;
*/
int cufile_version() noexcept;

/**
* @brief Check if cuFile's batch API is available.
*
* Since `cuFileGetVersion()` first became available in cufile v1.8 (CTK v12.3),
* this function returns false for versions older than v1.8 even though the batch
* API became available in v1.6.
*
* @return The boolean answer
*/
bool is_batch_api_available() noexcept;

/**
* @brief Check if cuFile's stream (async) API is available.
*
* Since `cuFileGetVersion()` first became available in cufile v1.8 (CTK v12.3),
* this function returns false for versions older than v1.8 even though the stream
* API became available in v1.7.
*
* @return The boolean answer
*/
bool is_stream_api_available() noexcept;

} // namespace kvikio
32 changes: 0 additions & 32 deletions cpp/include/kvikio/shim/cufile_h_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,38 +68,6 @@ CUfileError_t cuFileDriverSetMaxPinnedMemSize(...);
// Notice, this doesn't need to be ABI compatible with the cufile definitions and
// the lack of definitions is not a problem because the linker will never look for
// these symbols because the "real" function calls are made through the shim instance.
#ifndef KVIKIO_CUFILE_BATCH_API_FOUND
typedef enum CUfileOpcode { CUFILE_READ = 0, CUFILE_WRITE } CUfileOpcode_t;

typedef enum CUFILEStatus_enum {
CUFILE_WAITING = 0x000001, /* required value prior to submission */
CUFILE_PENDING = 0x000002, /* once enqueued */
CUFILE_INVALID = 0x000004, /* request was ill-formed or could not be enqueued */
CUFILE_CANCELED = 0x000008, /* request successfully canceled */
CUFILE_COMPLETE = 0x0000010, /* request successfully completed */
CUFILE_TIMEOUT = 0x0000020, /* request timed out */
CUFILE_FAILED = 0x0000040 /* unable to complete */
} CUfileStatus_t;

typedef struct CUfileIOEvents {
void* cookie;
CUfileStatus_t status; /* status of the operation */
size_t ret; /* -ve error or amount of I/O done. */
} CUfileIOEvents_t;

CUfileError_t cuFileBatchIOSetUp(...);
CUfileError_t cuFileBatchIOSubmit(...);
CUfileError_t cuFileBatchIOGetStatus(...);
CUfileError_t cuFileBatchIOCancel(...);
CUfileError_t cuFileBatchIODestroy(...);
#endif

#ifndef KVIKIO_CUFILE_STREAM_API_FOUND
CUfileError_t cuFileReadAsync(...);
CUfileError_t cuFileWriteAsync(...);
CUfileError_t cuFileStreamRegister(...);
CUfileError_t cuFileStreamDeregister(...);
#endif

#ifndef KVIKIO_CUFILE_VERSION_API_FOUND
CUfileError_t cuFileGetVersion(...);
Expand Down
26 changes: 0 additions & 26 deletions cpp/src/batch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@

namespace kvikio {

#ifdef KVIKIO_CUFILE_BATCH_API_FOUND

BatchHandle::BatchHandle(int max_num_events) : _initialized{true}, _max_num_events{max_num_events}
{
CUFILE_TRY(cuFileAPI::instance().BatchIOSetUp(&_handle, max_num_events));
Expand Down Expand Up @@ -78,28 +76,4 @@ std::vector<CUfileIOEvents_t> BatchHandle::status(unsigned min_nr,

void BatchHandle::cancel() { CUFILE_TRY(cuFileAPI::instance().BatchIOCancel(_handle)); }

#else

BatchHandle::BatchHandle(int max_num_events)
{
KVIKIO_FAIL("BatchHandle requires cuFile's batch API, please build with CUDA v12.1+");
}

bool BatchHandle::closed() const noexcept { return true; }

void BatchHandle::close() noexcept {}

void BatchHandle::submit(std::vector<BatchOp> const& operations) {}

std::vector<CUfileIOEvents_t> BatchHandle::status(unsigned min_nr,
unsigned max_nr,
struct timespec* timeout)
{
return std::vector<CUfileIOEvents_t>{};
}

void BatchHandle::cancel() {}

#endif

} // namespace kvikio
12 changes: 4 additions & 8 deletions cpp/src/compat_mode_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,22 +74,18 @@ CompatModeManager::CompatModeManager(std::string const& file_path,
}

// Check cuFile async API
static bool const is_extra_symbol_available = is_stream_api_available();
static bool const is_config_path_empty = config_path().empty();
_is_compat_mode_preferred_for_async =
_is_compat_mode_preferred || !is_extra_symbol_available || is_config_path_empty;
static bool const is_config_path_empty = config_path().empty();
_is_compat_mode_preferred_for_async = _is_compat_mode_preferred || is_config_path_empty;
}

void CompatModeManager::validate_compat_mode_for_async() const
{
KVIKIO_NVTX_FUNC_RANGE();
if (_is_compat_mode_preferred_for_async && _compat_mode_requested == CompatMode::OFF) {
std::string err_msg;
if (!is_stream_api_available()) { err_msg += "Missing the cuFile stream api."; }

// When checking for availability, we also check if cuFile's config file exists. This is
// When checking for availability, we check if cuFile's config file exists. This is
// because even when the stream API is available, it doesn't work if no config file exists.
if (config_path().empty()) { err_msg += " Missing cuFile configuration file."; }
if (config_path().empty()) { err_msg += "Missing cuFile configuration file."; }

KVIKIO_FAIL(err_msg, std::runtime_error);
}
Comment on lines 81 to 91
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
void CompatModeManager::validate_compat_mode_for_async() const
{
KVIKIO_NVTX_FUNC_RANGE();
if (_is_compat_mode_preferred_for_async && _compat_mode_requested == CompatMode::OFF) {
std::string err_msg;
if (!is_stream_api_available()) { err_msg += "Missing the cuFile stream api."; }
// When checking for availability, we also check if cuFile's config file exists. This is
// When checking for availability, we check if cuFile's config file exists. This is
// because even when the stream API is available, it doesn't work if no config file exists.
if (config_path().empty()) { err_msg += " Missing cuFile configuration file."; }
if (config_path().empty()) { err_msg += "Missing cuFile configuration file."; }
KVIKIO_FAIL(err_msg, std::runtime_error);
}
void CompatModeManager::validate_compat_mode_for_async() const
{
KVIKIO_NVTX_FUNC_RANGE();
// cuFile stream API will not work if the config file does not exist.
if (_is_compat_mode_preferred_for_async && _compat_mode_requested == CompatMode::OFF &&
config_path().empty()) {
KVIKIO_FAIL("Missing cuFile configuration file.", std::runtime_error);
}
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On a side note, I didn't know the presence of config file is required for the stream API to function properly. This is quite interesting. I'll run some tests and see if this behavior has changed in recent cuFile API.

Expand Down
Loading