Skip to content

Commit 02fe264

Browse files
committed
[None][feat] Add NIXL-LIBFABRIC
Allows users to use LIBFABRIC as NIXL backend for KV cache transmission. Usage: TRTLLM_NIXL_KVCACHE_BACKEND=LIBFABRIC # (or UCX) Currently supports only UCX and LIBFABRIC as NIXL KV cahce backends. Unsupported backend types will fallback to Default (UCX). Signed-off-by: Yoray Zack <[email protected]>
1 parent 83a47d9 commit 02fe264

File tree

3 files changed

+55
-2
lines changed

3 files changed

+55
-2
lines changed

cpp/tensorrt_llm/executor/cache_transmission/nixl_utils/transferAgent.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ NixlTransferAgent::NixlTransferAgent(BaseAgentConfig const& config)
348348

349349
std::string nixlBackend = common::getEnvNixlBackend();
350350
// List of supported backends - extend this list as new backends are added
351-
static const std::set<std::string> kSUPPORTED_BACKENDS = {"UCX"};
351+
static const std::set<std::string> kSUPPORTED_BACKENDS = {"UCX", "LIBFABRIC"};
352352

353353
if (kSUPPORTED_BACKENDS.find(nixlBackend) == kSUPPORTED_BACKENDS.end())
354354
{

cpp/tests/unit_tests/executor/transferAgentTest.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,57 @@ TEST_F(TransferAgentTest, Basic2)
133133
nixlAgent0->invalidateRemoteAgent(agent1);
134134
}
135135

136+
TEST_F(TransferAgentTest, LibfabricBackendBasic)
137+
{
138+
// Save original environment variable
139+
char const* originalBackend = std::getenv("TRTLLM_NIXL_KVCACHE_BACKEND");
140+
std::string savedBackend = originalBackend ? originalBackend : "";
141+
142+
// Set LIBFABRIC backend
143+
setenv("TRTLLM_NIXL_KVCACHE_BACKEND", "LIBFABRIC", 1);
144+
145+
std::string const agent0{"libfabric_agent0"}, agent1{"libfabric_agent1"};
146+
BaseAgentConfig config0{agent0, true}, config1{agent1, true};
147+
auto nixlAgent0 = makeTransferAgent(config0);
148+
auto nixlAgent1 = makeTransferAgent(config1);
149+
150+
TLLM_CHECK(nixlAgent0);
151+
TLLM_CHECK(nixlAgent1);
152+
153+
std::vector<char> memory0(100, 10);
154+
std::vector<char> memory1(100, 1);
155+
156+
RegisteredHostMemory regMem0(MemoryDescs{MemoryType::kDRAM, {MemoryDesc{memory0}}}, nixlAgent0.get());
157+
RegisteredHostMemory regMem1(MemoryDescs{MemoryType::kDRAM, {MemoryDesc{memory1}}}, nixlAgent1.get());
158+
159+
// nixlAgent0->loadRemoteAgent(agent1);
160+
auto connectionInfo = nixlAgent1->getLocalConnectionInfo();
161+
nixlAgent0->loadRemoteAgent(agent1, connectionInfo);
162+
bool checked = false;
163+
do
164+
{
165+
checked = nixlAgent0->checkRemoteDescs(agent1, regMem1.getDescs());
166+
// wait for regMem is unpacked by nixlAgent0
167+
} while (!checked);
168+
169+
TransferRequest writeReq{TransferOp::kWRITE, regMem0.getDescs(), regMem1.getDescs(), agent1};
170+
auto status = nixlAgent0->submitTransferRequests(writeReq);
171+
status->wait();
172+
173+
TLLM_CHECK(memory0 == memory1);
174+
175+
nixlAgent0->invalidateRemoteAgent(agent1);
176+
// Restore original environment variable
177+
if (savedBackend.empty())
178+
{
179+
unsetenv("TRTLLM_NIXL_KVCACHE_BACKEND");
180+
}
181+
else
182+
{
183+
setenv("TRTLLM_NIXL_KVCACHE_BACKEND", savedBackend.c_str(), 1);
184+
}
185+
}
186+
136187
TEST_F(TransferAgentTest, DeviceMemory)
137188
{
138189

docker/common/install_nixl.sh

100644100755
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ set -ex
44
GITHUB_URL="https://github.com"
55
UCX_INSTALL_PATH="/usr/local/ucx/"
66
CUDA_PATH="/usr/local/cuda"
7-
NIXL_VERSION="0.5.0"
7+
LIBFABRIC_INSTALL_PATH="/opt/libfabric"
8+
NIXL_VERSION="0.7.1"
89
NIXL_REPO="https://github.com/ai-dynamo/nixl.git"
910
OLD_LD_LIBRARY_PATH=$LD_LIBRARY_PATH
1011

@@ -38,6 +39,7 @@ meson setup builddir \
3839
-Dcudapath_lib="$CUDA_PATH/lib64" \
3940
-Dcudapath_inc="$CUDA_PATH/include" \
4041
-Dgds_path="$GDS_PATH" \
42+
-Dlibfabric_path="$LIBFABRIC_INSTALL_PATH" \
4143
-Dinstall_headers=true
4244

4345
cd builddir && ninja install

0 commit comments

Comments
 (0)