diff --git a/CHANGELOG.md b/CHANGELOG.md index 27c1bd513..cb3a6e9f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ Full documentation for RCCL is available at [https://rccl.readthedocs.io](https://rccl.readthedocs.io) +## RCCL 2.27.7 for ROCm 7.1.1 + +### Resolved Issues + +* Fixed an issue where using the ``librccl-profiler`` plugin with the All-to-all collective would cause a crash after the 2.27 update. + ## RCCL 2.27.7 for ROCm 7.1.0 ### Added diff --git a/ext-profiler/README.md b/ext-profiler/README.md index 27bd4e25c..6b69f0d7e 100644 --- a/ext-profiler/README.md +++ b/ext-profiler/README.md @@ -9,7 +9,7 @@ interface. Any NCCL user can write profiler plugins to extract performance data use it for debugging and analysis. Similarly to other plugins (e.g., network plugin), the profiler plugins come as a shared library -called `libnccl-profiler.so`. That shared library contains one or more implementations of the +called `lirccl-profiler.so`. That shared library contains one or more implementations of the NCCL PROFILER API, in the form of versioned structs, filled with pointers to all required functions. @@ -17,15 +17,15 @@ functions. ## Plugin name and supporting multiple profiler plugins -When NCCL is initialized, it will look for a `libnccl-profiler.so` library and dynamically load +When NCCL is initialized, it will look for a `librccl-profiler.so` library and dynamically load it, then look for symbols inside the library. The `NCCL_PROFILER_PLUGIN` environment variable allows multiple plugins to coexist. If set, NCCL -will look for a library with a name of `libnccl-profiler-${NCCL_PROFILER_PLUGIN}.so`. It is therefore -advised to name the library following that pattern, with a symlink pointing `libnccl-profiler.so` -to `libnccl-profiler-${NCCL_PROFILER_PLUGIN}.so`. That way, if there are multiple plugins in the +will look for a library with a name of `librccl-profiler-${NCCL_PROFILER_PLUGIN}.so`. It is therefore +advised to name the library following that pattern, with a symlink pointing `librccl-profiler.so` +to `librccl-profiler-${NCCL_PROFILER_PLUGIN}.so`. That way, if there are multiple plugins in the path, setting `NCCL_PROFILER_PLUGIN` will allow users to select the right plugin. Alternatively, -the user can also set `NCCL_PROFILER_PLUGIN` to the pathname of the `libnccl-profiler.so` library. +the user can also set `NCCL_PROFILER_PLUGIN` to the pathname of the `librccl-profiler.so` library. ## Struct versioning diff --git a/ext-profiler/example/Makefile b/ext-profiler/example/Makefile index 777ff5bad..daf65c32d 100644 --- a/ext-profiler/example/Makefile +++ b/ext-profiler/example/Makefile @@ -11,12 +11,12 @@ NCCLDIR := $(BUILDDIR) SRC_FILES := $(wildcard *.c) -build: ${BUILDDIR}/libnccl-profiler-example.so +build: ${BUILDDIR}/librccl-profiler.so -${BUILDDIR}/libnccl-profiler-example.so: ${SRC_FILES} +${BUILDDIR}/librccl-profiler.so: ${SRC_FILES} @printf "Compiling %-35s > %s\n" $< $@ @mkdir -p ${BUILDDIR} $(CC) -Inccl -fPIC -shared -o $@ $^ clean: - rm -f ${BUILDDIR}/libnccl-profiler-example.so + rm -f ${BUILDDIR}/librccl-profiler.so diff --git a/src/enqueue.cc b/src/enqueue.cc index 4f720cfb9..6702ebbdd 100644 --- a/src/enqueue.cc +++ b/src/enqueue.cc @@ -2557,6 +2557,7 @@ static ncclResult_t taskAppend(struct ncclComm* comm, struct ncclInfo* info) { // Must be in thread local group before tasks can be alloc'd in `comm->memScoped`. ncclGroupCommJoin(info->comm, ncclGroupTaskTypeCollective); struct ncclTaskP2p* p2p = ncclMemoryPoolAlloc(&comm->memPool_ncclTaskP2p, &comm->memPermanent); + p2p->func = info->coll; p2p->buff = (void*)info->recvbuff; p2p->count = info->count; p2p->datatype = info->datatype; diff --git a/src/plugin/plugin_open.cc b/src/plugin/plugin_open.cc index b0c4882d1..ee079c645 100644 --- a/src/plugin/plugin_open.cc +++ b/src/plugin/plugin_open.cc @@ -18,7 +18,7 @@ static char* libNames[NUM_LIBS]; static void *libHandles[NUM_LIBS]; static const char *pluginNames[NUM_LIBS] = { "NET", "TUNER", "PROFILER" }; -static const char *pluginPrefix[NUM_LIBS] = { "librccl-net", "libnccl-tuner", "libnccl-profiler" }; +static const char *pluginPrefix[NUM_LIBS] = { "librccl-net", "libnccl-tuner", "librccl-profiler" }; static const char *pluginFallback[NUM_LIBS] = { "", "Using internal tuner plugin.", "" }; static unsigned long subsys[NUM_LIBS] = { NCCL_INIT|NCCL_NET, NCCL_INIT|NCCL_TUNING, NCCL_INIT };