Skip to content

Commit 08a7be2

Browse files
Merge remote-tracking branch 'nccl/master' into develop
2 parents a0ec15b + 72d2432 commit 08a7be2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

108 files changed

+7749
-2124
lines changed

CHANGELOG.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22

33
Full documentation for RCCL is available at [https://rccl.readthedocs.io](https://rccl.readthedocs.io)
44

5-
## Unreleased - RCCL 2.26.6 for ROCm 7.1.0
5+
## Unreleased - RCCL 2.27.3 for ROCm 7.1.0
66

77
### Added
88

99
### Changed
10+
1011
* The MSCCL++ feature is now disabled by default. The `--disable-mscclpp` build flag is replaced with `--enable-mscclpp` in the `rccl/install.sh` script.
12+
* Compatibility with NCCL 2.27.3
1113

1214
### Resolved issues
1315

CMakeLists.txt

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,8 @@ endif()
140140
# Set CMAKE flags
141141
#==================================================================================================
142142
set(CMAKE_INSTALL_PREFIX "${ROCM_PATH}" CACHE PATH "")
143-
set(CMAKE_CXX_STANDARD 14) # We use C++14 features, this will add compile option: -std=c++14
144-
set(CMAKE_CXX_EXTENSIONS OFF) # Without this line, it will add -std=gnu++14 instead, which has some issues.
143+
set(CMAKE_CXX_STANDARD 17) # We use C++17 features, this will add compile option: -std=c++17
144+
set(CMAKE_CXX_EXTENSIONS OFF) # Without this line, it will add -std=gnu++17 instead, which has some issues.
145145
if(ROCM_PATH)
146146
list(APPEND CMAKE_PREFIX_PATH # Add ROCM_PATH to CMake search paths (for finding HIP / HSA
147147
${ROCM_PATH}
@@ -425,6 +425,7 @@ configure_file(src/nccl.h.in ${PROJECT_BINARY_DIR}/include/nccl.h) # Used b
425425
#==================================================================================================
426426
# E.g: find src -type f \( -name "*.cc" -o -name "*.h" -o -name "*.hpp" \) | sort
427427
set(SRC_FILES
428+
src/allocator.cc
428429
src/bootstrap.cc
429430
src/channel.cc
430431
src/collectives.cc
@@ -437,6 +438,7 @@ set(SRC_FILES
437438
src/msccl.cc
438439
src/proxy.cc
439440
src/rccl_wrap.cc
441+
src/symmetric.cc
440442
src/transport.cc
441443
src/device/all_gather.h
442444
src/device/all_reduce.h
@@ -458,6 +460,11 @@ set(SRC_FILES
458460
src/device/onerank.cu
459461
src/device/network/unpack/unpack_defs.h
460462
src/device/network/unpack/unpack.h
463+
src/device/symmetric/all_gather.cuh
464+
src/device/symmetric/all_reduce.cuh
465+
src/device/symmetric/kernel.cuh
466+
src/device/symmetric/primitives.cuh
467+
src/device/symmetric/reduce_scatter.cuh
461468
src/graph/connect.cc
462469
src/graph/paths.cc
463470
src/graph/rings.cc
@@ -472,6 +479,7 @@ set(SRC_FILES
472479
src/graph/xml.cc
473480
src/graph/xml.h
474481
src/include/alloc.h
482+
src/include/allocator.h
475483
src/include/alt_rsmi.h
476484
src/include/archinfo.h
477485
src/include/api_trace.h
@@ -516,6 +524,7 @@ set(SRC_FILES
516524
src/include/rccl_common.h
517525
src/include/rccl_vars.h
518526
src/include/register.h
527+
src/include/register_inline.h
519528
src/include/rccl_float8.h
520529
src/include/rocm_smi_wrap.h
521530
src/include/rocmwrap.h
@@ -526,11 +535,15 @@ set(SRC_FILES
526535
src/include/signals.h
527536
src/include/socket.h
528537
src/include/strongstream.h
538+
src/include/symmetric.h
529539
src/include/timer.h
530540
src/include/transport.h
531541
src/include/trees.h
532542
src/include/tuner.h
533543
src/include/utils.h
544+
src/include/mlx5/mlx5dvcore.h
545+
src/include/mlx5/mlx5dvsymbols.h
546+
src/include/mlx5/mlx5dvwrap.h
534547
src/include/msccl/msccl_lifecycle.h
535548
src/include/msccl/msccl_parser.h
536549
src/include/msccl/msccl_scheduler.h
@@ -591,6 +604,7 @@ set(SRC_FILES
591604
src/include/plugin/profiler/profiler_v1.h
592605
src/include/plugin/profiler/profiler_v2.h
593606
src/include/plugin/profiler/profiler_v3.h
607+
src/include/plugin/profiler/profiler_v4.h
594608
src/include/plugin/tuner/tuner_v2.h
595609
src/include/plugin/tuner/tuner_v3.h
596610
src/include/plugin/tuner/tuner_v4.h
@@ -604,6 +618,8 @@ set(SRC_FILES
604618
src/misc/ibvsymbols.cc
605619
src/misc/ibvwrap.cc
606620
src/misc/ipcsocket.cc
621+
src/misc/mlx5dvsymbols.cc
622+
src/misc/mlx5dvwrap.cc
607623
src/misc/npkit.cc
608624
# src/misc/nvmlwrap.cc
609625
src/misc/nvmlwrap_stub.cc
@@ -634,6 +650,7 @@ set(SRC_FILES
634650
src/plugin/profiler/profiler_v1.cc
635651
src/plugin/profiler/profiler_v2.cc
636652
src/plugin/profiler/profiler_v3.cc
653+
src/plugin/profiler/profiler_v4.cc
637654
src/plugin/tuner/tuner_v2.cc
638655
src/plugin/tuner/tuner_v3.cc
639656
src/plugin/tuner/tuner_v4.cc
@@ -706,6 +723,7 @@ foreach(SRC_FILE ${SRC_FILES})
706723
add_file_unique(HIP_SOURCES ${HIP_FILE})
707724

708725
# Convert .cu files to .cpp so that they get processed properly
726+
string(REPLACE "\.cuh" "\.h" HIP_FILE ${HIP_FILE})
709727
string(REPLACE "\.cu" "\.cu.cpp" HIP_FILE ${HIP_FILE})
710728
list(APPEND HIP_SOURCES ${HIP_FILE})
711729

@@ -826,8 +844,13 @@ if (NOT Python3_FOUND)
826844
endif()
827845

828846
set(GEN_DIR "${HIPIFY_DIR}/gensrc")
847+
set(GEN_SYM_DIR "${GEN_DIR}/symmetric")
829848

830-
# Execute the python script to generate required files
849+
if(ONLY_FUNCS)
850+
message(WARNING "Using ONLY_FUNCS = ${ONLY_FUNCS}. Not meant for release builds.")
851+
endif()
852+
853+
# Execute the python script to generate required collective functions
831854
execute_process(
832855
COMMAND ${Python3_EXECUTABLE} ${CMAKE_SOURCE_DIR}/src/device/generate.py ${GEN_DIR} ${IFC_ENABLED} ${COLLTRACE} ${ENABLE_MSCCL_KERNEL} ${BUILD_LOCAL_GPU_TARGET_ONLY} ${ONLY_FUNCS}
833856
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
@@ -839,8 +862,20 @@ if (gen_py_result)
839862
message(FATAL_ERROR "${CMAKE_SOURCE_DIR}/src/device/generate.py failed")
840863
endif()
841864

865+
# Execute the python script to generate required symmetric memory kernels
866+
execute_process(
867+
COMMAND ${Python3_EXECUTABLE} ${CMAKE_SOURCE_DIR}/src/device/symmetric/generate.py ${GEN_SYM_DIR}
868+
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
869+
RESULT_VARIABLE gen_sym_py_result
870+
ERROR_VARIABLE gen_sym_py_error
871+
)
872+
if (gen_sym_py_result)
873+
message(SEND_ERROR "Error: ${gen_sym_py_error}")
874+
message(FATAL_ERROR "${CMAKE_SOURCE_DIR}/src/device/symmetric/generate.py failed")
875+
endif()
876+
842877
# Find the generated files in the output directory
843-
file(GLOB GENERATED_FILES "${GEN_DIR}/*")
878+
file(GLOB_RECURSE GENERATED_FILES "${GEN_DIR}/*")
844879

845880
# Append all found generated files to the list
846881
foreach(file ${GENERATED_FILES})
@@ -876,10 +911,11 @@ endif()
876911
## Set RCCL include directories
877912
target_include_directories(rccl PRIVATE ${PROJECT_BINARY_DIR}/include) # for generated rccl.h header
878913
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src) # for hipfied headers
879-
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include)
880-
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include/plugin)
881914
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/device)
882915
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/device/network/unpack)
916+
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include)
917+
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include/mlx5)
918+
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include/plugin)
883919
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/gensrc)
884920
target_include_directories(rccl PRIVATE ${HSA_INCLUDE_PATH})
885921
target_include_directories(rccl PRIVATE ${ROCM_SMI_INCLUDE_DIR})

ext-net/example/nccl/common.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,15 @@
77
#ifndef COMMON_H_
88
#define COMMON_H_
99

10+
#include <stdint.h>
11+
1012
typedef enum {NCCL_LOG_NONE=0, NCCL_LOG_VERSION=1, NCCL_LOG_WARN=2, NCCL_LOG_INFO=3, NCCL_LOG_ABORT=4, NCCL_LOG_TRACE=5} ncclDebugLogLevel;
1113
typedef enum {NCCL_INIT=1, NCCL_COLL=2, NCCL_P2P=4, NCCL_SHM=8, NCCL_NET=16, NCCL_GRAPH=32, NCCL_TUNING=64, NCCL_ENV=128, NCCL_ALLOC=256, NCCL_CALL=512, NCCL_PROXY=1024, NCCL_NVLS=2048, NCCL_BOOTSTRAP=4096, NCCL_REG=8192, NCCL_ALL=~0} ncclDebugLogSubSys;
1214

1315
typedef void (*ncclDebugLogger_t)(ncclDebugLogLevel level, unsigned long flags, const char *file, int line, const char *fmt, ...);
1416

17+
enum { ncclProfilerNetEventStart = 0, ncclProfilerNetEventStop, ncclProfilerNetEventUpdate, ncclProfilerNetEventUpdateAndStop };
18+
19+
typedef ncclResult_t (*ncclProfilerCallback_t)(void** eHandle, int type, void* phandle, int64_t pluginId, void* extData);
20+
1521
#endif

ext-net/example/nccl/net.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
#include <stdint.h>
99
#include <stdlib.h>
1010

11-
#include "common.h"
1211
#include "err.h"
1312
#include "net_device.h"
13+
#include "common.h"
1414

1515
#define NCCL_NET_HANDLE_MAXSIZE 128
1616
#define NCCL_MAX_NET_SIZE_BYTES (1*1024*1024*1024*1024L) //1TB
@@ -23,8 +23,6 @@
2323
// Maximum number of requests per comm object
2424
#define NCCL_NET_MAX_REQUESTS 32
2525

26-
typedef ncclResult_t (*ncclProfilerCallback_t)(void** eHandle, int type, void* phandle, int64_t pluginId, void* extData);
27-
2826
#include "net_v10.h"
2927
#include "net_v9.h"
3028
#include "net_v8.h"

0 commit comments

Comments
 (0)