Skip to content

Commit c74b864

Browse files
Merge remote-tracking branch 'nccl/master' into develop
2 parents 0e7d7da + 72d2432 commit c74b864

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+7733
-2112
lines changed

CMakeLists.txt

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ endif()
143143
# Set CMAKE flags
144144
#==================================================================================================
145145
set(CMAKE_INSTALL_PREFIX "${ROCM_PATH}" CACHE PATH "")
146-
set(CMAKE_CXX_STANDARD 14) # We use C++14 features, this will add compile option: -std=c++14
146+
set(CMAKE_CXX_STANDARD 17) # We use C++14 features, this will add compile option: -std=c++14
147147
set(CMAKE_CXX_EXTENSIONS OFF) # Without this line, it will add -std=gnu++14 instead, which has some issues.
148148
if(ROCM_PATH)
149149
list(APPEND CMAKE_PREFIX_PATH # Add ROCM_PATH to CMake search paths (for finding HIP / HSA
@@ -424,6 +424,7 @@ configure_file(src/nccl.h.in ${PROJECT_BINARY_DIR}/include/nccl.h) # Used b
424424
#==================================================================================================
425425
# E.g: find src -type f \( -name "*.cc" -o -name "*.h" -o -name "*.hpp" \) | sort
426426
set(SRC_FILES
427+
src/allocator.cc
427428
src/bootstrap.cc
428429
src/channel.cc
429430
src/collectives.cc
@@ -436,6 +437,7 @@ set(SRC_FILES
436437
src/msccl.cc
437438
src/proxy.cc
438439
src/rccl_wrap.cc
440+
src/symmetric.cc
439441
src/transport.cc
440442
src/device/all_gather.h
441443
src/device/all_reduce.h
@@ -458,6 +460,11 @@ set(SRC_FILES
458460
src/device/onerank.cu
459461
src/device/network/unpack/unpack_defs.h
460462
src/device/network/unpack/unpack.h
463+
src/device/symmetric/all_gather.cuh
464+
src/device/symmetric/all_reduce.cuh
465+
src/device/symmetric/kernel.cuh
466+
src/device/symmetric/primitives.cuh
467+
src/device/symmetric/reduce_scatter.cuh
461468
src/graph/connect.cc
462469
src/graph/paths.cc
463470
src/graph/rings.cc
@@ -472,6 +479,7 @@ set(SRC_FILES
472479
src/graph/xml.cc
473480
src/graph/xml.h
474481
src/include/alloc.h
482+
src/include/allocator.h
475483
src/include/alt_rsmi.h
476484
src/include/archinfo.h
477485
src/include/api_trace.h
@@ -516,6 +524,7 @@ set(SRC_FILES
516524
src/include/rccl_common.h
517525
src/include/rccl_vars.h
518526
src/include/register.h
527+
src/include/register_inline.h
519528
src/include/rccl_float8.h
520529
src/include/rocm_smi_wrap.h
521530
src/include/rocmwrap.h
@@ -526,11 +535,15 @@ set(SRC_FILES
526535
src/include/signals.h
527536
src/include/socket.h
528537
src/include/strongstream.h
538+
src/include/symmetric.h
529539
src/include/timer.h
530540
src/include/transport.h
531541
src/include/trees.h
532542
src/include/tuner.h
533543
src/include/utils.h
544+
src/include/mlx5/mlx5dvcore.h
545+
src/include/mlx5/mlx5dvsymbols.h
546+
src/include/mlx5/mlx5dvwrap.h
534547
src/include/msccl/msccl_lifecycle.h
535548
src/include/msccl/msccl_parser.h
536549
src/include/msccl/msccl_scheduler.h
@@ -591,6 +604,7 @@ set(SRC_FILES
591604
src/include/plugin/profiler/profiler_v1.h
592605
src/include/plugin/profiler/profiler_v2.h
593606
src/include/plugin/profiler/profiler_v3.h
607+
src/include/plugin/profiler/profiler_v4.h
594608
src/include/plugin/tuner/tuner_v2.h
595609
src/include/plugin/tuner/tuner_v3.h
596610
src/include/plugin/tuner/tuner_v4.h
@@ -604,6 +618,8 @@ set(SRC_FILES
604618
src/misc/ibvsymbols.cc
605619
src/misc/ibvwrap.cc
606620
src/misc/ipcsocket.cc
621+
src/misc/mlx5dvsymbols.cc
622+
src/misc/mlx5dvwrap.cc
607623
src/misc/npkit.cc
608624
# src/misc/nvmlwrap.cc
609625
src/misc/nvmlwrap_stub.cc
@@ -634,6 +650,7 @@ set(SRC_FILES
634650
src/plugin/profiler/profiler_v1.cc
635651
src/plugin/profiler/profiler_v2.cc
636652
src/plugin/profiler/profiler_v3.cc
653+
src/plugin/profiler/profiler_v4.cc
637654
src/plugin/tuner/tuner_v2.cc
638655
src/plugin/tuner/tuner_v3.cc
639656
src/plugin/tuner/tuner_v4.cc
@@ -706,6 +723,7 @@ foreach(SRC_FILE ${SRC_FILES})
706723
add_file_unique(HIP_SOURCES ${HIP_FILE})
707724

708725
# Convert .cu files to .cpp so that they get processed properly
726+
string(REPLACE "\.cuh" "\.h" HIP_FILE ${HIP_FILE})
709727
string(REPLACE "\.cu" "\.cu.cpp" HIP_FILE ${HIP_FILE})
710728
list(APPEND HIP_SOURCES ${HIP_FILE})
711729

@@ -823,8 +841,13 @@ if (NOT Python3_FOUND)
823841
endif()
824842

825843
set(GEN_DIR "${HIPIFY_DIR}/gensrc")
844+
set(GEN_SYM_DIR "${GEN_DIR}/symmetric")
826845

827-
# Execute the python script to generate required files
846+
if(ONLY_FUNCS)
847+
message(WARNING "Using ONLY_FUNCS = ${ONLY_FUNCS}. Not meant for release builds.")
848+
endif()
849+
850+
# Execute the python script to generate required collective functions
828851
execute_process(
829852
COMMAND ${Python3_EXECUTABLE} ${CMAKE_SOURCE_DIR}/src/device/generate.py ${GEN_DIR} ${IFC_ENABLED} ${COLLTRACE} ${ENABLE_MSCCL_KERNEL} ${BUILD_LOCAL_GPU_TARGET_ONLY} ${ONLY_FUNCS}
830853
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
@@ -836,8 +859,20 @@ if (gen_py_result)
836859
message(FATAL_ERROR "${CMAKE_SOURCE_DIR}/src/device/generate.py failed")
837860
endif()
838861

862+
# Execute the python script to generate required symmetric memory kernels
863+
execute_process(
864+
COMMAND ${Python3_EXECUTABLE} ${CMAKE_SOURCE_DIR}/src/device/symmetric/generate.py ${GEN_SYM_DIR}
865+
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
866+
RESULT_VARIABLE gen_sym_py_result
867+
ERROR_VARIABLE gen_sym_py_error
868+
)
869+
if (gen_sym_py_result)
870+
message(SEND_ERROR "Error: ${gen_sym_py_error}")
871+
message(FATAL_ERROR "${CMAKE_SOURCE_DIR}/src/device/symmetric/generate.py failed")
872+
endif()
873+
839874
# Find the generated files in the output directory
840-
file(GLOB GENERATED_FILES "${GEN_DIR}/*")
875+
file(GLOB_RECURSE GENERATED_FILES "${GEN_DIR}/*")
841876

842877
# Append all found generated files to the list
843878
foreach(file ${GENERATED_FILES})
@@ -885,10 +920,11 @@ endif()
885920
## Set RCCL include directories
886921
target_include_directories(rccl PRIVATE ${PROJECT_BINARY_DIR}/include) # for generated rccl.h header
887922
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src) # for hipfied headers
888-
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include)
889-
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include/plugin)
890923
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/device)
891924
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/device/network/unpack)
925+
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include)
926+
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include/mlx5)
927+
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include/plugin)
892928
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/gensrc)
893929
target_include_directories(rccl PRIVATE ${HSA_INCLUDE_PATH})
894930
target_include_directories(rccl PRIVATE ${ROCM_SMI_INCLUDE_DIR})

ext-net/example/nccl/common.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,15 @@
77
#ifndef COMMON_H_
88
#define COMMON_H_
99

10+
#include <stdint.h>
11+
1012
typedef enum {NCCL_LOG_NONE=0, NCCL_LOG_VERSION=1, NCCL_LOG_WARN=2, NCCL_LOG_INFO=3, NCCL_LOG_ABORT=4, NCCL_LOG_TRACE=5} ncclDebugLogLevel;
1113
typedef enum {NCCL_INIT=1, NCCL_COLL=2, NCCL_P2P=4, NCCL_SHM=8, NCCL_NET=16, NCCL_GRAPH=32, NCCL_TUNING=64, NCCL_ENV=128, NCCL_ALLOC=256, NCCL_CALL=512, NCCL_PROXY=1024, NCCL_NVLS=2048, NCCL_BOOTSTRAP=4096, NCCL_REG=8192, NCCL_ALL=~0} ncclDebugLogSubSys;
1214

1315
typedef void (*ncclDebugLogger_t)(ncclDebugLogLevel level, unsigned long flags, const char *file, int line, const char *fmt, ...);
1416

17+
enum { ncclProfilerNetEventStart = 0, ncclProfilerNetEventStop, ncclProfilerNetEventUpdate, ncclProfilerNetEventUpdateAndStop };
18+
19+
typedef ncclResult_t (*ncclProfilerCallback_t)(void** eHandle, int type, void* phandle, int64_t pluginId, void* extData);
20+
1521
#endif

ext-net/example/nccl/net.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
#include <stdint.h>
99
#include <stdlib.h>
1010

11-
#include "common.h"
1211
#include "err.h"
1312
#include "net_device.h"
13+
#include "common.h"
1414

1515
#define NCCL_NET_HANDLE_MAXSIZE 128
1616
#define NCCL_MAX_NET_SIZE_BYTES (1*1024*1024*1024*1024L) //1TB
@@ -23,8 +23,6 @@
2323
// Maximum number of requests per comm object
2424
#define NCCL_NET_MAX_REQUESTS 32
2525

26-
typedef ncclResult_t (*ncclProfilerCallback_t)(void** eHandle, int type, void* phandle, int64_t pluginId, void* extData);
27-
2826
#include "net_v10.h"
2927
#include "net_v9.h"
3028
#include "net_v8.h"

0 commit comments

Comments
 (0)