@@ -140,8 +140,8 @@ endif()
140140# Set CMAKE flags
141141#==================================================================================================
142142set (CMAKE_INSTALL_PREFIX "${ROCM_PATH} " CACHE PATH "" )
143- set (CMAKE_CXX_STANDARD 14 ) # We use C++14 features, this will add compile option: -std=c++14
144- set (CMAKE_CXX_EXTENSIONS OFF ) # Without this line, it will add -std=gnu++14 instead, which has some issues.
143+ set (CMAKE_CXX_STANDARD 17 ) # We use C++17 features, this will add compile option: -std=c++17
144+ set (CMAKE_CXX_EXTENSIONS OFF ) # Without this line, it will add -std=gnu++17 instead, which has some issues.
145145if (ROCM_PATH)
146146 list (APPEND CMAKE_PREFIX_PATH # Add ROCM_PATH to CMake search paths (for finding HIP / HSA
147147 ${ROCM_PATH}
@@ -425,6 +425,7 @@ configure_file(src/nccl.h.in ${PROJECT_BINARY_DIR}/include/nccl.h) # Used b
425425#==================================================================================================
426426# E.g: find src -type f \( -name "*.cc" -o -name "*.h" -o -name "*.hpp" \) | sort
427427set (SRC_FILES
428+ src/allocator.cc
428429 src/bootstrap.cc
429430 src/channel.cc
430431 src/collectives.cc
@@ -437,6 +438,7 @@ set(SRC_FILES
437438 src/msccl.cc
438439 src/proxy.cc
439440 src/rccl_wrap.cc
441+ src/symmetric.cc
440442 src/transport.cc
441443 src/device/all_gather.h
442444 src/device/all_reduce.h
@@ -458,6 +460,11 @@ set(SRC_FILES
458460 src/device/onerank.cu
459461 src/device/network/unpack/unpack_defs.h
460462 src/device/network/unpack/unpack.h
463+ src/device/symmetric/all_gather.cuh
464+ src/device/symmetric/all_reduce.cuh
465+ src/device/symmetric/kernel.cuh
466+ src/device/symmetric/primitives.cuh
467+ src/device/symmetric/reduce_scatter.cuh
461468 src/graph/connect.cc
462469 src/graph/paths .cc
463470 src/graph/rings.cc
@@ -472,6 +479,7 @@ set(SRC_FILES
472479 src/graph/xml.cc
473480 src/graph/xml.h
474481 src/include /alloc.h
482+ src/include /allocator.h
475483 src/include /alt_rsmi.h
476484 src/include /archinfo.h
477485 src/include /api_trace.h
@@ -516,6 +524,7 @@ set(SRC_FILES
516524 src/include /rccl_common.h
517525 src/include /rccl_vars.h
518526 src/include /register.h
527+ src/include /register_inline.h
519528 src/include /rccl_float8.h
520529 src/include /rocm_smi_wrap.h
521530 src/include /rocmwrap.h
@@ -526,11 +535,15 @@ set(SRC_FILES
526535 src/include /signals.h
527536 src/include /socket.h
528537 src/include /strongstream.h
538+ src/include /symmetric.h
529539 src/include /timer.h
530540 src/include /transport.h
531541 src/include /trees.h
532542 src/include /tuner.h
533543 src/include /utils.h
544+ src/include /mlx5/mlx5dvcore.h
545+ src/include /mlx5/mlx5dvsymbols.h
546+ src/include /mlx5/mlx5dvwrap.h
534547 src/include /msccl/msccl_lifecycle.h
535548 src/include /msccl/msccl_parser.h
536549 src/include /msccl/msccl_scheduler.h
@@ -591,6 +604,7 @@ set(SRC_FILES
591604 src/include /plugin/profiler/profiler_v1.h
592605 src/include /plugin/profiler/profiler_v2.h
593606 src/include /plugin/profiler/profiler_v3.h
607+ src/include /plugin/profiler/profiler_v4.h
594608 src/include /plugin/tuner/tuner_v2.h
595609 src/include /plugin/tuner/tuner_v3.h
596610 src/include /plugin/tuner/tuner_v4.h
@@ -604,6 +618,8 @@ set(SRC_FILES
604618 src/misc/ibvsymbols.cc
605619 src/misc/ibvwrap.cc
606620 src/misc/ipcsocket.cc
621+ src/misc/mlx5dvsymbols.cc
622+ src/misc/mlx5dvwrap.cc
607623 src/misc/npkit.cc
608624# src/misc/nvmlwrap.cc
609625 src/misc/nvmlwrap_stub.cc
@@ -634,6 +650,7 @@ set(SRC_FILES
634650 src/plugin/profiler/profiler_v1.cc
635651 src/plugin/profiler/profiler_v2.cc
636652 src/plugin/profiler/profiler_v3.cc
653+ src/plugin/profiler/profiler_v4.cc
637654 src/plugin/tuner/tuner_v2.cc
638655 src/plugin/tuner/tuner_v3.cc
639656 src/plugin/tuner/tuner_v4.cc
@@ -706,6 +723,7 @@ foreach(SRC_FILE ${SRC_FILES})
706723 add_file_unique(HIP_SOURCES ${HIP_FILE} )
707724
708725 # Convert .cu files to .cpp so that they get processed properly
726+ string (REPLACE "\. cuh" "\. h" HIP_FILE ${HIP_FILE} )
709727 string (REPLACE "\. cu" "\. cu.cpp" HIP_FILE ${HIP_FILE} )
710728 list (APPEND HIP_SOURCES ${HIP_FILE} )
711729
@@ -826,8 +844,13 @@ if (NOT Python3_FOUND)
826844endif ()
827845
828846set (GEN_DIR "${HIPIFY_DIR} /gensrc" )
847+ set (GEN_SYM_DIR "${GEN_DIR} /symmetric" )
829848
830- # Execute the python script to generate required files
849+ if (ONLY_FUNCS)
850+ message (WARNING "Using ONLY_FUNCS = ${ONLY_FUNCS} . Not meant for release builds." )
851+ endif ()
852+
853+ # Execute the python script to generate required collective functions
831854execute_process (
832855 COMMAND ${Python3_EXECUTABLE} ${CMAKE_SOURCE_DIR} /src/device/generate.py ${GEN_DIR} ${IFC_ENABLED} ${COLLTRACE} ${ENABLE_MSCCL_KERNEL} ${BUILD_LOCAL_GPU_TARGET_ONLY} ${ONLY_FUNCS}
833856 WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
@@ -839,8 +862,20 @@ if (gen_py_result)
839862 message (FATAL_ERROR "${CMAKE_SOURCE_DIR} /src/device/generate.py failed" )
840863endif ()
841864
865+ # Execute the python script to generate required symmetric memory kernels
866+ execute_process (
867+ COMMAND ${Python3_EXECUTABLE} ${CMAKE_SOURCE_DIR} /src/device/symmetric/generate.py ${GEN_SYM_DIR}
868+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
869+ RESULT_VARIABLE gen_sym_py_result
870+ ERROR_VARIABLE gen_sym_py_error
871+ )
872+ if (gen_sym_py_result)
873+ message (SEND_ERROR "Error: ${gen_sym_py_error} " )
874+ message (FATAL_ERROR "${CMAKE_SOURCE_DIR} /src/device/symmetric/generate.py failed" )
875+ endif ()
876+
842877# Find the generated files in the output directory
843- file (GLOB GENERATED_FILES "${GEN_DIR} /*" )
878+ file (GLOB_RECURSE GENERATED_FILES "${GEN_DIR} /*" )
844879
845880# Append all found generated files to the list
846881foreach (file ${GENERATED_FILES} )
@@ -876,10 +911,11 @@ endif()
876911## Set RCCL include directories
877912target_include_directories (rccl PRIVATE ${PROJECT_BINARY_DIR} /include ) # for generated rccl.h header
878913target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src) # for hipfied headers
879- target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src/include )
880- target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src/include /plugin)
881914target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src/device)
882915target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src/device/network/unpack)
916+ target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src/include )
917+ target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src/include /mlx5)
918+ target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src/include /plugin)
883919target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /gensrc)
884920target_include_directories (rccl PRIVATE ${HSA_INCLUDE_PATH} )
885921target_include_directories (rccl PRIVATE ${ROCM_SMI_INCLUDE_DIR} )
0 commit comments