@@ -143,7 +143,7 @@ endif()
143143# Set CMAKE flags
144144#==================================================================================================
145145set (CMAKE_INSTALL_PREFIX "${ROCM_PATH} " CACHE PATH "" )
146- set (CMAKE_CXX_STANDARD 14 ) # We use C++14 features, this will add compile option: -std=c++14
146+ set (CMAKE_CXX_STANDARD 17 ) # We use C++14 features, this will add compile option: -std=c++14
147147set (CMAKE_CXX_EXTENSIONS OFF ) # Without this line, it will add -std=gnu++14 instead, which has some issues.
148148if (ROCM_PATH)
149149 list (APPEND CMAKE_PREFIX_PATH # Add ROCM_PATH to CMake search paths (for finding HIP / HSA
@@ -424,6 +424,7 @@ configure_file(src/nccl.h.in ${PROJECT_BINARY_DIR}/include/nccl.h) # Used b
424424#==================================================================================================
425425# E.g: find src -type f \( -name "*.cc" -o -name "*.h" -o -name "*.hpp" \) | sort
426426set (SRC_FILES
427+ src/allocator.cc
427428 src/bootstrap.cc
428429 src/channel.cc
429430 src/collectives.cc
@@ -436,6 +437,7 @@ set(SRC_FILES
436437 src/msccl.cc
437438 src/proxy.cc
438439 src/rccl_wrap.cc
440+ src/symmetric.cc
439441 src/transport.cc
440442 src/device/all_gather.h
441443 src/device/all_reduce.h
@@ -458,6 +460,11 @@ set(SRC_FILES
458460 src/device/onerank.cu
459461 src/device/network/unpack/unpack_defs.h
460462 src/device/network/unpack/unpack.h
463+ src/device/symmetric/all_gather.cuh
464+ src/device/symmetric/all_reduce.cuh
465+ src/device/symmetric/kernel.cuh
466+ src/device/symmetric/primitives.cuh
467+ src/device/symmetric/reduce_scatter.cuh
461468 src/graph/connect.cc
462469 src/graph/paths .cc
463470 src/graph/rings.cc
@@ -472,6 +479,7 @@ set(SRC_FILES
472479 src/graph/xml.cc
473480 src/graph/xml.h
474481 src/include /alloc.h
482+ src/include /allocator.h
475483 src/include /alt_rsmi.h
476484 src/include /archinfo.h
477485 src/include /api_trace.h
@@ -516,6 +524,7 @@ set(SRC_FILES
516524 src/include /rccl_common.h
517525 src/include /rccl_vars.h
518526 src/include /register.h
527+ src/include /register_inline.h
519528 src/include /rccl_float8.h
520529 src/include /rocm_smi_wrap.h
521530 src/include /rocmwrap.h
@@ -526,11 +535,15 @@ set(SRC_FILES
526535 src/include /signals.h
527536 src/include /socket.h
528537 src/include /strongstream.h
538+ src/include /symmetric.h
529539 src/include /timer.h
530540 src/include /transport.h
531541 src/include /trees.h
532542 src/include /tuner.h
533543 src/include /utils.h
544+ src/include /mlx5/mlx5dvcore.h
545+ src/include /mlx5/mlx5dvsymbols.h
546+ src/include /mlx5/mlx5dvwrap.h
534547 src/include /msccl/msccl_lifecycle.h
535548 src/include /msccl/msccl_parser.h
536549 src/include /msccl/msccl_scheduler.h
@@ -591,6 +604,7 @@ set(SRC_FILES
591604 src/include /plugin/profiler/profiler_v1.h
592605 src/include /plugin/profiler/profiler_v2.h
593606 src/include /plugin/profiler/profiler_v3.h
607+ src/include /plugin/profiler/profiler_v4.h
594608 src/include /plugin/tuner/tuner_v2.h
595609 src/include /plugin/tuner/tuner_v3.h
596610 src/include /plugin/tuner/tuner_v4.h
@@ -604,6 +618,8 @@ set(SRC_FILES
604618 src/misc/ibvsymbols.cc
605619 src/misc/ibvwrap.cc
606620 src/misc/ipcsocket.cc
621+ src/misc/mlx5dvsymbols.cc
622+ src/misc/mlx5dvwrap.cc
607623 src/misc/npkit.cc
608624# src/misc/nvmlwrap.cc
609625 src/misc/nvmlwrap_stub.cc
@@ -634,6 +650,7 @@ set(SRC_FILES
634650 src/plugin/profiler/profiler_v1.cc
635651 src/plugin/profiler/profiler_v2.cc
636652 src/plugin/profiler/profiler_v3.cc
653+ src/plugin/profiler/profiler_v4.cc
637654 src/plugin/tuner/tuner_v2.cc
638655 src/plugin/tuner/tuner_v3.cc
639656 src/plugin/tuner/tuner_v4.cc
@@ -706,6 +723,7 @@ foreach(SRC_FILE ${SRC_FILES})
706723 add_file_unique(HIP_SOURCES ${HIP_FILE} )
707724
708725 # Convert .cu files to .cpp so that they get processed properly
726+ string (REPLACE "\. cuh" "\. h" HIP_FILE ${HIP_FILE} )
709727 string (REPLACE "\. cu" "\. cu.cpp" HIP_FILE ${HIP_FILE} )
710728 list (APPEND HIP_SOURCES ${HIP_FILE} )
711729
@@ -823,8 +841,13 @@ if (NOT Python3_FOUND)
823841endif ()
824842
825843set (GEN_DIR "${HIPIFY_DIR} /gensrc" )
844+ set (GEN_SYM_DIR "${GEN_DIR} /symmetric" )
826845
827- # Execute the python script to generate required files
846+ if (ONLY_FUNCS)
847+ message (WARNING "Using ONLY_FUNCS = ${ONLY_FUNCS} . Not meant for release builds." )
848+ endif ()
849+
850+ # Execute the python script to generate required collective functions
828851execute_process (
829852 COMMAND ${Python3_EXECUTABLE} ${CMAKE_SOURCE_DIR} /src/device/generate.py ${GEN_DIR} ${IFC_ENABLED} ${COLLTRACE} ${ENABLE_MSCCL_KERNEL} ${BUILD_LOCAL_GPU_TARGET_ONLY} ${ONLY_FUNCS}
830853 WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
@@ -836,8 +859,20 @@ if (gen_py_result)
836859 message (FATAL_ERROR "${CMAKE_SOURCE_DIR} /src/device/generate.py failed" )
837860endif ()
838861
862+ # Execute the python script to generate required symmetric memory kernels
863+ execute_process (
864+ COMMAND ${Python3_EXECUTABLE} ${CMAKE_SOURCE_DIR} /src/device/symmetric/generate.py ${GEN_SYM_DIR}
865+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
866+ RESULT_VARIABLE gen_sym_py_result
867+ ERROR_VARIABLE gen_sym_py_error
868+ )
869+ if (gen_sym_py_result)
870+ message (SEND_ERROR "Error: ${gen_sym_py_error} " )
871+ message (FATAL_ERROR "${CMAKE_SOURCE_DIR} /src/device/symmetric/generate.py failed" )
872+ endif ()
873+
839874# Find the generated files in the output directory
840- file (GLOB GENERATED_FILES "${GEN_DIR} /*" )
875+ file (GLOB_RECURSE GENERATED_FILES "${GEN_DIR} /*" )
841876
842877# Append all found generated files to the list
843878foreach (file ${GENERATED_FILES} )
@@ -885,10 +920,11 @@ endif()
885920## Set RCCL include directories
886921target_include_directories (rccl PRIVATE ${PROJECT_BINARY_DIR} /include ) # for generated rccl.h header
887922target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src) # for hipfied headers
888- target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src/include )
889- target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src/include /plugin)
890923target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src/device)
891924target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src/device/network/unpack)
925+ target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src/include )
926+ target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src/include /mlx5)
927+ target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /src/include /plugin)
892928target_include_directories (rccl PRIVATE ${HIPIFY_DIR} /gensrc)
893929target_include_directories (rccl PRIVATE ${HSA_INCLUDE_PATH} )
894930target_include_directories (rccl PRIVATE ${ROCM_SMI_INCLUDE_DIR} )
0 commit comments