Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 41 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ endif()
# Set CMAKE flags
#==================================================================================================
set(CMAKE_INSTALL_PREFIX "${ROCM_PATH}" CACHE PATH "")
set(CMAKE_CXX_STANDARD 14) # We use C++14 features, this will add compile option: -std=c++14
set(CMAKE_CXX_STANDARD 17) # We use C++14 features, this will add compile option: -std=c++14
set(CMAKE_CXX_EXTENSIONS OFF) # Without this line, it will add -std=gnu++14 instead, which has some issues.
if(ROCM_PATH)
list(APPEND CMAKE_PREFIX_PATH # Add ROCM_PATH to CMake search paths (for finding HIP / HSA
Expand Down Expand Up @@ -424,6 +424,7 @@ configure_file(src/nccl.h.in ${PROJECT_BINARY_DIR}/include/nccl.h) # Used b
#==================================================================================================
# E.g: find src -type f \( -name "*.cc" -o -name "*.h" -o -name "*.hpp" \) | sort
set(SRC_FILES
src/allocator.cc
src/bootstrap.cc
src/channel.cc
src/collectives.cc
Expand All @@ -436,6 +437,7 @@ set(SRC_FILES
src/msccl.cc
src/proxy.cc
src/rccl_wrap.cc
src/symmetric.cc
src/transport.cc
src/device/all_gather.h
src/device/all_reduce.h
Expand All @@ -458,6 +460,11 @@ set(SRC_FILES
src/device/onerank.cu
src/device/network/unpack/unpack_defs.h
src/device/network/unpack/unpack.h
src/device/symmetric/all_gather.cuh
src/device/symmetric/all_reduce.cuh
src/device/symmetric/kernel.cuh
src/device/symmetric/primitives.cuh
src/device/symmetric/reduce_scatter.cuh
src/graph/connect.cc
src/graph/paths.cc
src/graph/rings.cc
Expand All @@ -472,6 +479,7 @@ set(SRC_FILES
src/graph/xml.cc
src/graph/xml.h
src/include/alloc.h
src/include/allocator.h
src/include/alt_rsmi.h
src/include/archinfo.h
src/include/api_trace.h
Expand Down Expand Up @@ -516,6 +524,7 @@ set(SRC_FILES
src/include/rccl_common.h
src/include/rccl_vars.h
src/include/register.h
src/include/register_inline.h
src/include/rccl_float8.h
src/include/rocm_smi_wrap.h
src/include/rocmwrap.h
Expand All @@ -526,11 +535,15 @@ set(SRC_FILES
src/include/signals.h
src/include/socket.h
src/include/strongstream.h
src/include/symmetric.h
src/include/timer.h
src/include/transport.h
src/include/trees.h
src/include/tuner.h
src/include/utils.h
src/include/mlx5/mlx5dvcore.h
src/include/mlx5/mlx5dvsymbols.h
src/include/mlx5/mlx5dvwrap.h
src/include/msccl/msccl_lifecycle.h
src/include/msccl/msccl_parser.h
src/include/msccl/msccl_scheduler.h
Expand Down Expand Up @@ -591,6 +604,7 @@ set(SRC_FILES
src/include/plugin/profiler/profiler_v1.h
src/include/plugin/profiler/profiler_v2.h
src/include/plugin/profiler/profiler_v3.h
src/include/plugin/profiler/profiler_v4.h
src/include/plugin/tuner/tuner_v2.h
src/include/plugin/tuner/tuner_v3.h
src/include/plugin/tuner/tuner_v4.h
Expand All @@ -604,6 +618,8 @@ set(SRC_FILES
src/misc/ibvsymbols.cc
src/misc/ibvwrap.cc
src/misc/ipcsocket.cc
src/misc/mlx5dvsymbols.cc
src/misc/mlx5dvwrap.cc
src/misc/npkit.cc
# src/misc/nvmlwrap.cc
src/misc/nvmlwrap_stub.cc
Expand Down Expand Up @@ -634,6 +650,7 @@ set(SRC_FILES
src/plugin/profiler/profiler_v1.cc
src/plugin/profiler/profiler_v2.cc
src/plugin/profiler/profiler_v3.cc
src/plugin/profiler/profiler_v4.cc
src/plugin/tuner/tuner_v2.cc
src/plugin/tuner/tuner_v3.cc
src/plugin/tuner/tuner_v4.cc
Expand Down Expand Up @@ -706,6 +723,7 @@ foreach(SRC_FILE ${SRC_FILES})
add_file_unique(HIP_SOURCES ${HIP_FILE})

# Convert .cu files to .cpp so that they get processed properly
string(REPLACE "\.cuh" "\.h" HIP_FILE ${HIP_FILE})
string(REPLACE "\.cu" "\.cu.cpp" HIP_FILE ${HIP_FILE})
list(APPEND HIP_SOURCES ${HIP_FILE})

Expand Down Expand Up @@ -823,8 +841,13 @@ if (NOT Python3_FOUND)
endif()

set(GEN_DIR "${HIPIFY_DIR}/gensrc")
set(GEN_SYM_DIR "${GEN_DIR}/symmetric")

# Execute the python script to generate required files
if(ONLY_FUNCS)
message(WARNING "Using ONLY_FUNCS = ${ONLY_FUNCS}. Not meant for release builds.")
endif()

# Execute the python script to generate required collective functions
execute_process(
COMMAND ${Python3_EXECUTABLE} ${CMAKE_SOURCE_DIR}/src/device/generate.py ${GEN_DIR} ${IFC_ENABLED} ${COLLTRACE} ${ENABLE_MSCCL_KERNEL} ${BUILD_LOCAL_GPU_TARGET_ONLY} ${ONLY_FUNCS}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
Expand All @@ -836,8 +859,20 @@ if (gen_py_result)
message(FATAL_ERROR "${CMAKE_SOURCE_DIR}/src/device/generate.py failed")
endif()

# Execute the python script to generate required symmetric memory kernels
execute_process(
COMMAND ${Python3_EXECUTABLE} ${CMAKE_SOURCE_DIR}/src/device/symmetric/generate.py ${GEN_SYM_DIR}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
RESULT_VARIABLE gen_sym_py_result
ERROR_VARIABLE gen_sym_py_error
)
if (gen_sym_py_result)
message(SEND_ERROR "Error: ${gen_sym_py_error}")
message(FATAL_ERROR "${CMAKE_SOURCE_DIR}/src/device/symmetric/generate.py failed")
endif()

# Find the generated files in the output directory
file(GLOB GENERATED_FILES "${GEN_DIR}/*")
file(GLOB_RECURSE GENERATED_FILES "${GEN_DIR}/*")

# Append all found generated files to the list
foreach(file ${GENERATED_FILES})
Expand Down Expand Up @@ -885,10 +920,11 @@ endif()
## Set RCCL include directories
target_include_directories(rccl PRIVATE ${PROJECT_BINARY_DIR}/include) # for generated rccl.h header
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src) # for hipfied headers
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include)
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include/plugin)
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/device)
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/device/network/unpack)
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include)
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include/mlx5)
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include/plugin)
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/gensrc)
target_include_directories(rccl PRIVATE ${HSA_INCLUDE_PATH})
target_include_directories(rccl PRIVATE ${ROCM_SMI_INCLUDE_DIR})
Expand Down
6 changes: 6 additions & 0 deletions ext-net/example/nccl/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@
#ifndef COMMON_H_
#define COMMON_H_

#include <stdint.h>

typedef enum {NCCL_LOG_NONE=0, NCCL_LOG_VERSION=1, NCCL_LOG_WARN=2, NCCL_LOG_INFO=3, NCCL_LOG_ABORT=4, NCCL_LOG_TRACE=5} ncclDebugLogLevel;
typedef enum {NCCL_INIT=1, NCCL_COLL=2, NCCL_P2P=4, NCCL_SHM=8, NCCL_NET=16, NCCL_GRAPH=32, NCCL_TUNING=64, NCCL_ENV=128, NCCL_ALLOC=256, NCCL_CALL=512, NCCL_PROXY=1024, NCCL_NVLS=2048, NCCL_BOOTSTRAP=4096, NCCL_REG=8192, NCCL_ALL=~0} ncclDebugLogSubSys;

typedef void (*ncclDebugLogger_t)(ncclDebugLogLevel level, unsigned long flags, const char *file, int line, const char *fmt, ...);

enum { ncclProfilerNetEventStart = 0, ncclProfilerNetEventStop, ncclProfilerNetEventUpdate, ncclProfilerNetEventUpdateAndStop };

typedef ncclResult_t (*ncclProfilerCallback_t)(void** eHandle, int type, void* phandle, int64_t pluginId, void* extData);

#endif
4 changes: 1 addition & 3 deletions ext-net/example/nccl/net.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
#include <stdint.h>
#include <stdlib.h>

#include "common.h"
#include "err.h"
#include "net_device.h"
#include "common.h"

#define NCCL_NET_HANDLE_MAXSIZE 128
#define NCCL_MAX_NET_SIZE_BYTES (1*1024*1024*1024*1024L) //1TB
Expand All @@ -23,8 +23,6 @@
// Maximum number of requests per comm object
#define NCCL_NET_MAX_REQUESTS 32

typedef ncclResult_t (*ncclProfilerCallback_t)(void** eHandle, int type, void* phandle, int64_t pluginId, void* extData);

#include "net_v10.h"
#include "net_v9.h"
#include "net_v8.h"
Expand Down
Loading