88#include " collectives.h"
99#include " enqueue.h"
1010#include " nccl.h"
11+ #include " nvtx_payload_schemas.h"
1112
1213const char * ncclFuncToString (ncclFunc_t fn) {
1314 switch (fn) {
@@ -78,11 +79,8 @@ NCCL_API(ncclResult_t, ncclAllGather, const void* sendbuff, void* recvbuff, size
7879ncclResult_t ncclAllGather (const void * sendbuff, void * recvbuff, size_t sendcount,
7980 ncclDataType_t datatype, ncclComm_t comm, cudaStream_t stream) {
8081 // Just pass the size of one message and not the total bytes sent/received.
81- constexpr nvtxPayloadSchemaEntry_t AllGatherSchema[] = {
82- {0 , NVTX_PAYLOAD_ENTRY_TYPE_SIZE, " Message size [bytes]" }
83- };
84- size_t msgsize = sendcount * ncclTypeSize (datatype);
85- NVTX3_FUNC_WITH_PARAMS (AllGather, AllGatherSchema, msgsize)
82+ NVTX3_FUNC_WITH_PARAMS (AllGather, NcclNvtxParamsAllGather,
83+ NVTX3_PAYLOAD (comm ? comm->commHash : 0 , sendcount * ncclTypeSize (datatype)));
8684
8785 struct ncclInfo info = { ncclFuncAllGather, " AllGather" ,
8886 sendbuff, recvbuff, sendcount, datatype, ncclSum, 0 , comm, stream, /* Args */
@@ -94,18 +92,8 @@ NCCL_API(ncclResult_t, ncclAllReduce, const void* sendbuff, void* recvbuff, size
9492 ncclDataType_t datatype, ncclRedOp_t op, ncclComm* comm, cudaStream_t stream);
9593ncclResult_t ncclAllReduce (const void * sendbuff, void * recvbuff, size_t count,
9694 ncclDataType_t datatype, ncclRedOp_t op, ncclComm* comm, cudaStream_t stream) {
97- struct NvtxParamsAllReduce {
98- size_t bytes;
99- ncclRedOp_t op;
100- };
101- // Just pass the size of one message and not the total bytes sent/received.
102- static constexpr nvtxPayloadSchemaEntry_t AllReduceSchema[] = {
103- {0 , NVTX_PAYLOAD_ENTRY_TYPE_SIZE, " Message size [bytes]" },
104- {0 , NVTX_PAYLOAD_ENTRY_NCCL_REDOP, " Reduction operation" , nullptr , 0 ,
105- offsetof (NvtxParamsAllReduce, op)}
106- };
107- NvtxParamsAllReduce payload{count * ncclTypeSize (datatype), op};
108- NVTX3_FUNC_WITH_PARAMS (AllReduce, AllReduceSchema, payload)
95+ NVTX3_FUNC_WITH_PARAMS (AllReduce, NcclNvtxParamsAllReduce,
96+ NVTX3_PAYLOAD (comm ? comm->commHash : 0 , count * ncclTypeSize (datatype), op));
10997
11098 struct ncclInfo info = { ncclFuncAllReduce, " AllReduce" ,
11199 sendbuff, recvbuff, count, datatype, op, 0 , comm, stream, /* Args */
@@ -117,16 +105,8 @@ NCCL_API(ncclResult_t, ncclBroadcast, const void* sendbuff, void* recvbuff, size
117105 ncclComm_t comm, cudaStream_t stream);
118106ncclResult_t ncclBroadcast (const void * sendbuff, void * recvbuff, size_t count, ncclDataType_t datatype, int root,
119107 ncclComm_t comm, cudaStream_t stream) {
120- struct NvtxParamsBroadcast {
121- size_t bytes;
122- int root;
123- };
124- constexpr nvtxPayloadSchemaEntry_t BroadcastSchema[] = {
125- {0 , NVTX_PAYLOAD_ENTRY_TYPE_SIZE, " Bytes" },
126- {0 , NVTX_PAYLOAD_ENTRY_TYPE_INT, " Root" , nullptr , 0 , offsetof (NvtxParamsBroadcast, root)}
127- };
128- NvtxParamsBroadcast payload{count * ncclTypeSize (datatype), root};
129- NVTX3_FUNC_WITH_PARAMS (Broadcast, BroadcastSchema, payload)
108+ NVTX3_FUNC_WITH_PARAMS (Broadcast, NcclNvtxParamsBroadcast,
109+ NVTX3_PAYLOAD (comm ? comm->commHash : 0 , count * ncclTypeSize (datatype), root));
130110
131111 struct ncclInfo info = { ncclFuncBroadcast, " Broadcast" ,
132112 sendbuff, recvbuff, count, datatype, ncclSum, root, comm, stream, /* Args */
@@ -145,19 +125,8 @@ NCCL_API(ncclResult_t, ncclReduce, const void* sendbuff, void* recvbuff, size_t
145125 ncclDataType_t datatype, ncclRedOp_t op, int root, ncclComm_t comm, cudaStream_t stream);
146126ncclResult_t ncclReduce (const void * sendbuff, void * recvbuff, size_t count,
147127 ncclDataType_t datatype, ncclRedOp_t op, int root, ncclComm_t comm, cudaStream_t stream) {
148- struct NvtxParamsReduce {
149- size_t bytes;
150- int root;
151- ncclRedOp_t op;
152- };
153- constexpr nvtxPayloadSchemaEntry_t ReduceSchema[] = {
154- {0 , NVTX_PAYLOAD_ENTRY_TYPE_SIZE, " Message size [bytes]" },
155- {0 , NVTX_PAYLOAD_ENTRY_TYPE_INT, " Root" , nullptr , 0 , offsetof (NvtxParamsReduce, root)},
156- {0 , NVTX_PAYLOAD_ENTRY_NCCL_REDOP, " Reduction operation" , nullptr , 0 ,
157- offsetof (NvtxParamsReduce, op)}
158- };
159- NvtxParamsReduce payload{count * ncclTypeSize (datatype), root, op};
160- NVTX3_FUNC_WITH_PARAMS (Reduce, ReduceSchema, payload)
128+ NVTX3_FUNC_WITH_PARAMS (Reduce, NcclNvtxParamsReduce,
129+ NVTX3_PAYLOAD (comm ? comm->commHash : 0 , count * ncclTypeSize (datatype), root, op));
161130
162131 struct ncclInfo info = { ncclFuncReduce, " Reduce" ,
163132 sendbuff, recvbuff, count, datatype, op, root, comm, stream, /* Args */
@@ -169,39 +138,21 @@ NCCL_API(ncclResult_t, ncclReduceScatter, const void* sendbuff, void* recvbuff,
169138 ncclDataType_t datatype, ncclRedOp_t op, ncclComm* comm, cudaStream_t stream);
170139ncclResult_t ncclReduceScatter (const void * sendbuff, void * recvbuff, size_t recvcount,
171140 ncclDataType_t datatype, ncclRedOp_t op, ncclComm* comm, cudaStream_t stream) {
172- struct NvtxParamsReduceScatter {
173- size_t bytes;
174- ncclRedOp_t op;
175- };
176- constexpr nvtxPayloadSchemaEntry_t ReduceScatterSchema[] = {
177- {0 , NVTX_PAYLOAD_ENTRY_TYPE_SIZE, " Message size [bytes]" },
178- {0 , NVTX_PAYLOAD_ENTRY_NCCL_REDOP, " Reduction operation" , nullptr , 0 ,
179- offsetof (NvtxParamsReduceScatter, op)}
180- };
181- NvtxParamsReduceScatter payload{recvcount * ncclTypeSize (datatype), op};
182- NVTX3_FUNC_WITH_PARAMS (ReduceScatter, ReduceScatterSchema, payload)
141+ NVTX3_FUNC_WITH_PARAMS (ReduceScatter, NcclNvtxParamsReduceScatter,
142+ NVTX3_PAYLOAD (comm ? comm->commHash : 0 , recvcount * ncclTypeSize (datatype), op));
183143
184144 struct ncclInfo info = { ncclFuncReduceScatter, " ReduceScatter" ,
185145 sendbuff, recvbuff, recvcount, datatype, op, 0 , comm, stream, /* Args */
186146 REDUCESCATTER_CHUNKSTEPS, REDUCESCATTER_SLICESTEPS };
187147 return ncclEnqueueCheck (&info);
188148}
189149
190- struct NvtxParamsSendRecv {
191- size_t bytes;
192- int peer;
193- };
194- constexpr const nvtxPayloadSchemaEntry_t SendRecvSchema[] = {
195- {0 , NVTX_PAYLOAD_ENTRY_TYPE_SIZE, " Bytes" },
196- {0 , NVTX_PAYLOAD_ENTRY_TYPE_INT, " Peer rank" , nullptr , 0 , offsetof (NvtxParamsSendRecv, peer)}
197- };
198-
199150NCCL_API (ncclResult_t, ncclSend, const void * sendbuff, size_t count, ncclDataType_t datatype, int peer,
200151 ncclComm_t comm, cudaStream_t stream);
201152ncclResult_t ncclSend (const void * sendbuff, size_t count, ncclDataType_t datatype, int peer,
202153 ncclComm_t comm, cudaStream_t stream) {
203- NvtxParamsSendRecv payload{count * ncclTypeSize (datatype), peer};
204- NVTX3_FUNC_WITH_PARAMS (Send, SendRecvSchema, payload)
154+ NVTX3_FUNC_WITH_PARAMS (Send, NcclNvtxParamsSendRecv,
155+ NVTX3_PAYLOAD (comm ? comm-> commHash : 0 , count * ncclTypeSize (datatype), peer));
205156
206157 struct ncclInfo info = { ncclFuncSend, " Send" ,
207158 NULL , (void *)sendbuff, count, datatype, ncclSum, peer, comm, stream, /* Args */
@@ -213,8 +164,8 @@ NCCL_API(ncclResult_t, ncclRecv, void* recvbuff, size_t count, ncclDataType_t da
213164 ncclComm_t comm, cudaStream_t stream);
214165ncclResult_t ncclRecv (void * recvbuff, size_t count, ncclDataType_t datatype, int peer,
215166 ncclComm_t comm, cudaStream_t stream) {
216- NvtxParamsSendRecv payload{count * ncclTypeSize (datatype), peer};
217- NVTX3_FUNC_WITH_PARAMS (Recv, SendRecvSchema, payload)
167+ NVTX3_FUNC_WITH_PARAMS (Recv, NcclNvtxParamsSendRecv,
168+ NVTX3_PAYLOAD (comm ? comm-> commHash : 0 , count * ncclTypeSize (datatype), peer));
218169
219170 struct ncclInfo info = { ncclFuncRecv, " Recv" ,
220171 NULL , recvbuff, count, datatype, ncclSum, peer, comm, stream, /* Args */
0 commit comments