|
| 1 | +/************************************************************************* |
| 2 | + * Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. |
| 3 | + * Copyright (c) 2023, Meta Platforms, Inc. and affiliates. |
| 4 | + * |
| 5 | + * See LICENSE.txt for license information |
| 6 | + ************************************************************************/ |
| 7 | + |
| 8 | +#ifndef NCCL_TUNER_H_ |
| 9 | +#define NCCL_TUNER_H_ |
| 10 | + |
| 11 | +#include <stdint.h> |
| 12 | +#include <stdlib.h> |
| 13 | + |
| 14 | +#include "common.h" |
| 15 | +#include "err.h" |
| 16 | + |
| 17 | +#define NCCL_NUM_FUNCTIONS 5 // Send/Recv not included for now |
| 18 | +typedef enum { |
| 19 | + ncclFuncBroadcast = 0, |
| 20 | + ncclFuncReduce = 1, |
| 21 | + ncclFuncAllGather = 2, |
| 22 | + ncclFuncReduceScatter = 3, |
| 23 | + ncclFuncAllReduce = 4, |
| 24 | + ncclFuncSendRecv = 5, |
| 25 | + ncclFuncSend = 6, |
| 26 | + ncclFuncRecv = 7, |
| 27 | + ncclNumFuncs = 8 |
| 28 | +} ncclFunc_t; |
| 29 | + |
| 30 | +#define NCCL_NUM_ALGORITHMS 7 // Tree/Ring/CollNet* |
| 31 | +#define NCCL_ALGO_UNDEF -1 |
| 32 | +#define NCCL_ALGO_TREE 0 |
| 33 | +#define NCCL_ALGO_RING 1 |
| 34 | +#define NCCL_ALGO_COLLNET_DIRECT 2 |
| 35 | +#define NCCL_ALGO_COLLNET_CHAIN 3 |
| 36 | +#define NCCL_ALGO_NVLS 4 |
| 37 | +#define NCCL_ALGO_NVLS_TREE 5 |
| 38 | +#define NCCL_ALGO_PAT 6 |
| 39 | + |
| 40 | +#define NCCL_NUM_PROTOCOLS 3 // Simple/LL/LL128 |
| 41 | +#define NCCL_PROTO_UNDEF -1 |
| 42 | +#define NCCL_PROTO_LL 0 |
| 43 | +#define NCCL_PROTO_LL128 1 |
| 44 | +#define NCCL_PROTO_SIMPLE 2 |
| 45 | + |
| 46 | +#define NCCL_ALGO_PROTO_IGNORE -1.0 |
| 47 | + |
| 48 | +// API to be implemented by external tuner |
| 49 | +typedef struct { |
| 50 | + // Name of the tuner |
| 51 | + const char* name; |
| 52 | + |
| 53 | + // Initializes tuner states. |
| 54 | + // Inputs: |
| 55 | + // - nRanks: number of ranks in current communicator. Each communicator initialize its own tuner. |
| 56 | + // - nNodes: number of nodes in current communicator. |
| 57 | + // - logFunction: a logFunction can be useful to integrate logging together with NCCL core. |
| 58 | + // Outputs: |
| 59 | + // - context: tuner context object |
| 60 | + ncclResult_t (*init)(size_t nRanks, size_t nNodes, ncclDebugLogger_t logFunction, void **context); |
| 61 | + |
| 62 | + // Gets info (algo, protocol, number of ctas and threads) for a given collective. |
| 63 | + // Inputs: |
| 64 | + // - context: tuner context object |
| 65 | + // - collType: collective type , e.g., allreduce, allgather… |
| 66 | + // - nBytes: collective size in bytes |
| 67 | + // - numPipeOps: number of operations in the group |
| 68 | + // - numAlgo: number of algorithms in collCostTable |
| 69 | + // - numProto: number of protocols in collCostTable |
| 70 | + // - regBuff: can register user buffer |
| 71 | + // |
| 72 | + // Outputs: |
| 73 | + // - nChannels: number of channels (hence SMs) to be used. |
| 74 | + // |
| 75 | + // InOut: |
| 76 | + // - collCostTable: collective cost table, generated by NCCL core, containing algo|proto|time entries for collType. |
| 77 | + // NCCL core sets ignored algo/proto cost table entries to -1.0 (NCCL_ALGO_PROTO_IGNORE). |
| 78 | + // |
| 79 | + // If getCollInfo() does not return ncclSuccess, NCCL will fall back to the |
| 80 | + // default tuning for the given collective. |
| 81 | + // Also, the plugin is allowed to not set any output, or set only the |
| 82 | + // algorithm and protocol, but not only the algorithm or only the protocol. |
| 83 | + // Unset fields will be set automatically by NCCL. |
| 84 | + ncclResult_t (*getCollInfo)(void* context, ncclFunc_t collType, size_t nBytes, |
| 85 | + int numPipeOps, float** collCostTable, int numAlgo, int numProto, |
| 86 | + int regBuff, int* nChannels); |
| 87 | + |
| 88 | + // Terminates the plugin and cleans up any resources that the plugin allocated. |
| 89 | + // context: tuner context object |
| 90 | + ncclResult_t (*destroy)(void* context); |
| 91 | +} ncclTuner_v4_t; |
| 92 | + |
| 93 | +typedef ncclTuner_v4_t ncclTuner_t; |
| 94 | + |
| 95 | +#define NCCL_TUNER_PLUGIN_SYMBOL "ncclTunerPlugin_v4" |
| 96 | + |
| 97 | +#endif |
0 commit comments