@@ -41,7 +41,8 @@ void cu_mem_free(void* ptr) {
4141
4242size_t get_size_align_to_granularity (size_t size_raw, size_t granularity) {
4343 size_t size = (size_raw + granularity - 1 ) & ~(granularity - 1 );
44- if (size == 0 ) size = granularity;
44+ if (size == 0 )
45+ size = granularity;
4546 return size;
4647}
4748
@@ -66,7 +67,7 @@ void SharedMemoryAllocator::malloc(void** ptr, size_t size_raw) {
6667 CUmemGenericAllocationHandle handle;
6768 CU_CHECK (cuMemCreate (&handle, size, &prop, 0 ));
6869
69- CU_CHECK (cuMemAddressReserve ((CUdeviceptr *)ptr, size, granularity, 0 , 0 ));
70+ CU_CHECK (cuMemAddressReserve ((CUdeviceptr*)ptr, size, granularity, 0 , 0 ));
7071 CU_CHECK (cuMemMap ((CUdeviceptr)*ptr, size, 0 , handle, 0 ));
7172 cu_mem_set_access_all (*ptr, size);
7273 } else {
@@ -105,7 +106,7 @@ void SharedMemoryAllocator::open_mem_handle(void** ptr, MemHandle* mem_handle) {
105106 CUmemGenericAllocationHandle handle;
106107 CU_CHECK (cuMemImportFromShareableHandle (&handle, &mem_handle->inner .cu_mem_fabric_handle , CU_MEM_HANDLE_TYPE_FABRIC));
107108
108- CU_CHECK (cuMemAddressReserve ((CUdeviceptr *)ptr, size, 0 , 0 , 0 ));
109+ CU_CHECK (cuMemAddressReserve ((CUdeviceptr*)ptr, size, 0 , 0 , 0 ));
109110 CU_CHECK (cuMemMap ((CUdeviceptr)*ptr, size, 0 , handle, 0 ));
110111 cu_mem_set_access_all (*ptr, size);
111112 } else {
@@ -120,7 +121,7 @@ void SharedMemoryAllocator::close_mem_handle(void* ptr) {
120121 CUDA_CHECK (cudaIpcCloseMemHandle (ptr));
121122 }
122123}
123- }
124+ } // namespace shared_memory
124125
125126namespace deep_ep {
126127
@@ -178,7 +179,8 @@ Buffer::Buffer(int rank,
178179
179180 if (num_nvl_bytes > 0 ) {
180181 // Local IPC: alloc local memory and set local IPC handles
181- shared_memory_allocator.malloc (&buffer_ptrs[nvl_rank], num_nvl_bytes + barrier_signal_bytes + buffer_ptr_bytes + barrier_signal_ptr_bytes);
182+ shared_memory_allocator.malloc (&buffer_ptrs[nvl_rank],
183+ num_nvl_bytes + barrier_signal_bytes + buffer_ptr_bytes + barrier_signal_ptr_bytes);
182184 shared_memory_allocator.get_mem_handle (&ipc_handles[nvl_rank], buffer_ptrs[nvl_rank]);
183185 buffer_ptrs_gpu = reinterpret_cast <void **>(static_cast <uint8_t *>(buffer_ptrs[nvl_rank]) + num_nvl_bytes + barrier_signal_bytes);
184186
0 commit comments