Skip to content

Commit b7babdc

Browse files
mdavis36adayton1
andauthored
ManagedSharedPtr (#298)
* Add ManagedSharedPtr as an alternative to managed_ptr. It adds basic reference counting on the host and experimental automatic migration * Improves object lifetime management and extends it to work with additional types * Some reorganization and clean up --------- Co-authored-by: Alan Dayton <[email protected]>
1 parent 92262a8 commit b7babdc

18 files changed

+2311
-117
lines changed

cmake/SetupChaiOptions.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#
55
# SPDX-License-Identifier: BSD-3-Clause
66
############################################################################
7+
option(CHAI_ENABLE_EXPERIMENTAL "Enable experimental chai features." On)
78
option(CHAI_ENABLE_GPU_SIMULATION_MODE "Enable GPU Simulation Mode" Off)
89
option(CHAI_ENABLE_OPENMP "Enable OpenMP" Off)
910
option(CHAI_ENABLE_MPI "Enable MPI (for umpire replay only)" Off)

src/chai/ArrayManager.hpp

Lines changed: 4 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
#ifndef CHAI_ArrayManager_HPP
88
#define CHAI_ArrayManager_HPP
99

10-
#include "chai/config.hpp"
1110
#include "chai/ChaiMacros.hpp"
1211
#include "chai/ExecutionSpaces.hpp"
13-
#include "chai/PointerRecord.hpp"
1412
#include "chai/Types.hpp"
1513

14+
#include "chai/PointerRecord.hpp"
15+
1616
#if defined(CHAI_ENABLE_RAJA_PLUGIN)
1717
#include "chai/pluginLinker.hpp"
1818
#endif
@@ -22,107 +22,12 @@
2222
#include "umpire/Allocator.hpp"
2323
#include "umpire/util/MemoryMap.hpp"
2424

25-
#if defined(CHAI_ENABLE_CUDA)
26-
#include <cuda_runtime_api.h>
27-
#endif
28-
#if defined(CHAI_ENABLE_HIP)
29-
#include "hip/hip_runtime_api.h"
30-
#endif
3125

32-
namespace chai
33-
{
34-
// CHAI_GPU_ERROR_CHECK macro
35-
#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP)
26+
#include "chai/DeviceHelpers.hpp"
3627

37-
#ifdef CHAI_ENABLE_GPU_ERROR_CHECKING
3828

39-
#ifdef CHAI_ENABLE_CUDA
40-
inline void gpuErrorCheck(cudaError_t code, const char *file, int line, bool abort=true)
41-
{
42-
if (code != cudaSuccess) {
43-
fprintf(stderr, "[CHAI] GPU Error: %s %s %d\n", cudaGetErrorString(code), file, line);
44-
if (abort) {
45-
exit(code);
46-
}
47-
}
48-
}
49-
#elif defined(CHAI_ENABLE_HIP)
50-
inline void gpuErrorCheck(hipError_t code, const char *file, int line, bool abort=true)
29+
namespace chai
5130
{
52-
if (code != hipSuccess) {
53-
fprintf(stderr, "[CHAI] GPU Error: %s %s %d\n", hipGetErrorString(code), file, line);
54-
if (abort) {
55-
exit(code);
56-
}
57-
}
58-
}
59-
#endif
60-
61-
62-
#define CHAI_GPU_ERROR_CHECK(code) { gpuErrorCheck((code), __FILE__, __LINE__); }
63-
#else // CHAI_ENABLE_GPU_ERROR_CHECKING
64-
#define CHAI_GPU_ERROR_CHECK(code) code
65-
#endif // CHAI_ENABLE_GPU_ERROR_CHECKING
66-
67-
#endif
68-
69-
// wrapper for hip/cuda synchronize
70-
inline void synchronize() {
71-
#if defined (CHAI_ENABLE_HIP) &&!defined(__HIP_DEVICE_COMPILE__)
72-
CHAI_GPU_ERROR_CHECK(hipDeviceSynchronize());
73-
#elif defined (CHAI_ENABLE_CUDA) &&!defined(__CUDA_ARCH__)
74-
CHAI_GPU_ERROR_CHECK(cudaDeviceSynchronize());
75-
#endif
76-
}
77-
78-
#if defined(CHAI_GPUCC) || defined(CHAI_ENABLE_GPU_SIMULATION_MODE)
79-
80-
// wrapper for hip/cuda free
81-
CHAI_HOST inline void gpuFree(void* buffer) {
82-
#if defined(CHAI_ENABLE_GPU_SIMULATION_MODE)
83-
free(buffer);
84-
#elif defined (CHAI_ENABLE_HIP)
85-
CHAI_GPU_ERROR_CHECK(hipFree(buffer));
86-
#elif defined (CHAI_ENABLE_CUDA)
87-
CHAI_GPU_ERROR_CHECK(cudaFree(buffer));
88-
#endif
89-
}
90-
91-
// wrapper for hip/cuda malloc
92-
CHAI_HOST inline void gpuMalloc(void** devPtr, size_t size) {
93-
#if defined(CHAI_ENABLE_GPU_SIMULATION_MODE)
94-
*devPtr = (void*)malloc(size);
95-
#elif defined (CHAI_ENABLE_HIP)
96-
CHAI_GPU_ERROR_CHECK(hipMalloc(devPtr, size));
97-
#elif defined (CHAI_ENABLE_CUDA)
98-
CHAI_GPU_ERROR_CHECK(cudaMalloc(devPtr, size));
99-
#endif
100-
}
101-
102-
// wrapper for hip/cuda managed malloc
103-
CHAI_HOST inline void gpuMallocManaged(void** devPtr, size_t size) {
104-
#if defined(CHAI_ENABLE_GPU_SIMULATION_MODE)
105-
*devPtr = (void*)malloc(size);
106-
#elif defined (CHAI_ENABLE_HIP)
107-
CHAI_GPU_ERROR_CHECK(hipMallocManaged(devPtr, size));
108-
#elif defined (CHAI_ENABLE_CUDA)
109-
CHAI_GPU_ERROR_CHECK(cudaMallocManaged(devPtr, size));
110-
#endif
111-
}
112-
113-
// wrapper for hip/cuda mem copy
114-
CHAI_HOST inline void gpuMemcpy(void* dst, const void* src, size_t count, gpuMemcpyKind kind) {
115-
#if defined(CHAI_ENABLE_GPU_SIMULATION_MODE)
116-
memcpy(dst, src, count);
117-
#elif defined (CHAI_ENABLE_HIP)
118-
CHAI_GPU_ERROR_CHECK(hipMemcpy(dst, src, count, kind));
119-
#elif defined (CHAI_ENABLE_CUDA)
120-
CHAI_GPU_ERROR_CHECK(cudaMemcpy(dst, src, count, kind));
121-
#endif
122-
}
123-
124-
#endif //#if defined(CHAI_GPUCC)
125-
12631
/*!
12732
* \brief Singleton that manages caching and movement of ManagedArray objects.
12833
*

src/chai/CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,24 @@ set (chai_headers
1515
ArrayManager.hpp
1616
ArrayManager.inl
1717
ChaiMacros.hpp
18+
DeviceHelpers.hpp
1819
ExecutionSpaces.hpp
1920
ManagedArray.hpp
2021
ManagedArray.inl
2122
managed_ptr.hpp
2223
PointerRecord.hpp
2324
Types.hpp)
2425

26+
if(CHAI_ENABLE_EXPERIMENTAL)
27+
set(chai_headers
28+
${chai_headers}
29+
ManagedSharedPtr.hpp
30+
SharedPtrCounter.hpp
31+
SharedPtrManager.hpp
32+
SharedPtrManager.inl
33+
SharedPointerRecord.hpp)
34+
endif()
35+
2536
if(CHAI_DISABLE_RM)
2637
set(chai_headers
2738
${chai_headers}
@@ -31,6 +42,12 @@ endif ()
3142
set (chai_sources
3243
ArrayManager.cpp)
3344

45+
if(CHAI_ENABLE_EXPERIMENTAL)
46+
set (chai_sources
47+
${chai_sources}
48+
SharedPtrManager.cpp)
49+
endif ()
50+
3451
set (chai_depends
3552
umpire)
3653

src/chai/ChaiMacros.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,13 @@
3131
#define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
3232
#define gpuMemcpyDefault cudaMemcpyDefault
3333

34+
#define gpuSuccess cudaSuccess
35+
#define gpuError_t cudaError_t
36+
#define gpuGetErrorString cudaGetErrorString
37+
#define gpuPeekAtLastError cudaPeekAtLastError
38+
#define gpuDeviceSynchronize cudaDeviceSynchronize
39+
40+
3441
// NOTE: Cannot have if defined(__HIPCC__) in the condition below, since __HIPCC__ comes from the included header hip_runtime below.
3542
#elif defined(CHAI_ENABLE_HIP)
3643

@@ -48,6 +55,12 @@
4855
#define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice
4956
#define gpuMemcpyDefault hipMemcpyDefault
5057

58+
#define gpuSuccess hipSuccess
59+
#define gpuError_t hipError_t
60+
#define gpuGetErrorString hipGetErrorString
61+
#define gpuPeekAtLastError hipPeekAtLastError
62+
#define gpuDeviceSynchronize hipDeviceSynchronize
63+
5164
#else
5265

5366
#define CHAI_HOST

src/chai/DeviceHelpers.hpp

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#ifndef CHAI_DEVICE_HELPERS_HPP
2+
#define CHAI_DEVICE_HELPERS_HPP
3+
4+
#include "chai/config.hpp"
5+
#include "chai/ChaiMacros.hpp"
6+
7+
namespace chai
8+
{
9+
// CHAI_GPU_ERROR_CHECK macro
10+
#ifdef CHAI_ENABLE_DEVICE
11+
12+
#ifdef CHAI_ENABLE_GPU_ERROR_CHECKING
13+
14+
inline void gpuErrorCheck(gpuError_t code, const char *file, int line, bool abort=true)
15+
{
16+
if (code != gpuSuccess) {
17+
fprintf(stderr, "[CHAI] GPU Error: %s %s %d\n", gpuGetErrorString(code), file, line);
18+
if (abort) {
19+
exit(code);
20+
}
21+
}
22+
}
23+
24+
#define CHAI_GPU_ERROR_CHECK(code) { ::chai::gpuErrorCheck((code), __FILE__, __LINE__); }
25+
#else // CHAI_ENABLE_GPU_ERROR_CHECKING
26+
#define CHAI_GPU_ERROR_CHECK(code) code
27+
#endif // CHAI_ENABLE_GPU_ERROR_CHECKING
28+
29+
#endif
30+
31+
// wrapper for hip/cuda synchronize
32+
inline void synchronize() {
33+
#if defined(CHAI_ENABLE_DEVICE) && !defined(CHAI_DEVICE_COMPILE)
34+
CHAI_GPU_ERROR_CHECK(gpuDeviceSynchronize());
35+
#endif
36+
}
37+
38+
#if defined(CHAI_GPUCC) || defined(CHAI_ENABLE_GPU_SIMULATION_MODE)
39+
40+
// wrapper for hip/cuda free
41+
CHAI_HOST inline void gpuFree(void* buffer) {
42+
#if defined(CHAI_ENABLE_GPU_SIMULATION_MODE)
43+
free(buffer);
44+
#elif defined (CHAI_ENABLE_HIP)
45+
CHAI_GPU_ERROR_CHECK(hipFree(buffer));
46+
#elif defined (CHAI_ENABLE_CUDA)
47+
CHAI_GPU_ERROR_CHECK(cudaFree(buffer));
48+
#endif
49+
}
50+
51+
// wrapper for hip/cuda malloc
52+
CHAI_HOST inline void gpuMalloc(void** devPtr, size_t size) {
53+
#if defined(CHAI_ENABLE_GPU_SIMULATION_MODE)
54+
*devPtr = (void*)malloc(size);
55+
#elif defined (CHAI_ENABLE_HIP)
56+
CHAI_GPU_ERROR_CHECK(hipMalloc(devPtr, size));
57+
#elif defined (CHAI_ENABLE_CUDA)
58+
CHAI_GPU_ERROR_CHECK(cudaMalloc(devPtr, size));
59+
#endif
60+
}
61+
62+
// wrapper for hip/cuda managed malloc
63+
CHAI_HOST inline void gpuMallocManaged(void** devPtr, size_t size) {
64+
#if defined(CHAI_ENABLE_GPU_SIMULATION_MODE)
65+
*devPtr = (void*)malloc(size);
66+
#elif defined (CHAI_ENABLE_HIP)
67+
CHAI_GPU_ERROR_CHECK(hipMallocManaged(devPtr, size));
68+
#elif defined (CHAI_ENABLE_CUDA)
69+
CHAI_GPU_ERROR_CHECK(cudaMallocManaged(devPtr, size));
70+
#endif
71+
}
72+
73+
// wrapper for hip/cuda mem copy
74+
CHAI_HOST inline void gpuMemcpy(void* dst, const void* src, size_t count, gpuMemcpyKind kind) {
75+
#if defined(CHAI_ENABLE_GPU_SIMULATION_MODE)
76+
memcpy(dst, src, count);
77+
#elif defined (CHAI_ENABLE_HIP)
78+
CHAI_GPU_ERROR_CHECK(hipMemcpy(dst, src, count, kind));
79+
#elif defined (CHAI_ENABLE_CUDA)
80+
CHAI_GPU_ERROR_CHECK(cudaMemcpy(dst, src, count, kind));
81+
#endif
82+
}
83+
84+
#endif //#if defined(CHAI_GPUCC)
85+
86+
} // namespace chai
87+
88+
#endif // CHAI_DEVICE_HELPERS_HPP

src/chai/ManagedArray.hpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,14 +373,14 @@ class ManagedArray : public CHAICopyable
373373
// shenanigan reasons need to be defined here.
374374
#if !defined(CHAI_DISABLE_RM)
375375
// if T is a CHAICopyable, then it is important to initialize all the
376-
// ManagedArrays to nullptr at allocation, since it is extremely easy to
376+
// elements with default constructors, since it is extremely easy to
377377
// trigger a moveInnerImpl, which expects inner values to be initialized.
378378
template <bool B = std::is_base_of<CHAICopyable, T>::value,
379379
typename std::enable_if<B, int>::type = 0>
380380
CHAI_HOST bool initInner(size_t start = 0)
381381
{
382382
for (size_t i = start; i < m_size/sizeof(T); ++i) {
383-
m_active_base_pointer[i] = nullptr;
383+
new (&m_active_base_pointer[i]) T();
384384
}
385385
return true;
386386
}
@@ -392,6 +392,26 @@ class ManagedArray : public CHAICopyable
392392
{
393393
return false;
394394
}
395+
396+
// if T is a CHAICopyable, then it is important to free all the
397+
// CHAICopyable containers, which expect inner values to be initialized.
398+
template <bool B = std::is_base_of<CHAICopyable, T>::value,
399+
typename std::enable_if<B, int>::type = 0>
400+
CHAI_HOST bool freeInner(size_t start = 0)
401+
{
402+
for (size_t i = start; i < m_size/sizeof(T); ++i) {
403+
m_active_base_pointer[i].~T();
404+
}
405+
return true;
406+
}
407+
408+
// Do not deep initialize if T is not a CHAICopyable.
409+
template <bool B = std::is_base_of<CHAICopyable, T>::value,
410+
typename std::enable_if<!B, int>::type = 0>
411+
CHAI_HOST bool freeInner(size_t = 0)
412+
{
413+
return false;
414+
}
395415
#endif
396416
protected:
397417
/*!

src/chai/ManagedArray.inl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,12 +249,14 @@ CHAI_HOST void ManagedArray<T>::reallocate(size_t elems)
249249
// trigger a moveInnerImpl, which expects inner values to be initialized.
250250
if (initInner(old_size/sizeof(T))) {
251251
// if we are active on the GPU, we need to send any newly initialized inner members to the device
252+
#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) || defined(CHAI_ENABLE_GPU_SIMULATION_MODE)
252253
if (m_pointer_record->m_last_space == GPU && old_size < m_size) {
253254
umpire::ResourceManager & umpire_rm = umpire::ResourceManager::getInstance();
254255
void *src = (void *)(((char *)(m_pointer_record->m_pointers[CPU])) + old_size);
255256
void *dst = (void *)(((char *)(m_pointer_record->m_pointers[GPU])) + old_size);
256257
umpire_rm.copy(dst,src,m_size-old_size);
257258
}
259+
#endif
258260
}
259261

260262
CHAI_LOG(Debug, "m_active_ptr reallocated at address: " << m_active_pointer);
@@ -276,6 +278,8 @@ CHAI_HOST void ManagedArray<T>::free(ExecutionSpace space)
276278
if (m_pointer_record == &ArrayManager::s_null_record) {
277279
m_pointer_record = m_resource_manager->makeManaged((void *)m_active_base_pointer,m_size,space,true);
278280
}
281+
freeInner();
282+
279283
m_resource_manager->free(m_pointer_record, space);
280284
m_active_pointer = nullptr;
281285
m_active_base_pointer = nullptr;

0 commit comments

Comments
 (0)