Skip to content

Commit b3786f6

Browse files
committed
Merge C++ Standard Parallelism and SYCL2020 implementations
1 parent 9ff46ec commit b3786f6

22 files changed

+359
-1164
lines changed

CMakeLists.txt

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -154,15 +154,12 @@ include(cmake/register_models.cmake)
154154
register_model(serial SERIAL SerialStream.cpp)
155155
register_model(omp OMP OMPStream.cpp)
156156
register_model(ocl OCL OCLStream.cpp)
157-
register_model(std-data STD_DATA STDDataStream.cpp)
158-
register_model(std-indices STD_INDICES STDIndicesStream.cpp)
159-
register_model(std-ranges STD_RANGES STDRangesStream.cpp)
157+
register_model(std STD STDStream.cpp)
160158
register_model(hip HIP HIPStream.cpp)
161159
register_model(cuda CUDA CUDAStream.cu)
162160
register_model(kokkos KOKKOS KokkosStream.cpp)
163161
register_model(sycl SYCL SYCLStream.cpp)
164-
register_model(sycl2020-acc SYCL2020 SYCLStream2020.cpp)
165-
register_model(sycl2020-usm SYCL2020 SYCLStream2020.cpp)
162+
register_model(sycl2020 SYCL2020 SYCLStream2020.cpp)
166163
register_model(acc ACC ACCStream.cpp)
167164
# defining RAJA collides with the RAJA namespace so USE_RAJA
168165
register_model(raja USE_RAJA RAJAStream.cpp)

src/Stream.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@
77

88
#pragma once
99

10+
#include <cstdint>
1011
#include <array>
1112
#include <vector>
1213
#include <string>
1314
#include "benchmark.h"
1415

16+
using std::intptr_t;
17+
1518
template <class T>
1619
class Stream
1720
{

src/StreamModels.h

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,8 @@
33

44
#if defined(CUDA)
55
#include "CUDAStream.h"
6-
#elif defined(STD_DATA)
7-
#include "STDDataStream.h"
8-
#elif defined(STD_INDICES)
9-
#include "STDIndicesStream.h"
10-
#elif defined(STD_RANGES)
11-
#include "STDRangesStream.hpp"
6+
#elif defined(STD)
7+
#include "STDStream.h"
128
#elif defined(TBB)
139
#include "TBBStream.hpp"
1410
#elif defined(THRUST)
@@ -63,17 +59,9 @@ std::unique_ptr<Stream<T>> make_stream(Args... args) {
6359
// Use the Kokkos implementation
6460
return std::make_unique<KokkosStream<T>>(args...);
6561

66-
#elif defined(STD_DATA)
62+
#elif defined(STD)
6763
// Use the C++ STD data-oriented implementation
68-
return std::make_unique<STDDataStream<T>>(args...);
69-
70-
#elif defined(STD_INDICES)
71-
// Use the C++ STD index-oriented implementation
72-
return std::make_unique<STDIndicesStream<T>>(args...);
73-
74-
#elif defined(STD_RANGES)
75-
// Use the C++ STD ranges implementation
76-
return std::make_unique<STDRangesStream<T>>(args...);
64+
return std::make_unique<STDStream<T>>(args...);
7765

7866
#elif defined(TBB)
7967
// Use the C++20 implementation

src/ci-test-compile.sh

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,10 @@ build_gcc() {
152152
*) dpl_conditional_flags="-DFETCH_ONEDPL=ON -DFETCH_TBB=ON -DUSE_TBB=ON -DCXX_EXTRA_FLAGS=-D_GLIBCXX_USE_TBB_PAR_BACKEND=0" ;;
153153
esac
154154
# some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here
155-
run_build $name "${GCC_CXX:?}" std-data "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
156-
run_build $name "${GCC_CXX:?}" std-indices "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
157-
run_build $name "${GCC_CXX:?}" std-ranges "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
155+
run_build $name "${GCC_CXX:?}" std "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=DATA17"
156+
# Requires GCC 14 and newer CMake for C++23 support
157+
#run_build $name "${GCC_CXX:?}" std "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=DATA23"
158+
run_build $name "${GCC_CXX:?}" std "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=INDICES"
158159
done
159160

160161
run_build $name "${GCC_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB"
@@ -251,9 +252,11 @@ build_clang() {
251252
OFF) dpl_conditional_flags="-DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" ;;
252253
*) dpl_conditional_flags="-DFETCH_ONEDPL=ON -DFETCH_TBB=ON -DUSE_TBB=ON -DCXX_EXTRA_FLAGS=-D_GLIBCXX_USE_TBB_PAR_BACKEND=0" ;;
253254
esac
254-
run_build $name "${CLANG_CXX:?}" std-data "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
255-
run_build $name "${CLANG_CXX:?}" std-indices "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
256-
# run_build $name "${CLANG_CXX:?}" std-ranges "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl" # not yet supported
255+
run_build $name "${CLANG_CXX:?}" std "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=DATA17"
256+
# Requires GCC 14 and newer CMake for C++23 support
257+
# run_build $name "${CLANG_CXX:?}" std "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=DATA23"
258+
# TODO: clang is too old
259+
#run_build $name "${CLANG_CXX:?}" std "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=INDICES"
257260
done
258261

259262
run_build $name "${CLANG_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB"
@@ -270,8 +273,10 @@ build_clang() {
270273
build_nvhpc() {
271274
local name="nvhpc_build"
272275
local cxx="-DCMAKE_CXX_COMPILER=${NVHPC_NVCXX:?}"
273-
run_build $name "${NVHPC_NVCXX:?}" std-data "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
274-
run_build $name "${NVHPC_NVCXX:?}" std-indices "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
276+
run_build $name "${NVHPC_NVCXX:?}" std "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY -DSTDIMPL=DATA17"
277+
# Requires GCC 14 and newer CMake for C++23 support
278+
# run_build $name "${NVHPC_NVCXX:?}" std "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY -DSTDIMPL=DATA23"
279+
run_build $name "${NVHPC_NVCXX:?}" std "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY -DSTDIMPL=INDICES"
275280

276281
run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY"
277282
run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen"

src/dpl_shim.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,15 @@ T *alloc_raw(size_t size) { return sycl::malloc_shared<T>(size, exe_policy.queue
2929
template<typename T>
3030
void dealloc_raw(T *ptr) { sycl::free(ptr, exe_policy.queue()); }
3131

32+
#define WORKAROUND
33+
3234
#else
3335

3436
// auto exe_policy = dpl::execution::seq;
3537
// auto exe_policy = dpl::execution::par;
3638
static constexpr auto exe_policy = dpl::execution::par_unseq;
3739
#define USE_STD_PTR_ALLOC_DEALLOC
40+
#define WORKAROUND
3841

3942
#endif
4043

src/main.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -332,21 +332,22 @@ void check_solution(const size_t num_times, T const* a, T const* b, T const* c,
332332
size_t failed = 0;
333333
T max_rel = std::numeric_limits<T>::epsilon() * T(100.0);
334334
T max_rel_dot = std::numeric_limits<T>::epsilon() * T(10000000.0);
335-
auto check = [&](const char* name, T is, T should, T max_rel, size_t i = size_t(-1)) {
335+
auto check = [&](const char* name, T is, T should, T mrel, size_t i = size_t(-1)) {
336336
// Relative difference:
337337
T diff = std::abs(is - should);
338338
T abs_is = std::abs(is);
339339
T abs_sh = std::abs(should);
340340
T largest = std::max(abs_is, abs_sh);
341-
T same = diff <= largest * max_rel;
341+
T same = diff <= largest * mrel;
342342
if (!same || std::isnan(is)) {
343343
++failed;
344344
if (failed > 10) return;
345345
std::cerr << "FAILED validation of " << name;
346346
if (i != size_t(-1)) std::cerr << "[" << i << "]";
347347
std::cerr << ": " << is << " (is) != " << should
348348
<< " (should)" << ", diff=" << diff << " > "
349-
<< largest * max_rel << std::endl;
349+
<< largest * mrel << " (largest=" << largest
350+
<< ", max_rel=" << mrel << ")" << std::endl;
350351
}
351352
};
352353

@@ -360,9 +361,9 @@ void check_solution(const size_t num_times, T const* a, T const* b, T const* c,
360361

361362
// Calculate the L^infty-norm relative error
362363
for (size_t i = 0; i < array_size; ++i) {
363-
check("a", a[i], goldA, i, max_rel);
364-
check("b", b[i], goldB, i, max_rel);
365-
check("c", c[i], goldC, i, max_rel);
364+
check("a", a[i], goldA, max_rel, i);
365+
check("b", b[i], goldB, max_rel, i);
366+
check("c", c[i], goldC, max_rel, i);
366367
}
367368

368369
if (failed > 0 && !silence_errors)

src/std-data/STDDataStream.cpp

Lines changed: 0 additions & 117 deletions
This file was deleted.

0 commit comments

Comments
 (0)