From 9dc7b153778d0d91a0ae1f5ca9aa907cacd63dae Mon Sep 17 00:00:00 2001 From: Aras Pranckevicius Date: Sun, 22 Jan 2023 22:07:49 +0200 Subject: [PATCH] Build fixes for Windows / Visual Studio - popcount() implementations - workaround the lack of std::aligned_alloc - fix cpu_coded.inl wrongly using #ifdef instead of #if for OpenMP flags (the flag is defined, just to zero). With OpenMP this does not yet compile on MSVC out of the box. - cmake: when using MSVC, indicate C++ 20 (for to_string), do not pass gcc/clang warning flags, and build as static library since public symbols do not have "please export me" attributes set on them. --- CMakeLists.txt | 22 ++++++++++++++++++---- src/ndzip/common.hh | 6 ++++++ src/ndzip/cpu_codec.inl | 25 +++++++++++++++++++++---- 3 files changed, 45 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 58236f5..e53d4ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,11 +75,18 @@ if (NDZIP_BUILD_BENCHMARK AND NDZIP_WITH_3RDPARTY_BENCHMARKS) endif () -set(CMAKE_CXX_STANDARD 17) +if (NOT MSVC) + set(CMAKE_CXX_STANDARD 17) +else () + set(CMAKE_CXX_STANDARD 20) # to_string under MSVC needs C++20 +endif () + set(CMAKE_CUDA_STANDARD 17) -set(NDZIP_COMPILE_FLAGS -Wall -Wextra -Wno-attributes -Wimplicit-fallthrough) -set(NDZIP_CXX_FLAGS ${NDZIP_COMPILE_FLAGS} -Werror=return-type -Werror=init-self -Werror=undef) +if (NOT MSVC) + set(NDZIP_COMPILE_FLAGS -Wall -Wextra -Wno-attributes -Wimplicit-fallthrough) + set(NDZIP_CXX_FLAGS ${NDZIP_COMPILE_FLAGS} -Werror=return-type -Werror=init-self -Werror=undef) +endif() if (NDZIP_USE_HIPSYCL) # Aggressive inlining avoids GPU call stack allocation == global memory access bottleneck @@ -107,7 +114,7 @@ set(NDZIP_PROFILE_CONFIGURATIONS VARIABLE DIMENSIONS VALUES 1 2 3 ) -add_library(ndzip SHARED +set(NDZIP_LIB_SOURCES include/ndzip/ndzip.hh include/ndzip/offload.hh src/ndzip/common.hh @@ -115,6 +122,13 @@ add_library(ndzip SHARED src/ndzip/cpu_codec.inl src/ndzip/cpu_factory.cc ) + +if (MSVC) + # on windows, build as static library since symbol exports are not setup there + add_library(ndzip STATIC ${NDZIP_LIB_SOURCES}) +else () + add_library(ndzip SHARED ${NDZIP_LIB_SOURCES}) +endif () target_split_configured_sources(ndzip PRIVATE GENERATE cpu_encoder.cc FROM src/ndzip/cpu_codec.inl ${NDZIP_PROFILE_CONFIGURATIONS} diff --git a/src/ndzip/common.hh b/src/ndzip/common.hh index 0b74c4e..cda0262 100644 --- a/src/ndzip/common.hh +++ b/src/ndzip/common.hh @@ -590,6 +590,8 @@ NDZIP_UNIVERSAL inline unsigned popcount(unsigned int x) { #ifdef __CUDA_ARCH__ // NVCC regards __builtin_popcount as a __host__ function return __popc(static_cast(x)); +#elif defined(_MSC_VER) + return __popcnt(x); #else return __builtin_popcount(x); #endif @@ -604,6 +606,8 @@ NDZIP_UNIVERSAL inline unsigned popcount(unsigned long x) { static_assert(sizeof(unsigned long) == sizeof(unsigned long long)); return __popcll(static_cast(x)); } +#elif defined(_MSC_VER) + return __popcnt64(x); #else return __builtin_popcountl(x); #endif @@ -613,6 +617,8 @@ NDZIP_UNIVERSAL inline unsigned popcount(unsigned long long x) { #ifdef __CUDA_ARCH__ // NVCC regards __builtin_popcountll as a __host__ function return __popcll(static_cast(x)); +#elif defined(_MSC_VER) + return __popcnt64(x); #else return __builtin_popcountll(x); #endif diff --git a/src/ndzip/cpu_codec.inl b/src/ndzip/cpu_codec.inl index 36552ca..cf2e847 100644 --- a/src/ndzip/cpu_codec.inl +++ b/src/ndzip/cpu_codec.inl @@ -12,7 +12,7 @@ #include #endif -#ifdef NDZIP_OPENMP_SUPPORT +#if NDZIP_OPENMP_SUPPORT #include #include #include @@ -42,20 +42,20 @@ class simd_aligned_buffer { explicit simd_aligned_buffer(size_t size) { assert(size % simd_width_bytes == 0); - _memory = std::aligned_alloc(simd_width_bytes, size * sizeof(T)); + _memory = aligned_alloc(size); if (!_memory) { throw std::bad_alloc(); } } simd_aligned_buffer(simd_aligned_buffer &&other) noexcept { *this = std::move(other); } simd_aligned_buffer &operator=(simd_aligned_buffer &&other) noexcept { - std::free(_memory); + aligned_free(_memory); _memory = other._memory; other._memory = nullptr; return *this; } - ~simd_aligned_buffer() { std::free(_memory); } + ~simd_aligned_buffer() { aligned_free(_memory); } explicit operator bool() const { return _memory != nullptr; } @@ -67,6 +67,23 @@ class simd_aligned_buffer { const T &operator[](size_t i) const { return data()[i]; }; + private: + static void *aligned_alloc(size_t size) + { + #ifdef _MSC_VER + return _aligned_malloc(size * sizeof(T), simd_width_bytes); + #else + return std::aligned_alloc(simd_width_bytes, size * sizeof(T)); + #endif + } + static void aligned_free(void *mem) + { + #ifdef _MSC_VER + return _aligned_free(mem); + #else + return std::free(mem); + #endif + } private: void *_memory = nullptr; };