Skip to content

Commit fa039de

Browse files
committed
Add Windows build jobs to CI
1 parent e686d70 commit fa039de

File tree

3 files changed

+24
-21
lines changed

3 files changed

+24
-21
lines changed

.github/workflows/build.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
fail-fast: false
1919
matrix:
2020
os:
21-
["ubuntu-22.04"] # "windows-2025" # Disabled until solution/workaround for NVTX is present
21+
["ubuntu-22.04", "windows-2025"]
2222
# "ubuntu-24.04" # Postponed as long as testing against CUDA 12.1, needs 12.6+
2323
python: ["3.9", "3.10", "3.11", "3.12", "3.13"]
2424

@@ -55,9 +55,9 @@ jobs:
5555

5656
- name: Set up CUDA toolkit (Windows)
5757
if: runner.os == 'Windows'
58-
uses: Jimver/cuda-toolkit@master
58+
uses: Jimver/cuda-toolkit@v0.2.24 # https://github.com/Jimver/cuda-toolkit/issues/395
5959
with:
60-
cuda: "12.4.0"
60+
cuda: "12.8.1"
6161
method: "network"
6262

6363
- name: Install torch with CUDA support (Ubuntu)
@@ -66,7 +66,7 @@ jobs:
6666

6767
- name: Install torch with CUDA support (Windows)
6868
if: runner.os == 'Windows'
69-
run: python -m pip install torch --index-url https://download.pytorch.org/whl/cu124
69+
run: python -m pip install torch --index-url https://download.pytorch.org/whl/cu128
7070

7171
- name: Install torchhull
7272
run: python -m pip install --editable ".[dev]"

src/torchhull/_C/CMakeLists.txt

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ if(NOT TARGET stdgpu::stdgpu)
4242
FetchContent_Declare(
4343
stdgpu
4444
PREFIX stdgpu
45-
URL https://github.com/stotko/stdgpu/archive/3a0b20e77a5eac672162fa5f6173ce9a34303d7f.tar.gz
46-
URL_HASH SHA256=4723bba67ccb67f3a0218515f555c4ed385ae2f638cf668b81d6d490c1f47fbc
45+
URL https://github.com/stotko/stdgpu/archive/abc7d0523c9921227c90bdadbb24d4a17e35de61.tar.gz
46+
URL_HASH SHA256=35aaf97a9d63817464c83020735e8761ad2ac64bef5c45e5e6b90601619b4fb5
4747
DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/external/stdgpu"
4848
SYSTEM
4949
)
@@ -57,6 +57,9 @@ if(NOT TARGET stdgpu::stdgpu)
5757
set(STDGPU_BUILD_TESTS OFF CACHE INTERNAL "")
5858

5959
FetchContent_MakeAvailable(stdgpu)
60+
61+
find_package(CUDAToolkit REQUIRED)
62+
target_link_libraries(stdgpu PUBLIC CUDA::cudart_static)
6063
endif()
6164

6265

@@ -78,7 +81,7 @@ if(charonload_FOUND)
7881
target_compile_definitions(torchhull_cpp PRIVATE "__CUDA_NO_HALF_OPERATORS__")
7982
target_compile_features(torchhull_cpp PUBLIC cxx_std_17)
8083
target_compile_options(torchhull_cpp PRIVATE ${HOST_DEVICE_FLAGS})
81-
target_link_libraries(torchhull_cpp PRIVATE glm::glm stdgpu::stdgpu)
84+
target_link_libraries(torchhull_cpp PRIVATE glm::glm-header-only stdgpu::stdgpu)
8285

8386

8487
if(TORCHHULL_BUILD_BINDINGS)

src/torchhull/_C/src/gaussian_blur_cuda.cu

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,20 @@ gaussian_blur_cuda_sparse(const torch::Tensor& images,
447447
dim3 grid_convolution;
448448
at::cuda::getApplyGrid(M, grid_convolution, images.device().index(), threads_per_block);
449449

450+
#define CASE_TILE_CONVOLUTION_KERNEL_SPECIALIZED(KERNEL_SIZE) \
451+
case KERNEL_SIZE: \
452+
{ \
453+
tile_convolution_kernel_specialized<KERNEL_SIZE><<<grid_convolution, threads, 0, stream>>>(tile_indices_, \
454+
M, \
455+
tile_size, \
456+
sigma, \
457+
images_, \
458+
blurred_images_); \
459+
DEFER(AT_CUDA_CHECK(cudaGetLastError());) \
460+
DEFER(AT_CUDA_CHECK(cudaStreamSynchronize(stream));) \
461+
} \
462+
break;
463+
450464
AT_DISPATCH_ALL_TYPES_AND(
451465
torch::ScalarType::Half,
452466
images.scalar_type(),
@@ -464,20 +478,6 @@ gaussian_blur_cuda_sparse(const torch::Tensor& images,
464478
auto blurred_images_ =
465479
blurred_images.packed_accessor64<scalar_t, 4, torch::RestrictPtrTraits>();
466480

467-
#define CASE_TILE_CONVOLUTION_KERNEL_SPECIALIZED(KERNEL_SIZE) \
468-
case KERNEL_SIZE: \
469-
{ \
470-
tile_convolution_kernel_specialized<KERNEL_SIZE><<<grid_convolution, threads, 0, stream>>>(tile_indices_, \
471-
M, \
472-
tile_size, \
473-
sigma, \
474-
images_, \
475-
blurred_images_); \
476-
DEFER(AT_CUDA_CHECK(cudaGetLastError());) \
477-
DEFER(AT_CUDA_CHECK(cudaStreamSynchronize(stream));) \
478-
} \
479-
break;
480-
481481
switch (kernel_size)
482482
{
483483
// Tested all possible values up to 21 for specialization.

0 commit comments

Comments
 (0)