Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.

Commit 2b6380c

Browse files
author
dumerrill
committed
Fix for illegal memory accesses when using custom value types in
reduce-by-value operations Former-commit-id: bc0077f
1 parent ae485f6 commit 2b6380c

File tree

2 files changed

+29
-10
lines changed

2 files changed

+29
-10
lines changed

cub/block_range/block_scan_prefix_operators.cuh

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,9 @@ struct ScanTileState<T, false>
319319
void* allocations[3];
320320
size_t allocation_sizes[3];
321321

322-
allocation_sizes[0] = (num_tiles + TILE_STATUS_PADDING) * sizeof(StatusWord); // bytes needed for tile status descriptors
323-
allocation_sizes[1] = num_tiles * sizeof(Uninitialized<T>); // bytes needed for partials
324-
allocation_sizes[2] = num_tiles * sizeof(Uninitialized<T>); // bytes needed for inclusives
322+
allocation_sizes[0] = (num_tiles + TILE_STATUS_PADDING) * sizeof(StatusWord); // bytes needed for tile status descriptors
323+
allocation_sizes[1] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized<T>); // bytes needed for partials
324+
allocation_sizes[2] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized<T>); // bytes needed for inclusives
325325

326326
// Compute allocation pointers into the single storage blob
327327
if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break;
@@ -348,8 +348,8 @@ struct ScanTileState<T, false>
348348
// Specify storage allocation requirements
349349
size_t allocation_sizes[3];
350350
allocation_sizes[0] = (num_tiles + TILE_STATUS_PADDING) * sizeof(StatusWord); // bytes needed for tile status descriptors
351-
allocation_sizes[1] = num_tiles * sizeof(Uninitialized<T>); // bytes needed for partials
352-
allocation_sizes[2] = num_tiles * sizeof(Uninitialized<T>); // bytes needed for inclusives
351+
allocation_sizes[1] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized<T>); // bytes needed for partials
352+
allocation_sizes[2] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized<T>); // bytes needed for inclusives
353353

354354
// Set the necessary size of the blob
355355
void* allocations[3];
@@ -383,7 +383,7 @@ struct ScanTileState<T, false>
383383
__device__ __forceinline__ void SetInclusive(int tile_idx, T tile_inclusive)
384384
{
385385
// Update tile inclusive value
386-
ThreadStore<STORE_CG>(d_tile_inclusive + tile_idx, tile_inclusive);
386+
ThreadStore<STORE_CG>(d_tile_inclusive + TILE_STATUS_PADDING + tile_idx, tile_inclusive);
387387

388388
// Fence
389389
__threadfence();
@@ -399,7 +399,7 @@ struct ScanTileState<T, false>
399399
__device__ __forceinline__ void SetPartial(int tile_idx, T tile_partial)
400400
{
401401
// Update tile partial value
402-
ThreadStore<STORE_CG>(d_tile_partial + tile_idx, tile_partial);
402+
ThreadStore<STORE_CG>(d_tile_partial + TILE_STATUS_PADDING + tile_idx, tile_partial);
403403

404404
// Fence
405405
__threadfence();
@@ -422,8 +422,8 @@ struct ScanTileState<T, false>
422422
status = ThreadLoad<LOAD_CG>(d_tile_status + TILE_STATUS_PADDING + tile_idx);
423423
}
424424

425-
T partial = ThreadLoad<LOAD_CG>(d_tile_partial + tile_idx);
426-
T inclusive = ThreadLoad<LOAD_CG>(d_tile_inclusive + tile_idx);
425+
T partial = ThreadLoad<LOAD_CG>(d_tile_partial + TILE_STATUS_PADDING + tile_idx);
426+
T inclusive = ThreadLoad<LOAD_CG>(d_tile_inclusive + TILE_STATUS_PADDING + tile_idx);
427427

428428
value = (status == StatusWord(SCAN_TILE_PARTIAL)) ?
429429
partial :
@@ -475,7 +475,7 @@ struct BlockScanLookbackPrefixOp
475475
};
476476

477477
// Fields
478-
ScanTileState &tile_status; ///< Interface to tile status
478+
ScanTileState &tile_status; ///< Interface to tile status
479479
_TempStorage &temp_storage; ///< Reference to a warp-reduction instance
480480
ScanOp scan_op; ///< Binary scan operator
481481
int tile_idx; ///< The current tile index

test/Makefile

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ ifeq (300, $(findstring 300, $(SM_ARCH)))
7878
SM_TARGETS += -gencode=arch=compute_30,code=\"sm_30,compute_30\"
7979
SM_DEF += -DSM300
8080
endif
81+
ifeq (210, $(findstring 210, $(SM_ARCH)))
82+
SM_TARGETS += -gencode=arch=compute_20,code=\"sm_21,compute_20\"
83+
SM_DEF += -DSM210
84+
endif
8185
ifeq (200, $(findstring 200, $(SM_ARCH)))
8286
SM_TARGETS += -gencode=arch=compute_20,code=\"sm_20,compute_20\"
8387
SM_DEF += -DSM200
@@ -475,6 +479,21 @@ bin/test_device_reduce_by_key_$(SUFFIX) : test_device_reduce_by_key.cu $(DEPS)
475479
$(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_device_reduce_by_key_$(SUFFIX) test_device_reduce_by_key.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3
476480

477481

482+
#-------------------------------------------------------------------------------
483+
# make fail
484+
#-------------------------------------------------------------------------------
485+
486+
fail: bin/fail_$(SUFFIX)
487+
488+
bin/fail_$(SUFFIX) : fail.cu $(DEPS)
489+
mkdir -p bin
490+
$(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/fail_$(SUFFIX) fail.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3
491+
492+
493+
494+
495+
496+
478497

479498
#-------------------------------------------------------------------------------
480499
# make test_device_seg_reduce

0 commit comments

Comments
 (0)