Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.

Commit d84db8a

Browse files
author
dumerrill
committed
Fixes for rle test failures
Former-commit-id: fa35717193b7f11bcc918b4aa5b6b87d462ed4bc
1 parent ff028b7 commit d84db8a

File tree

4 files changed

+77
-7
lines changed

4 files changed

+77
-7
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<hr>
22
<h3>About CUB</h3>
33

4-
Current release: v1.5.2 (03/21/2016)
4+
Current release: v1.5.3 (10/11/2016)
55

66
We recommend the [CUB Project Website](http://nvlabs.github.com/cub) and the [cub-users discussion forum](http://groups.google.com/group/cub-users) for further information and examples.
77

cub/agent/agent_rle.cuh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ struct AgentRle
367367
LengthOffsetPair (&lengths_and_num_runs)[ITEMS_PER_THREAD])
368368
{
369369
// Perform warpscans
370-
int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS);
370+
unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS);
371371
int lane_id = LaneId();
372372

373373
LengthOffsetPair identity;
@@ -422,7 +422,7 @@ struct AgentRle
422422
Int2Type<true> is_warp_time_slice)
423423
{
424424
unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS);
425-
unsigned int lane_id = LaneId();
425+
int lane_id = LaneId();
426426

427427
// Locally compact items within the warp (first warp)
428428
if (warp_id == 0)
@@ -479,7 +479,7 @@ struct AgentRle
479479
Int2Type<false> is_warp_time_slice)
480480
{
481481
unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS);
482-
unsigned int lane_id = LaneId();
482+
int lane_id = LaneId();
483483

484484
// Unzip
485485
OffsetT run_offsets[ITEMS_PER_THREAD];
@@ -733,7 +733,7 @@ struct AgentRle
733733

734734
// First warp computes tile prefix in lane 0
735735
TilePrefixCallbackOpT prefix_op(tile_status, temp_storage.prefix, Sum(), tile_idx);
736-
int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS);
736+
unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS);
737737
if (warp_id == 0)
738738
{
739739
prefix_op(tile_aggregate);

cub/block/block_scan.cuh

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,6 +1115,7 @@ public:
11151115

11161116
//@} end member group
11171117
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document no-initial-value scans
1118+
11181119
/******************************************************************//**
11191120
* \name Exclusive prefix scan operations (no initial value, single datum per thread)
11201121
*********************************************************************/
@@ -1161,10 +1162,79 @@ public:
11611162
InternalBlockScan(temp_storage).ExclusiveScan(input, output, scan_op, block_aggregate);
11621163
}
11631164

1165+
//@} end member group
1166+
/******************************************************************//**
1167+
* \name Exclusive prefix scan operations (no initial value, multiple data per thread)
1168+
*********************************************************************/
1169+
//@{
1170+
1171+
1172+
/**
1173+
* \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. With no initial value, the output computed for <em>thread</em><sub>0</sub> is undefined.
1174+
*
1175+
* \par
1176+
* - Supports non-commutative scan operators.
1177+
* - \blocked
1178+
* - \granularity
1179+
* - \smemreuse
1180+
*
1181+
* \tparam ITEMS_PER_THREAD <b>[inferred]</b> The number of consecutive items partitioned onto each thread.
1182+
* \tparam ScanOp <b>[inferred]</b> Binary scan functor type having member <tt>T operator()(const T &a, const T &b)</tt>
1183+
*/
1184+
template <
1185+
int ITEMS_PER_THREAD,
1186+
typename ScanOp>
1187+
__device__ __forceinline__ void ExclusiveScan(
1188+
T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items
1189+
T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input)
1190+
ScanOp scan_op) ///< [in] Binary scan functor
1191+
{
1192+
// Reduce consecutive thread items in registers
1193+
T thread_partial = ThreadReduce(input, scan_op);
1194+
1195+
// Exclusive threadblock-scan
1196+
ExclusiveScan(thread_partial, thread_partial, scan_op);
1197+
1198+
// Exclusive scan in registers with prefix
1199+
ThreadScanExclusive(input, output, scan_op, thread_partial, (linear_tid != 0));
1200+
}
1201+
1202+
1203+
/**
1204+
* \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for <em>thread</em><sub>0</sub> is undefined.
1205+
*
1206+
* \par
1207+
* - Supports non-commutative scan operators.
1208+
* - \blocked
1209+
* - \granularity
1210+
* - \smemreuse
1211+
*
1212+
* \tparam ITEMS_PER_THREAD <b>[inferred]</b> The number of consecutive items partitioned onto each thread.
1213+
* \tparam ScanOp <b>[inferred]</b> Binary scan functor type having member <tt>T operator()(const T &a, const T &b)</tt>
1214+
*/
1215+
template <
1216+
int ITEMS_PER_THREAD,
1217+
typename ScanOp>
1218+
__device__ __forceinline__ void ExclusiveScan(
1219+
T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items
1220+
T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input)
1221+
ScanOp scan_op, ///< [in] Binary scan functor
1222+
T &block_aggregate) ///< [out] block-wide aggregate reduction of input items
1223+
{
1224+
// Reduce consecutive thread items in registers
1225+
T thread_partial = ThreadReduce(input, scan_op);
1226+
1227+
// Exclusive threadblock-scan
1228+
ExclusiveScan(thread_partial, thread_partial, scan_op, block_aggregate);
1229+
1230+
// Exclusive scan in registers with prefix
1231+
ThreadScanExclusive(input, output, scan_op, thread_partial, (linear_tid != 0));
1232+
}
11641233

11651234

11661235
//@} end member group
11671236
#endif // DOXYGEN_SHOULD_SKIP_THIS // Do not document no-initial-value scans
1237+
11681238
/******************************************************************//**
11691239
* \name Inclusive prefix sum operations
11701240
*********************************************************************/

test/test_util.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1430,9 +1430,9 @@ void DisplayResults(
14301430
/**
14311431
* Print the contents of a host array
14321432
*/
1433-
template <typename T>
1433+
template <typename InputIteratorT>
14341434
void DisplayResults(
1435-
T *h_data,
1435+
InputIteratorT h_data,
14361436
size_t num_items)
14371437
{
14381438
// Display data

0 commit comments

Comments
 (0)