Skip to content
35 changes: 16 additions & 19 deletions include/oneapi/dpl/pstl/algorithm_fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -442,14 +442,15 @@ __brick_bounded_copy_if(_RandomAccessIterator1, typename std::iterator_traits<_R
_RandomAccessIterator2, typename std::iterator_traits<_RandomAccessIterator2>::difference_type,
_UnaryPredicate, /*vector=*/std::true_type) noexcept;

template <class _DifferenceType, class _ForwardIterator, class _UnaryPredicate>
::std::pair<_DifferenceType, _DifferenceType>
__brick_calc_mask_1(_ForwardIterator, _ForwardIterator, bool* __restrict, _UnaryPredicate,
/*vector=*/::std::false_type) noexcept;
template <class _DifferenceType, class _RandomAccessIterator, class _UnaryPredicate>
::std::pair<_DifferenceType, _DifferenceType>
__brick_calc_mask_1(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _UnaryPredicate,
/*vector=*/::std::true_type) noexcept;
template <class _RandomAccessIterator, class _DifferenceType, class _IterPredicate>
std::pair<_DifferenceType, _DifferenceType>
__brick_compute_mask(_RandomAccessIterator, _DifferenceType, _IterPredicate, bool*,
/*vector=*/std::false_type) noexcept;

template <class _RandomAccessIterator, class _DifferenceType, class _IterPredicate>
std::pair<_DifferenceType, _DifferenceType>
__brick_compute_mask(_RandomAccessIterator, _DifferenceType, _IterPredicate, bool*,
/*vector=*/std::true_type) noexcept;

template <class _ForwardIterator, class _OutputIterator>
void
Expand All @@ -458,7 +459,7 @@ __brick_copy_by_mask(_ForwardIterator, _ForwardIterator, _OutputIterator, bool*,

template <class _RandomAccessIterator, class _OutputIterator>
void
__brick_copy_by_mask(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, bool* __restrict,
__brick_copy_by_mask(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, bool*,
/*vector=*/::std::true_type) noexcept;

template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Bound, class _Assigner>
Expand All @@ -481,6 +482,12 @@ void
__brick_partition_by_mask(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator1, _OutputIterator2, bool*,
/*vector=*/::std::true_type) noexcept;

template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _DifferenceType,
class _RandomAccessIterator2, class _IterPredicate>
_RandomAccessIterator2
__parallel_selective_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _DifferenceType,
_RandomAccessIterator2, _IterPredicate);

template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryPredicate>
_OutputIterator
__pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator,
Expand Down Expand Up @@ -561,16 +568,6 @@ _OutputIterator
__pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator,
_BinaryPredicate) noexcept;

template <class _ExecutionPolicy, class _DifferenceType, class _RandomAccessIterator, class _BinaryPredicate>
_DifferenceType
__brick_calc_mask_2(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _BinaryPredicate,
/*vector=*/::std::false_type) noexcept;

template <class _DifferenceType, class _RandomAccessIterator, class _BinaryPredicate>
_DifferenceType
__brick_calc_mask_2(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _BinaryPredicate,
/*vector=*/::std::true_type) noexcept;

template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
class _BinaryPredicate>
_RandomAccessIterator2
Expand Down
187 changes: 68 additions & 119 deletions include/oneapi/dpl/pstl/algorithm_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1244,36 +1244,27 @@ __brick_bounded_copy_if(_RandomAccessIterator1 __first,
return {__first, __result};
}

// TODO: Try to use transform_reduce for combining __brick_copy_if_phase1 on IsVector.
template <class _DifferenceType, class _ForwardIterator, class _UnaryPredicate>
::std::pair<_DifferenceType, _DifferenceType>
__brick_calc_mask_1(_ForwardIterator __first, _ForwardIterator __last, bool* __restrict __mask, _UnaryPredicate __pred,
/*vector=*/::std::false_type) noexcept
template <class _RandomAccessIterator, class _DifferenceType, class _IterPredicate>
std::pair<_DifferenceType, _DifferenceType>
__brick_compute_mask(_RandomAccessIterator __first, _DifferenceType __len, _IterPredicate __pred, bool* __mask,
/*vector=*/std::false_type) noexcept
{
auto __count_true = _DifferenceType(0);
auto __size = __last - __first;

static_assert(__is_random_access_iterator_v<_ForwardIterator>,
"Pattern-brick error. Should be a random access iterator.");

for (; __first != __last; ++__first, (void)++__mask)
_DifferenceType __count_true = 0;
for (_DifferenceType __i = 0; __i < __len; ++__i)
{
*__mask = __pred(*__first);
if (*__mask)
{
++__count_true;
}
__mask[__i] = __pred(__first, __i);
__count_true += __mask[__i];
}
return ::std::make_pair(__count_true, __size - __count_true);
return std::make_pair(__count_true, __len - __count_true);
}

template <class _DifferenceType, class _RandomAccessIterator, class _UnaryPredicate>
::std::pair<_DifferenceType, _DifferenceType>
__brick_calc_mask_1(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __mask, _UnaryPredicate __pred,
/*vector=*/::std::true_type) noexcept
template <class _RandomAccessIterator, class _DifferenceType, class _IterPredicate>
std::pair<_DifferenceType, _DifferenceType>
__brick_compute_mask(_RandomAccessIterator __first, _DifferenceType __len, _IterPredicate __pred, bool* __mask,
/*vector=*/std::true_type) noexcept
{
auto __result = __unseq_backend::__simd_calc_mask_1(__first, __last - __first, __mask, __pred);
return ::std::make_pair(__result, (__last - __first) - __result);
auto __count_true = __unseq_backend::__simd_compute_mask(__first, __len, __pred, __mask);
return std::make_pair(__count_true, __len - __count_true);
}

template <class _ForwardIterator, class _OutputIterator, class _Assigner>
Expand All @@ -1294,13 +1285,9 @@ __brick_copy_by_mask(_ForwardIterator __first, _ForwardIterator __last, _OutputI
template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Assigner>
void
__brick_copy_by_mask(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result,
bool* __restrict __mask, _Assigner __assigner, /*vector=*/::std::true_type) noexcept
bool* __mask, _Assigner __assigner, /*vector=*/::std::true_type) noexcept
{
#if (_PSTL_MONOTONIC_PRESENT || _ONEDPL_MONOTONIC_PRESENT)
__unseq_backend::__simd_copy_by_mask(__first, __last - __first, __result, __mask, __assigner);
#else
__internal::__brick_copy_by_mask(__first, __last, __result, __mask, __assigner, ::std::false_type());
#endif
}

template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Bound, class _Assigner>
Expand Down Expand Up @@ -1384,6 +1371,34 @@ __brick_partition_by_mask(_RandomAccessIterator1 __first, _RandomAccessIterator1
#endif
}

template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _DifferenceType,
class _RandomAccessIterator2, class _IterPredicate>
_RandomAccessIterator2
__parallel_selective_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
_DifferenceType __n, _RandomAccessIterator2 __result, _IterPredicate __pred)
{
using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag;
__par_backend::__buffer<bool> __mask_buf(__n);
bool* __mask = __mask_buf.get();

return __internal::__except_handler([&__exec, __n, __first, __result, __pred, __mask]() {
_DifferenceType __m{};
__par_backend::__parallel_strict_scan(
__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
[=](_DifferenceType __i, _DifferenceType __len) { // Reduce
return __internal::__brick_compute_mask(__first + __i, __len, __pred, __mask + __i, _IsVector{}).first;
},
std::plus<_DifferenceType>(), // Combine
[=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan
__internal::__brick_copy_by_mask(
__first + __i, __first + (__i + __len), __result + __initial, __mask + __i,
[](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, _IsVector{});
},
[&__m](_DifferenceType __total) { __m = __total; }); // Apex
return __result + __m;
});
}

template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryPredicate>
_OutputIterator
__pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
Expand All @@ -1397,35 +1412,16 @@ __pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIt
template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
class _UnaryPredicate>
_RandomAccessIterator2
__pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
__pattern_copy_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
_RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _UnaryPredicate __pred)
{
using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag;

using _DifferenceType = typename std::iterator_traits<_RandomAccessIterator1>::difference_type;
const _DifferenceType __n = __last - __first;
if (_DifferenceType(1) < __n)
{
__par_backend::__buffer<bool> __mask_buf(__n);
return __internal::__except_handler([&__exec, __n, __first, __result, __pred, &__mask_buf]() {
bool* __mask = __mask_buf.get();
_DifferenceType __m{};
__par_backend::__parallel_strict_scan(
__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
[=](_DifferenceType __i, _DifferenceType __len) { // Reduce
return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len),
__mask + __i, __pred, _IsVector{})
.first;
},
::std::plus<_DifferenceType>(), // Combine
[=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan
__internal::__brick_copy_by_mask(
__first + __i, __first + (__i + __len), __result + __initial, __mask + __i,
[](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, _IsVector{});
},
[&__m](_DifferenceType __total) { __m = __total; });
return __result + __m;
});
return __parallel_selective_copy(
__tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, __result,
[&__pred](_RandomAccessIterator1 __it, _DifferenceType __idx) { return __pred(__it[__idx]); });
}
// trivial sequence - use serial algorithm
return __internal::__brick_copy_if(__first, __last, __result, __pred, _IsVector{});
Expand All @@ -1445,13 +1441,16 @@ __pattern_bounded_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec,

__par_backend::__buffer<bool> __mask_buf(__n);
bool* __mask = __mask_buf.get();
return __internal::__except_handler([&__exec, __n, __first, __result, __pred, __mask, __n_out]() {
auto __it_pred = [=](_RandomAccessIterator1 __it, _DifferenceType __idx) {
return std::invoke(__pred, __it[__idx]);
};
return __internal::__except_handler([&__exec, __n, __first, __result, __it_pred, __mask, __n_out]() {
_DifferenceType __res_in{__n}, __res_out{__n_out};
__par_backend::__parallel_strict_scan(
__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
[=](_DifferenceType __i, _DifferenceType __len) { // Reduce
return __internal::__brick_calc_mask_1<_DifferenceType>(
__first + __i, __first + (__i + __len), __mask + __i, __pred, _IsVector{}).first;
return __internal::__brick_compute_mask(__first + __i, __len, __it_pred, __mask + __i, _IsVector{})
.first;
},
std::plus<_DifferenceType>(), // Combine
[=, &__res_in](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan
Expand Down Expand Up @@ -1701,74 +1700,22 @@ __pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Forwa
return __internal::__brick_unique_copy(__first, __last, __result, __pred, typename _Tag::__is_vector{});
}

template <class _DifferenceType, class _RandomAccessIterator, class _BinaryPredicate>
_DifferenceType
__brick_calc_mask_2(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __restrict __mask,
_BinaryPredicate __pred, /*vector=*/::std::false_type) noexcept
{
_DifferenceType __count = 0;
for (; __first != __last; ++__first, (void)++__mask)
{
*__mask = !__pred(*__first, *(__first - 1));
__count += *__mask;
}
return __count;
}

template <class _DifferenceType, class _RandomAccessIterator, class _BinaryPredicate>
_DifferenceType
__brick_calc_mask_2(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __restrict __mask,
_BinaryPredicate __pred, /*vector=*/::std::true_type) noexcept
{
return __unseq_backend::__simd_calc_mask_2(__first, __last - __first, __mask, __pred);
}

template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
class _BinaryPredicate>
_RandomAccessIterator2
__pattern_unique_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
__pattern_unique_copy(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
_RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _BinaryPredicate __pred)
{
using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag;

using _DifferenceType = typename std::iterator_traits<_RandomAccessIterator1>::difference_type;
const _DifferenceType __n = __last - __first;
_DifferenceType __n = __last - __first;
if (_DifferenceType(2) < __n)
{
__par_backend::__buffer<bool> __mask_buf(__n);
if (_DifferenceType(2) < __n)
{
return __internal::__except_handler([&__exec, __n, __first, __result, __pred, &__mask_buf]() {
bool* __mask = __mask_buf.get();
_DifferenceType __m{};
__par_backend::__parallel_strict_scan(
__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
[=](_DifferenceType __i, _DifferenceType __len) -> _DifferenceType { // Reduce
_DifferenceType __extra = 0;
if (__i == 0)
{
// Special boundary case
__mask[__i] = true;
if (--__len == 0)
return 1;
++__i;
++__extra;
}
return __internal::__brick_calc_mask_2<_DifferenceType>(__first + __i, __first + (__i + __len),
__mask + __i, __pred, _IsVector{}) +
__extra;
},
::std::plus<_DifferenceType>(), // Combine
[=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan
// Phase 2 is same as for __pattern_copy_if
__internal::__brick_copy_by_mask(
__first + __i, __first + (__i + __len), __result + __initial, __mask + __i,
[](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, _IsVector{});
},
[&__m](_DifferenceType __total) { __m = __total; });
return __result + __m;
});
}
*__result++ = *__first++; // Always copy the first element
--__n;
return __parallel_selective_copy(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, __result,
[&__pred](_RandomAccessIterator1 __it, _DifferenceType __idx) {
return !__pred(__it[__idx], __it[__idx - 1]);
});
}
// trivial sequence - use serial algorithm
return __internal::__brick_unique_copy(__first, __last, __result, __pred, _IsVector{});
Expand Down Expand Up @@ -2497,15 +2444,17 @@ __pattern_partition_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _
if (_DifferenceType(1) < __n)
{
__par_backend::__buffer<bool> __mask_buf(__n);
return __internal::__except_handler([&__exec, __n, __first, __out_true, __out_false, __pred, &__mask_buf]() {
return __internal::__except_handler([&__exec, __n, __first, __out_true, __out_false, &__pred, &__mask_buf]() {
bool* __mask = __mask_buf.get();
_ReturnType __m{};
__par_backend::__parallel_strict_scan(
__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n,
std::make_pair(_DifferenceType(0), _DifferenceType(0)),
[=](_DifferenceType __i, _DifferenceType __len) { // Reduce
return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len),
__mask + __i, __pred, _IsVector{});
[=, &__pred](_DifferenceType __i, _DifferenceType __len) { // Reduce
return __internal::__brick_compute_mask(
__first + __i, __len,
[&__pred](_RandomAccessIterator1 __it, _DifferenceType __idx) { return __pred(__it[__idx]); },
__mask + __i, _IsVector{});
},
[](const _ReturnType& __x, const _ReturnType& __y) -> _ReturnType {
return ::std::make_pair(__x.first + __y.first, __x.second + __y.second);
Expand Down
Loading
Loading