Skip to content

Commit 3a1bbda

Browse files
committed
Snapshot of kokkos-kernels.git from commit ddfde88b830317ab9806b004a99eccf6e61ce98c
From repository at [email protected]:kokkos/kokkos-kernels.git At commit: commit ddfde88b830317ab9806b004a99eccf6e61ce98c Author: Luc Berger-Vergiat <[email protected]> Date: Mon Nov 17 14:32:45 2025 -0700 Update release action in release candidate branch This is required to get the github action to be triggered correctly with the new versioning scheme. Signed-off-by: Luc Berger-Vergiat <[email protected]> Signed-off-by: Nathan Ellingwood <[email protected]>
1 parent 6c62392 commit 3a1bbda

23 files changed

+1238
-310
lines changed

packages/kokkos-kernels/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
[Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/4.7.01...5.0.0)
55

66
### Enhancements and updates:
7+
- Improve performance of par_ilut and improve benchmark [\#2846](https://github.com/kokkos/kokkos-kernels/pull/2846)
78
- Prefer bit manipulation functions from Kokkos Core [\#2786](https://github.com/kokkos/kokkos-kernels/pull/2786)
89
- Common - ArithTraits: moving from Kokkos to KokkosKernels [\#2771](https://github.com/kokkos/kokkos-kernels/pull/2771)
910
- Update headers in preparation for the 5.0 release [\#2795](https://github.com/kokkos/kokkos-kernels/pull/2795)
1011
- Add a member function to BsrMatrix to convert to Crs [\#2809](https://github.com/kokkos/kokkos-kernels/pull/2809)
1112

1213
### Bug Fixes:
14+
- Batched - QR: fixes for complex scalars [\#2590](https://github.com/kokkos/kokkos-kernels/pull/2590)
1315
- Kokkos Batched InnerTrsm: Allocate in inner scope to avoid warnings with Sacado types [\#2810](https://github.com/kokkos/kokkos-kernels/pull/2810)
1416
- Define Fortran interface macros for LAPACK [\#2802](https://github.com/kokkos/kokkos-kernels/pull/2802)
1517
- Fixed blas includes [\#2776](https://github.com/kokkos/kokkos-kernels/pull/2776)

packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_Serial_Impl.hpp

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#define KOKKOSBATCHED_APPLY_HOUSEHOLDER_SERIAL_IMPL_HPP
55

66
/// \author Kyungjoo Kim ([email protected])
7+
/// \author Luc Berger-Vergiat ([email protected])
78

89
#include "KokkosBatched_Util.hpp"
910
#include "KokkosBatched_Householder_Serial_Internal.hpp"
@@ -14,23 +15,33 @@ namespace KokkosBatched {
1415
/// Serial Impl
1516
/// ===========
1617

17-
template <>
18-
template <typename uViewType, typename tauViewType, typename AViewType, typename wViewType>
19-
KOKKOS_INLINE_FUNCTION int SerialApplyHouseholder<Side::Left>::invoke(const uViewType &u2, const tauViewType &tau,
20-
const AViewType &A, const wViewType &w) {
21-
return SerialApplyLeftHouseholderInternal::invoke(A.extent(0) - 1, A.extent(1), tau.data(), u2.data(), u2.stride(0),
22-
A.data(), A.stride(1), A.data() + A.stride(0), A.stride(0),
23-
A.stride(1), w.data());
24-
}
25-
26-
template <>
27-
template <typename uViewType, typename tauViewType, typename AViewType, typename wViewType>
28-
KOKKOS_INLINE_FUNCTION int SerialApplyHouseholder<Side::Right>::invoke(const uViewType &u2, const tauViewType &tau,
29-
const AViewType &A, const wViewType &w) {
30-
return SerialApplyRightHouseholderInternal::invoke(A.extent(0), A.extent(1) - 1, tau.data(), u2.data(), u2.stride(0),
31-
A.data(), A.stride(0), A.data() + A.stride(1), A.stride(0),
32-
A.stride(1), w.data());
33-
}
18+
template <typename ArgTrans>
19+
struct SerialApplyHouseholder<Side::Left, ArgTrans> {
20+
template <typename uViewType, typename tauViewType, typename AViewType, typename wViewType>
21+
KOKKOS_INLINE_FUNCTION static int invoke(const uViewType &u2, const tauViewType &tau, const AViewType &A,
22+
const wViewType &w) {
23+
if constexpr (AViewType::rank() == 1) {
24+
return SerialApplyLeftHouseholderInternal<ArgTrans>::invoke(A.extent(0) - 1, 1, tau.data(), u2.data(),
25+
u2.stride(0), A.data(), 1, A.data() + A.stride(0),
26+
A.stride(0), 1, w.data());
27+
} else {
28+
return SerialApplyLeftHouseholderInternal<ArgTrans>::invoke(
29+
A.extent(0) - 1, A.extent(1), tau.data(), u2.data(), u2.stride(0), A.data(), A.stride(1),
30+
A.data() + A.stride(0), A.stride(0), A.stride(1), w.data());
31+
}
32+
}
33+
};
34+
35+
template <typename ArgTrans>
36+
struct SerialApplyHouseholder<Side::Right, ArgTrans> {
37+
template <typename uViewType, typename tauViewType, typename AViewType, typename wViewType>
38+
KOKKOS_INLINE_FUNCTION static int invoke(const uViewType &u2, const tauViewType &tau, const AViewType &A,
39+
const wViewType &w) {
40+
return SerialApplyRightHouseholderInternal::invoke(A.extent(0), A.extent(1) - 1, tau.data(), u2.data(),
41+
u2.stride(0), A.data(), A.stride(0), A.data() + A.stride(1),
42+
A.stride(0), A.stride(1), w.data());
43+
}
44+
};
3445

3546
} // namespace KokkosBatched
3647

packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyHouseholder_Serial_Internal.hpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#define KOKKOSBATCHED_APPLY_HOUSEHOLDER_SERIAL_INTERNAL_HPP
55

66
/// \author Kyungjoo Kim ([email protected])
7+
/// \author Luc Berger-Vergiat ([email protected])
78

89
#include "KokkosBatched_Util.hpp"
910

@@ -15,6 +16,7 @@ namespace KokkosBatched {
1516
///
1617
/// this impl follows the flame interface of householder transformation
1718
///
19+
template <typename ArgTrans>
1820
struct SerialApplyLeftHouseholderInternal {
1921
template <typename ValueType>
2022
KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, const ValueType* tau,
@@ -23,14 +25,16 @@ struct SerialApplyLeftHouseholderInternal {
2325
/* */ ValueType* A2, const int as0, const int as1,
2426
/* */ ValueType* w1t) {
2527
using value_type = ValueType;
28+
using KAT = KokkosKernels::ArithTraits<value_type>;
2629

2730
/// u2 m x 1
2831
/// a1t 1 x n
2932
/// A2 m x n
3033

3134
// apply a single householder transform H from the left to a row vector a1t
3235
// and a matrix A2
33-
const value_type inv_tau = value_type(1) / (*tau);
36+
const value_type inv_tau =
37+
std::is_same_v<Trans::Transpose, ArgTrans> ? KAT::one() / KAT::conj(*tau) : KAT::one() / *tau;
3438

3539
// compute the followings:
3640
// a1t -= inv(tau)(a1t + u2'A2)
@@ -40,8 +44,7 @@ struct SerialApplyLeftHouseholderInternal {
4044
// w1t /= tau
4145
for (int j = 0; j < n; ++j) {
4246
value_type tmp = a1t[j * a1ts];
43-
for (int i = 0; i < m; ++i)
44-
tmp += KokkosKernels::ArithTraits<value_type>::conj(u2[i * u2s]) * A2[i * as0 + j * as1];
47+
for (int i = 0; i < m; ++i) tmp += KAT::conj(u2[i * u2s]) * A2[i * as0 + j * as1];
4548
w1t[j] = tmp * inv_tau; // /= (*tau);
4649
}
4750

@@ -64,13 +67,14 @@ struct SerialApplyRightHouseholderInternal {
6467
/* */ ValueType* A2, const int as0, const int as1,
6568
/* */ ValueType* w1) {
6669
using value_type = ValueType;
70+
using KAT = KokkosKernels::ArithTraits<value_type>;
6771
/// u2 n x 1
6872
/// a1 m x 1
6973
/// A2 m x n
7074

71-
// apply a single householder transform H from the left to a row vector a1t
75+
// apply a single householder transform H from the right to a row vector a1t
7276
// and a matrix A2
73-
const value_type inv_tau = value_type(1) / (*tau);
77+
const value_type inv_tau = KAT::one() / *tau;
7478

7579
// compute the followings:
7680
// a1 -= inv(tau)(a1 + A2 u2)
@@ -89,8 +93,7 @@ struct SerialApplyRightHouseholderInternal {
8993

9094
// A2 -= w1 * u2' (ger with conjugate)
9195
for (int j = 0; j < n; ++j)
92-
for (int i = 0; i < m; ++i)
93-
A2[i * as0 + j * as1] -= w1[i] * KokkosKernels::ArithTraits<ValueType>::conj(u2[j * u2s]);
96+
for (int i = 0; i < m; ++i) A2[i * as0 + j * as1] -= w1[i] * KAT::conj(u2[j * u2s]);
9497

9598
return 0;
9699
}

packages/kokkos-kernels/batched/dense/impl/KokkosBatched_ApplyQ_Serial_Internal.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ struct SerialApplyQ_LeftForwardInternal {
5959
const int m_A2 = m - m_A0 - 1;
6060
/// -----------------------------------------------------
6161
// left apply householder to partitioned B1 and B2
62-
SerialApplyLeftHouseholderInternal::invoke(m_A2, n, tau, A_part3x3.A21, as0, B_part3x1.A1, bs1, B_part3x1.A2, bs0,
63-
bs1, w);
62+
SerialApplyLeftHouseholderInternal<Trans::NoTranspose>::invoke(m_A2, n, tau, A_part3x3.A21, as0, B_part3x1.A1,
63+
bs1, B_part3x1.A2, bs0, bs1, w);
6464

6565
/// -----------------------------------------------------
6666
A_part2x2.mergeToABR(A_part3x3);
@@ -113,8 +113,8 @@ struct SerialApplyQ_LeftBackwardInternal {
113113
const int m_A2 = m - m_A0 - 1;
114114
/// -----------------------------------------------------
115115
// left apply householder to partitioned B1 and B2
116-
SerialApplyLeftHouseholderInternal::invoke(m_A2, n, tau, A_part3x3.A21, as0, B_part3x1.A1, bs1, B_part3x1.A2, bs0,
117-
bs1, w);
116+
SerialApplyLeftHouseholderInternal<Trans::Transpose>::invoke(m_A2, n, tau, A_part3x3.A21, as0, B_part3x1.A1, bs1,
117+
B_part3x1.A2, bs0, bs1, w);
118118
/// -----------------------------------------------------
119119
A_part2x2.mergeToATL(A_part3x3);
120120
t_part2x1.mergeToAT(t_part3x1);

packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_Serial_Internal.hpp

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,37 +21,39 @@ struct SerialLeftHouseholderInternal {
2121
/* */ ValueType* chi1,
2222
/* */ ValueType* x2, const int x2s,
2323
/* */ ValueType* tau) {
24-
typedef ValueType value_type;
25-
typedef typename KokkosKernels::ArithTraits<ValueType>::mag_type mag_type;
24+
using value_type = ValueType;
25+
using KAT = KokkosKernels::ArithTraits<value_type>;
26+
using mag_type = typename KAT::mag_type;
27+
using KAT_mag = KokkosKernels::ArithTraits<mag_type>;
2628

27-
const mag_type zero(0);
28-
const mag_type half(0.5);
29-
const mag_type one(1);
30-
const mag_type minus_one(-1);
29+
const mag_type zero = KAT_mag::zero();
30+
const mag_type one = KAT_mag::one();
31+
const mag_type half = one / (one + one);
32+
const mag_type minus_one = -one;
3133

3234
/// compute the 2norm of x2
33-
mag_type norm_x2_square(0);
35+
mag_type norm_x2_square = zero;
3436
for (int i = 0; i < m_x2; ++i) {
3537
const auto x2_at_i = x2[i * x2s];
36-
norm_x2_square += Kokkos::abs(x2_at_i) * Kokkos::abs(x2_at_i);
38+
norm_x2_square += Kokkos::real(Kokkos::conj(x2_at_i) * x2_at_i);
3739
}
3840

3941
/// if norm_x2 is zero, return with trivial values
4042
if (norm_x2_square == zero) {
4143
*chi1 = -(*chi1);
42-
*tau = half;
44+
*tau = half * KAT::one();
4345

4446
return 0;
4547
}
4648

4749
/// compute magnitude of chi1, equal to norm2 of chi1
48-
const mag_type norm_chi1 = KokkosKernels::ArithTraits<value_type>::abs(*chi1);
50+
const mag_type norm_chi1 = KAT::abs(*chi1);
4951

5052
/// compute 2 norm of x using norm_chi1 and norm_x2
51-
const mag_type norm_x = KokkosKernels::ArithTraits<mag_type>::sqrt(norm_x2_square + norm_chi1 * norm_chi1);
53+
const mag_type norm_x = KAT_mag::sqrt(norm_x2_square + norm_chi1 * norm_chi1);
5254

5355
/// compute alpha
54-
const mag_type alpha = (*chi1 < KokkosKernels::ArithTraits<value_type>::zero() ? one : minus_one) * norm_x;
56+
const mag_type alpha = (Kokkos::real(*chi1) < zero ? one : minus_one) * norm_x;
5557

5658
/// overwrite x2 with u2
5759
const value_type chi1_minus_alpha = *chi1 - alpha;
@@ -62,8 +64,16 @@ struct SerialLeftHouseholderInternal {
6264
// SerialScaleInternal::invoke(m_x2, inv_chi1_minus_alpha, x2, x2s);
6365

6466
/// compute tau
67+
// Note that in the complex case we have
68+
// multiple possible expressions for tau
69+
// we chose the same as LAPACK which
70+
// guarentees that R is real valued.
6571
const mag_type chi1_minus_alpha_square = Kokkos::abs(chi1_minus_alpha) * Kokkos::abs(chi1_minus_alpha);
66-
*tau = half + half * (norm_x2_square / chi1_minus_alpha_square);
72+
if constexpr (KAT::is_complex) {
73+
*tau = alpha / (alpha - *chi1);
74+
} else {
75+
*tau = half + half * (norm_x2_square / chi1_minus_alpha_square);
76+
}
6777

6878
/// overwrite chi1 with alpha
6979
*chi1 = alpha;

packages/kokkos-kernels/batched/dense/impl/KokkosBatched_Householder_TeamVector_Impl.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ namespace KokkosBatched {
1414
/// TeamVector Impl
1515
/// ===============
1616

17-
template <typename MemberType>
18-
template <typename aViewType, typename tauViewType>
17+
template <>
18+
template <typename MemberType, typename aViewType, typename tauViewType>
1919
KOKKOS_INLINE_FUNCTION int TeamVectorHouseholder<Side::Left>::invoke(const MemberType &member, const aViewType &a,
2020
const tauViewType &tau) {
2121
return TeamVectorLeftHouseholderInternal::invoke(member, a.extent(0) - 1, a.data(), a.data() + a.stride(0),

packages/kokkos-kernels/batched/dense/impl/KokkosBatched_QR_Serial_Internal.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ struct SerialQR_Internal {
5656
SerialLeftHouseholderInternal::invoke(m_A22, A_part3x3.A11, A_part3x3.A21, as0, tau);
5757

5858
// left apply householder to A22
59-
SerialApplyLeftHouseholderInternal::invoke(m_A22, n_A22, tau, A_part3x3.A21, as0, A_part3x3.A12, as1,
60-
A_part3x3.A22, as0, as1, w);
59+
SerialApplyLeftHouseholderInternal<Trans::Transpose>::invoke(m_A22, n_A22, tau, A_part3x3.A21, as0, A_part3x3.A12,
60+
as1, A_part3x3.A22, as0, as1, w);
6161
/// -----------------------------------------------------
6262
A_part2x2.mergeToATL(A_part3x3);
6363
t_part2x1.mergeToAT(t_part3x1);

packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ struct SerialSVDInternal {
161161
KokkosBatched::SerialLeftHouseholderInternal::invoke<value_type>(m - i - 1, &SVDIND(A, i, i),
162162
&SVDIND(A, i + 1, i), As0, &tau);
163163
if (n - i > 1) {
164-
KokkosBatched::SerialApplyLeftHouseholderInternal::invoke<value_type>(
164+
KokkosBatched::SerialApplyLeftHouseholderInternal<Trans::NoTranspose>::invoke<value_type>(
165165
m - i - 1, n - i - 1, &tau, &SVDIND(A, i + 1, i), As0, &SVDIND(A, i, i + 1), As1, &SVDIND(A, i + 1, i + 1),
166166
As0, As1, work);
167167
}
@@ -183,7 +183,7 @@ struct SerialSVDInternal {
183183
&SVDIND(A, i + 1, i + 2), As0, As1, work);
184184
}
185185
if (Vt) {
186-
KokkosBatched::SerialApplyLeftHouseholderInternal::invoke<value_type>(
186+
KokkosBatched::SerialApplyLeftHouseholderInternal<Trans::NoTranspose>::invoke<value_type>(
187187
n - i - 2, n, &tau, &SVDIND(A, i, i + 2), As1, &SVDIND(Vt, i + 1, 0), Vts1, &SVDIND(Vt, i + 2, 0), Vts0,
188188
Vts1, work);
189189
}

0 commit comments

Comments
 (0)