Skip to content

Commit 4c78ee6

Browse files
committed
MueLu: Guard against potential NaN/infs in strength-of-connection algorithms
Signed-off-by: malphil <[email protected]>
1 parent bfa0c9f commit 4c78ee6

File tree

3 files changed

+52
-4
lines changed

3 files changed

+52
-4
lines changed

packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,8 @@ class CutDropFunctor {
679679
size_t keepStart = 0;
680680
size_t dropStart = nnz;
681681
// find index where dropping starts
682+
683+
bool nonFiniteValueEncountered = false;
682684
for (size_t i = 1; i < nnz; ++i) {
683685
auto const& x = row_permutation(i - 1);
684686
auto const& y = row_permutation(i);
@@ -693,6 +695,17 @@ class CutDropFunctor {
693695
dropStart = i;
694696
}
695697
}
698+
699+
nonFiniteValueEncountered |= !is_finite_type_safe(x_aij);
700+
nonFiniteValueEncountered |= !is_finite_type_safe(y_aij);
701+
}
702+
703+
if (nonFiniteValueEncountered) {
704+
const char* message =
705+
"Error encountered in MueLu::CutDrop::CutDropFunctor::operator():\n"
706+
"Non-finite values encountered in strength-of-connection measure.\n"
707+
"A potential fix is to enable rebalancing and/or perform an initial rebalance.\n";
708+
Kokkos::abort(message);
696709
}
697710

698711
// drop everything to the right of where values stop passing threshold

packages/muelu/src/Graph/MatrixTransformation/MueLu_DistanceLaplacianDropping.hpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,7 @@ class DropFunctor {
697697
}
698698
#endif
699699

700+
bool nonFiniteValueEncountered = false;
700701
for (local_ordinal_type k = 0; k < row.length; ++k) {
701702
auto clid = row.colidx(k);
702703

@@ -711,11 +712,14 @@ class DropFunctor {
711712
auto aiiajj = ATS::magnitude(diag(rlid)) * ATS::magnitude(diag(clid)); // |a_ii|*|a_jj|
712713
auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2
713714

715+
nonFiniteValueEncountered |= !is_finite_type_safe(aiiajj) || !is_finite_type_safe(aij2);
716+
714717
results(offset + k) = Kokkos::max((aij2 <= eps * eps * aiiajj) ? DROP : KEEP,
715718
results(offset + k));
716719
} else if constexpr (measure == Misc::SignedRugeStuebenMeasure) {
717-
auto neg_aij = -ATS::real(val);
718-
auto max_neg_aik = eps * ATS::real(diag(rlid));
720+
auto neg_aij = -ATS::real(val);
721+
auto max_neg_aik = eps * ATS::real(diag(rlid));
722+
nonFiniteValueEncountered |= !is_finite_type_safe(neg_aij) || !is_finite_type_safe(max_neg_aik);
719723
results(offset + k) = Kokkos::max((neg_aij < max_neg_aik) ? DROP : KEEP,
720724
results(offset + k));
721725
} else if constexpr (measure == Misc::SignedSmoothedAggregationMeasure) {
@@ -725,10 +729,19 @@ class DropFunctor {
725729
// + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0
726730
if (!is_nonpositive)
727731
aij2 = -aij2;
732+
nonFiniteValueEncountered |= !is_finite_type_safe(aiiajj) || !is_finite_type_safe(aij2);
728733
results(offset + k) = Kokkos::max((aij2 <= eps * eps * aiiajj) ? DROP : KEEP,
729734
results(offset + k));
730735
}
731736
}
737+
738+
if (nonFiniteValueEncountered) {
739+
const char* message =
740+
"Error encountered in MueLu::DistanceLaplacian::DropFunctor::operator():\n"
741+
"Non-finite values encountered in strength-of-connection measure.\n"
742+
"A potential fix is to enable rebalancing and/or perform an initial rebalance.\n";
743+
Kokkos::abort(message);
744+
}
732745
}
733746
};
734747

@@ -832,6 +845,7 @@ class VectorDropFunctor {
832845
}
833846
#endif
834847

848+
bool nonFiniteValueEncountered = false;
835849
for (local_ordinal_type k = 0; k < row.length; ++k) {
836850
auto clid = row.colidx(k);
837851
auto bclid = ghosted_point_to_block(clid);
@@ -847,11 +861,14 @@ class VectorDropFunctor {
847861
auto aiiajj = ATS::magnitude(diag(brlid)) * ATS::magnitude(diag(bclid)); // |a_ii|*|a_jj|
848862
auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2
849863

864+
nonFiniteValueEncountered |= !is_finite_type_safe(aiiajj) || !is_finite_type_safe(aij2);
865+
850866
results(offset + k) = Kokkos::max((aij2 <= eps * eps * aiiajj) ? DROP : KEEP,
851867
results(offset + k));
852868
} else if constexpr (measure == Misc::SignedRugeStuebenMeasure) {
853-
auto neg_aij = -ATS::real(val);
854-
auto max_neg_aik = eps * ATS::real(diag(brlid));
869+
auto neg_aij = -ATS::real(val);
870+
auto max_neg_aik = eps * ATS::real(diag(brlid));
871+
nonFiniteValueEncountered |= !is_finite_type_safe(neg_aij) || !is_finite_type_safe(max_neg_aik);
855872
results(offset + k) = Kokkos::max((neg_aij < max_neg_aik) ? DROP : KEEP,
856873
results(offset + k));
857874
} else if constexpr (measure == Misc::SignedSmoothedAggregationMeasure) {
@@ -861,10 +878,19 @@ class VectorDropFunctor {
861878
// + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0
862879
if (!is_nonpositive)
863880
aij2 = -aij2;
881+
nonFiniteValueEncountered |= !is_finite_type_safe(aiiajj) || !is_finite_type_safe(aij2);
864882
results(offset + k) = Kokkos::max((aij2 <= eps * eps * aiiajj) ? DROP : KEEP,
865883
results(offset + k));
866884
}
867885
}
886+
887+
if (nonFiniteValueEncountered) {
888+
const char* message =
889+
"Error encountered in MueLu::DistanceLaplacian::VectorDropFunctor::operator():\n"
890+
"Non-finite values encountered in strength-of-connection measure.\n"
891+
"A potential fix is to enable rebalancing and/or perform an initial rebalance.\n";
892+
Kokkos::abort(message);
893+
}
868894
}
869895
};
870896

packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,15 @@ enum DecisionType : char {
3636
BOUNDARY = 3 // entry is a boundary
3737
};
3838

39+
template <typename T>
40+
KOKKOS_INLINE_FUNCTION bool is_finite_type_safe(T value) {
41+
if constexpr (std::is_floating_point_v<T>) {
42+
return Kokkos::isfinite(value);
43+
} else {
44+
return true;
45+
}
46+
}
47+
3948
namespace Misc {
4049

4150
template <class local_ordinal_type>

0 commit comments

Comments
 (0)