Update rocPrim usage for ROCm7 (#2979) (#2987)

i-chaochen · mmakevic-amd · jayfurmanek · web-flow · commit d4d77b62cc6d · 2025-06-06T15:15:46.000+01:00
Co-authored-by: mmakevic-amd &lt;Milica.Makevic@amd.com&gt;
Co-authored-by: Jason Furmanek &lt;Jason.Furmanek@amd.com&gt;
diff --git a/tensorflow/core/kernels/gpu_prim.h b/tensorflow/core/kernels/gpu_prim.h
@@ -85,8 +85,8 @@ namespace gpuprim = ::hipcub;
 
 // Required for sorting Eigen::half and bfloat16.
 namespace rocprim {
+#if (TF_ROCM_VERSION >= 50200 && TF_ROCM_VERSION < 70000)
 namespace detail {
-#if (TF_ROCM_VERSION >= 50200)
 template <>
 struct float_bit_mask<Eigen::half> {
   static constexpr uint16_t sign_bit = 0x8000;
@@ -102,14 +102,35 @@ struct float_bit_mask<Eigen::bfloat16> {
   static constexpr uint16_t mantissa = 0x007F;
   using bit_type = uint16_t;
 };
+}; // namespace detail
+
+#else
+namespace traits {
+template<>
+struct rocprim::traits::define<Eigen::half> {
+  using float_bit_mask = rocprim::traits::float_bit_mask::values<uint16_t, 0x8000, 0x7C00, 0x03FF>;
+  using is_arithmetic = rocprim::traits::is_arithmetic::values<true>;
+  using number_format = rocprim::traits::number_format::values<traits::number_format::kind::floating_point_type>;
+};
+
+template<>
+struct rocprim::traits::define<tsl::bfloat16> {
+  using float_bit_mask = rocprim::traits::float_bit_mask::values<uint16_t, 0x8000, 0x7F80, 0x007F>;
+  using is_arithmetic = rocprim::traits::is_arithmetic::values<true>;
+  using number_format = rocprim::traits::number_format::values<traits::number_format::kind::floating_point_type>;
+};
+}; // namespace traits
 #endif
+#if (TF_ROCM_VERSION < 70000)
+namespace detail {
 template <>
 struct radix_key_codec_base<Eigen::half>
     : radix_key_codec_floating<Eigen::half, uint16_t> {};
 template <>
 struct radix_key_codec_base<tensorflow::bfloat16>
     : radix_key_codec_floating<tensorflow::bfloat16, uint16_t> {};
 };  // namespace detail
+#endif
 };  // namespace rocprim
 
 #endif  // TENSORFLOW_USE_ROCM
diff --git a/third_party/xla/xla/service/gpu/gpu_prim.h b/third_party/xla/xla/service/gpu/gpu_prim.h
@@ -38,11 +38,12 @@ namespace gpuprim = ::cub;
 #include "rocm/rocm_config.h"
 namespace gpuprim = ::hipcub;
 
-// Required for sorting Eigen::half and bfloat16.
 namespace rocprim {
+
+#if (TF_ROCM_VERSION >= 50200 && TF_ROCM_VERSION < 70000)
+// Required for sorting Eigen::half and bfloat16.
 namespace detail {
 
-#if (TF_ROCM_VERSION >= 50200)
 template <>
 struct float_bit_mask<Eigen::half> {
   static constexpr uint16_t sign_bit = 0x8000;
@@ -58,14 +59,41 @@ struct float_bit_mask<tsl::bfloat16> {
   static constexpr uint16_t mantissa = 0x007F;
   using bit_type = uint16_t;
 };
-#endif  // TF_ROCM_VERSION >= 50200
+
+}; // namespace detail
+#else
+namespace traits {
+
+template<>
+struct rocprim::traits::define<Eigen::half> {
+  using float_bit_mask = rocprim::traits::float_bit_mask::values<uint16_t, 0x8000, 0x7C00, 0x03FF>;
+  using is_arithmetic = rocprim::traits::is_arithmetic::values<true>;
+  using number_format = rocprim::traits::number_format::values<traits::number_format::kind::floating_point_type>;
+};
+
+template<>
+struct rocprim::traits::define<tsl::bfloat16> {
+  using float_bit_mask = rocprim::traits::float_bit_mask::values<uint16_t, 0x8000, 0x7F80, 0x007F>;
+  using is_arithmetic = rocprim::traits::is_arithmetic::values<true>;
+  using number_format = rocprim::traits::number_format::values<traits::number_format::kind::floating_point_type>;
+};
+
+}; // namespace traits
+#endif  // TF_ROCM_VERSION >= 50200 && TF_ROCM_VERSION < 70000
+
+#if (TF_ROCM_VERSION < 70000)
+namespace detail {
+
 template <>
 struct radix_key_codec_base<Eigen::half>
     : radix_key_codec_floating<Eigen::half, uint16_t> {};
 template <>
 struct radix_key_codec_base<tsl::bfloat16>
     : radix_key_codec_floating<tsl::bfloat16, uint16_t> {};
+
 };  // namespace detail
+#endif // TF_ROCM_VERSION < 70000
+
 };  // namespace rocprim
 
 #endif  // TENSORFLOW_USE_ROCM