@@ -4768,14 +4768,20 @@ HWY_API VFromD<DH> UpperHalf(DH dh, VFromD<Twice<DH>> v) {
47684768
47694769// ------------------------------ Broadcast/splat any lane
47704770
4771+ template <int kLane , typename T>
4772+ HWY_API Vec128<T, 1 > Broadcast (Vec128<T, 1 > v) {
4773+ return v;
4774+ }
4775+
47714776#if HWY_ARCH_ARM_A64
47724777// Unsigned
47734778template <int kLane >
47744779HWY_API Vec128<uint8_t > Broadcast (Vec128<uint8_t > v) {
47754780 static_assert (0 <= kLane && kLane < 16 , " Invalid lane" );
47764781 return Vec128<uint8_t >(vdupq_laneq_u8 (v.raw , kLane ));
47774782}
4778- template <int kLane , size_t N, HWY_IF_V_SIZE_LE(uint8_t , N, 8 )>
4783+ template <int kLane , size_t N, HWY_IF_V_SIZE_LE(uint8_t , N, 8 ),
4784+ HWY_IF_LANES_GT (N, 1 )>
47794785HWY_API Vec128<uint8_t, N> Broadcast(Vec128<uint8_t , N> v) {
47804786 static_assert (0 <= kLane && kLane < N, " Invalid lane" );
47814787 return Vec128<uint8_t , N>(vdup_lane_u8 (v.raw , kLane ));
@@ -4785,7 +4791,8 @@ HWY_API Vec128<uint16_t> Broadcast(Vec128<uint16_t> v) {
47854791 static_assert (0 <= kLane && kLane < 8 , " Invalid lane" );
47864792 return Vec128<uint16_t >(vdupq_laneq_u16 (v.raw , kLane ));
47874793}
4788- template <int kLane , size_t N, HWY_IF_V_SIZE_LE(uint16_t , N, 8 )>
4794+ template <int kLane , size_t N, HWY_IF_V_SIZE_LE(uint16_t , N, 8 ),
4795+ HWY_IF_LANES_GT (N, 1 )>
47894796HWY_API Vec128<uint16_t, N> Broadcast(Vec128<uint16_t , N> v) {
47904797 static_assert (0 <= kLane && kLane < N, " Invalid lane" );
47914798 return Vec128<uint16_t , N>(vdup_lane_u16 (v.raw , kLane ));
@@ -4795,7 +4802,8 @@ HWY_API Vec128<uint32_t> Broadcast(Vec128<uint32_t> v) {
47954802 static_assert (0 <= kLane && kLane < 4 , " Invalid lane" );
47964803 return Vec128<uint32_t >(vdupq_laneq_u32 (v.raw , kLane ));
47974804}
4798- template <int kLane , size_t N, HWY_IF_V_SIZE_LE(uint32_t , N, 8 )>
4805+ template <int kLane , size_t N, HWY_IF_V_SIZE_LE(uint32_t , N, 8 ),
4806+ HWY_IF_LANES_GT (N, 1 )>
47994807HWY_API Vec128<uint32_t, N> Broadcast(Vec128<uint32_t , N> v) {
48004808 static_assert (0 <= kLane && kLane < N, " Invalid lane" );
48014809 return Vec128<uint32_t , N>(vdup_lane_u32 (v.raw , kLane ));
@@ -4805,15 +4813,15 @@ HWY_API Vec128<uint64_t> Broadcast(Vec128<uint64_t> v) {
48054813 static_assert (0 <= kLane && kLane < 2 , " Invalid lane" );
48064814 return Vec128<uint64_t >(vdupq_laneq_u64 (v.raw , kLane ));
48074815}
4808- // Vec64<uint64_t> is defined below.
48094816
48104817// Signed
48114818template <int kLane >
48124819HWY_API Vec128<int8_t > Broadcast (Vec128<int8_t > v) {
48134820 static_assert (0 <= kLane && kLane < 16 , " Invalid lane" );
48144821 return Vec128<int8_t >(vdupq_laneq_s8 (v.raw , kLane ));
48154822}
4816- template <int kLane , size_t N, HWY_IF_V_SIZE_LE(int8_t , N, 8 )>
4823+ template <int kLane , size_t N, HWY_IF_V_SIZE_LE(int8_t , N, 8 ),
4824+ HWY_IF_LANES_GT (N, 1 )>
48174825HWY_API Vec128<int8_t, N> Broadcast(Vec128<int8_t , N> v) {
48184826 static_assert (0 <= kLane && kLane < N, " Invalid lane" );
48194827 return Vec128<int8_t , N>(vdup_lane_s8 (v.raw , kLane ));
@@ -4823,7 +4831,8 @@ HWY_API Vec128<int16_t> Broadcast(Vec128<int16_t> v) {
48234831 static_assert (0 <= kLane && kLane < 8 , " Invalid lane" );
48244832 return Vec128<int16_t >(vdupq_laneq_s16 (v.raw , kLane ));
48254833}
4826- template <int kLane , size_t N, HWY_IF_V_SIZE_LE(int16_t , N, 8 )>
4834+ template <int kLane , size_t N, HWY_IF_V_SIZE_LE(int16_t , N, 8 ),
4835+ HWY_IF_LANES_GT (N, 1 )>
48274836HWY_API Vec128<int16_t, N> Broadcast(Vec128<int16_t , N> v) {
48284837 static_assert (0 <= kLane && kLane < N, " Invalid lane" );
48294838 return Vec128<int16_t , N>(vdup_lane_s16 (v.raw , kLane ));
@@ -4833,7 +4842,8 @@ HWY_API Vec128<int32_t> Broadcast(Vec128<int32_t> v) {
48334842 static_assert (0 <= kLane && kLane < 4 , " Invalid lane" );
48344843 return Vec128<int32_t >(vdupq_laneq_s32 (v.raw , kLane ));
48354844}
4836- template <int kLane , size_t N, HWY_IF_V_SIZE_LE(int32_t , N, 8 )>
4845+ template <int kLane , size_t N, HWY_IF_V_SIZE_LE(int32_t , N, 8 ),
4846+ HWY_IF_LANES_GT (N, 1 )>
48374847HWY_API Vec128<int32_t, N> Broadcast(Vec128<int32_t , N> v) {
48384848 static_assert (0 <= kLane && kLane < N, " Invalid lane" );
48394849 return Vec128<int32_t , N>(vdup_lane_s32 (v.raw , kLane ));
@@ -4843,7 +4853,6 @@ HWY_API Vec128<int64_t> Broadcast(Vec128<int64_t> v) {
48434853 static_assert (0 <= kLane && kLane < 2 , " Invalid lane" );
48444854 return Vec128<int64_t >(vdupq_laneq_s64 (v.raw , kLane ));
48454855}
4846- // Vec64<int64_t> is defined below.
48474856
48484857// Float
48494858#if HWY_HAVE_FLOAT16
@@ -4852,7 +4861,8 @@ HWY_API Vec128<float16_t> Broadcast(Vec128<float16_t> v) {
48524861 static_assert (0 <= kLane && kLane < 8 , " Invalid lane" );
48534862 return Vec128<float16_t >(vdupq_laneq_f16 (v.raw , kLane ));
48544863}
4855- template <int kLane , size_t N, HWY_IF_V_SIZE_LE(float16_t , N, 8 )>
4864+ template <int kLane , size_t N, HWY_IF_V_SIZE_LE(float16_t , N, 8 ),
4865+ HWY_IF_LANES_GT (N, 1 )>
48564866HWY_API Vec128<float16_t, N> Broadcast(Vec128<float16_t , N> v) {
48574867 static_assert (0 <= kLane && kLane < N, " Invalid lane" );
48584868 return Vec128<float16_t , N>(vdup_lane_f16 (v.raw , kLane ));
@@ -4864,7 +4874,8 @@ HWY_API Vec128<float> Broadcast(Vec128<float> v) {
48644874 static_assert (0 <= kLane && kLane < 4 , " Invalid lane" );
48654875 return Vec128<float >(vdupq_laneq_f32 (v.raw , kLane ));
48664876}
4867- template <int kLane , size_t N, HWY_IF_V_SIZE_LE(float , N, 8 )>
4877+ template <int kLane , size_t N, HWY_IF_V_SIZE_LE(float , N, 8 ),
4878+ HWY_IF_LANES_GT (N, 1 )>
48684879HWY_API Vec128<float, N> Broadcast(Vec128<float , N> v) {
48694880 static_assert (0 <= kLane && kLane < N, " Invalid lane" );
48704881 return Vec128<float , N>(vdup_lane_f32 (v.raw , kLane ));
@@ -4874,11 +4885,6 @@ HWY_API Vec128<double> Broadcast(Vec128<double> v) {
48744885 static_assert (0 <= kLane && kLane < 2 , " Invalid lane" );
48754886 return Vec128<double >(vdupq_laneq_f64 (v.raw , kLane ));
48764887}
4877- template <int kLane >
4878- HWY_API Vec64<double > Broadcast (Vec64<double > v) {
4879- static_assert (0 <= kLane && kLane < 1 , " Invalid lane" );
4880- return v;
4881- }
48824888
48834889#else // !HWY_ARCH_ARM_A64
48844890// No vdupq_laneq_* on armv7: use vgetq_lane_* + vdupq_n_*.
@@ -4889,7 +4895,8 @@ HWY_API Vec128<uint8_t> Broadcast(Vec128<uint8_t> v) {
48894895 static_assert (0 <= kLane && kLane < 16 , " Invalid lane" );
48904896 return Vec128<uint8_t >(vdupq_n_u8 (vgetq_lane_u8 (v.raw , kLane )));
48914897}
4892- template <int kLane , size_t N, HWY_IF_V_SIZE_LE(uint8_t , N, 8 )>
4898+ template <int kLane , size_t N, HWY_IF_V_SIZE_LE(uint8_t , N, 8 ),
4899+ HWY_IF_LANES_GT (N, 1 )>
48934900HWY_API Vec128<uint8_t, N> Broadcast(Vec128<uint8_t , N> v) {
48944901 static_assert (0 <= kLane && kLane < N, " Invalid lane" );
48954902 return Vec128<uint8_t , N>(vdup_lane_u8 (v.raw , kLane ));
@@ -4899,7 +4906,8 @@ HWY_API Vec128<uint16_t> Broadcast(Vec128<uint16_t> v) {
48994906 static_assert (0 <= kLane && kLane < 8 , " Invalid lane" );
49004907 return Vec128<uint16_t >(vdupq_n_u16 (vgetq_lane_u16 (v.raw , kLane )));
49014908}
4902- template <int kLane , size_t N, HWY_IF_V_SIZE_LE(uint16_t , N, 8 )>
4909+ template <int kLane , size_t N, HWY_IF_V_SIZE_LE(uint16_t , N, 8 ),
4910+ HWY_IF_LANES_GT (N, 1 )>
49034911HWY_API Vec128<uint16_t, N> Broadcast(Vec128<uint16_t , N> v) {
49044912 static_assert (0 <= kLane && kLane < N, " Invalid lane" );
49054913 return Vec128<uint16_t , N>(vdup_lane_u16 (v.raw , kLane ));
@@ -4909,7 +4917,8 @@ HWY_API Vec128<uint32_t> Broadcast(Vec128<uint32_t> v) {
49094917 static_assert (0 <= kLane && kLane < 4 , " Invalid lane" );
49104918 return Vec128<uint32_t >(vdupq_n_u32 (vgetq_lane_u32 (v.raw , kLane )));
49114919}
4912- template <int kLane , size_t N, HWY_IF_V_SIZE_LE(uint32_t , N, 8 )>
4920+ template <int kLane , size_t N, HWY_IF_V_SIZE_LE(uint32_t , N, 8 ),
4921+ HWY_IF_LANES_GT (N, 1 )>
49134922HWY_API Vec128<uint32_t, N> Broadcast(Vec128<uint32_t , N> v) {
49144923 static_assert (0 <= kLane && kLane < N, " Invalid lane" );
49154924 return Vec128<uint32_t , N>(vdup_lane_u32 (v.raw , kLane ));
@@ -4919,15 +4928,15 @@ HWY_API Vec128<uint64_t> Broadcast(Vec128<uint64_t> v) {
49194928 static_assert (0 <= kLane && kLane < 2 , " Invalid lane" );
49204929 return Vec128<uint64_t >(vdupq_n_u64 (vgetq_lane_u64 (v.raw , kLane )));
49214930}
4922- // Vec64<uint64_t> is defined below.
49234931
49244932// Signed
49254933template <int kLane >
49264934HWY_API Vec128<int8_t > Broadcast (Vec128<int8_t > v) {
49274935 static_assert (0 <= kLane && kLane < 16 , " Invalid lane" );
49284936 return Vec128<int8_t >(vdupq_n_s8 (vgetq_lane_s8 (v.raw , kLane )));
49294937}
4930- template <int kLane , size_t N, HWY_IF_V_SIZE_LE(int8_t , N, 8 )>
4938+ template <int kLane , size_t N, HWY_IF_V_SIZE_LE(int8_t , N, 8 ),
4939+ HWY_IF_LANES_GT (N, 1 )>
49314940HWY_API Vec128<int8_t, N> Broadcast(Vec128<int8_t , N> v) {
49324941 static_assert (0 <= kLane && kLane < N, " Invalid lane" );
49334942 return Vec128<int8_t , N>(vdup_lane_s8 (v.raw , kLane ));
@@ -4937,7 +4946,8 @@ HWY_API Vec128<int16_t> Broadcast(Vec128<int16_t> v) {
49374946 static_assert (0 <= kLane && kLane < 8 , " Invalid lane" );
49384947 return Vec128<int16_t >(vdupq_n_s16 (vgetq_lane_s16 (v.raw , kLane )));
49394948}
4940- template <int kLane , size_t N, HWY_IF_V_SIZE_LE(int16_t , N, 8 )>
4949+ template <int kLane , size_t N, HWY_IF_V_SIZE_LE(int16_t , N, 8 ),
4950+ HWY_IF_LANES_GT (N, 1 )>
49414951HWY_API Vec128<int16_t, N> Broadcast(Vec128<int16_t , N> v) {
49424952 static_assert (0 <= kLane && kLane < N, " Invalid lane" );
49434953 return Vec128<int16_t , N>(vdup_lane_s16 (v.raw , kLane ));
@@ -4947,7 +4957,8 @@ HWY_API Vec128<int32_t> Broadcast(Vec128<int32_t> v) {
49474957 static_assert (0 <= kLane && kLane < 4 , " Invalid lane" );
49484958 return Vec128<int32_t >(vdupq_n_s32 (vgetq_lane_s32 (v.raw , kLane )));
49494959}
4950- template <int kLane , size_t N, HWY_IF_V_SIZE_LE(int32_t , N, 8 )>
4960+ template <int kLane , size_t N, HWY_IF_V_SIZE_LE(int32_t , N, 8 ),
4961+ HWY_IF_LANES_GT (N, 1 )>
49514962HWY_API Vec128<int32_t, N> Broadcast(Vec128<int32_t , N> v) {
49524963 static_assert (0 <= kLane && kLane < N, " Invalid lane" );
49534964 return Vec128<int32_t , N>(vdup_lane_s32 (v.raw , kLane ));
@@ -4957,7 +4968,6 @@ HWY_API Vec128<int64_t> Broadcast(Vec128<int64_t> v) {
49574968 static_assert (0 <= kLane && kLane < 2 , " Invalid lane" );
49584969 return Vec128<int64_t >(vdupq_n_s64 (vgetq_lane_s64 (v.raw , kLane )));
49594970}
4960- // Vec64<int64_t> is defined below.
49614971
49624972// Float
49634973#if HWY_HAVE_FLOAT16
@@ -4972,25 +4982,15 @@ HWY_API Vec128<float> Broadcast(Vec128<float> v) {
49724982 static_assert (0 <= kLane && kLane < 4 , " Invalid lane" );
49734983 return Vec128<float >(vdupq_n_f32 (vgetq_lane_f32 (v.raw , kLane )));
49744984}
4975- template <int kLane , size_t N, HWY_IF_V_SIZE_LE(float , N, 8 )>
4985+ template <int kLane , size_t N, HWY_IF_V_SIZE_LE(float , N, 8 ),
4986+ HWY_IF_LANES_GT (N, 1 )>
49764987HWY_API Vec128<float, N> Broadcast(Vec128<float , N> v) {
49774988 static_assert (0 <= kLane && kLane < N, " Invalid lane" );
49784989 return Vec128<float , N>(vdup_lane_f32 (v.raw , kLane ));
49794990}
49804991
49814992#endif // HWY_ARCH_ARM_A64
49824993
4983- template <int kLane >
4984- HWY_API Vec64<uint64_t > Broadcast (Vec64<uint64_t > v) {
4985- static_assert (0 <= kLane && kLane < 1 , " Invalid lane" );
4986- return v;
4987- }
4988- template <int kLane >
4989- HWY_API Vec64<int64_t > Broadcast (Vec64<int64_t > v) {
4990- static_assert (0 <= kLane && kLane < 1 , " Invalid lane" );
4991- return v;
4992- }
4993-
49944994// ------------------------------ TableLookupLanes
49954995
49964996// Returned by SetTableIndices for use by TableLookupLanes.
0 commit comments