Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
82bc797
Port ColorSpaceTransformUtils
JimBobSquarePants May 30, 2025
c490bc6
Port TTransformSse41
JimBobSquarePants May 30, 2025
dd9bd0a
Use explicit type
JimBobSquarePants May 30, 2025
3be2b6a
Port TransformTwo
JimBobSquarePants May 30, 2025
0a9c407
Add explicit AdvSimd to MultiplyAddAdjacent
JimBobSquarePants May 30, 2025
cfad39b
Add XPlat V128 SubtractSaturate
JimBobSquarePants May 30, 2025
7223e90
Port Vp8_Sse16x16
JimBobSquarePants Jun 2, 2025
e616844
Remove all v128 util restrictions
JimBobSquarePants Jun 2, 2025
f0c6f4c
Port load/store
JimBobSquarePants Jun 2, 2025
f2e4257
Port filters
JimBobSquarePants Jun 2, 2025
217450e
Complete LossyUtils port
JimBobSquarePants Jun 3, 2025
85d6a2b
Port Vp8Encoding
JimBobSquarePants Jun 3, 2025
b5fe86c
Port YuvConversion
JimBobSquarePants Jun 3, 2025
1a91ec9
Port common utils and alpha decoder
JimBobSquarePants Jun 3, 2025
6b5392b
Remove restrictions from vector utilities
JimBobSquarePants Jun 3, 2025
1a63729
Add Arm64 movemask
JimBobSquarePants Jun 3, 2025
e553807
Port LosslessUtils V128
JimBobSquarePants Jun 4, 2025
0c0748e
Update LosslessUtils.cs
JimBobSquarePants Jun 4, 2025
b29c25c
Merge branch 'main' into js/webp-arm
JimBobSquarePants Jun 4, 2025
db28d22
Merge branch 'main' into js/webp-arm
JimBobSquarePants Jun 5, 2025
62a0666
Merge branch 'main' into js/webp-arm
JimBobSquarePants Jun 6, 2025
3627073
Update based on feedback
JimBobSquarePants Jun 6, 2025
8355353
Respond to additional feedback
JimBobSquarePants Jun 10, 2025
221aa80
Merge branch 'main' into js/webp-arm
JimBobSquarePants Jun 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 0 additions & 17 deletions src/ImageSharp/Common/Helpers/Numerics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -884,23 +884,6 @@ public static void Accumulate(ref Vector<uint> accumulator, Vector<byte> values)
accumulator += intHigh;
}

/// <summary>
/// Reduces elements of the vector into one sum.
/// </summary>
/// <param name="accumulator">The accumulator to reduce.</param>
/// <returns>The sum of all elements.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int ReduceSum(Vector128<int> accumulator)
{
// Add odd to even.
Vector128<int> vsum = Sse2.Add(accumulator, Sse2.Shuffle(accumulator, 0b_11_11_01_01));

// Add high to low.
vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_10_11_10));

return Sse2.ConvertToInt32(vsum);
}

/// <summary>
/// Reduces elements of the vector into one sum.
/// </summary>
Expand Down
57 changes: 27 additions & 30 deletions src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ public static void Shuffle4Reduce(
ref Span<float> destination,
[ConstantExpected] byte control)
{
if ((Vector512.IsHardwareAccelerated && Vector512_.SupportsShuffleNativeFloat) ||
(Vector256.IsHardwareAccelerated && Vector256_.SupportsShuffleNativeFloat) ||
Vector128.IsHardwareAccelerated)
if (Vector512.IsHardwareAccelerated ||
Vector256.IsHardwareAccelerated ||
Vector128.IsHardwareAccelerated)
{
int remainder = 0;
if (Vector512.IsHardwareAccelerated)
Expand Down Expand Up @@ -112,9 +112,9 @@ public static void Shuffle4Reduce(
ref Span<byte> destination,
[ConstantExpected] byte control)
{
if ((Vector512.IsHardwareAccelerated && Vector512_.SupportsShuffleNativeByte) ||
(Vector256.IsHardwareAccelerated && Vector256_.SupportsShuffleNativeByte) ||
(Vector128.IsHardwareAccelerated && Vector128_.SupportsShuffleNativeByte))
if (Vector512.IsHardwareAccelerated ||
Vector256.IsHardwareAccelerated ||
Vector128.IsHardwareAccelerated)
{
int remainder = 0;
if (Vector512.IsHardwareAccelerated)
Expand Down Expand Up @@ -158,7 +158,7 @@ public static void Shuffle3Reduce(
ref Span<byte> destination,
[ConstantExpected] byte control)
{
if (Vector128.IsHardwareAccelerated && Vector128_.SupportsShuffleNativeByte && Vector128_.SupportsAlignRight)
if (Vector128.IsHardwareAccelerated)
{
int remainder = source.Length % (Vector128<byte>.Count * 3);

Expand Down Expand Up @@ -190,7 +190,7 @@ public static void Pad3Shuffle4Reduce(
ref Span<byte> destination,
[ConstantExpected] byte control)
{
if (Vector128.IsHardwareAccelerated && Vector128_.SupportsShuffleNativeByte && Vector128_.SupportsShiftByte)
if (Vector128.IsHardwareAccelerated)
{
int remainder = source.Length % (Vector128<byte>.Count * 3);

Expand Down Expand Up @@ -223,7 +223,7 @@ public static void Shuffle4Slice3Reduce(
ref Span<byte> destination,
[ConstantExpected] byte control)
{
if (Vector128.IsHardwareAccelerated && Vector128_.SupportsShuffleNativeByte && Vector128_.SupportsShiftByte)
if (Vector128.IsHardwareAccelerated)
{
int remainder = source.Length & ((Vector128<byte>.Count * 4) - 1); // bit-hack for modulo

Expand All @@ -249,7 +249,7 @@ private static void Shuffle4(
Span<float> destination,
[ConstantExpected] byte control)
{
if (Vector512.IsHardwareAccelerated && Vector512_.SupportsShuffleNativeFloat)
if (Vector512.IsHardwareAccelerated)
{
ref Vector512<float> sourceBase = ref Unsafe.As<float, Vector512<float>>(ref MemoryMarshal.GetReference(source));
ref Vector512<float> destinationBase = ref Unsafe.As<float, Vector512<float>>(ref MemoryMarshal.GetReference(destination));
Expand Down Expand Up @@ -277,7 +277,7 @@ private static void Shuffle4(
}
}
}
else if (Vector256.IsHardwareAccelerated && Vector256_.SupportsShuffleNativeFloat)
else if (Vector256.IsHardwareAccelerated)
{
ref Vector256<float> sourceBase = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
ref Vector256<float> destinationBase = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(destination));
Expand Down Expand Up @@ -341,7 +341,7 @@ private static void Shuffle4(
Span<byte> destination,
[ConstantExpected] byte control)
{
if (Vector512.IsHardwareAccelerated && Vector512_.SupportsShuffleNativeByte)
if (Vector512.IsHardwareAccelerated)
{
Span<byte> temp = stackalloc byte[Vector512<byte>.Count];
Shuffle.MMShuffleSpan(ref temp, control);
Expand Down Expand Up @@ -373,8 +373,13 @@ private static void Shuffle4(
}
}
}
else if (Vector256.IsHardwareAccelerated && Vector256_.SupportsShuffleNativeByte)
else if (Vector256.IsHardwareAccelerated)
{
// ShufflePerLane performs per-128-bit-lane shuffling using Avx2.Shuffle (vpshufb).
// MMShuffleSpan generates indices in the range [0, 31] and never sets bit 7 in any byte,
// so the shuffle will not zero elements. Because vpshufb uses only the low 4 bits (b[i] & 0x0F)
// for indexing within each lane, and ignores the upper bits unless bit 7 is set,
// this usage is guaranteed to remain within-lane and non-zeroing.
Span<byte> temp = stackalloc byte[Vector256<byte>.Count];
Shuffle.MMShuffleSpan(ref temp, control);
Vector256<byte> mask = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(temp));
Expand All @@ -391,21 +396,21 @@ private static void Shuffle4(
ref Vector256<byte> vs0 = ref Unsafe.Add(ref sourceBase, i);
ref Vector256<byte> vd0 = ref Unsafe.Add(ref destinationBase, i);

vd0 = Vector256_.ShuffleNative(vs0, mask);
Unsafe.Add(ref vd0, (nuint)1) = Vector256_.ShuffleNative(Unsafe.Add(ref vs0, (nuint)1), mask);
Unsafe.Add(ref vd0, (nuint)2) = Vector256_.ShuffleNative(Unsafe.Add(ref vs0, (nuint)2), mask);
Unsafe.Add(ref vd0, (nuint)3) = Vector256_.ShuffleNative(Unsafe.Add(ref vs0, (nuint)3), mask);
vd0 = Vector256_.ShufflePerLane(vs0, mask);
Unsafe.Add(ref vd0, (nuint)1) = Vector256_.ShufflePerLane(Unsafe.Add(ref vs0, (nuint)1), mask);
Unsafe.Add(ref vd0, (nuint)2) = Vector256_.ShufflePerLane(Unsafe.Add(ref vs0, (nuint)2), mask);
Unsafe.Add(ref vd0, (nuint)3) = Vector256_.ShufflePerLane(Unsafe.Add(ref vs0, (nuint)3), mask);
}

if (m > 0)
{
for (nuint i = u; i < n; i++)
{
Unsafe.Add(ref destinationBase, i) = Vector256_.ShuffleNative(Unsafe.Add(ref sourceBase, i), mask);
Unsafe.Add(ref destinationBase, i) = Vector256_.ShufflePerLane(Unsafe.Add(ref sourceBase, i), mask);
}
}
}
else if (Vector128.IsHardwareAccelerated && Vector128_.SupportsShuffleNativeByte)
else if (Vector128.IsHardwareAccelerated)
{
Span<byte> temp = stackalloc byte[Vector128<byte>.Count];
Shuffle.MMShuffleSpan(ref temp, control);
Expand Down Expand Up @@ -445,9 +450,7 @@ private static void Shuffle3(
Span<byte> destination,
[ConstantExpected] byte control)
{
if (Vector128.IsHardwareAccelerated &&
Vector128_.SupportsShuffleNativeByte &&
Vector128_.SupportsAlignRight)
if (Vector128.IsHardwareAccelerated)
{
Vector128<byte> maskPad4Nx16 = ShuffleMaskPad4Nx16();
Vector128<byte> maskSlice4Nx16 = ShuffleMaskSlice4Nx16();
Expand Down Expand Up @@ -507,10 +510,7 @@ private static void Pad3Shuffle4(
Span<byte> destination,
[ConstantExpected] byte control)
{
if (Vector128.IsHardwareAccelerated &&
Vector128_.SupportsShuffleNativeByte &&
Vector128_.SupportsShiftByte &&
Vector128_.SupportsAlignRight)
if (Vector128.IsHardwareAccelerated)
{
Vector128<byte> maskPad4Nx16 = ShuffleMaskPad4Nx16();
Vector128<byte> fill = Vector128.Create(0xff000000ff000000ul).AsByte();
Expand Down Expand Up @@ -553,10 +553,7 @@ private static void Shuffle4Slice3(
Span<byte> destination,
[ConstantExpected] byte control)
{
if (Vector128.IsHardwareAccelerated &&
Vector128_.SupportsShuffleNativeByte &&
Vector128_.SupportsShiftByte &&
Vector128_.SupportsAlignRight)
if (Vector128.IsHardwareAccelerated)
{
Vector128<byte> maskSlice4Nx16 = ShuffleMaskSlice4Nx16();
Vector128<byte> maskE = Vector128_.AlignRight(maskSlice4Nx16, maskSlice4Nx16, 12);
Expand Down
Loading
Loading