Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,18 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
break;

case NI_Sve2_MaxNumberPairwise:
case NI_Sve2_MinNumberPairwise:
// These instructions have unpredictable behaviour when using predicated movprfx,
// so the unpredicated variant must be used here.
assert(!intrin.op3->isContained() && falseReg != REG_NA);
GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, embMaskOp1Reg);
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
embOpt, sopt);
GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg,
falseReg, opt);
break;

default:
assert(targetReg != embMaskOp2Reg);

Expand Down
16 changes: 14 additions & 2 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4061,8 +4061,20 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
// When we are merging with zero, we can specialize
// and avoid instantiating the vector constant.
// Do this only if op1 was AllTrueMask
MakeSrcContained(node, op3);
LABELEDDISPTREERANGE("Contained false mask op3 in ConditionalSelect", BlockRange(), op3);
switch (op2->AsHWIntrinsic()->GetHWIntrinsicId())
{
case NI_Sve2_MinNumberPairwise:
case NI_Sve2_MaxNumberPairwise:
// This is an edge case where these instructions have unpredictable behaviour when
// using predicated movprfx, so the unpredicated variant must be used here. This
// prevents us from performing this optimization as we will need the constant vector
// for masking the result.
break;

default:
MakeSrcContained(node, op3);
LABELEDDISPTREERANGE("Contained false mask op3 in ConditionalSelect", BlockRange(), op3);
}
}

break;
Expand Down
5 changes: 5 additions & 0 deletions src/tests/Common/CoreCLRTestLibrary/Generator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ public static int? Seed
}
}

public static bool GetBool()
{
return m_rand.Next(0, 2) == 1;
}

// returns a byte array of random data
public static void GetBytes(int new_seed, byte[] buffer)
{
Expand Down
29 changes: 28 additions & 1 deletion src/tests/Common/CoreCLRTestLibrary/Vectors.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,25 @@ public static Vector<T> GetRandomVector<T>()
{
data[i] = TestLibrary.Generator.GetByte();
}

// TODO-ARM64-SVE: Some test functions do not support propagation of NaN/Inf values.
if (typeof(T) == typeof(float))
{
for (int i = 0; i < vsize / sizeof(float); i++)
{
// Clear bit 23 to suppress generation of NaN/Inf values.
data[i * sizeof(float) + 2] &= byte.CreateTruncating(~(1 << 7));
}
}
else if (typeof(T) == typeof(double))
{
for (int i = 0; i < vsize / sizeof(double); i++)
{
// Clear bit 52 to suppress generation of NaN/Inf values.
data[i * sizeof(double) + 6] &= byte.CreateTruncating(~(1 << 4));
}
}

return new Vector<T>(data.AsSpan());
}

Expand All @@ -37,7 +56,15 @@ public static Vector<T> GetRandomMask<T>()
long count = vsize / tsize;
for (int i = 0; i < count; i++)
{
data[i * tsize] |= (byte)(TestLibrary.Generator.GetByte() & 1);
// Bias the generator to produces zero values at least 50% of the time.
// Elements that pass through this choice will be filled with random data.
if (TestLibrary.Generator.GetBool())
{
for (int j = 0; j < tsize; j++)
{
data[i * tsize + j] = TestLibrary.Generator.GetByte();
}
}
}

return new Vector<T>(data.AsSpan());
Expand Down
110 changes: 94 additions & 16 deletions src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,21 @@ public static Vector<T> InitVector<T>(Func<int, T> f)
return new Vector<T>(arr);
}

public static T[] ConvertVectorToMask<T>(T[] vector) where T : IBinaryInteger<T>
{
T[] result = new T[vector.Length];
for (int i = 0; i < vector.Length; i++)
{
result[i] = vector[i] == T.Zero ? T.Zero : T.One;
}
return result;
}

public static T[] CreateMaskForFirstActiveElement<T>(T[] mask, T[] srcMask)
where T : unmanaged, IBinaryInteger<T>
{
int count = srcMask.Length;
T[] result = new T[count];
Array.Copy(srcMask, result, count);
T[] result = ConvertVectorToMask(srcMask);

for (int i = 0; i < count; i++)
{
Expand Down Expand Up @@ -1580,19 +1589,51 @@ public static ulong FusedAddHalving(ulong op1, ulong op2)
public static long FusedAddHalving(long op1, long op2)
{
long sum = op1 + op2;
bool carry = sum < op1;
return (sum >> 1) + (carry ? 1L << 63 : 0);

if (op1 > 0 && op2 > 0 && sum < 0)
{
// Addition overflows into the sign bit, which simulates an
// unsigned 64-bit addition. We need to perform a logical shift
// to make sure the sign-bit is clear on the half value.
return (long)((ulong)sum >>> 1);
}
else if (op1 < 0 && op2 < 0 && sum > 0)
{
// Addition of negative values overflows beyond the sign-bit into
// the positive range. The halved value will be OK but we need to
// reinstate the sign bit which was lost.
return (long)((ulong)(sum >> 1) | (1UL << 63));
}
else
{
// No overflow, simply halve preserving sign-bit.
return sum >> 1;
}
}

public static long FusedSubtractHalving(long op1, long op2)
{
ulong uop1 = (ulong)op1;
ulong uop2 = (ulong)op2;
long diff = op1 - op2;

ulong udiff = uop1 - uop2;
long sdiff = unchecked((long)udiff);

return sdiff >> 1;
if (op1 > 0 && op2 < 0 && diff < 0)
{
// Subtract of negative value overflows into the sign bit We need
// to perform a logical shift to make sure the sign-bit is clear
// on the half value.
return (long)((ulong)diff >>> 1);
}
else if (op1 < 0 && op2 > 0 && diff > 0)
{
// Subtraction of positive value overflows beyond the sign-bit into
// the positive range. The halved value will be OK but we need to
// reinstate the sign bit which was lost.
return (long)((ulong)(diff >> 1) | (1UL << 63));
}
else
{
// No overflow, simply halve preserving sign-bit.
return diff >> 1;
}
}

public static ulong FusedSubtractHalving(ulong op1, ulong op2)
Expand All @@ -1602,7 +1643,6 @@ public static ulong FusedSubtractHalving(ulong op1, ulong op2)
return (diff >> 1) + (overflow ? 1UL << 63 : 0);
}


public static uint FusedAddRoundedHalving(uint op1, uint op2) => (uint)((ulong)((ulong)op1 + (ulong)op2 + 1) >> 1);

public static uint FusedSubtractHalving(uint op1, uint op2) => (uint)((ulong)((ulong)op1 - (ulong)op2) >> 1);
Expand Down Expand Up @@ -2942,7 +2982,7 @@ private static sbyte SignedShift(sbyte op1, sbyte op2, bool rounding = false, bo
{
if (shiftOvf)
{
result = op2 < 0 ? sbyte.MinValue : sbyte.MaxValue;
return op1 > 0 ? sbyte.MaxValue : sbyte.MinValue;
}
}
}
Expand Down Expand Up @@ -3140,8 +3180,19 @@ private static (byte val, bool ovf) SubtractOvf(byte op1, byte op2)

public static sbyte AddSaturate(sbyte op1, sbyte op2)
{
var (result, ovf) = AddOvf(op1, op2);
return ovf ? (result > 0 ? sbyte.MinValue : sbyte.MaxValue) : result;
int result = op1 + op2;
if (result > sbyte.MaxValue)
{
return sbyte.MaxValue;
}
else if (result < sbyte.MinValue)
{
return sbyte.MinValue;
}
else
{
return (sbyte)result;
}
}

public static sbyte AddSaturate(sbyte op1, byte op2)
Expand Down Expand Up @@ -7517,7 +7568,7 @@ public static T[] CreateBreakPropagateMask<T>(T[] op1, T[] op2) where T : IBinar

if (LastActive(mask, op1) != T.Zero)
{
Array.Copy(op2, result, count);
result = ConvertVectorToMask(op2);
}

return result;
Expand Down Expand Up @@ -8185,7 +8236,34 @@ public static N SubtractRoundedHighNarrowingOdd<W, N>(N even, W op1, W op2, int
return Odd<N>(even, SubtractRoundedHighNarrowing<W, N>(op1, op2), i);
}

public static long FusedAddRoundedHalving(long op1, long op2) => (long)((ulong)(op1 + op2 + 1) >> 1);
public static long FusedAddRoundedHalving(long op1, long op2)
{
bool overflow = false;
long sum = 0;
try
{
sum = checked(op1 + op2 + 1);
}
catch (OverflowException)
{
overflow = true;
sum = op1 + op2 + 1;
}

// See FusedAddHalving for description of cases.
if (op1 > 0 && op2 > 0 && overflow)
{
return (long)((ulong)sum >>> 1);
}
else if (op1 < 0 && op2 < 0 && overflow)
{
return (long)((ulong)(sum >> 1) | (1UL << 63));
}
else
{
return sum >> 1;
}
}

public static ulong FusedAddRoundedHalving(ulong op1, ulong op2)
{
Expand Down
Loading
Loading