dotnet · snickolls-arm · Jul 31, 2025 · Aug 20, 2025 · Aug 21, 2025 · Sep 1, 2025
diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
@@ -783,6 +783,18 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                                 emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
                                 break;
 
+                            case NI_Sve2_MaxNumberPairwise:
+                            case NI_Sve2_MinNumberPairwise:
+                                // These instructions have unpredictable behaviour when using predicated movprfx,
+                                // so the unpredicated variant must be used here.
+                                assert(!intrin.op3->isContained() && falseReg != REG_NA);
+                                GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, embMaskOp1Reg);
+                                GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
+                                                            embOpt, sopt);
+                                GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg,
+                                                              falseReg, opt);
+                                break;
+
                             default:
                                 assert(targetReg != embMaskOp2Reg);
 

diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp
@@ -4061,8 +4061,20 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                     // When we are merging with zero, we can specialize
                     // and avoid instantiating the vector constant.
                     // Do this only if op1 was AllTrueMask
-                    MakeSrcContained(node, op3);
-                    LABELEDDISPTREERANGE("Contained false mask op3 in ConditionalSelect", BlockRange(), op3);
+                    switch (op2->AsHWIntrinsic()->GetHWIntrinsicId())
+                    {
+                        case NI_Sve2_MinNumberPairwise:
+                        case NI_Sve2_MaxNumberPairwise:
+                            // This is an edge case where these instructions have unpredictable behaviour when
+                            // using predicated movprfx, so the unpredicated variant must be used here. This
+                            // prevents us from performing this optimization as we will need the constant vector
+                            // for masking the result.
+                            break;
+
+                        default:
+                            MakeSrcContained(node, op3);
+                            LABELEDDISPTREERANGE("Contained false mask op3 in ConditionalSelect", BlockRange(), op3);
+                    }
                 }
 
                 break;

diff --git a/src/tests/Common/CoreCLRTestLibrary/Generator.cs b/src/tests/Common/CoreCLRTestLibrary/Generator.cs
@@ -47,6 +47,11 @@ public static int? Seed
             }
         }
 
+        public static bool GetBool()
+        {
+            return m_rand.Next(0, 2) == 1;
+        }
+
         // returns a byte array of random data
         public static void GetBytes(int new_seed, byte[] buffer)
         {

diff --git a/src/tests/Common/CoreCLRTestLibrary/Vectors.cs b/src/tests/Common/CoreCLRTestLibrary/Vectors.cs
@@ -24,6 +24,25 @@ public static Vector<T> GetRandomVector<T>()
             {
                 data[i] = TestLibrary.Generator.GetByte();
             }
+
+            // TODO-ARM64-SVE: Some test functions do not support propagation of NaN/Inf values.
+            if (typeof(T) == typeof(float))
+            {
+                for (int i = 0; i < vsize / sizeof(float); i++)
+                {
+                    // Clear bit 23 to suppress generation of NaN/Inf values.
+                    data[i * sizeof(float) + 2] &= byte.CreateTruncating(~(1 << 7));
+                }
+            }
+            else if (typeof(T) == typeof(double))
+            {
+                for (int i = 0; i < vsize / sizeof(double); i++)
+                {
+                    // Clear bit 52 to suppress generation of NaN/Inf values.
+                    data[i * sizeof(double) + 6] &= byte.CreateTruncating(~(1 << 4));
+                }
+            }
+
             return new Vector<T>(data.AsSpan());
         }
 
@@ -37,7 +56,15 @@ public static Vector<T> GetRandomMask<T>()
             long count = vsize / tsize;
             for (int i = 0; i < count; i++)
             {
-                data[i * tsize] |= (byte)(TestLibrary.Generator.GetByte() & 1);
+                // Bias the generator to produces zero values at least 50% of the time.
+                // Elements that pass through this choice will be filled with random data.
+                if (TestLibrary.Generator.GetBool())
+                {
+                    for (int j = 0; j < tsize; j++)
+                    {
+                        data[i * tsize + j] = TestLibrary.Generator.GetByte();
+                    }
+                }
             }
 
             return new Vector<T>(data.AsSpan());

diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs
@@ -29,12 +29,21 @@ public static Vector<T> InitVector<T>(Func<int, T> f)
             return new Vector<T>(arr);
         }
 
+        public static T[] ConvertVectorToMask<T>(T[] vector) where T : IBinaryInteger<T>
+        {
+            T[] result = new T[vector.Length];
+            for (int i = 0; i < vector.Length; i++)
+            {
+                result[i] = vector[i] == T.Zero ? T.Zero : T.One;
+            }
+            return result;
+        }
+
         public static T[] CreateMaskForFirstActiveElement<T>(T[] mask, T[] srcMask)
             where T : unmanaged, IBinaryInteger<T>
         {
             int count = srcMask.Length;
-            T[] result = new T[count];
-            Array.Copy(srcMask, result, count);
+            T[] result = ConvertVectorToMask(srcMask);
 
             for (int i = 0; i < count; i++)
             {
@@ -1580,19 +1589,51 @@ public static ulong FusedAddHalving(ulong op1, ulong op2)
         public static long FusedAddHalving(long op1, long op2)
         {
             long sum = op1 + op2;
-            bool carry = sum < op1;
-            return (sum >> 1) + (carry ? 1L << 63 : 0);
+
+            if (op1 > 0 && op2 > 0 && sum < 0)
+            {
+                // Addition overflows into the sign bit, which simulates an
+                // unsigned 64-bit addition. We need to perform a logical shift
+                // to make sure the sign-bit is clear on the half value.
+                return (long)((ulong)sum >>> 1);
+            }
+            else if (op1 < 0 && op2 < 0 && sum > 0)
+            {
+                // Addition of negative values overflows beyond the sign-bit into
+                // the positive range. The halved value will be OK but we need to
+                // reinstate the sign bit which was lost.
+                return (long)((ulong)(sum >> 1) | (1UL << 63));
+            }
+            else
+            {
+                // No overflow, simply halve preserving sign-bit.
+                return sum >> 1;
+            }
         }
 
         public static long FusedSubtractHalving(long op1, long op2)
         {
-            ulong uop1 = (ulong)op1;
-            ulong uop2 = (ulong)op2;
+            long diff = op1 - op2;
 
-            ulong udiff = uop1 - uop2;
-            long sdiff = unchecked((long)udiff);
-
-            return sdiff >> 1;
+            if (op1 > 0 && op2 < 0 && diff < 0)
+            {
+                // Subtract of negative value overflows into the sign bit We need
+                // to perform a logical shift to make sure the sign-bit is clear
+                // on the half value.
+                return (long)((ulong)diff >>> 1);
+            }
+            else if (op1 < 0 && op2 > 0 && diff > 0)
+            {
+                // Subtraction of positive value overflows beyond the sign-bit into
+                // the positive range. The halved value will be OK but we need to
+                // reinstate the sign bit which was lost.
+                return (long)((ulong)(diff >> 1) | (1UL << 63));
+            }
+            else
+            {
+                // No overflow, simply halve preserving sign-bit.
+                return diff >> 1;
+            }
         }
 
         public static ulong FusedSubtractHalving(ulong op1, ulong op2)
@@ -1602,7 +1643,6 @@ public static ulong FusedSubtractHalving(ulong op1, ulong op2)
             return (diff >> 1) + (overflow ? 1UL << 63 : 0);
         }
 
-
         public static uint FusedAddRoundedHalving(uint op1, uint op2) => (uint)((ulong)((ulong)op1 + (ulong)op2 + 1) >> 1);
 
         public static uint FusedSubtractHalving(uint op1, uint op2) => (uint)((ulong)((ulong)op1 - (ulong)op2) >> 1);
@@ -2942,7 +2982,7 @@ private static sbyte SignedShift(sbyte op1, sbyte op2, bool rounding = false, bo
                 {
                     if (shiftOvf)
                     {
-                        result = op2 < 0 ? sbyte.MinValue : sbyte.MaxValue;
+                        return op1 > 0 ? sbyte.MaxValue : sbyte.MinValue;
                     }
                 }
             }
@@ -3140,8 +3180,19 @@ private static (byte val, bool ovf) SubtractOvf(byte op1, byte op2)
 
         public static sbyte AddSaturate(sbyte op1, sbyte op2)
         {
-            var (result, ovf) = AddOvf(op1, op2);
-            return ovf ? (result > 0 ? sbyte.MinValue : sbyte.MaxValue) : result;
+            int result = op1 + op2;
+            if (result > sbyte.MaxValue)
+            {
+                return sbyte.MaxValue;
+            }
+            else if (result < sbyte.MinValue)
+            {
+                return sbyte.MinValue;
+            }
+            else
+            {
+                return (sbyte)result;
+            }
         }
 
         public static sbyte AddSaturate(sbyte op1, byte op2)
@@ -7517,7 +7568,7 @@ public static T[] CreateBreakPropagateMask<T>(T[] op1, T[] op2) where T : IBinar
 
             if (LastActive(mask, op1) != T.Zero)
             {
-                Array.Copy(op2, result, count);
+                result = ConvertVectorToMask(op2);
             }
 
             return result;
@@ -8185,7 +8236,34 @@ public static N SubtractRoundedHighNarrowingOdd<W, N>(N even, W op1, W op2, int
             return Odd<N>(even, SubtractRoundedHighNarrowing<W, N>(op1, op2), i);
         }
 
-        public static long FusedAddRoundedHalving(long op1, long op2) => (long)((ulong)(op1 + op2 + 1) >> 1);
+        public static long FusedAddRoundedHalving(long op1, long op2)
+        {
+            bool overflow = false;
+            long sum = 0;
+            try
+            {
+                sum = checked(op1 + op2 + 1);
+            }
+            catch (OverflowException)
+            {
+                overflow = true;
+                sum = op1 + op2 + 1;
+            }
+
+            // See FusedAddHalving for description of cases.
+            if (op1 > 0 && op2 > 0 && overflow)
+            {
+                return (long)((ulong)sum >>> 1);
+            }
+            else if (op1 < 0 && op2 < 0 && overflow)
+            {
+                return (long)((ulong)(sum >> 1) | (1UL << 63));
+            }
+            else
+            {
+                return sum >> 1;
+            }
+        }
 
         public static ulong FusedAddRoundedHalving(ulong op1, ulong op2)
         {