Skip to content

Commit 19e697d

Browse files
committed
Have certain mask simplification operations happen earlier than morph
1 parent 655b0ea commit 19e697d

File tree

5 files changed

+265
-336
lines changed

5 files changed

+265
-336
lines changed

src/coreclr/jit/compiler.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6697,15 +6697,6 @@ class Compiler
66976697
GenTree* fgMorphHWIntrinsicOptional(GenTreeHWIntrinsic* tree);
66986698
GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node);
66996699
GenTree* fgOptimizeHWIntrinsicAssociative(GenTreeHWIntrinsic* node);
6700-
#if defined(FEATURE_MASKED_HW_INTRINSICS)
6701-
GenTreeHWIntrinsic* fgOptimizeForMaskedIntrinsic(GenTreeHWIntrinsic* node);
6702-
#endif // FEATURE_MASKED_HW_INTRINSICS
6703-
#ifdef TARGET_ARM64
6704-
bool canMorphVectorOperandToMask(GenTree* node);
6705-
bool canMorphAllVectorOperandsToMasks(GenTreeHWIntrinsic* node);
6706-
GenTree* doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* parent);
6707-
GenTreeHWIntrinsic* fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node);
6708-
#endif // TARGET_ARM64
67096700
#endif // FEATURE_HW_INTRINSICS
67106701
GenTree* fgOptimizeCommutativeArithmetic(GenTreeOp* tree);
67116702
GenTree* fgOptimizeRelationalComparisonWithCasts(GenTreeOp* cmp);

src/coreclr/jit/gentree.cpp

Lines changed: 215 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32083,10 +32083,8 @@ bool GenTree::CanDivOrModPossiblyOverflow(Compiler* comp) const
3208332083
#if defined(FEATURE_HW_INTRINSICS)
3208432084
GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3208532085
{
32086-
if (!opts.Tier0OptimizationEnabled())
32087-
{
32088-
return tree;
32089-
}
32086+
assert(!optValnumCSE_phase);
32087+
assert(opts.Tier0OptimizationEnabled());
3209032088

3209132089
NamedIntrinsic ni = tree->GetHWIntrinsicId();
3209232090
var_types retType = tree->TypeGet();
@@ -32225,6 +32223,133 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3222532223
// We shouldn't find AND_NOT nodes since it should only be produced in lowering
3222632224
assert(oper != GT_AND_NOT);
3222732225

32226+
#if defined(FEATURE_MASKED_HW_INTRINSICS) && defined(TARGET_XARCH)
32227+
if (GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(oper))
32228+
{
32229+
// Comparisons that produce masks lead to more verbose trees than
32230+
// necessary in many scenarios due to requiring a CvtMaskToVector
32231+
// node to be inserted over them and this can block various opts
32232+
// that are dependent on tree height and similar. So we want to
32233+
// fold the unnecessary back and forth conversions away where possible.
32234+
32235+
genTreeOps effectiveOper = oper;
32236+
GenTree* actualOp2 = op2;
32237+
32238+
if (oper == GT_NOT)
32239+
{
32240+
assert(op2 == nullptr);
32241+
op2 = op1;
32242+
}
32243+
32244+
// We need both operands to be ConvertMaskToVector in
32245+
// order to optimize this to a direct mask operation
32246+
32247+
if (!op1->OperIsConvertMaskToVector())
32248+
{
32249+
return tree;
32250+
}
32251+
32252+
if (!op2->OperIsHWIntrinsic())
32253+
{
32254+
if ((oper != GT_XOR) || !op2->IsVectorAllBitsSet())
32255+
{
32256+
return tree;
32257+
}
32258+
32259+
// We want to explicitly recognize op1 ^ AllBitsSet as
32260+
// some platforms don't have direct support for ~op1
32261+
32262+
effectiveOper = GT_NOT;
32263+
op2 = op1;
32264+
}
32265+
32266+
GenTreeHWIntrinsic* cvtOp1 = op1->AsHWIntrinsic();
32267+
GenTreeHWIntrinsic* cvtOp2 = op2->AsHWIntrinsic();
32268+
32269+
if (!cvtOp2->OperIsConvertMaskToVector())
32270+
{
32271+
return tree;
32272+
}
32273+
32274+
unsigned simdBaseTypeSize = genTypeSize(simdBaseType);
32275+
32276+
if ((genTypeSize(cvtOp1->GetSimdBaseType()) != simdBaseTypeSize) ||
32277+
(genTypeSize(cvtOp2->GetSimdBaseType()) != simdBaseTypeSize))
32278+
{
32279+
// We need both operands to be the same kind of mask; otherwise
32280+
// the bitwise operation can differ in how it performs
32281+
return tree;
32282+
}
32283+
32284+
NamedIntrinsic maskIntrinsicId = NI_Illegal;
32285+
32286+
switch (effectiveOper)
32287+
{
32288+
case GT_AND:
32289+
{
32290+
maskIntrinsicId = NI_AVX512_AndMask;
32291+
break;
32292+
}
32293+
32294+
case GT_NOT:
32295+
{
32296+
maskIntrinsicId = NI_AVX512_NotMask;
32297+
break;
32298+
}
32299+
32300+
case GT_OR:
32301+
{
32302+
maskIntrinsicId = NI_AVX512_OrMask;
32303+
break;
32304+
}
32305+
32306+
case GT_XOR:
32307+
{
32308+
maskIntrinsicId = NI_AVX512_XorMask;
32309+
break;
32310+
}
32311+
32312+
default:
32313+
{
32314+
unreached();
32315+
}
32316+
}
32317+
32318+
assert(maskIntrinsicId != NI_Illegal);
32319+
32320+
if (effectiveOper == oper)
32321+
{
32322+
tree->ChangeHWIntrinsicId(maskIntrinsicId);
32323+
tree->Op(1) = cvtOp1->Op(1);
32324+
}
32325+
else
32326+
{
32327+
assert(effectiveOper == GT_NOT);
32328+
tree->ResetHWIntrinsicId(maskIntrinsicId, this, cvtOp1->Op(1));
32329+
tree->gtFlags &= ~GTF_REVERSE_OPS;
32330+
}
32331+
32332+
tree->gtType = TYP_MASK;
32333+
DEBUG_DESTROY_NODE(op1);
32334+
32335+
if (effectiveOper != GT_NOT)
32336+
{
32337+
tree->Op(2) = cvtOp2->Op(1);
32338+
}
32339+
32340+
if (actualOp2 != nullptr)
32341+
{
32342+
DEBUG_DESTROY_NODE(actualOp2);
32343+
}
32344+
tree->SetMorphed(this);
32345+
32346+
tree = gtNewSimdCvtMaskToVectorNode(retType, tree, simdBaseJitType, simdSize)->AsHWIntrinsic();
32347+
tree->SetMorphed(this);
32348+
32349+
return tree;
32350+
}
32351+
#endif // FEATURE_MASKED_HW_INTRINSICS && TARGET_XARCH
32352+
3222832353
GenTree* cnsNode = nullptr;
3222932354
GenTree* otherNode = nullptr;
3223032355

@@ -32762,10 +32887,28 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3276232887
oper = GT_NONE;
3276332888
}
3276432889

32890+
// For mask nodes in particular, the foldings below are done under the presumption
32891+
// that we only produce something like `AddMask(op1, op2)` if op1 and op2 are compatible
32892+
// masks. On xarch, for example, this means that it'd be adding 8, 16, 32, or 64-bits
32893+
// together with the same size. We wouldn't ever encounter something like an 8 and 16 bit
32894+
// masks being added. This ensures that we don't end up with a case where folding would
32895+
// cause a different result to be produced, such as because the remaining upper bits are
32896+
// no longer zeroed.
32897+
3276532898
switch (oper)
3276632899
{
3276732900
case GT_ADD:
3276832901
{
32902+
if (varTypeIsMask(retType))
32903+
{
32904+
// Handle `x + 0 == x` and `0 + x == x`
32905+
if (cnsNode->IsMaskZero())
32906+
{
32907+
resultNode = otherNode;
32908+
}
32909+
break;
32910+
}
32911+
3276932912
if (varTypeIsFloating(simdBaseType))
3277032913
{
3277132914
// Handle `x + NaN == NaN` and `NaN + x == NaN`
@@ -32799,6 +32942,23 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3279932942

3280032943
case GT_AND:
3280132944
{
32945+
if (varTypeIsMask(retType))
32946+
{
32947+
// Handle `x & 0 == 0` and `0 & x == 0`
32948+
if (cnsNode->IsMaskZero())
32949+
{
32950+
resultNode = otherNode;
32951+
break;
32952+
}
32953+
32954+
// Handle `x & AllBitsSet == x` and `AllBitsSet & x == x`
32955+
if (cnsNode->IsMaskAllBitsSet())
32956+
{
32957+
resultNode = otherNode;
32958+
}
32959+
break;
32960+
}
32961+
3280232962
// Handle `x & 0 == 0` and `0 & x == 0`
3280332963
if (cnsNode->IsVectorZero())
3280432964
{
@@ -33032,6 +33192,23 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3303233192

3303333193
case GT_OR:
3303433194
{
33195+
if (varTypeIsMask(retType))
33196+
{
33197+
// Handle `x | 0 == x` and `0 | x == x`
33198+
if (cnsNode->IsMaskZero())
33199+
{
33200+
resultNode = otherNode;
33201+
break;
33202+
}
33203+
33204+
// Handle `x | AllBitsSet == AllBitsSet` and `AllBitsSet | x == AllBitsSet`
33205+
if (cnsNode->IsMaskAllBitsSet())
33206+
{
33207+
resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT);
33208+
}
33209+
break;
33210+
}
33211+
3303533212
// Handle `x | 0 == x` and `0 | x == x`
3303633213
if (cnsNode->IsVectorZero())
3303733214
{
@@ -33059,6 +33236,27 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3305933236
// Handle `x >> 0 == x` and `0 >> x == 0`
3306033237
// Handle `x >>> 0 == x` and `0 >>> x == 0`
3306133238

33239+
if (varTypeIsMask(retType))
33240+
{
33241+
if (cnsNode->IsMaskZero())
33242+
{
33243+
if (cnsNode == op2)
33244+
{
33245+
resultNode = otherNode;
33246+
}
33247+
else
33248+
{
33249+
resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT);
33250+
}
33251+
}
33252+
else if (cnsNode->IsIntegralConst(0))
33253+
{
33254+
assert(cnsNode == op2);
33255+
resultNode = otherNode;
33256+
}
33257+
break;
33258+
}
33259+
3306233260
if (cnsNode->IsVectorZero())
3306333261
{
3306433262
if (cnsNode == op2)
@@ -33104,7 +33302,17 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3310433302

3310533303
case GT_XOR:
3310633304
{
33107-
// Handle `x | 0 == x` and `0 | x == x`
33305+
if (varTypeIsMask(retType))
33306+
{
33307+
// Handle `x ^ 0 == x` and `0 ^ x == x`
33308+
if (cnsNode->IsMaskZero())
33309+
{
33310+
resultNode = otherNode;
33311+
}
33312+
break;
33313+
}
33314+
33315+
// Handle `x ^ 0 == x` and `0 ^ x == x`
3310833316
if (cnsNode->IsVectorZero())
3310933317
{
3311033318
resultNode = otherNode;
@@ -33273,7 +33481,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3327333481
}
3327433482
else
3327533483
{
33276-
assert(!op1->IsTrueMask(simdBaseType) && !op1->IsFalseMask());
33484+
assert(!op1->IsTrueMask(simdBaseType) && !op1->IsMaskZero());
3327733485
}
3327833486
#endif
3327933487

@@ -33291,7 +33499,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3329133499
return op2;
3329233500
}
3329333501

33294-
if (op1->IsVectorZero() || op1->IsFalseMask())
33502+
if (op1->IsVectorZero() || op1->IsMaskZero())
3329533503
{
3329633504
return gtWrapWithSideEffects(op3, op2, GTF_ALL_EFFECT);
3329733505
}

src/coreclr/jit/gentree.h

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1803,8 +1803,9 @@ struct GenTree
18031803
inline bool IsVectorCreate() const;
18041804
inline bool IsVectorAllBitsSet() const;
18051805
inline bool IsVectorBroadcast(var_types simdBaseType) const;
1806+
inline bool IsMaskZero() const;
1807+
inline bool IsMaskAllBitsSet() const;
18061808
inline bool IsTrueMask(var_types simdBaseType) const;
1807-
inline bool IsFalseMask() const;
18081809

18091810
inline uint64_t GetIntegralVectorConstElement(size_t index, var_types simdBaseType);
18101811

@@ -9598,6 +9599,42 @@ inline bool GenTree::IsVectorBroadcast(var_types simdBaseType) const
95989599
return false;
95999600
}
96009601

9602+
//-------------------------------------------------------------------
9603+
// IsMaskZero: returns true if this node is a mask constant with all bits zero.
9604+
//
9605+
// Returns:
9606+
// True if this node is a mask constant with all bits zero
9607+
//
9608+
inline bool GenTree::IsMaskZero() const
9609+
{
9610+
#if defined(FEATURE_MASKED_HW_INTRINSICS)
9611+
if (IsCnsMsk())
9612+
{
9613+
return AsMskCon()->IsZero();
9614+
}
9615+
#endif // FEATURE_MASKED_HW_INTRINSICS
9616+
9617+
return false;
9618+
}
9619+
9620+
//-------------------------------------------------------------------
9621+
// IsMaskAllBitsSet: returns true if this node is a mask constant with all bits set.
9622+
//
9623+
// Returns:
9624+
// True if this node is a mask constant with all bits set
9625+
//
9626+
inline bool GenTree::IsMaskAllBitsSet() const
9627+
{
9628+
#if defined(FEATURE_MASKED_HW_INTRINSICS)
9629+
if (IsCnsMsk())
9630+
{
9631+
return AsMskCon()->IsAllBitsSet();
9632+
}
9633+
#endif // FEATURE_MASKED_HW_INTRINSICS
9634+
9635+
return false;
9636+
}
9637+
96019638
//------------------------------------------------------------------------
96029639
// IsTrueMask: Is the given node a true mask
96039640
//
@@ -9624,23 +9661,6 @@ inline bool GenTree::IsTrueMask(var_types simdBaseType) const
96249661
return false;
96259662
}
96269663

9627-
//------------------------------------------------------------------------
9628-
// IsFalseMask: Is the given node a false mask
9629-
//
9630-
// Returns true if the node is a false mask, ie all zeros
9631-
//
9632-
inline bool GenTree::IsFalseMask() const
9633-
{
9634-
#ifdef TARGET_ARM64
9635-
if (IsCnsMsk())
9636-
{
9637-
return AsMskCon()->IsZero();
9638-
}
9639-
#endif
9640-
9641-
return false;
9642-
}
9643-
96449664
//-------------------------------------------------------------------
96459665
// GetIntegralVectorConstElement: Gets the value of a given element in an integral vector constant
96469666
//

src/coreclr/jit/lowerarmarch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3942,7 +3942,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
39423942
GenTree* op3 = intrin.op3;
39433943

39443944
// Handle op1
3945-
if (op1->IsFalseMask())
3945+
if (op1->IsMaskZero())
39463946
{
39473947
// When we are merging with zero, we can specialize
39483948
// and avoid instantiating the vector constant.

0 commit comments

Comments
 (0)