Skip to content

Commit 6b19ccd

Browse files
authored
[AArch64] Simplify some masked integer comparisons. (#153783)
Specifically, `X & M ?= C --> (C << clz(M)) ?= (X << clz(M))` where M is a non-empty sequence of ones starting at the least significant bit with the remainder zero and C is a constant subset of M that cannot be materialised into a SUBS (immediate). Proof: https://alive2.llvm.org/ce/z/haqdJ4. This improves the comparison in isinf, for example: ```cpp int isinf(float x) { return __builtin_isinf(x); } ``` Before: ``` isinf: fmov w9, s0 mov w8, #2139095040 and w9, w9, #0x7fffffff cmp w9, w8 cset w0, eq ret ``` After: ``` isinf: fmov w9, s0 mov w8, #-16777216 cmp w8, w9, lsl #1 cset w0, eq ret ```
1 parent 88658db commit 6b19ccd

File tree

3 files changed

+208
-6
lines changed

3 files changed

+208
-6
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25512,6 +25512,32 @@ SDValue performCONDCombine(SDNode *N,
2551225512
CmpIndex, CC))
2551325513
return Val;
2551425514

25515+
// X & M ?= C --> (C << clz(M)) ?= (X << clz(M)) where M is a non-empty
25516+
// sequence of ones starting at the least significant bit with the remainder
25517+
// zero and C is a constant s.t. (C & ~M) == 0 that cannot be materialised
25518+
// into a SUBS (immediate). The transformed form can be matched into a SUBS
25519+
// (shifted register).
25520+
if ((CC == AArch64CC::EQ || CC == AArch64CC::NE) && AndNode->hasOneUse() &&
25521+
isa<ConstantSDNode>(AndNode->getOperand(1)) &&
25522+
isa<ConstantSDNode>(SubsNode->getOperand(1))) {
25523+
SDValue X = AndNode->getOperand(0);
25524+
APInt M = AndNode->getConstantOperandAPInt(1);
25525+
APInt C = SubsNode->getConstantOperandAPInt(1);
25526+
25527+
if (M.isMask() && C.isSubsetOf(M) && !isLegalArithImmed(C.getZExtValue())) {
25528+
SDLoc DL(SubsNode);
25529+
EVT VT = SubsNode->getValueType(0);
25530+
unsigned ShiftAmt = M.countl_zero();
25531+
SDValue ShiftedX = DAG.getNode(
25532+
ISD::SHL, DL, VT, X, DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
25533+
SDValue ShiftedC = DAG.getConstant(C << ShiftAmt, DL, VT);
25534+
SDValue NewSubs = DAG.getNode(AArch64ISD::SUBS, DL, SubsNode->getVTList(),
25535+
ShiftedC, ShiftedX);
25536+
DCI.CombineTo(SubsNode, NewSubs, NewSubs.getValue(1));
25537+
return SDValue(N, 0);
25538+
}
25539+
}
25540+
2551525541
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
2551625542
uint32_t CNV = CN->getZExtValue();
2551725543
if (CNV == 255)

llvm/test/CodeGen/AArch64/isinf.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,8 @@ define i32 @replace_isinf_call_f32(float %x) {
2727
; CHECK-LABEL: replace_isinf_call_f32:
2828
; CHECK: // %bb.0:
2929
; CHECK-NEXT: fmov w9, s0
30-
; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
31-
; CHECK-NEXT: and w9, w9, #0x7fffffff
32-
; CHECK-NEXT: cmp w9, w8
30+
; CHECK-NEXT: mov w8, #-16777216 // =0xff000000
31+
; CHECK-NEXT: cmp w8, w9, lsl #1
3332
; CHECK-NEXT: cset w0, eq
3433
; CHECK-NEXT: ret
3534
%abs = tail call float @llvm.fabs.f32(float %x)
@@ -43,9 +42,8 @@ define i32 @replace_isinf_call_f64(double %x) {
4342
; CHECK-LABEL: replace_isinf_call_f64:
4443
; CHECK: // %bb.0:
4544
; CHECK-NEXT: fmov x9, d0
46-
; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
47-
; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
48-
; CHECK-NEXT: cmp x9, x8
45+
; CHECK-NEXT: mov x8, #-9007199254740992 // =0xffe0000000000000
46+
; CHECK-NEXT: cmp x8, x9, lsl #1
4947
; CHECK-NEXT: cset w0, eq
5048
; CHECK-NEXT: ret
5149
%abs = tail call double @llvm.fabs.f64(double %x)
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o -| FileCheck %s
3+
4+
; Test code generation support for SUBS (shifted register) from masked integer
5+
; compare sequences. These sequences appear in isinf tests, for example.
6+
7+
define i1 @combine_masked_i32(i32 %x) {
8+
; CHECK-LABEL: combine_masked_i32:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: mov w8, #-16777216 // =0xff000000
11+
; CHECK-NEXT: cmp w8, w0, lsl #1
12+
; CHECK-NEXT: cset w0, eq
13+
; CHECK-NEXT: ret
14+
%and = and i32 %x, u0x7fffffff
15+
%sub = sub i32 %and, u0x7f800000
16+
%cmp = icmp eq i32 %sub, 0
17+
ret i1 %cmp
18+
}
19+
20+
define i1 @combine_masked_i64(i64 %x) {
21+
; CHECK-LABEL: combine_masked_i64:
22+
; CHECK: // %bb.0:
23+
; CHECK-NEXT: mov x8, #-9007199254740992 // =0xffe0000000000000
24+
; CHECK-NEXT: cmp x8, x0, lsl #1
25+
; CHECK-NEXT: cset w0, eq
26+
; CHECK-NEXT: ret
27+
%and = and i64 %x, u0x7fffffffffffffff
28+
%sub = sub i64 %and, u0x7ff0000000000000
29+
%cmp = icmp eq i64 %sub, 0
30+
ret i1 %cmp
31+
}
32+
33+
define i1 @combine_masked_ne(i32 %x) {
34+
; CHECK-LABEL: combine_masked_ne:
35+
; CHECK: // %bb.0:
36+
; CHECK-NEXT: mov w8, #-16777216 // =0xff000000
37+
; CHECK-NEXT: cmp w8, w0, lsl #1
38+
; CHECK-NEXT: cset w0, ne
39+
; CHECK-NEXT: ret
40+
%and = and i32 %x, u0x7fffffff
41+
%cmp = icmp ne i32 %and, u0x7f800000
42+
ret i1 %cmp
43+
}
44+
45+
define i1 @combine_masked_lsl4(i32 %x) {
46+
; CHECK-LABEL: combine_masked_lsl4:
47+
; CHECK: // %bb.0:
48+
; CHECK-NEXT: mov w8, #-134217728 // =0xf8000000
49+
; CHECK-NEXT: cmp w8, w0, lsl #4
50+
; CHECK-NEXT: cset w0, eq
51+
; CHECK-NEXT: ret
52+
%and = and i32 %x, u0x0fffffff
53+
%cmp = icmp eq i32 %and, u0x0f800000
54+
ret i1 %cmp
55+
}
56+
57+
define i1 @dont_combine_not_mask(i32 %x) {
58+
; CHECK-LABEL: dont_combine_not_mask:
59+
; CHECK: // %bb.0:
60+
; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
61+
; CHECK-NEXT: and w9, w0, #0x7ffffffe
62+
; CHECK-NEXT: cmp w9, w8
63+
; CHECK-NEXT: cset w0, eq
64+
; CHECK-NEXT: ret
65+
%and = and i32 %x, u0x7ffffffe
66+
%cmp = icmp eq i32 %and, u0x7f800000
67+
ret i1 %cmp
68+
}
69+
70+
define i1 @dont_combine_cmp_not_masked(i32 %x) {
71+
; CHECK-LABEL: dont_combine_cmp_not_masked:
72+
; CHECK: // %bb.0:
73+
; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
74+
; CHECK-NEXT: and w9, w0, #0x3fffffff
75+
; CHECK-NEXT: cmp w9, w8
76+
; CHECK-NEXT: cset w0, eq
77+
; CHECK-NEXT: ret
78+
%and = and i32 %x, u0x3fffffff
79+
%cmp = icmp eq i32 %and, u0x7f800000
80+
ret i1 %cmp
81+
}
82+
83+
define i1 @dont_combine_not_constant_mask(i32 %x, i32 %m) {
84+
; CHECK-LABEL: dont_combine_not_constant_mask:
85+
; CHECK: // %bb.0:
86+
; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
87+
; CHECK-NEXT: and w9, w0, w1
88+
; CHECK-NEXT: cmp w9, w8
89+
; CHECK-NEXT: cset w0, eq
90+
; CHECK-NEXT: ret
91+
%and = and i32 %x, %m
92+
%cmp = icmp eq i32 %and, u0x7f800000
93+
ret i1 %cmp
94+
}
95+
96+
define i1 @dont_combine_not_constant_cmp(i32 %x, i32 %c) {
97+
; CHECK-LABEL: dont_combine_not_constant_cmp:
98+
; CHECK: // %bb.0:
99+
; CHECK-NEXT: and w8, w0, #0xfffffff
100+
; CHECK-NEXT: cmp w8, w1
101+
; CHECK-NEXT: cset w0, eq
102+
; CHECK-NEXT: ret
103+
%and = and i32 %x, u0x0fffffff
104+
%cmp = icmp eq i32 %and, %c
105+
ret i1 %cmp
106+
}
107+
108+
define i1 @dont_combine_subs_imm(i32 %x) {
109+
; CHECK-LABEL: dont_combine_subs_imm:
110+
; CHECK: // %bb.0:
111+
; CHECK-NEXT: and w8, w0, #0x7fffffff
112+
; CHECK-NEXT: cmp w8, #291
113+
; CHECK-NEXT: cset w0, eq
114+
; CHECK-NEXT: ret
115+
%and = and i32 %x, u0x7fffffff
116+
%cmp = icmp eq i32 %and, u0x123
117+
ret i1 %cmp
118+
}
119+
120+
define i1 @dont_combine_subs_imm_lsl12(i32 %x) {
121+
; CHECK-LABEL: dont_combine_subs_imm_lsl12:
122+
; CHECK: // %bb.0:
123+
; CHECK-NEXT: and w8, w0, #0x7fffffff
124+
; CHECK-NEXT: cmp w8, #291, lsl #12 // =1191936
125+
; CHECK-NEXT: cset w0, eq
126+
; CHECK-NEXT: ret
127+
%and = and i32 %x, u0x7fffffff
128+
%cmp = icmp eq i32 %and, u0x123000
129+
ret i1 %cmp
130+
}
131+
132+
define { i1, i1 } @dont_combine_multi_use_cmp(i32 %x) {
133+
; CHECK-LABEL: dont_combine_multi_use_cmp:
134+
; CHECK: // %bb.0:
135+
; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
136+
; CHECK-NEXT: and w9, w0, #0x7fffffff
137+
; CHECK-NEXT: cmp w9, w8
138+
; CHECK-NEXT: cset w0, eq
139+
; CHECK-NEXT: cset w1, lt
140+
; CHECK-NEXT: ret
141+
%and = and i32 %x, u0x7fffffff
142+
%eq = icmp eq i32 %and, u0x7f800000
143+
%lt = icmp slt i32 %and, u0x7f800000
144+
%r1 = insertvalue { i1, i1 } poison, i1 %eq, 0
145+
%r2 = insertvalue { i1, i1 } %r1, i1 %lt, 1
146+
ret { i1, i1 } %r2
147+
}
148+
149+
define { i32, i1 } @dont_combine_multi_use_sub(i32 %x) {
150+
; CHECK-LABEL: dont_combine_multi_use_sub:
151+
; CHECK: // %bb.0:
152+
; CHECK-NEXT: mov w8, #-2139095040 // =0x80800000
153+
; CHECK-NEXT: and w9, w0, #0x7fffffff
154+
; CHECK-NEXT: adds w0, w9, w8
155+
; CHECK-NEXT: cset w1, eq
156+
; CHECK-NEXT: ret
157+
%and = and i32 %x, u0x7fffffff
158+
%sub = sub i32 %and, u0x7f800000
159+
%cmp = icmp eq i32 %sub, 0
160+
%r1 = insertvalue { i32, i1 } poison, i32 %sub, 0
161+
%r2 = insertvalue { i32, i1 } %r1, i1 %cmp, 1
162+
ret { i32, i1 } %r2
163+
}
164+
165+
define { i32, i1 } @dont_combine_multi_use_and(i32 %x) {
166+
; CHECK-LABEL: dont_combine_multi_use_and:
167+
; CHECK: // %bb.0:
168+
; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
169+
; CHECK-NEXT: and w0, w0, #0x7fffffff
170+
; CHECK-NEXT: cmp w0, w8
171+
; CHECK-NEXT: cset w1, eq
172+
; CHECK-NEXT: ret
173+
%and = and i32 %x, u0x7fffffff
174+
%cmp = icmp eq i32 %and, u0x7f800000
175+
%r1 = insertvalue { i32, i1 } poison, i32 %and, 0
176+
%r2 = insertvalue { i32, i1 } %r1, i1 %cmp, 1
177+
ret { i32, i1 } %r2
178+
}

0 commit comments

Comments
 (0)