@@ -20,22 +20,22 @@ define i32 @red_zext_mul_by_63(ptr %start, ptr %end) {
20
20
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
21
21
; CHECK: [[VECTOR_BODY]]:
22
22
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
23
- ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5 :%.*]], %[[VECTOR_BODY]] ]
23
+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE :%.*]], %[[VECTOR_BODY]] ]
24
24
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
25
25
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
26
26
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
27
27
; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], splat (i32 63)
28
- ; CHECK-NEXT: [[TMP5 ]] = add <16 x i32> [[VEC_PHI]], [[TMP4]]
28
+ ; CHECK-NEXT: [[PARTIAL_REDUCE ]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]])
29
29
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
30
- ; CHECK-NEXT: [[TMP6 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
31
- ; CHECK-NEXT: br i1 [[TMP6 ]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
30
+ ; CHECK-NEXT: [[TMP5 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
31
+ ; CHECK-NEXT: br i1 [[TMP5 ]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
32
32
; CHECK: [[MIDDLE_BLOCK]]:
33
- ; CHECK-NEXT: [[TMP7 :%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP5 ]])
33
+ ; CHECK-NEXT: [[TMP6 :%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE ]])
34
34
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
35
35
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
36
36
; CHECK: [[SCALAR_PH]]:
37
37
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
38
- ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7 ]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
38
+ ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6 ]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
39
39
; CHECK-NEXT: br label %[[LOOP:.*]]
40
40
; CHECK: [[LOOP]]:
41
41
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[GEP_IV_NEXT:%.*]], %[[LOOP]] ]
@@ -48,7 +48,7 @@ define i32 @red_zext_mul_by_63(ptr %start, ptr %end) {
48
48
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV]], [[END]]
49
49
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
50
50
; CHECK: [[EXIT]]:
51
- ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP7 ]], %[[MIDDLE_BLOCK]] ]
51
+ ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP6 ]], %[[MIDDLE_BLOCK]] ]
52
52
; CHECK-NEXT: ret i32 [[RED_NEXT_LCSSA]]
53
53
;
54
54
entry:
@@ -86,17 +86,17 @@ define i32 @red_zext_mul_by_255(ptr %start, ptr %end) {
86
86
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
87
87
; CHECK: [[VECTOR_BODY]]:
88
88
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
89
- ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5 :%.*]], %[[VECTOR_BODY]] ]
89
+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE :%.*]], %[[VECTOR_BODY]] ]
90
90
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
91
91
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
92
92
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
93
93
; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], splat (i32 255)
94
- ; CHECK-NEXT: [[TMP5 ]] = add <16 x i32> [[VEC_PHI]], [[TMP4]]
94
+ ; CHECK-NEXT: [[PARTIAL_REDUCE ]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]])
95
95
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
96
96
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
97
97
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
98
98
; CHECK: [[MIDDLE_BLOCK]]:
99
- ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP5 ]])
99
+ ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE ]])
100
100
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
101
101
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
102
102
; CHECK: [[SCALAR_PH]]:
@@ -218,22 +218,22 @@ define i32 @red_sext_mul_by_63(ptr %start, ptr %end) {
218
218
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
219
219
; CHECK: [[VECTOR_BODY]]:
220
220
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
221
- ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5 :%.*]], %[[VECTOR_BODY]] ]
221
+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE :%.*]], %[[VECTOR_BODY]] ]
222
222
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
223
223
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
224
224
; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
225
225
; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], splat (i32 63)
226
- ; CHECK-NEXT: [[TMP5 ]] = add <16 x i32> [[VEC_PHI]], [[TMP4]]
226
+ ; CHECK-NEXT: [[PARTIAL_REDUCE ]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]])
227
227
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
228
- ; CHECK-NEXT: [[TMP6 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
229
- ; CHECK-NEXT: br i1 [[TMP6 ]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
228
+ ; CHECK-NEXT: [[TMP5 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
229
+ ; CHECK-NEXT: br i1 [[TMP5 ]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
230
230
; CHECK: [[MIDDLE_BLOCK]]:
231
- ; CHECK-NEXT: [[TMP7 :%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP5 ]])
231
+ ; CHECK-NEXT: [[TMP6 :%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE ]])
232
232
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
233
233
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
234
234
; CHECK: [[SCALAR_PH]]:
235
235
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
236
- ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7 ]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
236
+ ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6 ]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
237
237
; CHECK-NEXT: br label %[[LOOP:.*]]
238
238
; CHECK: [[LOOP]]:
239
239
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[GEP_IV_NEXT:%.*]], %[[LOOP]] ]
@@ -246,7 +246,7 @@ define i32 @red_sext_mul_by_63(ptr %start, ptr %end) {
246
246
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV]], [[END]]
247
247
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
248
248
; CHECK: [[EXIT]]:
249
- ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP7 ]], %[[MIDDLE_BLOCK]] ]
249
+ ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP6 ]], %[[MIDDLE_BLOCK]] ]
250
250
; CHECK-NEXT: ret i32 [[RED_NEXT_LCSSA]]
251
251
;
252
252
entry:
0 commit comments