diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f77d587bf5889..45599798005c3 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10684,6 +10684,9 @@ class InstructionsCompatibilityAnalysis { } unsigned BestOpcodeNum = 0; MainOp = nullptr; + // All opcodes are the same - exit. + if (Candidates.size() == 1 && Candidates.back().second.size() == VL.size()) + return; for (const auto &P : Candidates) { if (P.second.size() < BestOpcodeNum) continue; @@ -10905,6 +10908,13 @@ class InstructionsCompatibilityAnalysis { findAndSetMainInstruction(VL, R); if (!MainOp) return InstructionsState::invalid(); + InstructionsState AltS = InstructionsState::invalid(); + if (SkipSameCodeCheck && allSameBlock(VL)) { + AltS = getSameOpcode(VL, TLI); + if (AltS.valid() && !AltS.isAltShuffle()) + return AltS; + } + S = InstructionsState(MainOp, MainOp, /*HasCopyables=*/true); if (!WithProfitabilityCheck) return S; @@ -11383,27 +11393,27 @@ void BoUpSLP::buildTreeRec(ArrayRef VLRef, unsigned Depth, } ScalarsVectorizationLegality Legality = getScalarsVectorizationLegality( - VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/false); + VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/true); InstructionsState S = Legality.getInstructionsState(); if (!Legality.isLegal()) { - if (Legality.trySplitVectorize()) { - auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL); - // Last chance to try to vectorize alternate node. - if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp))) - return; - } - if (!S) + if (!S) { Legality = getScalarsVectorizationLegality( - VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/true); + VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/false); + S = Legality.getInstructionsState(); + } if (!Legality.isLegal()) { + if (Legality.trySplitVectorize()) { + auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL); + // Last chance to try to vectorize alternate node. + if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp))) + return; + } if (Legality.tryToFindDuplicates()) tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx); - newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices); return; } - S = Legality.getInstructionsState(); } // FIXME: investigate if there are profitable cases for VL.size() <= 4. @@ -11447,16 +11457,41 @@ void BoUpSLP::buildTreeRec(ArrayRef VLRef, unsigned Depth, BS.verify(); #endif if (!BundlePtr || (*BundlePtr && !*BundlePtr.value())) { - LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); - // Last chance to try to vectorize alternate node. - if (S.isAltShuffle() && ReuseShuffleIndices.empty() && TrySplitNode(S)) + // Last chance - check if it does not require scheduling and check if it can + // be vectorized without copyables. + bool TryNonCopyableButAlternateOpcode = false; + if (S.areInstructionsWithCopyableElements() && doesNotNeedToSchedule(VL)) { + Legality = getScalarsVectorizationLegality( + VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/false); + if (!Legality.isLegal()) { + if (Legality.trySplitVectorize()) { + auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL); + // Last chance to try to vectorize alternate node. + if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp))) + return; + } + } else { + S = Legality.getInstructionsState(); + if (S.isAltShuffle() && TrySplitNode(S)) + return; + TryNonCopyableButAlternateOpcode = true; + } + } + if (!TryNonCopyableButAlternateOpcode) { + LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); + // Last chance to try to vectorize alternate node. + if (S.isAltShuffle() && ReuseShuffleIndices.empty() && TrySplitNode(S)) + return; + newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices); + NonScheduledFirst.insert(VL.front()); + if (S.getOpcode() == Instruction::Load && + BS.ScheduleRegionSize < BS.ScheduleRegionSizeLimit) + registerNonVectorizableLoads(ArrayRef(VL)); return; - newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices); - NonScheduledFirst.insert(VL.front()); - if (S.getOpcode() == Instruction::Load && - BS.ScheduleRegionSize < BS.ScheduleRegionSizeLimit) - registerNonVectorizableLoads(ArrayRef(VL)); - return; + } + LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle, " + "trying alternate without scheduling instead.\n"); + BundlePtr = nullptr; } InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI); SmallVector Operands = Analysis.buildOperands(S, VL); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll index 1abc8102dc332..0bc2e3fc5d9aa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll @@ -5,17 +5,17 @@ define i32 @bar() local_unnamed_addr { ; CHECK-LABEL: @bar( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD78_1:%.*]] = add nsw i32 undef, undef -; CHECK-NEXT: [[SUB86_1:%.*]] = sub nsw i32 undef, undef ; CHECK-NEXT: [[ADD94_1:%.*]] = add nsw i32 undef, undef -; CHECK-NEXT: [[SUB102_1:%.*]] = sub nsw i32 undef, undef ; CHECK-NEXT: [[ADD78_2:%.*]] = add nsw i32 undef, undef ; CHECK-NEXT: [[SUB102_3:%.*]] = sub nsw i32 undef, undef +; CHECK-NEXT: [[SUB102_1:%.*]] = sub nsw i32 undef, undef +; CHECK-NEXT: [[SUB102_4:%.*]] = sub nsw i32 undef, undef ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> , i32 [[SUB102_1]], i32 2 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[ADD94_1]], i32 3 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> , i32 [[SUB86_1]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> , i32 [[SUB102_3]], i32 2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[ADD78_1]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[ADD78_2]], i32 5 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[SUB102_3]], i32 6 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[SUB102_4]], i32 6 ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> , <8 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP5]], <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll index 1fedde4cc9fd7..cee333341271b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll @@ -4,17 +4,15 @@ define void @test() { ; CHECK-LABEL: define void @test() { ; CHECK-NEXT: [[XOR108_I_I_I:%.*]] = xor i64 0, 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <12 x i64> , i64 [[XOR108_I_I_I]], i32 10 -; CHECK-NEXT: [[TMP2:%.*]] = lshr <12 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i64> poison, i64 [[XOR108_I_I_I]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <12 x i64> [[TMP2]], <12 x i64> poison, <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i64> [[TMP5]], <16 x i64> [[TMP3]], <16 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i64> [[TMP6]], <16 x i64> poison, <16 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i64> [[TMP7]] to <16 x i1> -; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = freeze <16 x i1> [[TMP9]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[XOR108_I_I_I]] to i1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <14 x i1> , i1 [[TMP1]], i32 3 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <14 x i1> [[TMP2]], <14 x i1> poison, <14 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = lshr <14 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <14 x i1> [[TMP4]], <14 x i1> poison, <16 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i1> [[TMP10]] to <16 x i16> -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <16 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = freeze <16 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <16 x i16> [[TMP8]], zeroinitializer ; CHECK-NEXT: ret void ; %xor108.i.i.i = xor i64 0, 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index 20a42777cf8e4..8f59a2ad09a0c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -11,27 +11,11 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv(i ; CHECK: if.then22.i: ; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1 ; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]] -; CHECK-NEXT: [[SHR_I_I:%.*]] = lshr i32 [[CONV31_I]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = lshr <8 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[SUB_I]] to i8 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i8> poison, i8 [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[SHR_I_I]] to i8 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> [[TMP8]], i8 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = trunc <8 x i32> [[TMP6]] to <8 x i8> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i8> [[TMP11]], <8 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> [[TMP12]], <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8> -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP19]], <16 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x i8> [[TMP15]], <2 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP20]], <16 x i8> [[TMP18]], <16 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[CONV31_I]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = lshr <16 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP16:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i8> ; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP16]], splat (i8 1) ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr undef, align 1 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll index 2cc2f28ccf6d5..42e9c24954441 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll @@ -39,19 +39,18 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]] ; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]] ; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433 +; CHECK-NEXT: [[T32:%.*]] = mul nsw i32 [[T27]], 6270 ; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T40]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T27]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T32]], i32 2 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[T47]], i32 3 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T48]], i32 1 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[T40]], i32 3 ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP5]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = mul nsw <4 x i32> [[TMP5]], [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> -; CHECK-NEXT: [[T701:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[T701:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6 ; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], splat (i32 3) ; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll index cea98bf55b6ff..1c80530593ab3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll @@ -25,7 +25,6 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T24:%.*]] = add nsw i32 [[T23]], [[T21]] ; CHECK-NEXT: [[T25:%.*]] = sub nsw i32 [[T21]], [[T23]] ; CHECK-NEXT: [[T27:%.*]] = sub nsw i32 [[T3]], [[T24]] -; CHECK-NEXT: [[T32:%.*]] = mul nsw i32 [[T27]], 6270 ; CHECK-NEXT: [[T37:%.*]] = add nsw i32 [[T25]], [[T11]] ; CHECK-NEXT: [[T38:%.*]] = add nsw i32 [[T17]], [[T5]] ; CHECK-NEXT: [[T39:%.*]] = add nsw i32 [[T37]], [[T38]] @@ -34,25 +33,26 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T42:%.*]] = mul nsw i32 [[T17]], 16819 ; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069 ; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196 -; CHECK-NEXT: [[T49:%.*]] = add nsw i32 [[T40]], [[T47]] ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[T8]], align 4 ; CHECK-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4 ; CHECK-NEXT: [[T9:%.*]] = load i32, ptr [[T8]], align 4 ; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]] ; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]] ; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433 +; CHECK-NEXT: [[T32:%.*]] = mul nsw i32 [[T27]], 6270 ; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[T40]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[T48]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[T67:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T32]], i32 2 -; CHECK-NEXT: [[T68:%.*]] = insertelement <8 x i32> [[T67]], i32 [[T49]], i32 3 -; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP6]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T40]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T32]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[T47]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T48]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[T40]], i32 3 +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP5]], [[TMP9]] +; CHECK-NEXT: [[T701:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6 -; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], splat (i32 3) +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], splat (i32 3) ; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll index 7060288d739bd..cd0ed9cd7884c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll @@ -19,7 +19,6 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T24:%.*]] = add nsw i32 [[T23]], [[T21]] ; CHECK-NEXT: [[T25:%.*]] = sub nsw i32 [[T21]], [[T23]] ; CHECK-NEXT: [[T27:%.*]] = sub nsw i32 [[T3]], [[T24]] -; CHECK-NEXT: [[T32:%.*]] = mul nsw i32 [[T27]], 6270 ; CHECK-NEXT: [[T37:%.*]] = add nsw i32 [[T25]], [[T11]] ; CHECK-NEXT: [[T38:%.*]] = add nsw i32 [[T17]], [[T5]] ; CHECK-NEXT: [[T39:%.*]] = add nsw i32 [[T37]], [[T38]] @@ -28,26 +27,25 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T42:%.*]] = mul nsw i32 [[T17]], 16819 ; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069 ; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196 -; CHECK-NEXT: [[T49:%.*]] = add nsw i32 [[T40]], [[T47]] ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[T8]], align 4 ; CHECK-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4 ; CHECK-NEXT: [[T9:%.*]] = load i32, ptr [[T8]], align 4 ; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]] ; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]] ; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433 +; CHECK-NEXT: [[T32:%.*]] = mul nsw i32 [[T27]], 6270 ; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[T40]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[T48]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[T67:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T32]], i32 2 -; CHECK-NEXT: [[T68:%.*]] = insertelement <8 x i32> [[T67]], i32 [[T49]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T32]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T47]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[T40]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP6]] +; CHECK-NEXT: [[T50:%.*]] = add nsw i32 [[T40]], [[T48]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[T701:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[T50]], i32 5 ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6 -; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], splat (i32 3) +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], splat (i32 3) ; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll index 125c2dce32663..9f95f79ecc7fc 100644 --- a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll +++ b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll @@ -170,11 +170,10 @@ define <2 x i8> @replace_through_binop_fail_cant_speculate(i8 %inp, <2 x i8> %d, define <2 x i8> @replace_through_binop_preserve_flags(i8 %inp, <2 x i8> %d, <2 x i8> %any) { ; CHECK-LABEL: define <2 x i8> @replace_through_binop_preserve_flags( ; CHECK-SAME: i8 [[INP:%.*]], <2 x i8> [[D:%.*]], <2 x i8> [[ANY:%.*]]) { -; CHECK-NEXT: [[ADD:%.*]] = xor i8 [[INP]], 5 -; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[INP]], 123 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw i8 [[ADD]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i8> poison, i8 [[TMP1]], i64 0 -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i8> [[TMP3]], i8 [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i8> [[TMP2]], +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP3]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; %add = xor i8 %inp, 5