diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b17724cd07209..b68510df181e5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22240,9 +22240,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } // extract_vector_elt (build_vector x, y), 1 -> y - if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) || - VecOp.getOpcode() == ISD::SPLAT_VECTOR) && - TLI.isTypeLegal(VecVT)) { + if ((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) || + VecOp.getOpcode() == ISD::SPLAT_VECTOR) { assert((VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector()) && "BUILD_VECTOR used for scalable vectors"); @@ -22252,7 +22251,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { EVT InEltVT = Elt.getValueType(); if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) || - isNullConstant(Elt)) { + isIntOrFPConstant(Elt)) { // Sometimes build_vector's scalar input types do not match result type. if (ScalarVT == InEltVT) return Elt; diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll index 274621307f540..74d377bb2bb2b 100644 --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -190,10 +190,6 @@ define <2 x half> @v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) #1 ; CI-LABEL: v_test_canonicalize_build_vector_v2f16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_test_canonicalize_build_vector_v2f16: @@ -2301,12 +2297,6 @@ define <3 x half> @v_test_canonicalize_var_v3f16(<3 x half> %val) #1 { ; CI-LABEL: v_test_canonicalize_var_v3f16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_test_canonicalize_var_v3f16: @@ -2341,14 +2331,6 @@ define <4 x half> @v_test_canonicalize_var_v4f16(<4 x half> %val) #1 { ; CI-LABEL: v_test_canonicalize_var_v4f16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_test_canonicalize_var_v4f16: @@ -2611,9 +2593,7 @@ define <2 x half> @v_test_canonicalize_reg_k_v2f16(half %val) #1 { ; CI-LABEL: v_test_canonicalize_reg_k_v2f16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_test_canonicalize_reg_k_v2f16: @@ -2647,8 +2627,7 @@ define <2 x half> @v_test_canonicalize_k_reg_v2f16(half %val) #1 { ; CI-LABEL: v_test_canonicalize_k_reg_v2f16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v0 +; CI-NEXT: v_mov_b32_e32 v1, v0 ; CI-NEXT: v_mov_b32_e32 v0, 2.0 ; CI-NEXT: s_setpc_b64 s[30:31] ; @@ -2878,18 +2857,6 @@ define <6 x half> @v_test_canonicalize_var_v6f16(<6 x half> %val) #1 { ; CI-LABEL: v_test_canonicalize_var_v6f16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_test_canonicalize_var_v6f16: @@ -2933,22 +2900,6 @@ define <8 x half> @v_test_canonicalize_var_v8f16(<8 x half> %val) #1 { ; CI-LABEL: v_test_canonicalize_var_v8f16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_test_canonicalize_var_v8f16: @@ -3001,30 +2952,6 @@ define <12 x half> @v_test_canonicalize_var_v12f16(<12 x half> %val) #1 { ; CI-LABEL: v_test_canonicalize_var_v12f16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v9, v9 -; CI-NEXT: v_cvt_f16_f32_e32 v10, v10 -; CI-NEXT: v_cvt_f16_f32_e32 v11, v11 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v9, v9 -; CI-NEXT: v_cvt_f32_f16_e32 v10, v10 -; CI-NEXT: v_cvt_f32_f16_e32 v11, v11 ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_test_canonicalize_var_v12f16: @@ -3087,38 +3014,6 @@ define <16 x half> @v_test_canonicalize_var_v16f16(<16 x half> %val) #1 { ; CI-LABEL: v_test_canonicalize_var_v16f16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v9, v9 -; CI-NEXT: v_cvt_f16_f32_e32 v10, v10 -; CI-NEXT: v_cvt_f16_f32_e32 v11, v11 -; CI-NEXT: v_cvt_f16_f32_e32 v12, v12 -; CI-NEXT: v_cvt_f16_f32_e32 v13, v13 -; CI-NEXT: v_cvt_f16_f32_e32 v14, v14 -; CI-NEXT: v_cvt_f16_f32_e32 v15, v15 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v9, v9 -; CI-NEXT: v_cvt_f32_f16_e32 v10, v10 -; CI-NEXT: v_cvt_f32_f16_e32 v11, v11 -; CI-NEXT: v_cvt_f32_f16_e32 v12, v12 -; CI-NEXT: v_cvt_f32_f16_e32 v13, v13 -; CI-NEXT: v_cvt_f32_f16_e32 v14, v14 -; CI-NEXT: v_cvt_f32_f16_e32 v15, v15 ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_test_canonicalize_var_v16f16: @@ -3216,71 +3111,7 @@ define <32 x half> @v_test_canonicalize_var_v32f16(<32 x half> %val) #1 { ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v9, v9 -; CI-NEXT: v_cvt_f16_f32_e32 v10, v10 -; CI-NEXT: v_cvt_f16_f32_e32 v11, v11 -; CI-NEXT: v_cvt_f16_f32_e32 v12, v12 -; CI-NEXT: v_cvt_f16_f32_e32 v13, v13 -; CI-NEXT: v_cvt_f16_f32_e32 v14, v14 -; CI-NEXT: v_cvt_f16_f32_e32 v15, v15 -; CI-NEXT: v_cvt_f16_f32_e32 v16, v16 -; CI-NEXT: v_cvt_f16_f32_e32 v17, v17 -; CI-NEXT: v_cvt_f16_f32_e32 v18, v18 -; CI-NEXT: v_cvt_f16_f32_e32 v19, v19 -; CI-NEXT: v_cvt_f16_f32_e32 v20, v20 -; CI-NEXT: v_cvt_f16_f32_e32 v21, v21 -; CI-NEXT: v_cvt_f16_f32_e32 v22, v22 -; CI-NEXT: v_cvt_f16_f32_e32 v23, v23 -; CI-NEXT: v_cvt_f16_f32_e32 v24, v24 -; CI-NEXT: v_cvt_f16_f32_e32 v25, v25 -; CI-NEXT: v_cvt_f16_f32_e32 v26, v26 -; CI-NEXT: v_cvt_f16_f32_e32 v27, v27 -; CI-NEXT: v_cvt_f16_f32_e32 v28, v28 -; CI-NEXT: v_cvt_f16_f32_e32 v29, v29 -; CI-NEXT: v_cvt_f16_f32_e32 v30, v30 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v9, v9 -; CI-NEXT: v_cvt_f32_f16_e32 v10, v10 -; CI-NEXT: v_cvt_f32_f16_e32 v11, v11 -; CI-NEXT: v_cvt_f32_f16_e32 v12, v12 -; CI-NEXT: v_cvt_f32_f16_e32 v13, v13 -; CI-NEXT: v_cvt_f32_f16_e32 v14, v14 -; CI-NEXT: v_cvt_f32_f16_e32 v15, v15 -; CI-NEXT: v_cvt_f32_f16_e32 v16, v16 -; CI-NEXT: v_cvt_f32_f16_e32 v17, v17 -; CI-NEXT: v_cvt_f32_f16_e32 v18, v18 -; CI-NEXT: v_cvt_f32_f16_e32 v19, v19 -; CI-NEXT: v_cvt_f32_f16_e32 v20, v20 -; CI-NEXT: v_cvt_f32_f16_e32 v21, v21 -; CI-NEXT: v_cvt_f32_f16_e32 v22, v22 -; CI-NEXT: v_cvt_f32_f16_e32 v23, v23 -; CI-NEXT: v_cvt_f32_f16_e32 v24, v24 -; CI-NEXT: v_cvt_f32_f16_e32 v25, v25 -; CI-NEXT: v_cvt_f32_f16_e32 v26, v26 -; CI-NEXT: v_cvt_f32_f16_e32 v27, v27 -; CI-NEXT: v_cvt_f32_f16_e32 v28, v28 -; CI-NEXT: v_cvt_f32_f16_e32 v29, v29 -; CI-NEXT: v_cvt_f32_f16_e32 v30, v30 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v31, v31 -; CI-NEXT: v_cvt_f32_f16_e32 v31, v31 ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_test_canonicalize_var_v32f16: diff --git a/llvm/test/CodeGen/ARM/arm-half-promote.ll b/llvm/test/CodeGen/ARM/arm-half-promote.ll index e1ab75b2ac7f1..d6a8a9b9538f1 100644 --- a/llvm/test/CodeGen/ARM/arm-half-promote.ll +++ b/llvm/test/CodeGen/ARM/arm-half-promote.ll @@ -2,113 +2,78 @@ define arm_aapcs_vfpcc { <8 x half>, <8 x half> } @f1() { ; CHECK-LABEL: _f1 -; CHECK: vpush {d8, d9, d10, d11} -; CHECK-NEXT: vmov.i32 q8, #0x0 -; CHECK-NEXT: vmov.u16 r0, d16[0] -; CHECK-NEXT: vmov d4, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d16[1] -; CHECK-NEXT: vmov d8, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d16[2] -; CHECK-NEXT: vmov d5, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d16[3] -; CHECK-NEXT: vmov d9, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d17[0] -; CHECK-NEXT: vmov d6, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d17[1] -; CHECK-NEXT: vmov d10, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d17[2] -; CHECK-NEXT: vmov d7, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d17[3] -; CHECK-NEXT: vmov d11, r0, r0 -; CHECK: vmov.f32 s0, s8 -; CHECK: vmov.f32 s1, s16 -; CHECK: vmov.f32 s2, s10 -; CHECK: vmov.f32 s3, s18 -; CHECK: vmov.f32 s4, s12 -; CHECK: vmov.f32 s5, s20 -; CHECK: vmov.f32 s6, s14 -; CHECK: vmov.f32 s7, s22 -; CHECK: vmov.f32 s9, s16 -; CHECK: vmov.f32 s11, s18 -; CHECK: vmov.f32 s13, s20 -; CHECK: vmov.f32 s15, s22 -; CHECK: vpop {d8, d9, d10, d11} +; CHECK: vpush {d8} +; CHECK-NEXT: vmov.f64 d8, #5.000000e-01 +; CHECK-NEXT: vmov.i32 d8, #0x0 +; CHECK-NEXT: vmov.i32 d0, #0x0 +; CHECK-NEXT: vmov.i32 d1, #0x0 +; CHECK-NEXT: vmov.i32 d2, #0x0 +; CHECK-NEXT: vmov.i32 d3, #0x0 +; CHECK-NEXT: vmov.i32 d4, #0x0 +; CHECK-NEXT: vmov.i32 d5, #0x0 +; CHECK-NEXT: vmov.i32 d6, #0x0 +; CHECK-NEXT: vmov.i32 d7, #0x0 +; CHECK-NEXT: vmov.f32 s1, s16 +; CHECK-NEXT: vmov.f32 s3, s16 +; CHECK-NEXT: vmov.f32 s5, s16 +; CHECK-NEXT: vmov.f32 s7, s16 +; CHECK-NEXT: vmov.f32 s9, s16 +; CHECK-NEXT: vmov.f32 s11, s16 +; CHECK-NEXT: vmov.f32 s13, s16 +; CHECK-NEXT: vmov.f32 s15, s16 +; CHECK-NEXT: vpop {d8} ; CHECK-NEXT: bx lr - ret { <8 x half>, <8 x half> } zeroinitializer } define swiftcc { <8 x half>, <8 x half> } @f2() { ; CHECK-LABEL: _f2 -; CHECK: vpush {d8, d9, d10, d11} -; CHECK-NEXT: vmov.i32 q8, #0x0 -; CHECK-NEXT: vmov.u16 r0, d16[0] -; CHECK-NEXT: vmov d4, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d16[1] -; CHECK-NEXT: vmov d8, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d16[2] -; CHECK-NEXT: vmov d5, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d16[3] -; CHECK-NEXT: vmov d9, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d17[0] -; CHECK-NEXT: vmov d6, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d17[1] -; CHECK-NEXT: vmov d10, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d17[2] -; CHECK-NEXT: vmov d7, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d17[3] -; CHECK-NEXT: vmov d11, r0, r0 -; CHECK: vmov.f32 s0, s8 -; CHECK: vmov.f32 s1, s16 -; CHECK: vmov.f32 s2, s10 -; CHECK: vmov.f32 s3, s18 -; CHECK: vmov.f32 s4, s12 -; CHECK: vmov.f32 s5, s20 -; CHECK: vmov.f32 s6, s14 -; CHECK: vmov.f32 s7, s22 -; CHECK: vmov.f32 s9, s16 -; CHECK: vmov.f32 s11, s18 -; CHECK: vmov.f32 s13, s20 -; CHECK: vmov.f32 s15, s22 -; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK: vpush {d8} +; CHECK-NEXT: vmov.f64 d8, #5.000000e-01 +; CHECK-NEXT: vmov.i32 d8, #0x0 +; CHECK-NEXT: vmov.i32 d0, #0x0 +; CHECK-NEXT: vmov.i32 d1, #0x0 +; CHECK-NEXT: vmov.i32 d2, #0x0 +; CHECK-NEXT: vmov.i32 d3, #0x0 +; CHECK-NEXT: vmov.i32 d4, #0x0 +; CHECK-NEXT: vmov.i32 d5, #0x0 +; CHECK-NEXT: vmov.i32 d6, #0x0 +; CHECK-NEXT: vmov.i32 d7, #0x0 +; CHECK-NEXT: vmov.f32 s1, s16 +; CHECK-NEXT: vmov.f32 s3, s16 +; CHECK-NEXT: vmov.f32 s5, s16 +; CHECK-NEXT: vmov.f32 s7, s16 +; CHECK-NEXT: vmov.f32 s9, s16 +; CHECK-NEXT: vmov.f32 s11, s16 +; CHECK-NEXT: vmov.f32 s13, s16 +; CHECK-NEXT: vmov.f32 s15, s16 +; CHECK-NEXT: vpop {d8} ; CHECK-NEXT: bx lr - ret { <8 x half>, <8 x half> } zeroinitializer } define fastcc { <8 x half>, <8 x half> } @f3() { ; CHECK-LABEL: _f3 -; CHECK: vpush {d8, d9, d10, d11} -; CHECK-NEXT: vmov.i32 q8, #0x0 -; CHECK-NEXT: vmov.u16 r0, d16[0] -; CHECK-NEXT: vmov d4, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d16[1] -; CHECK-NEXT: vmov d8, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d16[2] -; CHECK-NEXT: vmov d5, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d16[3] -; CHECK-NEXT: vmov d9, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d17[0] -; CHECK-NEXT: vmov d6, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d17[1] -; CHECK-NEXT: vmov d10, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d17[2] -; CHECK-NEXT: vmov d7, r0, r0 -; CHECK-NEXT: vmov.u16 r0, d17[3] -; CHECK-NEXT: vmov d11, r0, r0 -; CHECK: vmov.f32 s0, s8 -; CHECK: vmov.f32 s1, s16 -; CHECK: vmov.f32 s2, s10 -; CHECK: vmov.f32 s3, s18 -; CHECK: vmov.f32 s4, s12 -; CHECK: vmov.f32 s5, s20 -; CHECK: vmov.f32 s6, s14 -; CHECK: vmov.f32 s7, s22 -; CHECK: vmov.f32 s9, s16 -; CHECK: vmov.f32 s11, s18 -; CHECK: vmov.f32 s13, s20 -; CHECK: vmov.f32 s15, s22 -; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK: vpush {d8} +; CHECK-NEXT: vmov.f64 d8, #5.000000e-01 +; CHECK-NEXT: vmov.i32 d8, #0x0 +; CHECK-NEXT: vmov.i32 d0, #0x0 +; CHECK-NEXT: vmov.i32 d1, #0x0 +; CHECK-NEXT: vmov.i32 d2, #0x0 +; CHECK-NEXT: vmov.i32 d3, #0x0 +; CHECK-NEXT: vmov.i32 d4, #0x0 +; CHECK-NEXT: vmov.i32 d5, #0x0 +; CHECK-NEXT: vmov.i32 d6, #0x0 +; CHECK-NEXT: vmov.i32 d7, #0x0 +; CHECK-NEXT: vmov.f32 s1, s16 +; CHECK-NEXT: vmov.f32 s3, s16 +; CHECK-NEXT: vmov.f32 s5, s16 +; CHECK-NEXT: vmov.f32 s7, s16 +; CHECK-NEXT: vmov.f32 s9, s16 +; CHECK-NEXT: vmov.f32 s11, s16 +; CHECK-NEXT: vmov.f32 s13, s16 +; CHECK-NEXT: vmov.f32 s15, s16 +; CHECK-NEXT: vpop {d8} ; CHECK-NEXT: bx lr ret { <8 x half>, <8 x half> } zeroinitializer diff --git a/llvm/test/CodeGen/Hexagon/autohvx/hfnosplat_cp.ll b/llvm/test/CodeGen/Hexagon/autohvx/hfnosplat_cp.ll deleted file mode 100644 index 4c5c96e61b78c..0000000000000 --- a/llvm/test/CodeGen/Hexagon/autohvx/hfnosplat_cp.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: llc -mtriple=hexagon < %s | FileCheck %s - -; Check that the vsplat instruction is generated -; CHECK: .word 1097875824 -; CHECK: .word 1048133241 -; CHECK: .word 0 - -target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" -target triple = "hexagon" -; Function Attrs: nofree norecurse nounwind writeonly -define dso_local i32 @foo(ptr nocapture %a) local_unnamed_addr #0 { -vector.body: - store <40 x half> , ptr %a, align 2 - ret i32 0 -} - -attributes #0 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv69" "target-features"="+hvx-length128b,+hvxv69,+v69,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll index 28a7dc046139b..a56005ead73ef 100644 --- a/llvm/test/CodeGen/Mips/cconv/vector.ll +++ b/llvm/test/CodeGen/Mips/cconv/vector.ll @@ -3855,77 +3855,81 @@ define void @calli8_16() { ; MIPS64EB-NEXT: jr $ra ; MIPS64EB-NEXT: nop ; -; MIPS32R5-LABEL: calli8_16: -; MIPS32R5: # %bb.0: # %entry -; MIPS32R5-NEXT: addiu $sp, $sp, -40 -; MIPS32R5-NEXT: .cfi_def_cfa_offset 40 -; MIPS32R5-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill -; MIPS32R5-NEXT: .cfi_offset 31, -4 -; MIPS32R5-NEXT: lui $1, %hi($CPI30_0) -; MIPS32R5-NEXT: addiu $1, $1, %lo($CPI30_0) -; MIPS32R5-NEXT: ld.w $w0, 0($1) -; MIPS32R5-NEXT: copy_s.w $4, $w0[0] -; MIPS32R5-NEXT: copy_s.w $5, $w0[1] -; MIPS32R5-NEXT: copy_s.w $6, $w0[2] -; MIPS32R5-NEXT: copy_s.w $7, $w0[3] -; MIPS32R5-NEXT: lui $1, %hi($CPI30_1) -; MIPS32R5-NEXT: addiu $1, $1, %lo($CPI30_1) -; MIPS32R5-NEXT: ld.w $w0, 0($1) -; MIPS32R5-NEXT: copy_s.w $1, $w0[0] -; MIPS32R5-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5-NEXT: copy_s.w $3, $w0[2] -; MIPS32R5-NEXT: copy_s.w $8, $w0[3] -; MIPS32R5-NEXT: sw $8, 28($sp) -; MIPS32R5-NEXT: sw $3, 24($sp) -; MIPS32R5-NEXT: sw $2, 20($sp) -; MIPS32R5-NEXT: sw $1, 16($sp) -; MIPS32R5-NEXT: jal i8_16 -; MIPS32R5-NEXT: nop -; MIPS32R5-NEXT: lui $1, %hi(gv16i8) -; MIPS32R5-NEXT: insert.w $w0[0], $2 -; MIPS32R5-NEXT: insert.w $w0[1], $3 -; MIPS32R5-NEXT: addiu $1, $1, %lo(gv16i8) -; MIPS32R5-NEXT: insert.w $w0[2], $4 -; MIPS32R5-NEXT: insert.w $w0[3], $5 -; MIPS32R5-NEXT: st.w $w0, 0($1) -; MIPS32R5-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload -; MIPS32R5-NEXT: addiu $sp, $sp, 40 -; MIPS32R5-NEXT: jr $ra -; MIPS32R5-NEXT: nop +; MIPS32R5EB-LABEL: calli8_16: +; MIPS32R5EB: # %bb.0: # %entry +; MIPS32R5EB-NEXT: addiu $sp, $sp, -40 +; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 40 +; MIPS32R5EB-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: .cfi_offset 31, -4 +; MIPS32R5EB-NEXT: lui $1, 3080 +; MIPS32R5EB-NEXT: ori $1, $1, 2314 +; MIPS32R5EB-NEXT: lui $2, 1801 +; MIPS32R5EB-NEXT: sw $1, 28($sp) +; MIPS32R5EB-NEXT: ori $1, $2, 1801 +; MIPS32R5EB-NEXT: sw $1, 24($sp) +; MIPS32R5EB-NEXT: sw $1, 20($sp) +; MIPS32R5EB-NEXT: sw $1, 16($sp) +; MIPS32R5EB-NEXT: lui $1, 1543 +; MIPS32R5EB-NEXT: ori $4, $1, 1543 +; MIPS32R5EB-NEXT: ori $7, $1, 2314 +; MIPS32R5EB-NEXT: move $5, $4 +; MIPS32R5EB-NEXT: move $6, $4 +; MIPS32R5EB-NEXT: jal i8_16 +; MIPS32R5EB-NEXT: nop +; MIPS32R5EB-NEXT: insert.w $w0[0], $2 +; MIPS32R5EB-NEXT: insert.w $w0[1], $3 +; MIPS32R5EB-NEXT: insert.w $w0[2], $4 +; MIPS32R5EB-NEXT: lui $1, %hi(gv16i8) +; MIPS32R5EB-NEXT: insert.w $w0[3], $5 +; MIPS32R5EB-NEXT: addiu $1, $1, %lo(gv16i8) +; MIPS32R5EB-NEXT: st.w $w0, 0($1) +; MIPS32R5EB-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: addiu $sp, $sp, 40 +; MIPS32R5EB-NEXT: jr $ra +; MIPS32R5EB-NEXT: nop ; -; MIPS64R5-LABEL: calli8_16: -; MIPS64R5: # %bb.0: # %entry -; MIPS64R5-NEXT: daddiu $sp, $sp, -16 -; MIPS64R5-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R5-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill -; MIPS64R5-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill -; MIPS64R5-NEXT: .cfi_offset 31, -8 -; MIPS64R5-NEXT: .cfi_offset 28, -16 -; MIPS64R5-NEXT: lui $1, %hi(%neg(%gp_rel(calli8_16))) -; MIPS64R5-NEXT: daddu $1, $1, $25 -; MIPS64R5-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(calli8_16))) -; MIPS64R5-NEXT: ld $1, %got_page(.LCPI30_0)($gp) -; MIPS64R5-NEXT: daddiu $1, $1, %got_ofst(.LCPI30_0) -; MIPS64R5-NEXT: ld.d $w0, 0($1) -; MIPS64R5-NEXT: copy_s.d $4, $w0[0] -; MIPS64R5-NEXT: copy_s.d $5, $w0[1] -; MIPS64R5-NEXT: ld $1, %got_page(.LCPI30_1)($gp) -; MIPS64R5-NEXT: daddiu $1, $1, %got_ofst(.LCPI30_1) -; MIPS64R5-NEXT: ld.d $w0, 0($1) -; MIPS64R5-NEXT: copy_s.d $6, $w0[0] -; MIPS64R5-NEXT: copy_s.d $7, $w0[1] -; MIPS64R5-NEXT: ld $25, %call16(i8_16)($gp) -; MIPS64R5-NEXT: jalr $25 -; MIPS64R5-NEXT: nop -; MIPS64R5-NEXT: insert.d $w0[0], $2 -; MIPS64R5-NEXT: insert.d $w0[1], $3 -; MIPS64R5-NEXT: ld $1, %got_disp(gv16i8)($gp) -; MIPS64R5-NEXT: st.d $w0, 0($1) -; MIPS64R5-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload -; MIPS64R5-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload -; MIPS64R5-NEXT: daddiu $sp, $sp, 16 -; MIPS64R5-NEXT: jr $ra -; MIPS64R5-NEXT: nop +; MIPS64R5EB-LABEL: calli8_16: +; MIPS64R5EB: # %bb.0: # %entry +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EB-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: .cfi_offset 31, -8 +; MIPS64R5EB-NEXT: .cfi_offset 28, -16 +; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(calli8_16))) +; MIPS64R5EB-NEXT: daddu $1, $1, $25 +; MIPS64R5EB-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(calli8_16))) +; MIPS64R5EB-NEXT: lui $1, 1801 +; MIPS64R5EB-NEXT: daddiu $1, $1, 1801 +; MIPS64R5EB-NEXT: dsll $1, $1, 16 +; MIPS64R5EB-NEXT: daddiu $1, $1, 1801 +; MIPS64R5EB-NEXT: lui $2, 1543 +; MIPS64R5EB-NEXT: dsll $1, $1, 16 +; MIPS64R5EB-NEXT: daddiu $2, $2, 1543 +; MIPS64R5EB-NEXT: dsll $2, $2, 16 +; MIPS64R5EB-NEXT: daddiu $2, $2, 1543 +; MIPS64R5EB-NEXT: dsll $2, $2, 16 +; MIPS64R5EB-NEXT: daddiu $4, $2, 1543 +; MIPS64R5EB-NEXT: daddiu $5, $2, 2314 +; MIPS64R5EB-NEXT: daddiu $6, $1, 1801 +; MIPS64R5EB-NEXT: lui $1, 225 +; MIPS64R5EB-NEXT: daddiu $1, $1, 8417 +; MIPS64R5EB-NEXT: dsll $1, $1, 16 +; MIPS64R5EB-NEXT: daddiu $1, $1, 8577 +; MIPS64R5EB-NEXT: dsll $1, $1, 19 +; MIPS64R5EB-NEXT: daddiu $7, $1, 2314 +; MIPS64R5EB-NEXT: ld $25, %call16(i8_16)($gp) +; MIPS64R5EB-NEXT: jalr $25 +; MIPS64R5EB-NEXT: nop +; MIPS64R5EB-NEXT: ld $1, %got_disp(gv16i8)($gp) +; MIPS64R5EB-NEXT: insert.d $w0[0], $2 +; MIPS64R5EB-NEXT: insert.d $w0[1], $3 +; MIPS64R5EB-NEXT: st.d $w0, 0($1) +; MIPS64R5EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16 +; MIPS64R5EB-NEXT: jr $ra +; MIPS64R5EB-NEXT: nop ; ; MIPS32EL-LABEL: calli8_16: ; MIPS32EL: # %bb.0: # %entry @@ -4005,6 +4009,87 @@ define void @calli8_16() { ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop +; +; MIPS32R5EL-LABEL: calli8_16: +; MIPS32R5EL: # %bb.0: # %entry +; MIPS32R5EL-NEXT: addiu $sp, $sp, -40 +; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 40 +; MIPS32R5EL-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: .cfi_offset 31, -4 +; MIPS32R5EL-NEXT: lui $1, 2569 +; MIPS32R5EL-NEXT: ori $2, $1, 2060 +; MIPS32R5EL-NEXT: lui $3, 2311 +; MIPS32R5EL-NEXT: sw $2, 28($sp) +; MIPS32R5EL-NEXT: ori $2, $3, 2311 +; MIPS32R5EL-NEXT: sw $2, 24($sp) +; MIPS32R5EL-NEXT: sw $2, 20($sp) +; MIPS32R5EL-NEXT: sw $2, 16($sp) +; MIPS32R5EL-NEXT: lui $2, 1798 +; MIPS32R5EL-NEXT: ori $4, $2, 1798 +; MIPS32R5EL-NEXT: ori $7, $1, 1798 +; MIPS32R5EL-NEXT: move $5, $4 +; MIPS32R5EL-NEXT: move $6, $4 +; MIPS32R5EL-NEXT: jal i8_16 +; MIPS32R5EL-NEXT: nop +; MIPS32R5EL-NEXT: insert.w $w0[0], $2 +; MIPS32R5EL-NEXT: insert.w $w0[1], $3 +; MIPS32R5EL-NEXT: insert.w $w0[2], $4 +; MIPS32R5EL-NEXT: lui $1, %hi(gv16i8) +; MIPS32R5EL-NEXT: insert.w $w0[3], $5 +; MIPS32R5EL-NEXT: addiu $1, $1, %lo(gv16i8) +; MIPS32R5EL-NEXT: st.w $w0, 0($1) +; MIPS32R5EL-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: addiu $sp, $sp, 40 +; MIPS32R5EL-NEXT: jr $ra +; MIPS32R5EL-NEXT: nop +; +; MIPS64R5EL-LABEL: calli8_16: +; MIPS64R5EL: # %bb.0: # %entry +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: .cfi_offset 31, -8 +; MIPS64R5EL-NEXT: .cfi_offset 28, -16 +; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(calli8_16))) +; MIPS64R5EL-NEXT: daddu $1, $1, $25 +; MIPS64R5EL-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(calli8_16))) +; MIPS64R5EL-NEXT: lui $1, 1285 +; MIPS64R5EL-NEXT: daddiu $1, $1, -31869 +; MIPS64R5EL-NEXT: dsll $1, $1, 16 +; MIPS64R5EL-NEXT: daddiu $1, $1, 899 +; MIPS64R5EL-NEXT: lui $2, 2311 +; MIPS64R5EL-NEXT: daddiu $2, $2, 2311 +; MIPS64R5EL-NEXT: dsll $2, $2, 16 +; MIPS64R5EL-NEXT: daddiu $2, $2, 2311 +; MIPS64R5EL-NEXT: dsll $2, $2, 16 +; MIPS64R5EL-NEXT: dsll $1, $1, 17 +; MIPS64R5EL-NEXT: lui $3, 899 +; MIPS64R5EL-NEXT: daddiu $3, $3, 899 +; MIPS64R5EL-NEXT: dsll $3, $3, 16 +; MIPS64R5EL-NEXT: daddiu $3, $3, 899 +; MIPS64R5EL-NEXT: dsll $3, $3, 17 +; MIPS64R5EL-NEXT: daddiu $4, $3, 1798 +; MIPS64R5EL-NEXT: daddiu $5, $1, 1798 +; MIPS64R5EL-NEXT: daddiu $6, $2, 2311 +; MIPS64R5EL-NEXT: lui $1, 642 +; MIPS64R5EL-NEXT: daddiu $1, $1, 16899 +; MIPS64R5EL-NEXT: dsll $1, $1, 18 +; MIPS64R5EL-NEXT: daddiu $1, $1, 2311 +; MIPS64R5EL-NEXT: dsll $1, $1, 16 +; MIPS64R5EL-NEXT: daddiu $7, $1, 2311 +; MIPS64R5EL-NEXT: ld $25, %call16(i8_16)($gp) +; MIPS64R5EL-NEXT: jalr $25 +; MIPS64R5EL-NEXT: nop +; MIPS64R5EL-NEXT: ld $1, %got_disp(gv16i8)($gp) +; MIPS64R5EL-NEXT: insert.d $w0[0], $2 +; MIPS64R5EL-NEXT: insert.d $w0[1], $3 +; MIPS64R5EL-NEXT: st.d $w0, 0($1) +; MIPS64R5EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16 +; MIPS64R5EL-NEXT: jr $ra +; MIPS64R5EL-NEXT: nop entry: %0 = call <16 x i8> @i8_16(<16 x i8> , <16 x i8> ) store <16 x i8> %0, ptr @gv16i8 @@ -4510,36 +4595,26 @@ define void @calli16_8() { ; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 40 ; MIPS32R5EB-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill ; MIPS32R5EB-NEXT: .cfi_offset 31, -4 +; MIPS32R5EB-NEXT: lui $1, 9 +; MIPS32R5EB-NEXT: ori $5, $1, 10 +; MIPS32R5EB-NEXT: sw $5, 28($sp) +; MIPS32R5EB-NEXT: lui $1, 12 +; MIPS32R5EB-NEXT: ori $1, $1, 8 +; MIPS32R5EB-NEXT: sw $1, 24($sp) +; MIPS32R5EB-NEXT: sw $5, 20($sp) ; MIPS32R5EB-NEXT: lui $1, 6 -; MIPS32R5EB-NEXT: ori $1, $1, 7 -; MIPS32R5EB-NEXT: lui $2, 9 -; MIPS32R5EB-NEXT: ori $2, $2, 10 -; MIPS32R5EB-NEXT: fill.w $w0, $2 -; MIPS32R5EB-NEXT: insert.w $w0[1], $1 -; MIPS32R5EB-NEXT: splati.d $w0, $w0[0] -; MIPS32R5EB-NEXT: copy_s.w $4, $w0[0] -; MIPS32R5EB-NEXT: copy_s.w $5, $w0[1] -; MIPS32R5EB-NEXT: copy_s.w $6, $w0[2] -; MIPS32R5EB-NEXT: copy_s.w $7, $w0[3] -; MIPS32R5EB-NEXT: lui $1, %hi($CPI33_0) -; MIPS32R5EB-NEXT: addiu $1, $1, %lo($CPI33_0) -; MIPS32R5EB-NEXT: ld.w $w0, 0($1) -; MIPS32R5EB-NEXT: copy_s.w $1, $w0[0] -; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5EB-NEXT: copy_s.w $3, $w0[2] -; MIPS32R5EB-NEXT: copy_s.w $8, $w0[3] -; MIPS32R5EB-NEXT: sw $8, 28($sp) -; MIPS32R5EB-NEXT: sw $3, 24($sp) -; MIPS32R5EB-NEXT: sw $2, 20($sp) -; MIPS32R5EB-NEXT: sw $1, 16($sp) +; MIPS32R5EB-NEXT: ori $4, $1, 7 +; MIPS32R5EB-NEXT: sw $4, 16($sp) +; MIPS32R5EB-NEXT: move $6, $4 +; MIPS32R5EB-NEXT: move $7, $5 ; MIPS32R5EB-NEXT: jal i16_8 ; MIPS32R5EB-NEXT: nop -; MIPS32R5EB-NEXT: lui $1, %hi(gv8i16) -; MIPS32R5EB-NEXT: addiu $1, $1, %lo(gv8i16) ; MIPS32R5EB-NEXT: insert.w $w0[0], $2 ; MIPS32R5EB-NEXT: insert.w $w0[1], $3 ; MIPS32R5EB-NEXT: insert.w $w0[2], $4 +; MIPS32R5EB-NEXT: lui $1, %hi(gv8i16) ; MIPS32R5EB-NEXT: insert.w $w0[3], $5 +; MIPS32R5EB-NEXT: addiu $1, $1, %lo(gv8i16) ; MIPS32R5EB-NEXT: st.w $w0, 0($1) ; MIPS32R5EB-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload ; MIPS32R5EB-NEXT: addiu $sp, $sp, 40 @@ -4557,20 +4632,21 @@ define void @calli16_8() { ; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(calli16_8))) ; MIPS64R5EB-NEXT: daddu $1, $1, $25 ; MIPS64R5EB-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(calli16_8))) -; MIPS64R5EB-NEXT: lui $1, 9 -; MIPS64R5EB-NEXT: ori $1, $1, 10 -; MIPS64R5EB-NEXT: lui $2, 6 -; MIPS64R5EB-NEXT: ori $2, $2, 7 -; MIPS64R5EB-NEXT: dinsu $1, $2, 32, 32 -; MIPS64R5EB-NEXT: fill.d $w0, $1 -; MIPS64R5EB-NEXT: copy_s.d $4, $w0[0] -; MIPS64R5EB-NEXT: copy_s.d $5, $w0[1] -; MIPS64R5EB-NEXT: ld $1, %got_page(.LCPI33_0)($gp) -; MIPS64R5EB-NEXT: daddiu $1, $1, %got_ofst(.LCPI33_0) -; MIPS64R5EB-NEXT: ld.d $w0, 0($1) -; MIPS64R5EB-NEXT: copy_s.d $6, $w0[0] -; MIPS64R5EB-NEXT: copy_s.d $7, $w0[1] +; MIPS64R5EB-NEXT: lui $1, 6 +; MIPS64R5EB-NEXT: daddiu $1, $1, 7 +; MIPS64R5EB-NEXT: dsll $1, $1, 16 +; MIPS64R5EB-NEXT: daddiu $1, $1, 9 +; MIPS64R5EB-NEXT: dsll $1, $1, 16 +; MIPS64R5EB-NEXT: daddiu $4, $1, 10 +; MIPS64R5EB-NEXT: lui $1, 2 +; MIPS64R5EB-NEXT: daddiu $1, $1, -32767 +; MIPS64R5EB-NEXT: dsll $1, $1, 19 +; MIPS64R5EB-NEXT: daddiu $1, $1, 9 +; MIPS64R5EB-NEXT: dsll $1, $1, 16 +; MIPS64R5EB-NEXT: daddiu $7, $1, 10 ; MIPS64R5EB-NEXT: ld $25, %call16(i16_8)($gp) +; MIPS64R5EB-NEXT: move $5, $4 +; MIPS64R5EB-NEXT: move $6, $4 ; MIPS64R5EB-NEXT: jalr $25 ; MIPS64R5EB-NEXT: nop ; MIPS64R5EB-NEXT: ld $1, %got_disp(gv8i16)($gp) @@ -4658,35 +4734,25 @@ define void @calli16_8() { ; MIPS32R5EL-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill ; MIPS32R5EL-NEXT: .cfi_offset 31, -4 ; MIPS32R5EL-NEXT: lui $1, 10 -; MIPS32R5EL-NEXT: ori $1, $1, 9 -; MIPS32R5EL-NEXT: lui $2, 7 -; MIPS32R5EL-NEXT: ori $2, $2, 6 -; MIPS32R5EL-NEXT: fill.w $w0, $2 -; MIPS32R5EL-NEXT: insert.w $w0[1], $1 -; MIPS32R5EL-NEXT: splati.d $w0, $w0[0] -; MIPS32R5EL-NEXT: copy_s.w $4, $w0[0] -; MIPS32R5EL-NEXT: copy_s.w $5, $w0[1] -; MIPS32R5EL-NEXT: copy_s.w $6, $w0[2] -; MIPS32R5EL-NEXT: copy_s.w $7, $w0[3] -; MIPS32R5EL-NEXT: lui $1, %hi($CPI33_0) -; MIPS32R5EL-NEXT: addiu $1, $1, %lo($CPI33_0) -; MIPS32R5EL-NEXT: ld.w $w0, 0($1) -; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0] -; MIPS32R5EL-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2] -; MIPS32R5EL-NEXT: copy_s.w $8, $w0[3] -; MIPS32R5EL-NEXT: sw $8, 28($sp) -; MIPS32R5EL-NEXT: sw $3, 24($sp) -; MIPS32R5EL-NEXT: sw $2, 20($sp) -; MIPS32R5EL-NEXT: sw $1, 16($sp) +; MIPS32R5EL-NEXT: ori $5, $1, 9 +; MIPS32R5EL-NEXT: sw $5, 28($sp) +; MIPS32R5EL-NEXT: lui $1, 8 +; MIPS32R5EL-NEXT: ori $1, $1, 12 +; MIPS32R5EL-NEXT: sw $1, 24($sp) +; MIPS32R5EL-NEXT: sw $5, 20($sp) +; MIPS32R5EL-NEXT: lui $1, 7 +; MIPS32R5EL-NEXT: ori $4, $1, 6 +; MIPS32R5EL-NEXT: sw $4, 16($sp) +; MIPS32R5EL-NEXT: move $6, $4 +; MIPS32R5EL-NEXT: move $7, $5 ; MIPS32R5EL-NEXT: jal i16_8 ; MIPS32R5EL-NEXT: nop -; MIPS32R5EL-NEXT: lui $1, %hi(gv8i16) -; MIPS32R5EL-NEXT: addiu $1, $1, %lo(gv8i16) ; MIPS32R5EL-NEXT: insert.w $w0[0], $2 ; MIPS32R5EL-NEXT: insert.w $w0[1], $3 ; MIPS32R5EL-NEXT: insert.w $w0[2], $4 +; MIPS32R5EL-NEXT: lui $1, %hi(gv8i16) ; MIPS32R5EL-NEXT: insert.w $w0[3], $5 +; MIPS32R5EL-NEXT: addiu $1, $1, %lo(gv8i16) ; MIPS32R5EL-NEXT: st.w $w0, 0($1) ; MIPS32R5EL-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload ; MIPS32R5EL-NEXT: addiu $sp, $sp, 40 @@ -4704,20 +4770,21 @@ define void @calli16_8() { ; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(calli16_8))) ; MIPS64R5EL-NEXT: daddu $1, $1, $25 ; MIPS64R5EL-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(calli16_8))) -; MIPS64R5EL-NEXT: lui $1, 7 -; MIPS64R5EL-NEXT: ori $1, $1, 6 -; MIPS64R5EL-NEXT: lui $2, 10 -; MIPS64R5EL-NEXT: ori $2, $2, 9 -; MIPS64R5EL-NEXT: dinsu $1, $2, 32, 32 -; MIPS64R5EL-NEXT: fill.d $w0, $1 -; MIPS64R5EL-NEXT: copy_s.d $4, $w0[0] -; MIPS64R5EL-NEXT: copy_s.d $5, $w0[1] -; MIPS64R5EL-NEXT: ld $1, %got_page(.LCPI33_0)($gp) -; MIPS64R5EL-NEXT: daddiu $1, $1, %got_ofst(.LCPI33_0) -; MIPS64R5EL-NEXT: ld.d $w0, 0($1) -; MIPS64R5EL-NEXT: copy_s.d $6, $w0[0] -; MIPS64R5EL-NEXT: copy_s.d $7, $w0[1] +; MIPS64R5EL-NEXT: lui $1, 10 +; MIPS64R5EL-NEXT: daddiu $1, $1, 9 +; MIPS64R5EL-NEXT: dsll $1, $1, 16 +; MIPS64R5EL-NEXT: daddiu $1, $1, 7 +; MIPS64R5EL-NEXT: dsll $1, $1, 16 +; MIPS64R5EL-NEXT: daddiu $4, $1, 6 +; MIPS64R5EL-NEXT: lui $1, 1 +; MIPS64R5EL-NEXT: daddiu $1, $1, 16385 +; MIPS64R5EL-NEXT: dsll $1, $1, 16 +; MIPS64R5EL-NEXT: daddiu $1, $1, 8193 +; MIPS64R5EL-NEXT: dsll $1, $1, 19 +; MIPS64R5EL-NEXT: daddiu $7, $1, 12 ; MIPS64R5EL-NEXT: ld $25, %call16(i16_8)($gp) +; MIPS64R5EL-NEXT: move $5, $4 +; MIPS64R5EL-NEXT: move $6, $4 ; MIPS64R5EL-NEXT: jalr $25 ; MIPS64R5EL-NEXT: nop ; MIPS64R5EL-NEXT: ld $1, %got_disp(gv8i16)($gp) @@ -4989,39 +5056,38 @@ define void @calli32_4() { ; MIPS32R5-NEXT: jr $ra ; MIPS32R5-NEXT: nop ; -; MIPS64R5-LABEL: calli32_4: -; MIPS64R5: # %bb.0: # %entry -; MIPS64R5-NEXT: daddiu $sp, $sp, -16 -; MIPS64R5-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R5-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill -; MIPS64R5-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill -; MIPS64R5-NEXT: .cfi_offset 31, -8 -; MIPS64R5-NEXT: .cfi_offset 28, -16 -; MIPS64R5-NEXT: lui $1, %hi(%neg(%gp_rel(calli32_4))) -; MIPS64R5-NEXT: daddu $1, $1, $25 -; MIPS64R5-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(calli32_4))) -; MIPS64R5-NEXT: ld $1, %got_page(.LCPI35_0)($gp) -; MIPS64R5-NEXT: daddiu $1, $1, %got_ofst(.LCPI35_0) -; MIPS64R5-NEXT: ld.d $w0, 0($1) -; MIPS64R5-NEXT: copy_s.d $4, $w0[0] -; MIPS64R5-NEXT: copy_s.d $5, $w0[1] -; MIPS64R5-NEXT: ld $1, %got_page(.LCPI35_1)($gp) -; MIPS64R5-NEXT: daddiu $1, $1, %got_ofst(.LCPI35_1) -; MIPS64R5-NEXT: ld.d $w0, 0($1) -; MIPS64R5-NEXT: copy_s.d $6, $w0[0] -; MIPS64R5-NEXT: copy_s.d $7, $w0[1] -; MIPS64R5-NEXT: ld $25, %call16(i32_4)($gp) -; MIPS64R5-NEXT: jalr $25 -; MIPS64R5-NEXT: nop -; MIPS64R5-NEXT: insert.d $w0[0], $2 -; MIPS64R5-NEXT: insert.d $w0[1], $3 -; MIPS64R5-NEXT: ld $1, %got_disp(gv4i32)($gp) -; MIPS64R5-NEXT: st.d $w0, 0($1) -; MIPS64R5-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload -; MIPS64R5-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload -; MIPS64R5-NEXT: daddiu $sp, $sp, 16 -; MIPS64R5-NEXT: jr $ra -; MIPS64R5-NEXT: nop +; MIPS64R5EB-LABEL: calli32_4: +; MIPS64R5EB: # %bb.0: # %entry +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EB-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: .cfi_offset 31, -8 +; MIPS64R5EB-NEXT: .cfi_offset 28, -16 +; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(calli32_4))) +; MIPS64R5EB-NEXT: daddu $1, $1, $25 +; MIPS64R5EB-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(calli32_4))) +; MIPS64R5EB-NEXT: daddiu $1, $zero, 3 +; MIPS64R5EB-NEXT: dsll $2, $1, 33 +; MIPS64R5EB-NEXT: daddiu $4, $2, 7 +; MIPS64R5EB-NEXT: dsll $1, $1, 34 +; MIPS64R5EB-NEXT: daddiu $6, $1, 8 +; MIPS64R5EB-NEXT: daddiu $1, $zero, 9 +; MIPS64R5EB-NEXT: dsll $1, $1, 32 +; MIPS64R5EB-NEXT: daddiu $5, $1, 10 +; MIPS64R5EB-NEXT: ld $25, %call16(i32_4)($gp) +; MIPS64R5EB-NEXT: move $7, $5 +; MIPS64R5EB-NEXT: jalr $25 +; MIPS64R5EB-NEXT: nop +; MIPS64R5EB-NEXT: insert.d $w0[0], $2 +; MIPS64R5EB-NEXT: insert.d $w0[1], $3 +; MIPS64R5EB-NEXT: ld $1, %got_disp(gv4i32)($gp) +; MIPS64R5EB-NEXT: st.d $w0, 0($1) +; MIPS64R5EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16 +; MIPS64R5EB-NEXT: jr $ra +; MIPS64R5EB-NEXT: nop ; ; MIPS64EL-LABEL: calli32_4: ; MIPS64EL: # %bb.0: # %entry @@ -5055,6 +5121,40 @@ define void @calli32_4() { ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop +; +; MIPS64R5EL-LABEL: calli32_4: +; MIPS64R5EL: # %bb.0: # %entry +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: .cfi_offset 31, -8 +; MIPS64R5EL-NEXT: .cfi_offset 28, -16 +; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(calli32_4))) +; MIPS64R5EL-NEXT: daddu $1, $1, $25 +; MIPS64R5EL-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(calli32_4))) +; MIPS64R5EL-NEXT: daddiu $1, $zero, 7 +; MIPS64R5EL-NEXT: dsll $1, $1, 32 +; MIPS64R5EL-NEXT: daddiu $4, $1, 6 +; MIPS64R5EL-NEXT: daddiu $1, $zero, 1 +; MIPS64R5EL-NEXT: dsll $1, $1, 35 +; MIPS64R5EL-NEXT: daddiu $6, $1, 12 +; MIPS64R5EL-NEXT: daddiu $1, $zero, 5 +; MIPS64R5EL-NEXT: dsll $1, $1, 33 +; MIPS64R5EL-NEXT: daddiu $5, $1, 9 +; MIPS64R5EL-NEXT: ld $25, %call16(i32_4)($gp) +; MIPS64R5EL-NEXT: move $7, $5 +; MIPS64R5EL-NEXT: jalr $25 +; MIPS64R5EL-NEXT: nop +; MIPS64R5EL-NEXT: insert.d $w0[0], $2 +; MIPS64R5EL-NEXT: insert.d $w0[1], $3 +; MIPS64R5EL-NEXT: ld $1, %got_disp(gv4i32)($gp) +; MIPS64R5EL-NEXT: st.d $w0, 0($1) +; MIPS64R5EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16 +; MIPS64R5EL-NEXT: jr $ra +; MIPS64R5EL-NEXT: nop entry: %0 = call <4 x i32> @i32_4(<4 x i32> , <4 x i32> ) store <4 x i32> %0, ptr @gv4i32 @@ -5124,30 +5224,24 @@ define void @calli64_2() { ; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 40 ; MIPS32R5EB-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill ; MIPS32R5EB-NEXT: .cfi_offset 31, -4 -; MIPS32R5EB-NEXT: lui $1, %hi($CPI36_0) -; MIPS32R5EB-NEXT: addiu $1, $1, %lo($CPI36_0) -; MIPS32R5EB-NEXT: ld.w $w0, 0($1) -; MIPS32R5EB-NEXT: copy_s.w $5, $w0[1] -; MIPS32R5EB-NEXT: copy_s.w $7, $w0[3] -; MIPS32R5EB-NEXT: lui $1, %hi($CPI36_1) -; MIPS32R5EB-NEXT: addiu $1, $1, %lo($CPI36_1) -; MIPS32R5EB-NEXT: ld.w $w0, 0($1) -; MIPS32R5EB-NEXT: copy_s.w $1, $w0[1] -; MIPS32R5EB-NEXT: copy_s.w $2, $w0[3] -; MIPS32R5EB-NEXT: sw $2, 28($sp) +; MIPS32R5EB-NEXT: addiu $1, $zero, 8 +; MIPS32R5EB-NEXT: sw $1, 28($sp) +; MIPS32R5EB-NEXT: addiu $1, $zero, 12 ; MIPS32R5EB-NEXT: sw $1, 20($sp) ; MIPS32R5EB-NEXT: sw $zero, 24($sp) ; MIPS32R5EB-NEXT: sw $zero, 16($sp) ; MIPS32R5EB-NEXT: addiu $4, $zero, 0 +; MIPS32R5EB-NEXT: addiu $5, $zero, 6 ; MIPS32R5EB-NEXT: addiu $6, $zero, 0 +; MIPS32R5EB-NEXT: addiu $7, $zero, 7 ; MIPS32R5EB-NEXT: jal i64_2 ; MIPS32R5EB-NEXT: nop -; MIPS32R5EB-NEXT: lui $1, %hi(gv2i64) ; MIPS32R5EB-NEXT: insert.w $w0[0], $2 ; MIPS32R5EB-NEXT: insert.w $w0[1], $3 -; MIPS32R5EB-NEXT: addiu $1, $1, %lo(gv2i64) ; MIPS32R5EB-NEXT: insert.w $w0[2], $4 +; MIPS32R5EB-NEXT: lui $1, %hi(gv2i64) ; MIPS32R5EB-NEXT: insert.w $w0[3], $5 +; MIPS32R5EB-NEXT: addiu $1, $1, %lo(gv2i64) ; MIPS32R5EB-NEXT: st.w $w0, 0($1) ; MIPS32R5EB-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload ; MIPS32R5EB-NEXT: addiu $sp, $sp, 40 @@ -5217,30 +5311,24 @@ define void @calli64_2() { ; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 40 ; MIPS32R5EL-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill ; MIPS32R5EL-NEXT: .cfi_offset 31, -4 -; MIPS32R5EL-NEXT: lui $1, %hi($CPI36_0) -; MIPS32R5EL-NEXT: addiu $1, $1, %lo($CPI36_0) -; MIPS32R5EL-NEXT: ld.w $w0, 0($1) -; MIPS32R5EL-NEXT: copy_s.w $4, $w0[0] -; MIPS32R5EL-NEXT: copy_s.w $6, $w0[2] -; MIPS32R5EL-NEXT: lui $1, %hi($CPI36_1) -; MIPS32R5EL-NEXT: addiu $1, $1, %lo($CPI36_1) -; MIPS32R5EL-NEXT: ld.w $w0, 0($1) -; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0] -; MIPS32R5EL-NEXT: copy_s.w $2, $w0[2] -; MIPS32R5EL-NEXT: sw $2, 24($sp) +; MIPS32R5EL-NEXT: addiu $1, $zero, 8 +; MIPS32R5EL-NEXT: sw $1, 24($sp) +; MIPS32R5EL-NEXT: addiu $1, $zero, 12 ; MIPS32R5EL-NEXT: sw $1, 16($sp) ; MIPS32R5EL-NEXT: sw $zero, 28($sp) ; MIPS32R5EL-NEXT: sw $zero, 20($sp) +; MIPS32R5EL-NEXT: addiu $4, $zero, 6 ; MIPS32R5EL-NEXT: addiu $5, $zero, 0 +; MIPS32R5EL-NEXT: addiu $6, $zero, 7 ; MIPS32R5EL-NEXT: addiu $7, $zero, 0 ; MIPS32R5EL-NEXT: jal i64_2 ; MIPS32R5EL-NEXT: nop -; MIPS32R5EL-NEXT: lui $1, %hi(gv2i64) ; MIPS32R5EL-NEXT: insert.w $w0[0], $2 ; MIPS32R5EL-NEXT: insert.w $w0[1], $3 -; MIPS32R5EL-NEXT: addiu $1, $1, %lo(gv2i64) ; MIPS32R5EL-NEXT: insert.w $w0[2], $4 +; MIPS32R5EL-NEXT: lui $1, %hi(gv2i64) ; MIPS32R5EL-NEXT: insert.w $w0[3], $5 +; MIPS32R5EL-NEXT: addiu $1, $1, %lo(gv2i64) ; MIPS32R5EL-NEXT: st.w $w0, 0($1) ; MIPS32R5EL-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload ; MIPS32R5EL-NEXT: addiu $sp, $sp, 40 @@ -5496,27 +5584,21 @@ define void @callfloat_4() { ; MIPS32R5-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5-NEXT: addiu $1, $zero, -16 ; MIPS32R5-NEXT: and $sp, $sp, $1 -; MIPS32R5-NEXT: lui $1, %hi($CPI38_0) -; MIPS32R5-NEXT: addiu $1, $1, %lo($CPI38_0) -; MIPS32R5-NEXT: ld.w $w0, 0($1) -; MIPS32R5-NEXT: copy_s.w $7, $w0[1] -; MIPS32R5-NEXT: copy_s.w $1, $w0[2] -; MIPS32R5-NEXT: copy_s.w $2, $w0[3] -; MIPS32R5-NEXT: lui $3, %hi($CPI38_1) -; MIPS32R5-NEXT: addiu $3, $3, %lo($CPI38_1) -; MIPS32R5-NEXT: ld.w $w0, 0($3) -; MIPS32R5-NEXT: copy_s.w $3, $w0[0] -; MIPS32R5-NEXT: copy_s.w $4, $w0[1] -; MIPS32R5-NEXT: copy_s.w $5, $w0[2] -; MIPS32R5-NEXT: copy_s.w $6, $w0[3] -; MIPS32R5-NEXT: sw $6, 36($sp) -; MIPS32R5-NEXT: sw $5, 32($sp) -; MIPS32R5-NEXT: sw $4, 28($sp) -; MIPS32R5-NEXT: sw $3, 24($sp) -; MIPS32R5-NEXT: sw $2, 20($sp) +; MIPS32R5-NEXT: lui $1, 16704 +; MIPS32R5-NEXT: lui $2, 16736 +; MIPS32R5-NEXT: lui $3, 16752 +; MIPS32R5-NEXT: lui $4, 16768 +; MIPS32R5-NEXT: sw $4, 36($sp) +; MIPS32R5-NEXT: sw $3, 32($sp) +; MIPS32R5-NEXT: sw $2, 28($sp) +; MIPS32R5-NEXT: sw $1, 24($sp) +; MIPS32R5-NEXT: lui $1, 16512 +; MIPS32R5-NEXT: sw $1, 20($sp) +; MIPS32R5-NEXT: lui $1, 16384 ; MIPS32R5-NEXT: sw $1, 16($sp) ; MIPS32R5-NEXT: addiu $4, $sp, 48 ; MIPS32R5-NEXT: addiu $6, $zero, 0 +; MIPS32R5-NEXT: lui $7, 49024 ; MIPS32R5-NEXT: jal float4_extern ; MIPS32R5-NEXT: nop ; MIPS32R5-NEXT: lui $1, %hi(gv4f32) @@ -5530,39 +5612,43 @@ define void @callfloat_4() { ; MIPS32R5-NEXT: jr $ra ; MIPS32R5-NEXT: nop ; -; MIPS64R5-LABEL: callfloat_4: -; MIPS64R5: # %bb.0: # %entry -; MIPS64R5-NEXT: daddiu $sp, $sp, -16 -; MIPS64R5-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R5-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill -; MIPS64R5-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill -; MIPS64R5-NEXT: .cfi_offset 31, -8 -; MIPS64R5-NEXT: .cfi_offset 28, -16 -; MIPS64R5-NEXT: lui $1, %hi(%neg(%gp_rel(callfloat_4))) -; MIPS64R5-NEXT: daddu $1, $1, $25 -; MIPS64R5-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_4))) -; MIPS64R5-NEXT: ld $1, %got_page(.LCPI38_0)($gp) -; MIPS64R5-NEXT: daddiu $1, $1, %got_ofst(.LCPI38_0) -; MIPS64R5-NEXT: ld.d $w0, 0($1) -; MIPS64R5-NEXT: copy_s.d $4, $w0[0] -; MIPS64R5-NEXT: copy_s.d $5, $w0[1] -; MIPS64R5-NEXT: ld $1, %got_page(.LCPI38_1)($gp) -; MIPS64R5-NEXT: daddiu $1, $1, %got_ofst(.LCPI38_1) -; MIPS64R5-NEXT: ld.d $w0, 0($1) -; MIPS64R5-NEXT: copy_s.d $6, $w0[0] -; MIPS64R5-NEXT: copy_s.d $7, $w0[1] -; MIPS64R5-NEXT: ld $25, %call16(float4_extern)($gp) -; MIPS64R5-NEXT: jalr $25 -; MIPS64R5-NEXT: nop -; MIPS64R5-NEXT: insert.d $w0[0], $2 -; MIPS64R5-NEXT: insert.d $w0[1], $3 -; MIPS64R5-NEXT: ld $1, %got_disp(gv4f32)($gp) -; MIPS64R5-NEXT: st.d $w0, 0($1) -; MIPS64R5-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload -; MIPS64R5-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload -; MIPS64R5-NEXT: daddiu $sp, $sp, 16 -; MIPS64R5-NEXT: jr $ra -; MIPS64R5-NEXT: nop +; MIPS64R5EB-LABEL: callfloat_4: +; MIPS64R5EB: # %bb.0: # %entry +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EB-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: .cfi_offset 31, -8 +; MIPS64R5EB-NEXT: .cfi_offset 28, -16 +; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(callfloat_4))) +; MIPS64R5EB-NEXT: daddu $1, $1, $25 +; MIPS64R5EB-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_4))) +; MIPS64R5EB-NEXT: daddiu $1, $zero, 1 +; MIPS64R5EB-NEXT: dsll $1, $1, 39 +; MIPS64R5EB-NEXT: daddiu $1, $1, 129 +; MIPS64R5EB-NEXT: daddiu $2, $zero, 261 +; MIPS64R5EB-NEXT: dsll $2, $2, 33 +; MIPS64R5EB-NEXT: daddiu $3, $zero, 383 +; MIPS64R5EB-NEXT: dsll $4, $3, 23 +; MIPS64R5EB-NEXT: dsll $5, $1, 23 +; MIPS64R5EB-NEXT: daddiu $1, $2, 523 +; MIPS64R5EB-NEXT: dsll $6, $1, 21 +; MIPS64R5EB-NEXT: daddiu $1, $zero, 1047 +; MIPS64R5EB-NEXT: dsll $1, $1, 29 +; MIPS64R5EB-NEXT: daddiu $1, $1, 131 +; MIPS64R5EB-NEXT: dsll $7, $1, 23 +; MIPS64R5EB-NEXT: ld $25, %call16(float4_extern)($gp) +; MIPS64R5EB-NEXT: jalr $25 +; MIPS64R5EB-NEXT: nop +; MIPS64R5EB-NEXT: insert.d $w0[0], $2 +; MIPS64R5EB-NEXT: insert.d $w0[1], $3 +; MIPS64R5EB-NEXT: ld $1, %got_disp(gv4f32)($gp) +; MIPS64R5EB-NEXT: st.d $w0, 0($1) +; MIPS64R5EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16 +; MIPS64R5EB-NEXT: jr $ra +; MIPS64R5EB-NEXT: nop ; ; MIPS64EL-LABEL: callfloat_4: ; MIPS64EL: # %bb.0: # %entry @@ -5600,6 +5686,44 @@ define void @callfloat_4() { ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop +; +; MIPS64R5EL-LABEL: callfloat_4: +; MIPS64R5EL: # %bb.0: # %entry +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: .cfi_offset 31, -8 +; MIPS64R5EL-NEXT: .cfi_offset 28, -16 +; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(callfloat_4))) +; MIPS64R5EL-NEXT: daddu $1, $1, $25 +; MIPS64R5EL-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_4))) +; MIPS64R5EL-NEXT: daddiu $1, $zero, 129 +; MIPS64R5EL-NEXT: dsll $1, $1, 25 +; MIPS64R5EL-NEXT: daddiu $1, $1, 1 +; MIPS64R5EL-NEXT: daddiu $2, $zero, 523 +; MIPS64R5EL-NEXT: dsll $2, $2, 31 +; MIPS64R5EL-NEXT: daddiu $3, $zero, 383 +; MIPS64R5EL-NEXT: dsll $4, $3, 55 +; MIPS64R5EL-NEXT: dsll $5, $1, 30 +; MIPS64R5EL-NEXT: daddiu $1, $2, 261 +; MIPS64R5EL-NEXT: dsll $6, $1, 22 +; MIPS64R5EL-NEXT: daddiu $1, $zero, 131 +; MIPS64R5EL-NEXT: dsll $1, $1, 35 +; MIPS64R5EL-NEXT: daddiu $1, $1, 1047 +; MIPS64R5EL-NEXT: dsll $7, $1, 20 +; MIPS64R5EL-NEXT: ld $25, %call16(float4_extern)($gp) +; MIPS64R5EL-NEXT: jalr $25 +; MIPS64R5EL-NEXT: nop +; MIPS64R5EL-NEXT: insert.d $w0[0], $2 +; MIPS64R5EL-NEXT: insert.d $w0[1], $3 +; MIPS64R5EL-NEXT: ld $1, %got_disp(gv4f32)($gp) +; MIPS64R5EL-NEXT: st.d $w0, 0($1) +; MIPS64R5EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16 +; MIPS64R5EL-NEXT: jr $ra +; MIPS64R5EL-NEXT: nop entry: %0 = call <4 x float> @float4_extern(<4 x float> , <4 x float> ) store <4 x float> %0, ptr @gv4f32 @@ -5688,17 +5812,11 @@ define void @calldouble_2() { ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 ; MIPS32R5EB-NEXT: and $sp, $sp, $1 -; MIPS32R5EB-NEXT: lui $1, %hi($CPI39_0) -; MIPS32R5EB-NEXT: addiu $1, $1, %lo($CPI39_0) -; MIPS32R5EB-NEXT: ld.w $w0, 0($1) -; MIPS32R5EB-NEXT: copy_s.w $1, $w0[2] -; MIPS32R5EB-NEXT: lui $2, %hi($CPI39_1) -; MIPS32R5EB-NEXT: addiu $2, $2, %lo($CPI39_1) -; MIPS32R5EB-NEXT: ld.w $w0, 0($2) -; MIPS32R5EB-NEXT: copy_s.w $2, $w0[0] -; MIPS32R5EB-NEXT: copy_s.w $3, $w0[2] -; MIPS32R5EB-NEXT: sw $3, 32($sp) -; MIPS32R5EB-NEXT: sw $2, 24($sp) +; MIPS32R5EB-NEXT: lui $1, 16424 +; MIPS32R5EB-NEXT: lui $2, 16428 +; MIPS32R5EB-NEXT: sw $2, 32($sp) +; MIPS32R5EB-NEXT: sw $1, 24($sp) +; MIPS32R5EB-NEXT: lui $1, 49136 ; MIPS32R5EB-NEXT: sw $1, 16($sp) ; MIPS32R5EB-NEXT: sw $zero, 36($sp) ; MIPS32R5EB-NEXT: sw $zero, 28($sp) @@ -5730,15 +5848,12 @@ define void @calldouble_2() { ; MIPS64R5-NEXT: lui $1, %hi(%neg(%gp_rel(calldouble_2))) ; MIPS64R5-NEXT: daddu $1, $1, $25 ; MIPS64R5-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(calldouble_2))) -; MIPS64R5-NEXT: ld $1, %got_page(.LCPI39_0)($gp) -; MIPS64R5-NEXT: daddiu $1, $1, %got_ofst(.LCPI39_0) -; MIPS64R5-NEXT: ld.d $w0, 0($1) -; MIPS64R5-NEXT: copy_s.d $5, $w0[1] -; MIPS64R5-NEXT: ld $1, %got_page(.LCPI39_1)($gp) -; MIPS64R5-NEXT: daddiu $1, $1, %got_ofst(.LCPI39_1) -; MIPS64R5-NEXT: ld.d $w0, 0($1) -; MIPS64R5-NEXT: copy_s.d $6, $w0[0] -; MIPS64R5-NEXT: copy_s.d $7, $w0[1] +; MIPS64R5-NEXT: daddiu $1, $zero, 3071 +; MIPS64R5-NEXT: dsll $5, $1, 52 +; MIPS64R5-NEXT: daddiu $1, $zero, 2053 +; MIPS64R5-NEXT: dsll $6, $1, 51 +; MIPS64R5-NEXT: daddiu $1, $zero, 4107 +; MIPS64R5-NEXT: dsll $7, $1, 50 ; MIPS64R5-NEXT: ld $25, %call16(double2_extern)($gp) ; MIPS64R5-NEXT: daddiu $4, $zero, 0 ; MIPS64R5-NEXT: jalr $25 @@ -5804,17 +5919,11 @@ define void @calldouble_2() { ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 ; MIPS32R5EL-NEXT: and $sp, $sp, $1 -; MIPS32R5EL-NEXT: lui $1, %hi($CPI39_0) -; MIPS32R5EL-NEXT: addiu $1, $1, %lo($CPI39_0) -; MIPS32R5EL-NEXT: ld.w $w0, 0($1) -; MIPS32R5EL-NEXT: copy_s.w $1, $w0[3] -; MIPS32R5EL-NEXT: lui $2, %hi($CPI39_1) -; MIPS32R5EL-NEXT: addiu $2, $2, %lo($CPI39_1) -; MIPS32R5EL-NEXT: ld.w $w0, 0($2) -; MIPS32R5EL-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5EL-NEXT: copy_s.w $3, $w0[3] -; MIPS32R5EL-NEXT: sw $3, 36($sp) -; MIPS32R5EL-NEXT: sw $2, 28($sp) +; MIPS32R5EL-NEXT: lui $1, 16424 +; MIPS32R5EL-NEXT: lui $2, 16428 +; MIPS32R5EL-NEXT: sw $2, 36($sp) +; MIPS32R5EL-NEXT: sw $1, 28($sp) +; MIPS32R5EL-NEXT: lui $1, 49136 ; MIPS32R5EL-NEXT: sw $1, 20($sp) ; MIPS32R5EL-NEXT: sw $zero, 32($sp) ; MIPS32R5EL-NEXT: sw $zero, 24($sp) diff --git a/llvm/test/CodeGen/X86/nontemporal-4.ll b/llvm/test/CodeGen/X86/nontemporal-4.ll index c1eff891a9487..743d4cf0927da 100644 --- a/llvm/test/CodeGen/X86/nontemporal-4.ll +++ b/llvm/test/CodeGen/X86/nontemporal-4.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefixes=CHECK,SSE,SSE4A -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 @@ -24,221 +24,61 @@ define void @test_constant_v2f64_align1(ptr %dst) nounwind { } define void @test_constant_v4f32_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_constant_v4f32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: movabsq $4647714816524288000, %rax # imm = 0x4080000040400000 -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movabsq $4611686019492741120, %rax # imm = 0x400000003F800000 -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_constant_v4f32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [2.0000004731118679E+0,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_constant_v4f32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: movabsq $4647714816524288000, %rax # imm = 0x4080000040400000 -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movabsq $4611686019492741120, %rax # imm = 0x400000003F800000 -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_constant_v4f32_align1: -; AVX: # %bb.0: -; AVX-NEXT: movabsq $4647714816524288000, %rax # imm = 0x4080000040400000 -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movabsq $4611686019492741120, %rax # imm = 0x400000003F800000 -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_constant_v4f32_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: movabsq $4647714816524288000, %rax # imm = 0x4080000040400000 -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movabsq $4611686019492741120, %rax # imm = 0x400000003F800000 -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_constant_v4f32_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $4647714816524288000, %rax # imm = 0x4080000040400000 +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movabsq $4611686019492741120, %rax # imm = 0x400000003F800000 +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: retq store <4 x float> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v2i64_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_constant_v2i64_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: movl $1, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_constant_v2i64_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [4.9406564584124654E-324,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: xorl %eax, %eax -; SSE4A-NEXT: movntiq %rax, (%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_constant_v2i64_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: movl $1, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_constant_v2i64_align1: -; AVX: # %bb.0: -; AVX-NEXT: movl $1, %eax -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_constant_v2i64_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: movl $1, %eax -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_constant_v2i64_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: retq store <2 x i64> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v4i32_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_constant_v4i32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: movabsq $12884901890, %rax # imm = 0x300000002 -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_constant_v4i32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [2.1219957909652723E-314,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_constant_v4i32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: movabsq $12884901890, %rax # imm = 0x300000002 -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_constant_v4i32_align1: -; AVX: # %bb.0: -; AVX-NEXT: movabsq $12884901890, %rax # imm = 0x300000002 -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_constant_v4i32_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: movabsq $12884901890, %rax # imm = 0x300000002 -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_constant_v4i32_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $12884901890, %rax # imm = 0x300000002 +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: retq store <4 x i32> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v8i16_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_constant_v8i16_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: movabsq $1970350607106052, %rax # imm = 0x7000600050004 -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movabsq $844433520132096, %rax # imm = 0x3000200010000 -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_constant_v8i16_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [4.1720559249406128E-309,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_constant_v8i16_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: movabsq $1970350607106052, %rax # imm = 0x7000600050004 -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movabsq $844433520132096, %rax # imm = 0x3000200010000 -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_constant_v8i16_align1: -; AVX: # %bb.0: -; AVX-NEXT: movabsq $1970350607106052, %rax # imm = 0x7000600050004 -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movabsq $844433520132096, %rax # imm = 0x3000200010000 -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_constant_v8i16_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: movabsq $1970350607106052, %rax # imm = 0x7000600050004 -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movabsq $844433520132096, %rax # imm = 0x3000200010000 -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_constant_v8i16_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $1970350607106052, %rax # imm = 0x7000600050004 +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movabsq $844433520132096, %rax # imm = 0x3000200010000 +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: retq store <8 x i16> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v16i8_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_constant_v16i8_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908 -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movabsq $506097522914230528, %rax # imm = 0x706050403020100 -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_constant_v16i8_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [7.9499288951273625E-275,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_constant_v16i8_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908 -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movabsq $506097522914230528, %rax # imm = 0x706050403020100 -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_constant_v16i8_align1: -; AVX: # %bb.0: -; AVX-NEXT: movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908 -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movabsq $506097522914230528, %rax # imm = 0x706050403020100 -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_constant_v16i8_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908 -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movabsq $506097522914230528, %rax # imm = 0x706050403020100 -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_constant_v16i8_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908 +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movabsq $506097522914230528, %rax # imm = 0x706050403020100 +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: retq store <16 x i8> , ptr %dst, align 1, !nontemporal !1 ret void } @@ -262,321 +102,81 @@ define void @test_constant_v4f64_align1(ptr %dst) nounwind { } define void @test_constant_v8f32_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_constant_v8f32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_constant_v8f32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-5.1200036668777466E+2,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_constant_v8f32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_constant_v8f32_align1: -; AVX: # %bb.0: -; AVX-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_constant_v8f32_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_constant_v8f32_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: retq store <8 x float> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v4i64_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_constant_v4i64_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: movq $-1, %rax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movq $-3, %rax -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movq $-2, %rax -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_constant_v4i64_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: xorl %eax, %eax -; SSE4A-NEXT: movntiq %rax, (%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_constant_v4i64_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: movq $-1, %rax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movq $-3, %rax -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movq $-2, %rax -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_constant_v4i64_align1: -; AVX: # %bb.0: -; AVX-NEXT: movq $-1, %rax -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movq $-3, %rax -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movq $-2, %rax -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_constant_v4i64_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: movq $-1, %rax -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movq $-3, %rax -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movq $-2, %rax -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_constant_v4i64_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: movq $-1, %rax +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movq $-3, %rax +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movq $-2, %rax +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: retq store <4 x i64> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v8i32_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_constant_v8i32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_constant_v8i32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_constant_v8i32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_constant_v8i32_align1: -; AVX: # %bb.0: -; AVX-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_constant_v8i32_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_constant_v8i32_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: retq store <8 x i32> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v16i16_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_constant_v16i16_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_constant_v16i16_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_constant_v16i16_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_constant_v16i16_align1: -; AVX: # %bb.0: -; AVX-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_constant_v16i16_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_constant_v16i16_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: retq store <16 x i16> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v32i8_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_constant_v32i8_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_constant_v32i8_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-3.826728214441238E+279,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-1.6485712323024388E+202,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_constant_v32i8_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_constant_v32i8_align1: -; AVX: # %bb.0: -; AVX-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_constant_v32i8_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_constant_v32i8_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: retq store <32 x i8> , ptr %dst, align 1, !nontemporal !1 ret void } @@ -779,521 +379,121 @@ define void @test_constant_v8f64_align1(ptr %dst) nounwind { } define void @test_constant_v16f32_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_constant_v16f32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000 -; SSE2-NEXT: movntiq %rax, 40(%rdi) -; SSE2-NEXT: movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000 -; SSE2-NEXT: movntiq %rax, 32(%rdi) -; SSE2-NEXT: movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000 -; SSE2-NEXT: movntiq %rax, 56(%rdi) -; SSE2-NEXT: movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000 -; SSE2-NEXT: movntiq %rax, 48(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_constant_v16f32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-5.1200036668777466E+2,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-1.3107209417724609E+5,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-2.0971535092773438E+6,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_constant_v16f32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000 -; SSE41-NEXT: movntiq %rax, 40(%rdi) -; SSE41-NEXT: movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000 -; SSE41-NEXT: movntiq %rax, 32(%rdi) -; SSE41-NEXT: movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000 -; SSE41-NEXT: movntiq %rax, 56(%rdi) -; SSE41-NEXT: movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000 -; SSE41-NEXT: movntiq %rax, 48(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_constant_v16f32_align1: -; AVX: # %bb.0: -; AVX-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000 -; AVX-NEXT: movntiq %rax, 40(%rdi) -; AVX-NEXT: movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000 -; AVX-NEXT: movntiq %rax, 32(%rdi) -; AVX-NEXT: movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000 -; AVX-NEXT: movntiq %rax, 56(%rdi) -; AVX-NEXT: movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000 -; AVX-NEXT: movntiq %rax, 48(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_constant_v16f32_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000 -; AVX512-NEXT: movntiq %rax, 40(%rdi) -; AVX512-NEXT: movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000 -; AVX512-NEXT: movntiq %rax, 32(%rdi) -; AVX512-NEXT: movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000 -; AVX512-NEXT: movntiq %rax, 56(%rdi) -; AVX512-NEXT: movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000 -; AVX512-NEXT: movntiq %rax, 48(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_constant_v16f32_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000 +; CHECK-NEXT: movntiq %rax, 40(%rdi) +; CHECK-NEXT: movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000 +; CHECK-NEXT: movntiq %rax, 32(%rdi) +; CHECK-NEXT: movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000 +; CHECK-NEXT: movntiq %rax, 56(%rdi) +; CHECK-NEXT: movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000 +; CHECK-NEXT: movntiq %rax, 48(%rdi) +; CHECK-NEXT: retq store <16 x float> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v8i64_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_constant_v8i64_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: movq $-1, %rax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movq $-3, %rax -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movq $-2, %rax -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: movq $-5, %rax -; SSE2-NEXT: movntiq %rax, 40(%rdi) -; SSE2-NEXT: movq $-4, %rax -; SSE2-NEXT: movntiq %rax, 32(%rdi) -; SSE2-NEXT: movq $-7, %rax -; SSE2-NEXT: movntiq %rax, 56(%rdi) -; SSE2-NEXT: movq $-6, %rax -; SSE2-NEXT: movntiq %rax, 48(%rdi) -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_constant_v8i64_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: xorl %eax, %eax -; SSE4A-NEXT: movntiq %rax, (%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_constant_v8i64_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: movq $-1, %rax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movq $-3, %rax -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movq $-2, %rax -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: movq $-5, %rax -; SSE41-NEXT: movntiq %rax, 40(%rdi) -; SSE41-NEXT: movq $-4, %rax -; SSE41-NEXT: movntiq %rax, 32(%rdi) -; SSE41-NEXT: movq $-7, %rax -; SSE41-NEXT: movntiq %rax, 56(%rdi) -; SSE41-NEXT: movq $-6, %rax -; SSE41-NEXT: movntiq %rax, 48(%rdi) -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_constant_v8i64_align1: -; AVX: # %bb.0: -; AVX-NEXT: movq $-1, %rax -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movq $-3, %rax -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movq $-2, %rax -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: movq $-5, %rax -; AVX-NEXT: movntiq %rax, 40(%rdi) -; AVX-NEXT: movq $-4, %rax -; AVX-NEXT: movntiq %rax, 32(%rdi) -; AVX-NEXT: movq $-7, %rax -; AVX-NEXT: movntiq %rax, 56(%rdi) -; AVX-NEXT: movq $-6, %rax -; AVX-NEXT: movntiq %rax, 48(%rdi) -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_constant_v8i64_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: movq $-1, %rax -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movq $-3, %rax -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movq $-2, %rax -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: movq $-5, %rax -; AVX512-NEXT: movntiq %rax, 40(%rdi) -; AVX512-NEXT: movq $-4, %rax -; AVX512-NEXT: movntiq %rax, 32(%rdi) -; AVX512-NEXT: movq $-7, %rax -; AVX512-NEXT: movntiq %rax, 56(%rdi) -; AVX512-NEXT: movq $-6, %rax -; AVX512-NEXT: movntiq %rax, 48(%rdi) -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_constant_v8i64_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: movq $-1, %rax +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movq $-3, %rax +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movq $-2, %rax +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: movq $-5, %rax +; CHECK-NEXT: movntiq %rax, 40(%rdi) +; CHECK-NEXT: movq $-4, %rax +; CHECK-NEXT: movntiq %rax, 32(%rdi) +; CHECK-NEXT: movq $-7, %rax +; CHECK-NEXT: movntiq %rax, 56(%rdi) +; CHECK-NEXT: movq $-6, %rax +; CHECK-NEXT: movntiq %rax, 48(%rdi) +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: retq store <8 x i64> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v16i32_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_constant_v16i32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6 -; SSE2-NEXT: movntiq %rax, 40(%rdi) -; SSE2-NEXT: movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8 -; SSE2-NEXT: movntiq %rax, 32(%rdi) -; SSE2-NEXT: movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2 -; SSE2-NEXT: movntiq %rax, 56(%rdi) -; SSE2-NEXT: movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4 -; SSE2-NEXT: movntiq %rax, 48(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_constant_v16i32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_constant_v16i32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6 -; SSE41-NEXT: movntiq %rax, 40(%rdi) -; SSE41-NEXT: movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8 -; SSE41-NEXT: movntiq %rax, 32(%rdi) -; SSE41-NEXT: movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2 -; SSE41-NEXT: movntiq %rax, 56(%rdi) -; SSE41-NEXT: movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4 -; SSE41-NEXT: movntiq %rax, 48(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_constant_v16i32_align1: -; AVX: # %bb.0: -; AVX-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6 -; AVX-NEXT: movntiq %rax, 40(%rdi) -; AVX-NEXT: movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8 -; AVX-NEXT: movntiq %rax, 32(%rdi) -; AVX-NEXT: movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2 -; AVX-NEXT: movntiq %rax, 56(%rdi) -; AVX-NEXT: movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4 -; AVX-NEXT: movntiq %rax, 48(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_constant_v16i32_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6 -; AVX512-NEXT: movntiq %rax, 40(%rdi) -; AVX512-NEXT: movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8 -; AVX512-NEXT: movntiq %rax, 32(%rdi) -; AVX512-NEXT: movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2 -; AVX512-NEXT: movntiq %rax, 56(%rdi) -; AVX512-NEXT: movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4 -; AVX512-NEXT: movntiq %rax, 48(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_constant_v16i32_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6 +; CHECK-NEXT: movntiq %rax, 40(%rdi) +; CHECK-NEXT: movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8 +; CHECK-NEXT: movntiq %rax, 32(%rdi) +; CHECK-NEXT: movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2 +; CHECK-NEXT: movntiq %rax, 56(%rdi) +; CHECK-NEXT: movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4 +; CHECK-NEXT: movntiq %rax, 48(%rdi) +; CHECK-NEXT: retq store <16 x i32> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v32i16_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_constant_v32i16_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC -; SSE2-NEXT: movntiq %rax, 40(%rdi) -; SSE2-NEXT: movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0 -; SSE2-NEXT: movntiq %rax, 32(%rdi) -; SSE2-NEXT: movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4 -; SSE2-NEXT: movntiq %rax, 56(%rdi) -; SSE2-NEXT: movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8 -; SSE2-NEXT: movntiq %rax, 48(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_constant_v32i16_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-1.6853227412070812E+308,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-1.2358925997317751E+308,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_constant_v32i16_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC -; SSE41-NEXT: movntiq %rax, 40(%rdi) -; SSE41-NEXT: movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0 -; SSE41-NEXT: movntiq %rax, 32(%rdi) -; SSE41-NEXT: movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4 -; SSE41-NEXT: movntiq %rax, 56(%rdi) -; SSE41-NEXT: movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8 -; SSE41-NEXT: movntiq %rax, 48(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_constant_v32i16_align1: -; AVX: # %bb.0: -; AVX-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC -; AVX-NEXT: movntiq %rax, 40(%rdi) -; AVX-NEXT: movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0 -; AVX-NEXT: movntiq %rax, 32(%rdi) -; AVX-NEXT: movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4 -; AVX-NEXT: movntiq %rax, 56(%rdi) -; AVX-NEXT: movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8 -; AVX-NEXT: movntiq %rax, 48(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_constant_v32i16_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC -; AVX512-NEXT: movntiq %rax, 40(%rdi) -; AVX512-NEXT: movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0 -; AVX512-NEXT: movntiq %rax, 32(%rdi) -; AVX512-NEXT: movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4 -; AVX512-NEXT: movntiq %rax, 56(%rdi) -; AVX512-NEXT: movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8 -; AVX512-NEXT: movntiq %rax, 48(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_constant_v32i16_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC +; CHECK-NEXT: movntiq %rax, 40(%rdi) +; CHECK-NEXT: movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0 +; CHECK-NEXT: movntiq %rax, 32(%rdi) +; CHECK-NEXT: movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4 +; CHECK-NEXT: movntiq %rax, 56(%rdi) +; CHECK-NEXT: movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8 +; CHECK-NEXT: movntiq %rax, 48(%rdi) +; CHECK-NEXT: retq store <32 x i16> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v64i8_align1(ptr %dst) nounwind { -; SSE2-LABEL: test_constant_v64i8_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8 -; SSE2-NEXT: movntiq %rax, 40(%rdi) -; SSE2-NEXT: movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0 -; SSE2-NEXT: movntiq %rax, 32(%rdi) -; SSE2-NEXT: movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8 -; SSE2-NEXT: movntiq %rax, 56(%rdi) -; SSE2-NEXT: movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0 -; SSE2-NEXT: movntiq %rax, 48(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_constant_v64i8_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-3.826728214441238E+279,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-1.6485712323024388E+202,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-7.1020783099933495E+124,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) -; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-3.0595730451167367E+47,0.0E+0] -; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_constant_v64i8_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8 -; SSE41-NEXT: movntiq %rax, 40(%rdi) -; SSE41-NEXT: movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0 -; SSE41-NEXT: movntiq %rax, 32(%rdi) -; SSE41-NEXT: movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8 -; SSE41-NEXT: movntiq %rax, 56(%rdi) -; SSE41-NEXT: movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0 -; SSE41-NEXT: movntiq %rax, 48(%rdi) -; SSE41-NEXT: retq -; -; AVX-LABEL: test_constant_v64i8_align1: -; AVX: # %bb.0: -; AVX-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 -; AVX-NEXT: movntiq %rax, 8(%rdi) -; AVX-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 -; AVX-NEXT: movntiq %rax, (%rdi) -; AVX-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 -; AVX-NEXT: movntiq %rax, 24(%rdi) -; AVX-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 -; AVX-NEXT: movntiq %rax, 16(%rdi) -; AVX-NEXT: movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8 -; AVX-NEXT: movntiq %rax, 40(%rdi) -; AVX-NEXT: movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0 -; AVX-NEXT: movntiq %rax, 32(%rdi) -; AVX-NEXT: movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8 -; AVX-NEXT: movntiq %rax, 56(%rdi) -; AVX-NEXT: movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0 -; AVX-NEXT: movntiq %rax, 48(%rdi) -; AVX-NEXT: retq -; -; AVX512-LABEL: test_constant_v64i8_align1: -; AVX512: # %bb.0: -; AVX512-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 -; AVX512-NEXT: movntiq %rax, 8(%rdi) -; AVX512-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 -; AVX512-NEXT: movntiq %rax, (%rdi) -; AVX512-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 -; AVX512-NEXT: movntiq %rax, 24(%rdi) -; AVX512-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 -; AVX512-NEXT: movntiq %rax, 16(%rdi) -; AVX512-NEXT: movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8 -; AVX512-NEXT: movntiq %rax, 40(%rdi) -; AVX512-NEXT: movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0 -; AVX512-NEXT: movntiq %rax, 32(%rdi) -; AVX512-NEXT: movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8 -; AVX512-NEXT: movntiq %rax, 56(%rdi) -; AVX512-NEXT: movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0 -; AVX512-NEXT: movntiq %rax, 48(%rdi) -; AVX512-NEXT: retq +; CHECK-LABEL: test_constant_v64i8_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 +; CHECK-NEXT: movntiq %rax, 8(%rdi) +; CHECK-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 +; CHECK-NEXT: movntiq %rax, (%rdi) +; CHECK-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 +; CHECK-NEXT: movntiq %rax, 24(%rdi) +; CHECK-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 +; CHECK-NEXT: movntiq %rax, 16(%rdi) +; CHECK-NEXT: movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8 +; CHECK-NEXT: movntiq %rax, 40(%rdi) +; CHECK-NEXT: movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0 +; CHECK-NEXT: movntiq %rax, 32(%rdi) +; CHECK-NEXT: movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8 +; CHECK-NEXT: movntiq %rax, 56(%rdi) +; CHECK-NEXT: movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0 +; CHECK-NEXT: movntiq %rax, 48(%rdi) +; CHECK-NEXT: retq store <64 x i8> , ptr %dst, align 1, !nontemporal !1 ret void } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index b5adfb3733357..0cfe3f60595a1 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -3169,48 +3169,18 @@ entry: declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) define void @PR43024() { -; SSE2-LABEL: PR43024: -; SSE2: # %bb.0: -; SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] -; SSE2-NEXT: movaps %xmm0, (%rax) -; SSE2-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: addss %xmm1, %xmm0 -; SSE2-NEXT: addss %xmm1, %xmm0 -; SSE2-NEXT: movss %xmm0, (%rax) -; SSE2-NEXT: retq -; -; SSSE3-LABEL: PR43024: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] -; SSSE3-NEXT: movaps %xmm0, (%rax) -; SSSE3-NEXT: addss %xmm0, %xmm0 -; SSSE3-NEXT: xorps %xmm1, %xmm1 -; SSSE3-NEXT: addss %xmm1, %xmm0 -; SSSE3-NEXT: addss %xmm1, %xmm0 -; SSSE3-NEXT: movss %xmm0, (%rax) -; SSSE3-NEXT: retq -; -; SSE41-LABEL: PR43024: -; SSE41: # %bb.0: -; SSE41-NEXT: movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] -; SSE41-NEXT: movaps %xmm0, (%rax) -; SSE41-NEXT: addss %xmm0, %xmm0 -; SSE41-NEXT: xorps %xmm1, %xmm1 -; SSE41-NEXT: addss %xmm1, %xmm0 -; SSE41-NEXT: addss %xmm1, %xmm0 -; SSE41-NEXT: movss %xmm0, (%rax) -; SSE41-NEXT: retq +; SSE-LABEL: PR43024: +; SSE: # %bb.0: +; SSE-NEXT: movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] +; SSE-NEXT: movaps %xmm0, (%rax) +; SSE-NEXT: movl $2143289344, (%rax) # imm = 0x7FC00000 +; SSE-NEXT: retq ; ; AVX-LABEL: PR43024: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] ; AVX-NEXT: vmovaps %xmm0, (%rax) -; AVX-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}+4(%rip), %xmm0, %xmm0 -; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovss %xmm0, (%rax) +; AVX-NEXT: movl $2143289344, (%rax) # imm = 0x7FC00000 ; AVX-NEXT: retq store <4 x float> , ptr undef, align 16 %1 = load <4 x float>, ptr undef, align 16