Skip to content

Commit b898a54

Browse files
authored
Fix genPutArgStkFieldList for SIMD12 (#88920)
1 parent 2a7ba8a commit b898a54

File tree

11 files changed

+169
-52
lines changed

11 files changed

+169
-52
lines changed

src/coreclr/jit/codegenarm64.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5313,14 +5313,11 @@ void CodeGen::genStoreLclTypeSimd12(GenTreeLclVarCommon* treeNode)
53135313
{
53145314
// Simply use mov if we move a SIMD12 reg to another SIMD12 reg
53155315
assert(GetEmitter()->isVectorRegister(tgtReg));
5316-
53175316
inst_Mov(treeNode->TypeGet(), tgtReg, dataReg, /* canSkip */ true);
53185317
}
53195318
else
53205319
{
5321-
// Need an additional integer register to extract upper 4 bytes from data.
5322-
regNumber tmpReg = treeNode->GetSingleTempReg();
5323-
GetEmitter()->emitStoreSimd12ToLclOffset(varNum, offs, dataReg, tmpReg);
5320+
GetEmitter()->emitStoreSimd12ToLclOffset(varNum, offs, dataReg, treeNode);
53245321
}
53255322
genUpdateLifeStore(treeNode, tgtReg, varDsc);
53265323
}

src/coreclr/jit/codegenarmarch.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -815,10 +815,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
815815
assert(compMacOsArm64Abi() || treeNode->GetStackByteSize() % TARGET_POINTER_SIZE == 0);
816816

817817
#ifdef TARGET_ARM64
818-
if (compMacOsArm64Abi() && (treeNode->GetStackByteSize() == 12))
818+
if (treeNode->GetStackByteSize() == 12)
819819
{
820-
regNumber tmpReg = treeNode->GetSingleTempReg();
821-
GetEmitter()->emitStoreSimd12ToLclOffset(varNumOut, argOffsetOut, srcReg, tmpReg);
820+
GetEmitter()->emitStoreSimd12ToLclOffset(varNumOut, argOffsetOut, srcReg, treeNode);
822821
argOffsetOut += 12;
823822
}
824823
else

src/coreclr/jit/codegenlinear.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1868,13 +1868,10 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArg
18681868
// Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
18691869
// argument area.
18701870

1871-
#if defined(FEATURE_SIMD) && defined(TARGET_ARM64)
1872-
// storing of TYP_SIMD12 (i.e. Vector3) argument.
1873-
if (compMacOsArm64Abi() && (type == TYP_SIMD12))
1871+
#if defined(FEATURE_SIMD)
1872+
if (type == TYP_SIMD12)
18741873
{
1875-
// Need an additional integer register to extract upper 4 bytes from data.
1876-
regNumber tmpReg = nextArgNode->GetSingleTempReg();
1877-
GetEmitter()->emitStoreSimd12ToLclOffset(outArgVarNum, thisFieldOffset, reg, tmpReg);
1874+
GetEmitter()->emitStoreSimd12ToLclOffset(outArgVarNum, thisFieldOffset, reg, nextArgNode);
18781875
}
18791876
else
18801877
#endif // FEATURE_SIMD

src/coreclr/jit/emit.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2714,6 +2714,10 @@ class emitter
27142714
void emitMarkStackLvl(unsigned stackLevel);
27152715
#endif
27162716

2717+
#if defined(FEATURE_SIMD)
2718+
void emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNumber dataReg, GenTree* tmpRegProvider);
2719+
#endif // FEATURE_SIMD
2720+
27172721
int emitNextRandomNop();
27182722

27192723
//

src/coreclr/jit/emitarm64.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16767,4 +16767,32 @@ bool emitter::IsOptimizableLdrToMov(
1676716767

1676816768
return true;
1676916769
}
16770+
16771+
#if defined(FEATURE_SIMD)
16772+
//-----------------------------------------------------------------------------------
16773+
// emitStoreSimd12ToLclOffset: store SIMD12 value from dataReg to varNum+offset.
16774+
//
16775+
// Arguments:
16776+
// varNum - the variable on the stack to use as a base;
16777+
// offset - the offset from the varNum;
16778+
// dataReg - the src reg with SIMD12 value;
16779+
// tmpRegProvider - a tree to grab a tmp reg from if needed.
16780+
//
16781+
void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNumber dataReg, GenTree* tmpRegProvider)
16782+
{
16783+
assert(varNum != BAD_VAR_NUM);
16784+
assert(isVectorRegister(dataReg));
16785+
16786+
// store lower 8 bytes
16787+
emitIns_S_R(INS_str, EA_8BYTE, dataReg, varNum, offset);
16788+
16789+
// Extract upper 4-bytes from data
16790+
regNumber tmpReg = tmpRegProvider->GetSingleTempReg();
16791+
emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, dataReg, 2);
16792+
16793+
// 4-byte write
16794+
emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offset + 8);
16795+
}
16796+
#endif // FEATURE_SIMD
16797+
1677016798
#endif // defined(TARGET_ARM64)

src/coreclr/jit/emitarm64.h

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,30 +1003,4 @@ inline bool emitIsLoadConstant(instrDesc* jmp)
10031003
(jmp->idInsFmt() == IF_LARGELDC));
10041004
}
10051005

1006-
#if defined(FEATURE_SIMD)
1007-
//-----------------------------------------------------------------------------------
1008-
// emitStoreSimd12ToLclOffset: store SIMD12 value from dataReg to varNum+offset.
1009-
//
1010-
// Arguments:
1011-
// varNum - the variable on the stack to use as a base;
1012-
// offset - the offset from the varNum;
1013-
// dataReg - the src reg with SIMD12 value;
1014-
// tmpReg - a tmp reg to use for the write, can be general or float.
1015-
//
1016-
void emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNumber dataReg, regNumber tmpReg)
1017-
{
1018-
assert(varNum != BAD_VAR_NUM);
1019-
assert(isVectorRegister(dataReg));
1020-
1021-
// store lower 8 bytes
1022-
emitIns_S_R(INS_str, EA_8BYTE, dataReg, varNum, offset);
1023-
1024-
// Extract upper 4-bytes from data
1025-
emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, dataReg, 2);
1026-
1027-
// 4-byte write
1028-
emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offset + 8);
1029-
}
1030-
#endif // FEATURE_SIMD
1031-
10321006
#endif // TARGET_ARM64

src/coreclr/jit/emitxarch.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5648,6 +5648,43 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
56485648
emitAdjustStackDepthPushPop(ins);
56495649
}
56505650

5651+
#if defined(FEATURE_SIMD)
5652+
//-----------------------------------------------------------------------------------
5653+
// emitStoreSimd12ToLclOffset: store SIMD12 value from dataReg to varNum+offset.
5654+
//
5655+
// Arguments:
5656+
// varNum - the variable on the stack to use as a base;
5657+
// offset - the offset from the varNum;
5658+
// dataReg - the src reg with SIMD12 value;
5659+
// tmpRegProvider - a tree to grab a tmp reg from if needed.
5660+
//
5661+
void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNumber dataReg, GenTree* tmpRegProvider)
5662+
{
5663+
assert(varNum != BAD_VAR_NUM);
5664+
assert(isFloatReg(dataReg));
5665+
5666+
// Store lower 8 bytes
5667+
emitIns_S_R(INS_movsd_simd, EA_8BYTE, dataReg, varNum, offset);
5668+
5669+
if (emitComp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
5670+
{
5671+
// Extract and store upper 4 bytes
5672+
emitIns_S_R_I(INS_extractps, EA_16BYTE, varNum, offset + 8, dataReg, 2);
5673+
}
5674+
else
5675+
{
5676+
regNumber tmpReg = tmpRegProvider->GetSingleTempReg();
5677+
assert(isFloatReg(tmpReg));
5678+
5679+
// Extract upper 4 bytes from data
5680+
emitIns_R_R(INS_movhlps, EA_16BYTE, tmpReg, dataReg);
5681+
5682+
// Store upper 4 bytes
5683+
emitIns_S_R(INS_movss, EA_4BYTE, tmpReg, varNum, offset + 8);
5684+
}
5685+
}
5686+
#endif // FEATURE_SIMD
5687+
56515688
/*****************************************************************************
56525689
*
56535690
* Add an instruction referencing a register and a constant.

src/coreclr/jit/lsraarmarch.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -425,15 +425,11 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
425425
srcCount++;
426426

427427
#if defined(FEATURE_SIMD)
428-
if (compMacOsArm64Abi())
428+
if (use.GetType() == TYP_SIMD12)
429429
{
430-
if (use.GetType() == TYP_SIMD12)
431-
{
432-
// Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
433-
// To assemble the vector properly we would need an additional int register.
434-
// The other platforms can write it as 16-byte using 1 write.
435-
buildInternalIntRegisterDefForNode(use.GetNode());
436-
}
430+
// Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
431+
// To assemble the vector properly we would need an additional int register.
432+
buildInternalIntRegisterDefForNode(use.GetNode());
437433
}
438434
#endif // FEATURE_SIMD
439435
}

src/coreclr/jit/lsraxarch.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,12 +1651,22 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
16511651
#endif // TARGET_X86
16521652

16531653
#if defined(FEATURE_SIMD)
1654-
// Note that we need to check the field type, not the type of the node. This is because the
1655-
// field type will be TYP_SIMD12 whereas the node type might be TYP_SIMD16 for lclVar, where
1656-
// we "round up" to 16.
1657-
if ((fieldType == TYP_SIMD12) && (simdTemp == nullptr))
1654+
if (fieldType == TYP_SIMD12)
16581655
{
1659-
simdTemp = buildInternalFloatRegisterDefForNode(putArgStk);
1656+
// Note that we need to check the field type, not the type of the node. This is because the
1657+
// field type will be TYP_SIMD12 whereas the node type might be TYP_SIMD16 for lclVar, where
1658+
// we "round up" to 16.
1659+
if (simdTemp == nullptr)
1660+
{
1661+
simdTemp = buildInternalFloatRegisterDefForNode(putArgStk);
1662+
}
1663+
1664+
if (!compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41))
1665+
{
1666+
// To store SIMD12 without SSE4.1 (extractps) we will need
1667+
// a temp xmm reg to do the shuffle.
1668+
buildInternalFloatRegisterDefForNode(use.GetNode());
1669+
}
16601670
}
16611671
#endif // defined(FEATURE_SIMD)
16621672

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Collections.Generic;
5+
using System.Numerics;
6+
using System.Runtime.CompilerServices;
7+
using Xunit;
8+
9+
public readonly struct Ray
10+
{
11+
public readonly Vector3 Origin;
12+
public readonly Vector3 Direction;
13+
14+
public Ray(Vector3 origin, Vector3 direction)
15+
{
16+
Origin = origin;
17+
Direction = direction;
18+
}
19+
}
20+
21+
public class RayTracer
22+
{
23+
private static IEnumerable<object> hittables;
24+
25+
static RayTracer()
26+
{
27+
var list = new List<object>();
28+
list.Add(new object());
29+
hittables = list;
30+
}
31+
32+
[Fact]
33+
public static int TestEntryPoint()
34+
{
35+
Ray r = GetRay();
36+
Consume(r.Direction);
37+
traceRay(r);
38+
return 100;
39+
}
40+
41+
[MethodImpl(MethodImplOptions.NoInlining)]
42+
private static void Consume(object o)
43+
{
44+
var _ = o.ToString();
45+
}
46+
47+
private static Ray GetRay()
48+
{
49+
return new Ray(Vector3.Zero, -Vector3.UnitY);
50+
}
51+
52+
private static Vector3 traceRay(Ray r)
53+
{
54+
foreach (object h in hittables)
55+
{
56+
TestHit(r);
57+
return Vector3.One;
58+
}
59+
return Vector3.Zero;
60+
}
61+
62+
[MethodImpl(MethodImplOptions.NoInlining)]
63+
private static void TestHit(Ray ray)
64+
{
65+
return;
66+
}
67+
}

0 commit comments

Comments
 (0)