@@ -7528,6 +7528,13 @@ void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI, unsigned OpIdx,
7528
7528
7529
7529
unsigned Opcode = MI.getOpcode ();
7530
7530
MachineBasicBlock *MBB = MI.getParent ();
7531
+ switch (Opcode) {
7532
+ case AMDGPU::REG_SEQUENCE:
7533
+ case AMDGPU::INSERT_SUBREG:
7534
+ legalizeSpecialInst_t16 (MI, MRI);
7535
+ return ;
7536
+ }
7537
+
7531
7538
// Legalize operands and check for size mismatch
7532
7539
if (!OpIdx || OpIdx >= MI.getNumExplicitOperands () ||
7533
7540
OpIdx >= get (Opcode).getNumOperands () ||
@@ -7565,50 +7572,66 @@ void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI,
7565
7572
legalizeOperandsVALUt16 (MI, OpIdx, MRI);
7566
7573
}
7567
7574
7568
- // Legalize size mismatches between 16bit and 32bit registers in v2s copy
7569
- // lowering (lower the copy itself). Including cases:
7570
- // 1. sreg32 = copy vgpr16 => vgpr32 = REG_SEQUENCE(vgpr16, lo16)
7571
- // 2. sreg32 = copy .lo16:vgpr32 / sreg32 = copy .hi16:vgpr32
7572
- // => vgpr16 = copy .hi/lo16:vgpr32
7573
- // vgpr32 = REG_SEQUENCE(vgpr16, lo16)
7575
+ // Legalize operands of size-mismatches special inst between 16bit and 32bit
7576
+ // in moveToVALU lowering in true16 mode. This caused by 16bit
7577
+ // placed in both vgpr16 and sreg32 by isel. Including cases:
7578
+ // Copy
7579
+ // 1. dst32 = copy vgpr16 => dst32 = REG_SEQUENCE(vgpr16, lo16)
7580
+ // 2. dst32 = copy .lo16:vgpr32 / dst32 = copy .hi16:vgpr32
7581
+ // => dst32 = REG_SEQUENCE(.lo16/hi16:vgpr32, lo16)
7574
7582
// 3. sgpr16 = copy vgpr32/... (skipped, isel do not generate sgpr16)
7583
+ //
7584
+ // Reg_sequence / Insert_subreg
7585
+ // dst32 = reg_sequence(vgpr32, lo16/hi16) /
7586
+ // dst32 = insert_subreg(vgpr32,lo16/hi16)
7587
+ // => dst32 = reg_sequence(.lo16:vgpr32, lo16/hi16) /
7588
+ // dst32 = insert_subreg(.lo16:vgpr32, lo16/hi16)
7589
+ //
7575
7590
// This can be removed after we have sgpr16 in place.
7576
- bool SIInstrInfo::legalizeV2SCopyt16 (MachineInstr &Copy,
7577
- MachineRegisterInfo &MRI,
7578
- SIInstrWorklist &Worklist) const {
7579
- Register DstReg = Copy.getOperand (0 ).getReg ();
7580
- Register SrcReg = Copy.getOperand (1 ).getReg ();
7581
- Register SrcSubReg = Copy.getOperand (1 ).getSubReg ();
7582
- const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass (Copy);
7583
- const TargetRegisterClass *SrcRegRC = getOpRegClass (Copy, 1 );
7584
- bool KeepCopy;
7585
-
7586
- if (RI.getMatchingSuperRegClass (NewDstRC, SrcRegRC, AMDGPU::lo16)) {
7587
- KeepCopy = 0 ;
7588
- } else if (NewDstRC == &AMDGPU::VGPR_32RegClass &&
7589
- (SrcSubReg == AMDGPU::hi16 || SrcSubReg == AMDGPU::lo16)) {
7590
- KeepCopy = 1 ;
7591
- Register NewDstReg = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
7592
- Copy.getOperand (0 ).setReg (NewDstReg);
7593
- SrcReg = NewDstReg;
7594
- } else
7595
- return false ;
7591
+ void SIInstrInfo::legalizeSpecialInst_t16 (MachineInstr &Inst,
7592
+ MachineRegisterInfo &MRI) const {
7593
+ unsigned Opcode = Inst.getOpcode ();
7594
+ const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass (Inst);
7595
+ switch (Opcode) {
7596
+ case AMDGPU::COPY: {
7597
+ Register SrcReg = Inst.getOperand (1 ).getReg ();
7598
+ if (!SrcReg.isVirtual () || !RI.isVGPR (MRI, SrcReg))
7599
+ return ;
7596
7600
7597
- Register NewDstReg = MRI.createVirtualRegister (NewDstRC);
7598
- Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
7599
- BuildMI (*Copy.getParent (), &Copy, Copy.getDebugLoc (),
7600
- get (AMDGPU::IMPLICIT_DEF), Undef);
7601
- BuildMI (*Copy.getParent (), std::next (Copy.getIterator ()), Copy.getDebugLoc (),
7602
- get (AMDGPU::REG_SEQUENCE), NewDstReg)
7603
- .addReg (SrcReg)
7604
- .addImm (AMDGPU::lo16)
7605
- .addReg (Undef)
7606
- .addImm (AMDGPU::hi16);
7607
- if (!KeepCopy)
7608
- Copy.eraseFromParent ();
7609
- MRI.replaceRegWith (DstReg, NewDstReg);
7610
- addUsersToMoveToVALUWorklist (NewDstReg, MRI, Worklist);
7611
- return true ;
7601
+ bool SetSubReg = false ;
7602
+ Register SrcSubReg = Inst.getOperand (1 ).getSubReg ();
7603
+ const TargetRegisterClass *SrcRegRC = getOpRegClass (Inst, 1 );
7604
+ if (RI.getMatchingSuperRegClass (NewDstRC, SrcRegRC, AMDGPU::lo16)) {
7605
+ } else if (NewDstRC == &AMDGPU::VGPR_32RegClass &&
7606
+ (SrcSubReg == AMDGPU::hi16 || SrcSubReg == AMDGPU::lo16)) {
7607
+ SetSubReg = true ;
7608
+ } else
7609
+ return ;
7610
+
7611
+ Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
7612
+ BuildMI (*Inst.getParent (), &Inst, Inst.getDebugLoc (),
7613
+ get (AMDGPU::IMPLICIT_DEF), Undef);
7614
+ Inst.setDesc (get (AMDGPU::REG_SEQUENCE));
7615
+ if (SetSubReg)
7616
+ Inst.getOperand (1 ).setSubReg (SrcSubReg);
7617
+
7618
+ Inst.addOperand (MachineOperand::CreateImm (AMDGPU::lo16));
7619
+ Inst.addOperand (MachineOperand::CreateReg (Undef, 0 ));
7620
+ Inst.addOperand (MachineOperand::CreateImm (AMDGPU::hi16));
7621
+ } break ;
7622
+ case AMDGPU::REG_SEQUENCE:
7623
+ case AMDGPU::INSERT_SUBREG: {
7624
+ for (unsigned I = 0 , E = (Inst.getNumOperands () - 1 ) / 2 ; I < E; ++I) {
7625
+ Register SrcReg = Inst.getOperand (1 + 2 * I).getReg ();
7626
+ auto SubReg = Inst.getOperand (1 + 2 * I + 1 ).getImm ();
7627
+ if (SrcReg.isVirtual () && RI.isVGPR (MRI, SrcReg) &&
7628
+ MRI.constrainRegClass (SrcReg, &AMDGPU::VGPR_32RegClass) &&
7629
+ (SubReg == AMDGPU::lo16 || SubReg == AMDGPU::hi16)) {
7630
+ Inst.getOperand (1 + 2 * I).setSubReg (AMDGPU::lo16);
7631
+ }
7632
+ }
7633
+ } break ;
7634
+ }
7612
7635
}
7613
7636
7614
7637
void SIInstrInfo::moveToVALU (SIInstrWorklist &Worklist,
@@ -8129,14 +8152,8 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
8129
8152
return ;
8130
8153
}
8131
8154
8132
- // If this is a v2s copy between 16bit and 32bit reg,
8133
- // replace vgpr copy to reg_sequence
8134
- if (ST.useRealTrue16Insts () && Inst.isCopy () &&
8135
- Inst.getOperand (1 ).getReg ().isVirtual () &&
8136
- RI.isVGPR (MRI, Inst.getOperand (1 ).getReg ())) {
8137
- if (legalizeV2SCopyt16 (Inst, MRI, Worklist))
8138
- return ;
8139
- }
8155
+ if (ST.useRealTrue16Insts ())
8156
+ legalizeSpecialInst_t16 (Inst, MRI);
8140
8157
8141
8158
if (Inst.isCopy () && Inst.getOperand (1 ).getReg ().isVirtual () &&
8142
8159
NewDstRC == RI.getRegClassForReg (MRI, Inst.getOperand (1 ).getReg ())) {
0 commit comments