Unverified Commit fe8335ba authored by Stanislav Mekhanoshin's avatar Stanislav Mekhanoshin Committed by GitHub
Browse files

[AMDGPU] Select 64-bit imm moves if can be encoded as 32 bit operand (#70395)

This allows folding of 64-bit operands if fit into 32-bit. Fixes
https://github.com/llvm/llvm-project/issues/67781
parent ee6d62db
Loading
Loading
Loading
Loading
+9 −4
Original line number Diff line number Diff line
@@ -595,11 +595,15 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
      break;

    uint64_t Imm;
    if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
    if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) {
      Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
    else {
      if (AMDGPU::isValid32BitLiteral(Imm, true))
        break;
    } else {
      ConstantSDNode *C = cast<ConstantSDNode>(N);
      Imm = C->getZExtValue();
      if (AMDGPU::isValid32BitLiteral(Imm, false))
        break;
    }

    SDLoc DL(N);
@@ -3014,7 +3018,7 @@ bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
    if (!RC || SIRI->isSGPRClass(RC))
      return false;

    if (RC != &AMDGPU::VS_32RegClass) {
    if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {
      AllUsesAcceptSReg = false;
      SDNode * User = *U;
      if (User->isMachineOpcode()) {
@@ -3026,7 +3030,8 @@ bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
          if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
            unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
            const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
            if (CommutedRC == &AMDGPU::VS_32RegClass)
            if (CommutedRC == &AMDGPU::VS_32RegClass ||
                CommutedRC == &AMDGPU::VS_64RegClass)
              AllUsesAcceptSReg = true;
          }
        }
+8 −0
Original line number Diff line number Diff line
@@ -2551,11 +2551,13 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
  MachineOperand &ImmOp = I.getOperand(1);
  Register DstReg = I.getOperand(0).getReg();
  unsigned Size = MRI->getType(DstReg).getSizeInBits();
  bool IsFP = false;

  // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
  if (ImmOp.isFPImm()) {
    const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
    ImmOp.ChangeToImmediate(Imm.getZExtValue());
    IsFP = true;
  } else if (ImmOp.isCImm()) {
    ImmOp.ChangeToImmediate(ImmOp.getCImm()->getSExtValue());
  } else {
@@ -2568,6 +2570,12 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
  unsigned Opcode;
  if (DstRB->getID() == AMDGPU::VCCRegBankID) {
    Opcode = STI.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
  } else if (Size == 64 &&
             AMDGPU::isValid32BitLiteral(I.getOperand(1).getImm(), IsFP)) {
    Opcode = IsSgpr ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::V_MOV_B64_PSEUDO;
    I.setDesc(TII.get(Opcode));
    I.addImplicitDefUseOperands(*MF);
    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  } else {
    Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;

+1 −1
Original line number Diff line number Diff line
@@ -367,7 +367,7 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
    SMovOp = AMDGPU::S_MOV_B32;
    break;
  case AMDGPU::V_MOV_B64_PSEUDO:
    SMovOp = AMDGPU::S_MOV_B64;
    SMovOp = AMDGPU::S_MOV_B64_IMM_PSEUDO;
    break;
  }
  Imm = ImmOp->getImm();
+23 −0
Original line number Diff line number Diff line
@@ -1966,6 +1966,29 @@ def : GCNPat <
  (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
>;

// V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
// immediate and wil be expanded as needed, but we will only use these patterns
// for values which can be encoded.
def : GCNPat <
  (VGPRImm<(i64 imm)>:$imm),
  (V_MOV_B64_PSEUDO imm:$imm)
>;

def : GCNPat <
  (VGPRImm<(f64 fpimm)>:$imm),
  (V_MOV_B64_PSEUDO (f64 (bitcast_fpimm_to_i64 $imm)))
>;

def : GCNPat <
  (i64 imm:$imm),
  (S_MOV_B64_IMM_PSEUDO imm:$imm)
>;

def : GCNPat <
  (f64 fpimm:$imm),
  (S_MOV_B64_IMM_PSEUDO (i64 (bitcast_fpimm_to_i64 fpimm:$imm)))
>;

def : GCNPat <
  (f32 fpimm:$imm),
  (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm)))
+3 −1
Original line number Diff line number Diff line
@@ -41,11 +41,12 @@ entry:
}

; GCN-LABEL: {{^}}v_clamp_i64_i16_invalid_lower
; GFX6789: v_mov_b32_e32 v{{[0-9]+}}, 0x8001
; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8001
; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc
; GFX6789: v_cndmask_b32_e32 [[C:v[0-9]+]], 0, [[C]], vcc

; GFX10: v_cndmask_b32_e32 [[A:v[0-9]+]], 0x8001, [[A]], vcc_lo
; GFX10: v_{{(dual_)?}}cndmask_b32{{(_e32)?}} [[A:v[0-9]+]], 0x8001, [[A]]
; GFX10: v_cndmask_b32_e32 [[B:v[0-9]+]], 0, [[B]], vcc_lo
define i16 @v_clamp_i64_i16_invalid_lower(i64 %in) #0 {
entry:
@@ -56,6 +57,7 @@ entry:
}

; GCN-LABEL: {{^}}v_clamp_i64_i16_invalid_lower_and_higher
; GFX6789: v_mov_b32_e32 v{{[0-9]+}}, 0x8000
; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8000
; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc
; GFX10: v_cndmask_b32_e32 [[A:v[0-9]+]], 0x8000, [[A]], vcc_lo
Loading