Commit 3b724f63 authored by Hans Wennborg's avatar Hans Wennborg
Browse files

Merging r323909:

------------------------------------------------------------------------
r323909 | mareko | 2018-01-31 21:18:11 +0100 (Wed, 31 Jan 2018) | 13 lines

AMDGPU: Fold inline offset for loads properly in moveToVALU on GFX9

Summary:
This enables load merging into x2, x4, which is driven by inline offsets.

6500 shaders are affected:
Code Size in affected shaders: -15.14 %

Reviewers: arsenm, nhaehnle

Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D42078
------------------------------------------------------------------------

llvm-svn: 324089
parent 9f3da91d
Loading
Loading
Loading
Loading
+31 −22
Original line number Diff line number Diff line
@@ -3756,18 +3756,24 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
      // FIXME: This isn't safe because the addressing mode doesn't work
      // correctly if vaddr is negative.
      //
      // FIXME: Handle v_add_u32 and VOP3 form. Also don't rely on immediate
      // being in src0.
      //
      // FIXME: Should probably be done somewhere else, maybe SIFoldOperands.
      //
      // See if we can extract an immediate offset by recognizing one of these:
      //   V_ADD_I32_e32 dst, imm, src1
      //   V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1
      // V_ADD will be removed by "Remove dead machine instructions".
      if (Add && Add->getOpcode() == AMDGPU::V_ADD_I32_e32) {
      if (Add &&
          (Add->getOpcode() == AMDGPU::V_ADD_I32_e32 ||
           Add->getOpcode() == AMDGPU::V_ADD_U32_e64)) {
        static const unsigned SrcNames[2] = {
          AMDGPU::OpName::src0,
          AMDGPU::OpName::src1,
        };

        // Find a literal offset in one of source operands.
        for (int i = 0; i < 2; i++) {
          const MachineOperand *Src =
          getNamedOperand(*Add, AMDGPU::OpName::src0);
            getNamedOperand(*Add, SrcNames[i]);

          if (Src->isReg()) {
            auto Mov = MRI.getUniqueVRegDef(Src->getReg());
@@ -3782,11 +3788,14 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
              Offset = Src->getCImm()->getZExtValue();
          }

        if (Offset && isLegalMUBUFImmOffset(Offset))
          VAddr = getNamedOperand(*Add, AMDGPU::OpName::src1);
        else
          if (Offset && isLegalMUBUFImmOffset(Offset)) {
            VAddr = getNamedOperand(*Add, SrcNames[!i]);
            break;
          }

          Offset = 0;
        }
      }

      BuildMI(*MBB, Inst, Inst.getDebugLoc(),
              get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
+3 −15
Original line number Diff line number Diff line
@@ -194,11 +194,7 @@ main_body:

; GCN-LABEL: {{^}}smrd_vgpr_offset_imm:
; GCN-NEXT: %bb.

; SICIVI-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;

; GFX9-NEXT: v_add_u32_e32 [[ADD:v[0-9]+]], 0xfff, v0
; GFX9-NEXT: buffer_load_dword v{{[0-9]}}, [[ADD]], s[0:3], 0 offen ;
; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 {
main_body:
  %off = add i32 %offset, 4095
@@ -244,16 +240,8 @@ main_body:

; GCN-LABEL: {{^}}smrd_vgpr_merged:
; GCN-NEXT: %bb.

; SICIVI-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
; SICIVI-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28

; GFX9: buffer_load_dword
; GFX9: buffer_load_dword
; GFX9: buffer_load_dword
; GFX9: buffer_load_dword
; GFX9: buffer_load_dword
; GFX9: buffer_load_dword
; GCN-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
; GCN-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
define amdgpu_ps void @smrd_vgpr_merged(<4 x i32> inreg %desc, i32 %a) #0 {
main_body:
  %a1 = add i32 %a, 4