Commit d07a7895 authored by Jay Foad's avatar Jay Foad
Browse files

[AMDGPU] Cluster FLAT instructions with both vaddr and saddr

Reviewers: rampitec, arsenm

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73634
parent 6e82d0df
Loading
Loading
Loading
Loading
+7 −16
Original line number Diff line number Diff line
@@ -365,23 +365,14 @@ bool SIInstrInfo::getMemOperandsWithOffset(
  }

  if (isFLAT(LdSt)) {
    const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
    if (VAddr) {
      // Can't analyze 2 offsets.
      // FIXME remove this restriction!
      if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
        return false;

      BaseOp = VAddr;
    } else {
      // scratch instructions have either vaddr or saddr.
    // Instructions have either vaddr or saddr or both.
    BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
    if (BaseOp)
      BaseOps.push_back(BaseOp);
    BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
    }

    Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
    if (!BaseOp->isReg())
      return false;
    if (BaseOp)
      BaseOps.push_back(BaseOp);
    Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
    return true;
  }

+3 −3
Original line number Diff line number Diff line
@@ -2,11 +2,11 @@

; Test for a conv2d like sequence of loads.

; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}{{$}}
; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:-16{{$}}
; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:-32{{$}}
; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
; GFX9: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:8{{$}}

define hidden amdgpu_kernel void @simpleSaddrs(i64 addrspace(1)* %dst_image, i64 addrspace(1)* %src_image ) {
@@ -45,9 +45,9 @@ entry:
  store volatile i64 %add7, i64 addrspace(1)* %ptr9

; Test various offset boundaries.
; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:4088{{$}}
; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:4088{{$}}
; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:2040{{$}}
; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:4088{{$}}
  %gep11 = getelementptr inbounds i64, i64 addrspace(1)* %gep, i64 511
  %load11 = load i64, i64 addrspace(1)* %gep11
  %gep12 = getelementptr inbounds i64, i64 addrspace(1)* %gep, i64 1023
+2 −2
Original line number Diff line number Diff line
@@ -264,11 +264,11 @@ define amdgpu_kernel void @reorder_global_offsets(i32 addrspace(1)* nocapture %o
; CI-NEXT: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:52{{$}}

; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:12
; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:28
; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:44

; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}{{$}}
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:20
; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:28

; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:36
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:52