Commit 106959ac authored by Vang Thao's avatar Vang Thao
Browse files

[AMDGPU] Inline non-kernel functions using extern lds

In https://reviews.llvm.org/D100481, forceful inline of all non-kernel
functions using lds was disabled since AMDGPULowerModuleLDS pass now handles
static lds. However that pass does not handle extern lds so non-kernel
functions using extern lds must sill be inline.

Reviewed By: hsmhsm, arsenm

Differential Revision: https://reviews.llvm.org/D109773
parent d49cb5b3
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -122,7 +122,7 @@ static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
    unsigned AS = GV.getAddressSpace();
    if ((AS == AMDGPUAS::REGION_ADDRESS) ||
        (AS == AMDGPUAS::LOCAL_ADDRESS &&
         !AMDGPUTargetMachine::EnableLowerModuleLDS))
         (!AMDGPUTargetMachine::EnableLowerModuleLDS || !GV.hasInitializer())))
      recursivelyVisitUsers(GV, FuncsToAlwaysInline);
  }

+21 −0
Original line number Diff line number Diff line
@@ -135,4 +135,25 @@ define amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) {
  ret void
}

; CHECK-LABEL: dynamic_shared_array_with_call:
; CHECK-NOT: s_swappc_b64
define amdgpu_kernel void @dynamic_shared_array_with_call(float addrspace(1)* nocapture readnone %out) local_unnamed_addr {
  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
  %1 = sext i32 %tid.x to i64
  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i64 0, i64 %1
  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
  tail call void @store_value(float %val0)
  ret void
}

; CHECK-NOT: store_value
define linkonce_odr hidden void @store_value(float %val1) local_unnamed_addr {
entry:
  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
  %0 = sext i32 %tid.x to i64
  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i64 0, i64 %0
  store float %val1, float addrspace(3)* %arrayidx1, align 4
  ret void
}

declare i32 @llvm.amdgcn.workitem.id.x()