Commit 2c04ddab authored by Tom Stellard's avatar Tom Stellard
Browse files

Merging r259911:

------------------------------------------------------------------------
r259911 | Matthew.Arsenault | 2016-02-05 11:47:23 -0800 (Fri, 05 Feb 2016) | 5 lines

AMDGPU: Preserve alignments on new created globals

Also switch to internal linkage, and include the name of the function in
the name.

------------------------------------------------------------------------

llvm-svn: 271639
parent eb699d0a
Loading
Loading
Loading
Loading
+10 −2
Original line number Diff line number Diff line
@@ -355,10 +355,18 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
  DEBUG(dbgs() << "Promoting alloca to local memory\n");
  LocalMemAvailable -= AllocaSize;

  Function *F = I.getParent()->getParent();

  Type *GVTy = ArrayType::get(I.getAllocatedType(), 256);
  GlobalVariable *GV = new GlobalVariable(
      *Mod, GVTy, false, GlobalValue::ExternalLinkage, 0, I.getName(), 0,
      GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
      *Mod, GVTy, false, GlobalValue::InternalLinkage,
      UndefValue::get(GVTy),
      Twine(F->getName()) + Twine('.') + I.getName(),
      nullptr,
      GlobalVariable::NotThreadLocal,
      AMDGPUAS::LOCAL_ADDRESS);
  GV->setUnnamedAddr(true);
  GV->setAlignment(I.getAlignment());

  FunctionType *FTy = FunctionType::get(
      Type::getInt32Ty(Mod->getContext()), false);
+26 −0
Original line number Diff line number Diff line
@@ -8,6 +8,10 @@

declare i32 @llvm.r600.read.tidig.x() nounwind readnone

; HSAOPT: @mova_same_clause.stack = internal unnamed_addr addrspace(3) global [256 x [5 x i32]] undef, align 4
; HSAOPT: @high_alignment.stack = internal unnamed_addr addrspace(3) global [256 x [8 x i32]] undef, align 16


; FUNC-LABEL: {{^}}mova_same_clause:

; R600: LDS_WRITE
@@ -52,6 +56,28 @@ entry:
  ret void
}

; OPT-LABEL: @high_alignment(
; OPT: getelementptr inbounds [256 x [8 x i32]], [256 x [8 x i32]] addrspace(3)* @high_alignment.stack, i32 0, i32 %{{[0-9]+}}
define void @high_alignment(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry:
  %stack = alloca [8 x i32], align 16
  %0 = load i32, i32 addrspace(1)* %in, align 4
  %arrayidx1 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 %0
  store i32 4, i32* %arrayidx1, align 4
  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
  %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
  %arrayidx3 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 %1
  store i32 5, i32* %arrayidx3, align 4
  %arrayidx10 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 0
  %2 = load i32, i32* %arrayidx10, align 4
  store i32 %2, i32 addrspace(1)* %out, align 4
  %arrayidx12 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 1
  %3 = load i32, i32* %arrayidx12
  %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
  store i32 %3, i32 addrspace(1)* %arrayidx13
  ret void
}

; This test checks that the stack offset is calculated correctly for structs.
; All register loads/stores should be optimized away, so there shouldn't be
; any MOVA instructions.
+4 −4
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0
declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) #1

; CHECK-LABEL: @promote_with_memcpy(
; CHECK: getelementptr [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: getelementptr [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @promote_with_memcpy.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false)
; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false)
define void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
@@ -25,7 +25,7 @@ define void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
}

; CHECK-LABEL: @promote_with_memmove(
; CHECK: getelementptr [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @alloca.1, i32 0, i32 %{{[0-9]+}}
; CHECK: getelementptr [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @promote_with_memmove.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false)
; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false)
define void @promote_with_memmove(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
@@ -39,7 +39,7 @@ define void @promote_with_memmove(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
}

; CHECK-LABEL: @promote_with_memset(
; CHECK: getelementptr [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @alloca.2, i32 0, i32 %{{[0-9]+}}
; CHECK: getelementptr [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @promote_with_memset.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %alloca.bc, i8 7, i32 68, i32 4, i1 false)
define void @promote_with_memset(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
  %alloca = alloca [17 x i32], align 16
@@ -51,7 +51,7 @@ define void @promote_with_memset(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
}

; CHECK-LABEL: @promote_with_objectsize(
; CHECK: [[PTR:%[0-9]+]] = getelementptr [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @alloca.3, i32 0, i32 %{{[0-9]+}}
; CHECK: [[PTR:%[0-9]+]] = getelementptr [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @promote_with_objectsize.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %alloca.bc, i1 false)
define void @promote_with_objectsize(i32 addrspace(1)* %out) #0 {
  %alloca = alloca [17 x i32], align 16