Commit 9f3da91d authored by Hans Wennborg's avatar Hans Wennborg
Browse files

Merging r323907 and r323913:

------------------------------------------------------------------------
r323907 | mareko | 2018-01-31 21:17:52 +0100 (Wed, 31 Jan 2018) | 11 lines

[SeparateConstOffsetFromGEP] Preserve metadata when splitting GEPs

Summary:
!amdgpu.uniform needs to be preserved for AMDGPU, otherwise bad things
happen.

Reviewers: arsenm, nhaehnle, jingyue, broune, majnemer, bjarke.roune, dblaikie

Subscribers: wdng, tpr, llvm-commits

Differential Revision: https://reviews.llvm.org/D42744
------------------------------------------------------------------------

------------------------------------------------------------------------
r323913 | mareko | 2018-01-31 21:49:19 +0100 (Wed, 31 Jan 2018) | 1 line

[SeparateConstOffsetFromGEP] Fix up addrspace in the AMDGPU test
------------------------------------------------------------------------

llvm-svn: 324088
parent 87627246
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -1071,6 +1071,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
    NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
                                       ConstantInt::get(IntPtrTy, Index, true),
                                       GEP->getName(), GEP);
    NewGEP->copyMetadata(*GEP);
    // Inherit the inbounds attribute of the original GEP.
    cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
  } else {
@@ -1095,6 +1096,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
        Type::getInt8Ty(GEP->getContext()), NewGEP,
        ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
        GEP);
    NewGEP->copyMetadata(*GEP);
    // Inherit the inbounds attribute of the original GEP.
    cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
    if (GEP->getType() != I8PtrTy)
+45 −0
Original line number Diff line number Diff line
@@ -92,3 +92,48 @@ define amdgpu_kernel void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y
  store float %tmp21, float addrspace(1)* %output, align 4
  ret void
}

; IR-LABEL: @keep_metadata(
; IR: getelementptr {{.*}} !amdgpu.uniform
; IR: getelementptr {{.*}} !amdgpu.uniform
; IR: getelementptr {{.*}} !amdgpu.uniform
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @keep_metadata([0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
main_body:
  %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8
  %23 = bitcast float %22 to i32
  %24 = shl i32 %23, 1
  %25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(2)* %1, i32 0, i32 %24, !amdgpu.uniform !0
  %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !invariant.load !0
  %27 = shl i32 %23, 2
  %28 = or i32 %27, 3
  %29 = bitcast [0 x <8 x i32>] addrspace(2)* %1 to [0 x <4 x i32>] addrspace(2)*
  %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i32 0, i32 %28, !amdgpu.uniform !0
  %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !invariant.load !0
  %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
  %33 = extractelement <4 x float> %32, i32 0
  %34 = extractelement <4 x float> %32, i32 1
  %35 = extractelement <4 x float> %32, i32 2
  %36 = extractelement <4 x float> %32, i32 3
  %37 = bitcast float %4 to i32
  %38 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %37, 4
  %39 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %38, float %33, 5
  %40 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %39, float %34, 6
  %41 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %40, float %35, 7
  %42 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float %36, 8
  %43 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42, float %20, 19
  ret <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43
}

; Function Attrs: nounwind readnone speculatable
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6

; Function Attrs: nounwind readonly
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #7


!0 = !{}

attributes #5 = { "InitialPSInputAddr"="45175" }
attributes #6 = { nounwind readnone speculatable }
attributes #7 = { nounwind readonly }
attributes #8 = { nounwind readnone }