[SeparateConstOffsetFromGEP] strengthen the inbounds attribute (13a80eac) · Commits · llvm-doe / llvm-project

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

+9 −4

Original line number	Diff line number	Diff line
		@@ -892,12 +892,12 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
		// e.g.,
		//
		// b = add i64 a, 5
		// addr = gep inbounds float* p, i64 b
		// addr = gep inbounds float, float* p, i64 b
		//
		// is transformed to:
		//
		// addr2 = gep float* p, i64 a
		// addr = gep float* addr2, i64 5
		// addr2 = gep float, float* p, i64 a ; inbounds removed
		// addr = gep inbounds float, float* addr2, i64 5
		//
		// If a is -4, although the old index b is in bounds, the new index a is
		// off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
		@@ -907,6 +907,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
		//
		// TODO(jingyue): do some range analysis to keep as many inbounds as
		// possible. GEPs with inbounds are more friendly to alias analysis.
		bool GEPWasInBounds = GEP->isInBounds();
		GEP->setIsInBounds(false);

		// Lowers a GEP to either GEPs with a single index or arithmetic operations.
		@@ -968,6 +969,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
		NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
		ConstantInt::get(IntPtrTy, Index, true),
		GEP->getName(), GEP);
		// Inherit the inbounds attribute of the original GEP.
		cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
		} else {
		// Unlikely but possible. For example,
		// #pragma pack(1)
		@@ -990,6 +993,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
		Type::getInt8Ty(GEP->getContext()), NewGEP,
		ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
		GEP);
		// Inherit the inbounds attribute of the original GEP.
		cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
		if (GEP->getType() != I8PtrTy)
		NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
		}

llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll

+7 −7

Original line number	Diff line number	Diff line
		@@ -6,9 +6,9 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:

		; IR-LABEL: @sum_of_array(
		; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
		; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 1
		; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 32
		; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 33
		; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 1
		; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 32
		; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 33
		define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
		%tmp = sext i32 %y to i64
		%tmp1 = sext i32 %x to i64
		@@ -38,7 +38,7 @@ define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output)

		; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
		; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
		; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 255
		; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 255
		; IR: add i32 %x, 256
		; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
		; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
		@@ -71,9 +71,9 @@ define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(
		; DS instructions have a larger immediate offset, so make sure these are OK.
		; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset(
		; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}}
		; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 255
		; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16128
		; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16383
		; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 255
		; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16128
		; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16383
		define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
		%tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y
		%tmp4 = load float, float addrspace(3)* %tmp2, align 4

llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll

+12 −12

Original line number	Diff line number	Diff line
		@@ -52,9 +52,9 @@ define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {

		; IR-LABEL: @sum_of_array(
		; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
		; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
		; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
		; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
		; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
		; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
		; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33

		; @sum_of_array2 is very similar to @sum_of_array. The only difference is in
		; the order of "sext" and "add" when computing the array indices. @sum_of_array
		@@ -95,9 +95,9 @@ define void @sum_of_array2(i32 %x, i32 %y, float* nocapture %output) {

		; IR-LABEL: @sum_of_array2(
		; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
		; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
		; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
		; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
		; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
		; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
		; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33


		; This function loads
		@@ -145,9 +145,9 @@ define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {

		; IR-LABEL: @sum_of_array3(
		; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
		; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
		; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
		; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
		; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
		; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
		; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33


		; This function loads
		@@ -191,6 +191,6 @@ define void @sum_of_array4(i32 %x, i32 %y, float* nocapture %output) {

		; IR-LABEL: @sum_of_array4(
		; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
		; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
		; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
		; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
		; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
		; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
		; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33

llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll

+8 −8

Original line number	Diff line number	Diff line
		@@ -44,7 +44,7 @@ entry:
		; CHECK: add i32 %j, -2
		; CHECK: sext
		; CHECK: getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
		; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 32
		; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 32

		; We should be able to trace into sext/zext if it can be distributed to both
		; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b)
		@@ -65,7 +65,7 @@ define float* @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) {
		}
		; CHECK-LABEL: @ext_add_no_overflow(
		; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
		; CHECK: getelementptr float, float* [[BASE_PTR]], i64 33
		; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 33

		; Verifies we handle nested sext/zext correctly.
		define void @sext_zext(i32 %a, i32 %b, float %out1, float %out2) {
		@@ -110,7 +110,7 @@ entry:
		}
		; CHECK-LABEL: @sext_or(
		; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
		; CHECK: getelementptr float, float* [[BASE_PTR]], i64 32
		; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 32

		; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b +
		; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't
		@@ -125,7 +125,7 @@ entry:
		}
		; CHECK-LABEL: @expr(
		; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 0
		; CHECK: getelementptr float, float* [[BASE_PTR]], i64 160
		; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 160
		; CHECK: store i64 %b5, i64* %out

		; d + sext(a +nsw (b +nsw (c +nsw 8))) => (d + sext(a) + sext(b) + sext(c)) + 8
		@@ -143,7 +143,7 @@ entry:
		; CHECK: sext i32
		; CHECK: sext i32
		; CHECK: sext i32
		; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 8
		; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 8

		; Verifies we handle "sub" correctly.
		define float* @sub(i64 %i, i64 %j) {
		@@ -155,7 +155,7 @@ define float* @sub(i64 %i, i64 %j) {
		; CHECK-LABEL: @sub(
		; CHECK: %[[j2:[a-zA-Z0-9]+]] = sub i64 0, %j
		; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]]
		; CHECK: getelementptr float, float* [[BASE_PTR]], i64 -155
		; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 -155

		%struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed

		@@ -173,7 +173,7 @@ entry:
		; CHECK-LABEL: @packed_struct(
		; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [1024 x %struct.Packed], [1024 x %struct.Packed]* %s, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1, i64 %{{[a-zA-Z0-9]+}}
		; CHECK: [[CASTED_PTR:%[a-zA-Z0-9]+]] = bitcast i64* [[BASE_PTR]] to i8*
		; CHECK: %uglygep = getelementptr i8, i8* [[CASTED_PTR]], i64 100
		; CHECK: %uglygep = getelementptr inbounds i8, i8* [[CASTED_PTR]], i64 100
		; CHECK: bitcast i8* %uglygep to i64*

		; We shouldn't be able to extract the 8 from "zext(a +nuw (b + 8))",
		@@ -272,7 +272,7 @@ entry:
		%ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
		; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
		; CHECK: [[PTR1:%[a-zA-Z0-9]+]] = bitcast %struct2* [[PTR]] to i8*
		; CHECK: getelementptr i8, i8* [[PTR1]], i64 -64
		; CHECK: getelementptr inbounds i8, i8* [[PTR1]], i64 -64
		; CHECK: bitcast
		ret %struct2* %ptr2
		; CHECK-NEXT: ret

llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll

+1 −1

Original line number	Diff line number	Diff line
		@@ -25,7 +25,7 @@ then:
		%or = or i64 %i, 3
		%p = getelementptr inbounds float, float* %input, i64 %or
		; CHECK: [[base:[^ ]+]] = getelementptr float, float* %input, i64 %i
		; CHECK: getelementptr float, float* [[base]], i64 3
		; CHECK: getelementptr inbounds float, float* [[base]], i64 3
		ret float* %p

		exit: