AMDGPU: Fix divergence analysis of control flow intrinsics (096cd991) · Commits · llvm-doe / llvm-project

llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

+8 −0

Original line number	Diff line number	Diff line
		@@ -270,5 +270,13 @@ def : SourceOfDivergence<int_amdgcn_mfma_i32_32x32x8i8>;
		def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x2bf16>;
		def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x4bf16>;

		// The dummy boolean output is divergent from the IR's perspective,
		// but the mask results are uniform. These produce a divergent and
		// uniform result, so the returned struct is collectively divergent.
		// isAlwaysUniform can override the extract of the uniform component.
		def : SourceOfDivergence<int_amdgcn_if>;
		def : SourceOfDivergence<int_amdgcn_else>;
		def : SourceOfDivergence<int_amdgcn_loop>;

		foreach intr = AMDGPUImageDimAtomicIntrinsics in
		def : SourceOfDivergence<intr>;

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

+21 −6

Original line number	Diff line number	Diff line
		@@ -706,6 +706,7 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
		case Intrinsic::amdgcn_readlane:
		case Intrinsic::amdgcn_icmp:
		case Intrinsic::amdgcn_fcmp:
		case Intrinsic::amdgcn_if_break:
		return true;
		}
		}
		@@ -720,13 +721,27 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
		if (!ExtValue)
		return false;

		if (const CallInst *CI = dyn_cast<CallInst>(ExtValue->getOperand(0))) {
		const CallInst *CI = dyn_cast<CallInst>(ExtValue->getOperand(0));
		if (!CI)
		return false;

		if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(CI)) {
		switch (Intrinsic->getIntrinsicID()) {
		default:
		return false;
		case Intrinsic::amdgcn_if:
		case Intrinsic::amdgcn_else: {
		ArrayRef<unsigned> Indices = ExtValue->getIndices();
		return Indices.size() == 1 && Indices[0] == 1;
		}
		}
		}

		// If we have inline asm returning mixed SGPR and VGPR results, we inferred
		// divergent for the overall struct return. We need to override it in the
		// case we're extracting an SGPR component here.
		if (isa<InlineAsm>(CI->getCalledValue()))
		return !isInlineAsmSourceOfDivergence(CI, ExtValue->getIndices());
		}

		return false;
		}

llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll

0 → 100644

+102 −0

Original line number	Diff line number	Diff line
		; RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s \| FileCheck %s

		; Tests control flow intrinsics that should be treated as uniform

		; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_break':
		; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0
		; CHECK-NOT: DIVERGENT
		; CHECK: ret void
		define amdgpu_ps void @test_if_break(i32 %arg0, i64 inreg %saved) {
		entry:
		%cond = icmp eq i32 %arg0, 0
		%break = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %cond, i64 %saved)
		store volatile i64 %break, i64 addrspace(1)* undef
		ret void
		}

		; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if':
		; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0
		; CHECK-NEXT: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond)
		; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0
		; CHECK-NOT: DIVERGENT
		; CHECK: DIVERGENT: %if.bool.ext = zext i1 %if.bool to i32
		define void @test_if(i32 %arg0) {
		entry:
		%cond = icmp eq i32 %arg0, 0
		%if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond)
		%if.bool = extractvalue { i1, i64 } %if, 0
		%if.mask = extractvalue { i1, i64 } %if, 1
		%if.bool.ext = zext i1 %if.bool to i32
		store volatile i32 %if.bool.ext, i32 addrspace(1)* undef
		store volatile i64 %if.mask, i64 addrspace(1)* undef
		ret void
		}

		; The result should still be treated as divergent, even with a uniform source.
		; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_uniform':
		; CHECK-NOT: DIVERGENT
		; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond)
		; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0
		; CHECK-NOT: DIVERGENT
		; CHECK: DIVERGENT: %if.bool.ext = zext i1 %if.bool to i32
		define amdgpu_ps void @test_if_uniform(i32 inreg %arg0) {
		entry:
		%cond = icmp eq i32 %arg0, 0
		%if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond)
		%if.bool = extractvalue { i1, i64 } %if, 0
		%if.mask = extractvalue { i1, i64 } %if, 1
		%if.bool.ext = zext i1 %if.bool to i32
		store volatile i32 %if.bool.ext, i32 addrspace(1)* undef
		store volatile i64 %if.mask, i64 addrspace(1)* undef
		ret void
		}

		; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_loop_uniform':
		; CHECK: DIVERGENT: %loop = call i1 @llvm.amdgcn.loop.i64(i64 %mask)
		define amdgpu_ps void @test_loop_uniform(i64 inreg %mask) {
		entry:
		%loop = call i1 @llvm.amdgcn.loop.i64(i64 %mask)
		%loop.ext = zext i1 %loop to i32
		store volatile i32 %loop.ext, i32 addrspace(1)* undef
		ret void
		}

		; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else':
		; CHECK: DIVERGENT: %else = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask)
		; CHECK: DIVERGENT: %else.bool = extractvalue { i1, i64 } %else, 0
		; CHECK: {{^[ \t]+}}%else.mask = extractvalue { i1, i64 } %else, 1
		define amdgpu_ps void @test_else(i64 inreg %mask) {
		entry:
		%else = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask)
		%else.bool = extractvalue { i1, i64 } %else, 0
		%else.mask = extractvalue { i1, i64 } %else, 1
		%else.bool.ext = zext i1 %else.bool to i32
		store volatile i32 %else.bool.ext, i32 addrspace(1)* undef
		store volatile i64 %else.mask, i64 addrspace(1)* undef
		ret void
		}

		; This case is probably always broken
		; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else_divergent_mask':
		; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask)
		; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0
		; CHECK-NOT: DIVERGENT
		; CHECK: DIVERGENT: %if.bool.ext = zext i1 %if.bool to i32
		define void @test_else_divergent_mask(i64 %mask) {
		entry:
		%if = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask)
		%if.bool = extractvalue { i1, i64 } %if, 0
		%if.mask = extractvalue { i1, i64 } %if, 1
		%if.bool.ext = zext i1 %if.bool to i32
		store volatile i32 %if.bool.ext, i32 addrspace(1)* undef
		store volatile i64 %if.mask, i64 addrspace(1)* undef
		ret void
		}

		declare { i1, i64 } @llvm.amdgcn.if.i64(i1) #0
		declare { i1, i64 } @llvm.amdgcn.else.i64.i64(i64) #0
		declare i64 @llvm.amdgcn.if.break.i64.i64(i1, i64) #1
		declare i1 @llvm.amdgcn.loop.i64(i64) #1

		attributes #0 = { convergent nounwind }
		attributes #1 = { convergent nounwind readnone }