Loading llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +3 −1 Original line number Diff line number Diff line Loading @@ -195,6 +195,8 @@ static BasicBlock *unifyReturnBlockSet(Function &F, bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); if (PDT.getRoots().size() <= 1) return false; LegacyDivergenceAnalysis &DA = getAnalysis<LegacyDivergenceAnalysis>(); Loading Loading @@ -319,7 +321,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { if (ReturningBlocks.empty()) return false; // No blocks return if (ReturningBlocks.size() == 1 && !InsertExport) if (ReturningBlocks.size() == 1) return false; // Already has a single return block const TargetTransformInfo &TTI Loading llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll +0 −16 Original line number Diff line number Diff line Loading @@ -45,22 +45,6 @@ end: ret void } ; test the case where there's only a kill in an infinite loop ; CHECK-LABEL: only_kill ; CHECK: exp null off, off, off, off done vm ; CHECK-NEXT: s_endpgm ; SIInsertSkips inserts an extra null export here, but it should be harmless. ; CHECK: exp null off, off, off, off done vm ; CHECK-NEXT: s_endpgm define amdgpu_ps void @only_kill() #0 { main_body: br label %loop loop: call void @llvm.amdgcn.kill(i1 false) #3 br label %loop } ; In case there's an epilog, we shouldn't have to do this. ; CHECK-LABEL: return_nonvoid ; CHECK-NOT: exp null off, off, off, off done vm Loading Loading
llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +3 −1 Original line number Diff line number Diff line Loading @@ -195,6 +195,8 @@ static BasicBlock *unifyReturnBlockSet(Function &F, bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); if (PDT.getRoots().size() <= 1) return false; LegacyDivergenceAnalysis &DA = getAnalysis<LegacyDivergenceAnalysis>(); Loading Loading @@ -319,7 +321,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { if (ReturningBlocks.empty()) return false; // No blocks return if (ReturningBlocks.size() == 1 && !InsertExport) if (ReturningBlocks.size() == 1) return false; // Already has a single return block const TargetTransformInfo &TTI Loading
llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll +0 −16 Original line number Diff line number Diff line Loading @@ -45,22 +45,6 @@ end: ret void } ; test the case where there's only a kill in an infinite loop ; CHECK-LABEL: only_kill ; CHECK: exp null off, off, off, off done vm ; CHECK-NEXT: s_endpgm ; SIInsertSkips inserts an extra null export here, but it should be harmless. ; CHECK: exp null off, off, off, off done vm ; CHECK-NEXT: s_endpgm define amdgpu_ps void @only_kill() #0 { main_body: br label %loop loop: call void @llvm.amdgcn.kill(i1 false) #3 br label %loop } ; In case there's an epilog, we shouldn't have to do this. ; CHECK-LABEL: return_nonvoid ; CHECK-NOT: exp null off, off, off, off done vm Loading