Loading llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +10 −0 Original line number Diff line number Diff line Loading @@ -4433,6 +4433,16 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Instruction *I = canonicalizeScalarSelectOfVecs(SI, *this)) return I; // Fold: select (icmp ult X, 2), X, ctpop(X) --> ctpop(X) // ctpop(0)==0 and ctpop(1)==1, so the guard is always redundant. if (match(FalseVal, m_Ctpop(m_Specific(TrueVal))) && match(CondVal, m_SpecificICmp(ICmpInst::ICMP_ULT, m_Specific(TrueVal), m_SpecificInt(2)))) { cast<Instruction>(FalseVal)->dropPoisonGeneratingAnnotations(); addToWorklist(cast<Instruction>(FalseVal)); return replaceInstUsesWith(SI, FalseVal); } // If the type of select is not an integer type or if the condition and // the selection type are not both scalar nor both vector types, there is no // point in attempting to match these patterns. Loading llvm/test/Transforms/InstCombine/select-ctpop-fold.ll 0 → 100644 +94 −0 Original line number Diff line number Diff line ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; ; Test folding of redundant early-exit guard around ctpop: ; select (icmp ule X, 1), X, ctpop(X) --> ctpop(X) ; ; This fold is valid because ctpop(0)==0 and ctpop(1)==1, so the guard ; is always redundant. The guard only existed to skip slow software emulation. ; RUN: opt < %s -S -passes=instcombine | FileCheck %s ;------------------------------------------------------------------------------ ; Positive tests: select (icmp ule X, 1), X, ctpop(X) --> ctpop(X) ;------------------------------------------------------------------------------ define i64 @fold_ule1_i64(i64 %x) { ; CHECK-LABEL: @fold_ule1_i64( ; CHECK-NEXT: [[POP:%.*]] = call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 [[X:%.*]]) ; CHECK-NEXT: ret i64 [[POP]] ; %cmp = icmp ule i64 %x, 1 %pop = call i64 @llvm.ctpop.i64(i64 %x) %res = select i1 %cmp, i64 %x, i64 %pop ret i64 %res } define i32 @fold_ule1_i32(i32 %x) { ; CHECK-LABEL: @fold_ule1_i32( ; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X:%.*]]) ; CHECK-NEXT: ret i32 [[POP]] ; %cmp = icmp ule i32 %x, 1 %pop = call i32 @llvm.ctpop.i32(i32 %x) %res = select i1 %cmp, i32 %x, i32 %pop ret i32 %res } define <4 x i32> @fold_vector_ule1(<4 x i32> %x) { ; CHECK-LABEL: @fold_vector_ule1( ; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret <4 x i32> [[POP]] ; %cmp = icmp ule <4 x i32> %x, splat(i32 1) %pop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x) %res = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> %pop ret <4 x i32> %res } ;------------------------------------------------------------------------------ ; Negative tests: should NOT fold. ;------------------------------------------------------------------------------ ; Wrong predicate (ugt instead of ult/ule) define i32 @no_fold_wrong_pred(i32 %x) { ; CHECK-LABEL: @no_fold_wrong_pred( ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], 1 ; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]]) ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[POP]] ; CHECK-NEXT: ret i32 [[RES]] ; %cmp = icmp ugt i32 %x, 1 %pop = call i32 @llvm.ctpop.i32(i32 %x) %res = select i1 %cmp, i32 %x, i32 %pop ret i32 %res } ; Wrong constant (ult 3 instead of ult 2) define i32 @no_fold_wrong_const(i32 %x) { ; CHECK-LABEL: @no_fold_wrong_const( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 3 ; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]]) ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[POP]] ; CHECK-NEXT: ret i32 [[RES]] ; %cmp = icmp ult i32 %x, 3 %pop = call i32 @llvm.ctpop.i32(i32 %x) %res = select i1 %cmp, i32 %x, i32 %pop ret i32 %res } ; Mismatched variables (condition uses Y, ctpop uses X) define i32 @no_fold_mismatch_var(i32 %x, i32 %y) { ; CHECK-LABEL: @no_fold_mismatch_var( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[Y:%.*]], 2 ; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X:%.*]]) ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[POP]] ; CHECK-NEXT: ret i32 [[RES]] ; %cmp = icmp ult i32 %y, 2 %pop = call i32 @llvm.ctpop.i32(i32 %x) %res = select i1 %cmp, i32 %y, i32 %pop ret i32 %res } Loading
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +10 −0 Original line number Diff line number Diff line Loading @@ -4433,6 +4433,16 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Instruction *I = canonicalizeScalarSelectOfVecs(SI, *this)) return I; // Fold: select (icmp ult X, 2), X, ctpop(X) --> ctpop(X) // ctpop(0)==0 and ctpop(1)==1, so the guard is always redundant. if (match(FalseVal, m_Ctpop(m_Specific(TrueVal))) && match(CondVal, m_SpecificICmp(ICmpInst::ICMP_ULT, m_Specific(TrueVal), m_SpecificInt(2)))) { cast<Instruction>(FalseVal)->dropPoisonGeneratingAnnotations(); addToWorklist(cast<Instruction>(FalseVal)); return replaceInstUsesWith(SI, FalseVal); } // If the type of select is not an integer type or if the condition and // the selection type are not both scalar nor both vector types, there is no // point in attempting to match these patterns. Loading
llvm/test/Transforms/InstCombine/select-ctpop-fold.ll 0 → 100644 +94 −0 Original line number Diff line number Diff line ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; ; Test folding of redundant early-exit guard around ctpop: ; select (icmp ule X, 1), X, ctpop(X) --> ctpop(X) ; ; This fold is valid because ctpop(0)==0 and ctpop(1)==1, so the guard ; is always redundant. The guard only existed to skip slow software emulation. ; RUN: opt < %s -S -passes=instcombine | FileCheck %s ;------------------------------------------------------------------------------ ; Positive tests: select (icmp ule X, 1), X, ctpop(X) --> ctpop(X) ;------------------------------------------------------------------------------ define i64 @fold_ule1_i64(i64 %x) { ; CHECK-LABEL: @fold_ule1_i64( ; CHECK-NEXT: [[POP:%.*]] = call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 [[X:%.*]]) ; CHECK-NEXT: ret i64 [[POP]] ; %cmp = icmp ule i64 %x, 1 %pop = call i64 @llvm.ctpop.i64(i64 %x) %res = select i1 %cmp, i64 %x, i64 %pop ret i64 %res } define i32 @fold_ule1_i32(i32 %x) { ; CHECK-LABEL: @fold_ule1_i32( ; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X:%.*]]) ; CHECK-NEXT: ret i32 [[POP]] ; %cmp = icmp ule i32 %x, 1 %pop = call i32 @llvm.ctpop.i32(i32 %x) %res = select i1 %cmp, i32 %x, i32 %pop ret i32 %res } define <4 x i32> @fold_vector_ule1(<4 x i32> %x) { ; CHECK-LABEL: @fold_vector_ule1( ; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret <4 x i32> [[POP]] ; %cmp = icmp ule <4 x i32> %x, splat(i32 1) %pop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x) %res = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> %pop ret <4 x i32> %res } ;------------------------------------------------------------------------------ ; Negative tests: should NOT fold. ;------------------------------------------------------------------------------ ; Wrong predicate (ugt instead of ult/ule) define i32 @no_fold_wrong_pred(i32 %x) { ; CHECK-LABEL: @no_fold_wrong_pred( ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], 1 ; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]]) ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[POP]] ; CHECK-NEXT: ret i32 [[RES]] ; %cmp = icmp ugt i32 %x, 1 %pop = call i32 @llvm.ctpop.i32(i32 %x) %res = select i1 %cmp, i32 %x, i32 %pop ret i32 %res } ; Wrong constant (ult 3 instead of ult 2) define i32 @no_fold_wrong_const(i32 %x) { ; CHECK-LABEL: @no_fold_wrong_const( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 3 ; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]]) ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[POP]] ; CHECK-NEXT: ret i32 [[RES]] ; %cmp = icmp ult i32 %x, 3 %pop = call i32 @llvm.ctpop.i32(i32 %x) %res = select i1 %cmp, i32 %x, i32 %pop ret i32 %res } ; Mismatched variables (condition uses Y, ctpop uses X) define i32 @no_fold_mismatch_var(i32 %x, i32 %y) { ; CHECK-LABEL: @no_fold_mismatch_var( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[Y:%.*]], 2 ; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X:%.*]]) ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[POP]] ; CHECK-NEXT: ret i32 [[RES]] ; %cmp = icmp ult i32 %y, 2 %pop = call i32 @llvm.ctpop.i32(i32 %x) %res = select i1 %cmp, i32 %y, i32 %pop ret i32 %res }