Unverified Commit 29dfc683 authored by Sai Sanjay Chikne's avatar Sai Sanjay Chikne Committed by GitHub
Browse files

[InstCombine] Fold redundant select guards for ctpop (#195443)

Fixes #194207

Folds select patterns that guard ctpop for edge cases:
  `select (icmp ult X, 2), X, ctpop(X)`       --> `ctpop(X)`

These guards are unconditionally redundant since `ctpop(0)==0`,
`ctpop(1)==1` already.
parent d1e84bb9
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -4433,6 +4433,16 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
  if (Instruction *I = canonicalizeScalarSelectOfVecs(SI, *this))
    return I;

  // Fold: select (icmp ult X, 2), X, ctpop(X)  -->  ctpop(X)
  // ctpop(0)==0 and ctpop(1)==1, so the guard is always redundant.
  if (match(FalseVal, m_Ctpop(m_Specific(TrueVal))) &&
      match(CondVal, m_SpecificICmp(ICmpInst::ICMP_ULT, m_Specific(TrueVal),
                                    m_SpecificInt(2)))) {
    cast<Instruction>(FalseVal)->dropPoisonGeneratingAnnotations();
    addToWorklist(cast<Instruction>(FalseVal));
    return replaceInstUsesWith(SI, FalseVal);
  }

  // If the type of select is not an integer type or if the condition and
  // the selection type are not both scalar nor both vector types, there is no
  // point in attempting to match these patterns.
+94 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
;
; Test folding of redundant early-exit guard around ctpop:
;   select (icmp ule X, 1), X, ctpop(X)  --> ctpop(X)
;
; This fold is valid because ctpop(0)==0 and ctpop(1)==1, so the guard
; is always redundant. The guard only existed to skip slow software emulation.


; RUN: opt < %s -S -passes=instcombine | FileCheck %s


;------------------------------------------------------------------------------
; Positive tests: select (icmp ule X, 1), X, ctpop(X)  -->  ctpop(X)
;------------------------------------------------------------------------------

define i64 @fold_ule1_i64(i64 %x) {
; CHECK-LABEL: @fold_ule1_i64(
; CHECK-NEXT:    [[POP:%.*]] = call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 [[X:%.*]])
; CHECK-NEXT:    ret i64 [[POP]]
;
  %cmp = icmp ule i64 %x, 1
  %pop = call i64 @llvm.ctpop.i64(i64 %x)
  %res = select i1 %cmp, i64 %x, i64 %pop
  ret i64 %res
}

define i32 @fold_ule1_i32(i32 %x) {
; CHECK-LABEL: @fold_ule1_i32(
; CHECK-NEXT:    [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X:%.*]])
; CHECK-NEXT:    ret i32 [[POP]]
;
  %cmp = icmp ule i32 %x, 1
  %pop = call i32 @llvm.ctpop.i32(i32 %x)
  %res = select i1 %cmp, i32 %x, i32 %pop
  ret i32 %res
}

define <4 x i32> @fold_vector_ule1(<4 x i32> %x) {
; CHECK-LABEL: @fold_vector_ule1(
; CHECK-NEXT:    [[POP:%.*]] = call range(i32 0, 33) <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT:    ret <4 x i32> [[POP]]
;
  %cmp = icmp ule <4 x i32> %x, splat(i32 1)
  %pop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x)
  %res = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> %pop
  ret <4 x i32> %res
}

;------------------------------------------------------------------------------
; Negative tests: should NOT fold.
;------------------------------------------------------------------------------

; Wrong predicate (ugt instead of ult/ule)
define i32 @no_fold_wrong_pred(i32 %x) {
; CHECK-LABEL: @no_fold_wrong_pred(
; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], 1
; CHECK-NEXT:    [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]])
; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[POP]]
; CHECK-NEXT:    ret i32 [[RES]]
;
  %cmp = icmp ugt i32 %x, 1
  %pop = call i32 @llvm.ctpop.i32(i32 %x)
  %res = select i1 %cmp, i32 %x, i32 %pop
  ret i32 %res
}

; Wrong constant (ult 3 instead of ult 2)
define i32 @no_fold_wrong_const(i32 %x) {
; CHECK-LABEL: @no_fold_wrong_const(
; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 3
; CHECK-NEXT:    [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]])
; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[POP]]
; CHECK-NEXT:    ret i32 [[RES]]
;
  %cmp = icmp ult i32 %x, 3
  %pop = call i32 @llvm.ctpop.i32(i32 %x)
  %res = select i1 %cmp, i32 %x, i32 %pop
  ret i32 %res
}

; Mismatched variables (condition uses Y, ctpop uses X)
define i32 @no_fold_mismatch_var(i32 %x, i32 %y) {
; CHECK-LABEL: @no_fold_mismatch_var(
; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[Y:%.*]], 2
; CHECK-NEXT:    [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X:%.*]])
; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[POP]]
; CHECK-NEXT:    ret i32 [[RES]]
;
  %cmp = icmp ult i32 %y, 2
  %pop = call i32 @llvm.ctpop.i32(i32 %x)
  %res = select i1 %cmp, i32 %y, i32 %pop
  ret i32 %res
}