Unverified Commit 3aed0816 authored by Usha Gupta's avatar Usha Gupta Committed by GitHub
Browse files

[AggressiveInstCombine] Fold split-width i32 cttz/ctlz patterns into wide i64 intrinsics (#192296)

This patch teaches `AggressiveInstCombine ` to recognize and fold common split-width i32 cttz/ctlz intrinsic calls into a single full-width i64
cttz/ctlz intrinsic.

For ex:
```
define i32 @src(i64 %val) {
 %lo = trunc i64 %val to i32
 %cmp = icmp eq i32 %lo, 0
 %shr = lshr i64 %val, 32
 %hi = trunc i64 %shr to i32
 %cttz_hi = call i32 @llvm.cttz.i32(i32 %hi, i1 true)
 %hi_plus32 = or i32 %cttz_hi, 32
 %cttz_lo = call i32 @llvm.cttz.i32(i32 %lo, i1 true)
 %result = select i1 %cmp, i32 %hi_plus32, i32 %cttz_lo
 ret i32 %result
}

define i32 @tgt(i64 %val)  {
%cttz64 = call i64 @llvm.cttz.i64(i64 %val, i1 false)
%result = trunc i64 %cttz64 to i32
ret i32 %result
}
```
and similarly for ctlz intrinsic.

Alive proof for the 2 folds added by this patch.
cttz:
https://alive2.llvm.org/ce/z/-s14-s

ctlz:
https://alive2.llvm.org/ce/z/WfQepH
parent 521f5534
Loading
Loading
Loading
Loading
+190 −0
Original line number Diff line number Diff line
@@ -53,6 +53,10 @@ STATISTIC(NumGuardedRotates,
STATISTIC(NumGuardedFunnelShifts,
          "Number of guarded funnel shifts transformed into funnel shifts");
STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
STATISTIC(NumSelectCTTZFolded,
          "Number of select-based split cttz patterns folded");
STATISTIC(NumSelectCTLZFolded,
          "Number of select-based split ctlz patterns folded");

static cl::opt<unsigned> MaxInstrsToScan(
    "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,
@@ -68,6 +72,190 @@ static cl::opt<unsigned>
                          cl::desc("The maximum length of a constant string to "
                                   "inline a memchr call."));

/// Try to fold a select-based split cttz pattern into a single full-width cttz.
///
///   %lo = trunc iN %val to i(N/2)
///   %cmp = icmp eq i(N/2) %lo, 0
///   %shr = lshr iN %val, N/2
///   %hi = trunc iN %shr to i(N/2)
///   %cttz_hi = call i(N/2) @llvm.cttz.i(N/2)(i(N/2) %hi, ...)
///   %hi_plus = add/or_disjoint i(N/2) %cttz_hi, N/2
///   %cttz_lo = call i(N/2) @llvm.cttz.i(N/2)(i(N/2) %lo, ...)
///   %result = select i1 %cmp, i(N/2) %hi_plus, i(N/2) %cttz_lo
/// -->
///   %cttz_wide = call iN @llvm.cttz.iN(iN %val, i1 false)
///   %result = trunc iN %cttz_wide to i(N/2)
/// Alive proof (for i64/i32):  https://alive2.llvm.org/ce/z/-s14-s
static bool foldSelectSplitCTTZ(Instruction &I) {
  Value *Cond, *TrueVal, *FalseVal;
  if (!match(&I, m_Select(m_Value(Cond), m_Value(TrueVal), m_Value(FalseVal))))
    return false;

  Type *HalfTy = I.getType();
  if (!HalfTy->isIntegerTy())
    return false;
  unsigned HalfWidth = HalfTy->getIntegerBitWidth();

  // Bail out on very small types (i1, i2): the full-width cttz can return
  // values not representable in the half type (e.g., cttz.i4 can return 4,
  // which doesn't fit in i2).
  if (HalfWidth <= 2)
    return false;

  unsigned FullWidth = HalfWidth * 2;

  // select (icmp eq (trunc SrcVal to i(N/2)), 0), HiResult, LoResult
  // Or select (icmp ne ...), LoResult, HiResult
  Value *LoTrunc;
  Value *HiResult, *LoResult;
  if (match(Cond,
            m_SpecificICmp(CmpInst::ICMP_EQ, m_Value(LoTrunc), m_ZeroInt()))) {
    HiResult = TrueVal;
    LoResult = FalseVal;
  } else if (match(Cond, m_SpecificICmp(CmpInst::ICMP_NE, m_Value(LoTrunc),
                                        m_ZeroInt()))) {
    HiResult = FalseVal;
    LoResult = TrueVal;
  } else {
    return false;
  }

  // LoTrunc: trunc iN SrcVal to i(N/2)
  Value *SrcVal;
  if (!match(LoTrunc, m_Trunc(m_Value(SrcVal))))
    return false;
  if (!SrcVal->getType()->isIntegerTy(FullWidth))
    return false;

  // LoResult: cttz(trunc(SrcVal), _),  must use same truncated value
  if (!match(LoResult, m_OneUse(m_Intrinsic<Intrinsic::cttz>(
                           m_Specific(LoTrunc), m_Value()))))
    return false;

  // HiResult: add/or_disjoint(cttz(trunc(lshr(SrcVal, N/2)), _), N/2)
  Value *CttzHiCall;
  if (!match(HiResult, m_OneUse(m_AddLike(m_Value(CttzHiCall),
                                          m_SpecificInt(HalfWidth)))))
    return false;

  Value *HiCttzArg;
  if (!match(CttzHiCall, m_OneUse(m_Intrinsic<Intrinsic::cttz>(
                             m_Value(HiCttzArg), m_Value()))))
    return false;

  if (!match(HiCttzArg,
             m_Trunc(m_LShr(m_Specific(SrcVal), m_SpecificInt(HalfWidth)))))
    return false;

  // Match successful.
  IRBuilder<> Builder(&I);
  Value *CttzWide = Builder.CreateIntrinsic(
      Intrinsic::cttz, {SrcVal->getType()}, {SrcVal, Builder.getFalse()});
  Value *Trunc = Builder.CreateTrunc(CttzWide, HalfTy);

  I.replaceAllUsesWith(Trunc);
  ++NumSelectCTTZFolded;
  return true;
}

/// Same as foldSelectSplitCTTZ but for leading zeros (ctlz).
///
///   %shr = lshr iN %val, N/2
///   %hi = trunc iN %shr to i(N/2)
///   %cmp = icmp eq i(N/2) %hi, 0   (or icmp eq iN %shr, 0)
///   %lo = trunc iN %val to i(N/2)
///   %ctlz_lo = call i(N/2) @llvm.ctlz.i(N/2)(i(N/2) %lo, ...)
///   %lo_plus = add/or_disjoint i(N/2) %ctlz_lo, N/2
///   %ctlz_hi = call i(N/2) @llvm.ctlz.i(N/2)(i(N/2) %hi, ...)
///   %result = select i1 %cmp, i(N/2) %lo_plus, i(N/2) %ctlz_hi
/// -->
///   %ctlz_wide = call iN @llvm.ctlz.iN(iN %val, i1 false)
///   %result = trunc iN %ctlz_wide to i(N/2)
///
/// Alive proof (for i64/i32): https://alive2.llvm.org/ce/z/WfQepH
static bool foldSelectSplitCTLZ(Instruction &I) {
  Value *Cond, *TrueVal, *FalseVal;
  if (!match(&I, m_Select(m_Value(Cond), m_Value(TrueVal), m_Value(FalseVal))))
    return false;

  Type *HalfTy = I.getType();
  if (!HalfTy->isIntegerTy())
    return false;
  unsigned HalfWidth = HalfTy->getIntegerBitWidth();

  // Bail out on very small types (i1, i2): the full-width ctlz can return
  // values not representable in the half type (e.g., ctlz.i4 can return 4,
  // which doesn't fit in i2).
  if (HalfWidth <= 2)
    return false;

  unsigned FullWidth = HalfWidth * 2;

  // select (icmp eq HiPart, 0), LoResult, HiResult
  // HiPart could be (trunc (lshr SrcVal, N/2) to i(N/2)) or (lshr SrcVal, N/2)
  Value *HiPart;
  Value *LoResult, *HiResult;
  if (match(Cond,
            m_SpecificICmp(CmpInst::ICMP_EQ, m_Value(HiPart), m_ZeroInt()))) {
    LoResult = TrueVal;  // upper is zero: count in lower + N/2
    HiResult = FalseVal; // upper non-zero: count in upper
  } else if (match(Cond, m_SpecificICmp(CmpInst::ICMP_NE, m_Value(HiPart),
                                        m_ZeroInt()))) {
    LoResult = FalseVal;
    HiResult = TrueVal;
  } else {
    return false;
  }

  // Extract SrcVal from HiPart: either trunc(lshr(SrcVal, N/2)) or
  // lshr(SrcVal, N/2)
  Value *SrcVal;
  if (match(HiPart,
            m_Trunc(m_LShr(m_Value(SrcVal), m_SpecificInt(HalfWidth))))) {
    // HiPart is trunc(lshr(SrcVal, N/2))
  } else if (match(HiPart, m_LShr(m_Value(SrcVal), m_SpecificInt(HalfWidth)))) {
    // HiPart is lshr(SrcVal, N/2)
  } else {
    return false;
  }
  if (!SrcVal->getType()->isIntegerTy(FullWidth))
    return false;

  // HiResult: ctlz(trunc(lshr(SrcVal, N/2)), _)
  Value *HiCtlzArg;
  if (!match(HiResult, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_Value(HiCtlzArg),
                                                             m_Value()))))
    return false;

  if (!match(HiCtlzArg,
             m_Trunc(m_LShr(m_Specific(SrcVal), m_SpecificInt(HalfWidth)))))
    return false;

  // LoResult: add/or_disjoint(ctlz(trunc(SrcVal), _), N/2)
  Value *CtlzLoCall;
  if (!match(LoResult, m_OneUse(m_AddLike(m_Value(CtlzLoCall),
                                          m_SpecificInt(HalfWidth)))))
    return false;

  Value *LoCtlzArg;
  if (!match(CtlzLoCall, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(
                             m_Value(LoCtlzArg), m_Value()))))
    return false;

  if (!match(LoCtlzArg, m_Trunc(m_Specific(SrcVal))))
    return false;

  // Match successful.
  IRBuilder<> Builder(&I);
  Value *CtlzWide = Builder.CreateIntrinsic(
      Intrinsic::ctlz, {SrcVal->getType()}, {SrcVal, Builder.getFalse()});
  Value *Trunc = Builder.CreateTrunc(CtlzWide, HalfTy);

  I.replaceAllUsesWith(Trunc);
  ++NumSelectCTLZFolded;
  return true;
}

/// Match a pattern for a bitwise funnel/rotate operation that partially guards
/// against undefined behavior by branching around the funnel-shift/rotation
/// when the shift amount is 0.
@@ -2115,6 +2303,8 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
    for (Instruction &I : make_early_inc_range(llvm::reverse(BB))) {
      MadeChange |= foldAnyOrAllBitsSet(I);
      MadeChange |= foldGuardedFunnelShift(I, DT);
      MadeChange |= foldSelectSplitCTTZ(I);
      MadeChange |= foldSelectSplitCTLZ(I);
      MadeChange |= tryToRecognizePopCount(I);
      MadeChange |= tryToRecognizePopCount2n3(I);
      MadeChange |= tryToFPToSat(I, TTI);
+273 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s

; Select-based split i64 ctlz
define i32 @split_ctlz_select(i64 %val) {
; CHECK-LABEL: @split_ctlz_select(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[VAL:%.*]], i1 false)
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; CHECK-NEXT:    ret i32 [[TMP1]]
;
entry:
  %shr = lshr i64 %val, 32
  %conv_hi = trunc i64 %shr to i32
  %cmp = icmp eq i32 %conv_hi, 0
  %conv_lo = trunc i64 %val to i32
  %0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
  %add = add i32 %0, 32
  %1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
  %retval = select i1 %cmp, i32 %add, i32 %1
  ret i32 %retval
}

; Select-based with i64 comparison on lshr result (not truncated).
define i32 @split_ctlz_select_i64cmp(i64 %val) {
; CHECK-LABEL: @split_ctlz_select_i64cmp(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[VAL:%.*]], i1 false)
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; CHECK-NEXT:    ret i32 [[TMP1]]
;
entry:
  %shr = lshr i64 %val, 32
  %cmp = icmp eq i64 %shr, 0
  %conv_hi = trunc i64 %shr to i32
  %conv_lo = trunc i64 %val to i32
  %0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
  %add = add i32 %0, 32
  %1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
  %retval = select i1 %cmp, i32 %add, i32 %1
  ret i32 %retval
}

; Select-based with icmp ne (inverted condition).
define i32 @split_ctlz_select_ne(i64 %val) {
; CHECK-LABEL: @split_ctlz_select_ne(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[VAL:%.*]], i1 false)
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; CHECK-NEXT:    ret i32 [[TMP1]]
;
entry:
  %shr = lshr i64 %val, 32
  %conv_hi = trunc i64 %shr to i32
  %cmp = icmp ne i32 %conv_hi, 0
  %conv_lo = trunc i64 %val to i32
  %0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
  %add = add i32 %0, 32
  %1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
  %retval = select i1 %cmp, i32 %1, i32 %add
  ret i32 %retval
}

; Select-based with or instead of add.
define i32 @split_ctlz_select_or(i64 %val) {
; CHECK-LABEL: @split_ctlz_select_or(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[VAL:%.*]], i1 false)
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; CHECK-NEXT:    ret i32 [[TMP1]]
;
entry:
  %shr = lshr i64 %val, 32
  %conv_hi = trunc i64 %shr to i32
  %cmp = icmp eq i32 %conv_hi, 0
  %conv_lo = trunc i64 %val to i32
  %0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
  %add = or disjoint i32 %0, 32
  %1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
  %retval = select i1 %cmp, i32 %add, i32 %1
  ret i32 %retval
}

; i32 split into two i16 ctlz.
define i16 @split_ctlz_select_i32(i32 %val) {
; CHECK-LABEL: @split_ctlz_select_i32(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[VAL:%.*]], i1 false)
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i16
; CHECK-NEXT:    ret i16 [[TMP1]]
;
entry:
  %shr = lshr i32 %val, 16
  %conv_hi = trunc i32 %shr to i16
  %cmp = icmp eq i16 %conv_hi, 0
  %conv_lo = trunc i32 %val to i16
  %0 = call i16 @llvm.ctlz.i16(i16 %conv_lo, i1 false)
  %add = add i16 %0, 16
  %1 = call i16 @llvm.ctlz.i16(i16 %conv_hi, i1 false)
  %retval = select i1 %cmp, i16 %add, i16 %1
  ret i16 %retval
}

; i16 split into two i8 ctlz.
define i8 @split_ctlz_select_i16(i16 %val) {
; CHECK-LABEL: @split_ctlz_select_i16(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = call i16 @llvm.ctlz.i16(i16 [[VAL:%.*]], i1 false)
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i16 [[TMP0]] to i8
; CHECK-NEXT:    ret i8 [[TMP1]]
;
entry:
  %shr = lshr i16 %val, 8
  %conv_hi = trunc i16 %shr to i8
  %cmp = icmp eq i8 %conv_hi, 0
  %conv_lo = trunc i16 %val to i8
  %0 = call i8 @llvm.ctlz.i8(i8 %conv_lo, i1 false)
  %add = add i8 %0, 8
  %1 = call i8 @llvm.ctlz.i8(i8 %conv_hi, i1 false)
  %retval = select i1 %cmp, i8 %add, i8 %1
  ret i8 %retval
}

; i128 split into two i64 ctlz.
define i64 @split_ctlz_select_i128(i128 %val) {
; CHECK-LABEL: @split_ctlz_select_i128(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = call i128 @llvm.ctlz.i128(i128 [[VAL:%.*]], i1 false)
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64
; CHECK-NEXT:    ret i64 [[TMP1]]
;
entry:
  %shr = lshr i128 %val, 64
  %conv_hi = trunc i128 %shr to i64
  %cmp = icmp eq i64 %conv_hi, 0
  %conv_lo = trunc i128 %val to i64
  %0 = call i64 @llvm.ctlz.i64(i64 %conv_lo, i1 false)
  %add = add i64 %0, 64
  %1 = call i64 @llvm.ctlz.i64(i64 %conv_hi, i1 false)
  %retval = select i1 %cmp, i64 %add, i64 %1
  ret i64 %retval
}

; i256 split into two i128 ctlz.
; This tests m_APInt matching for shift amounts > 64 bits.
define i128 @split_ctlz_select_i256(i256 %val) {
; CHECK-LABEL: @split_ctlz_select_i256(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = call i256 @llvm.ctlz.i256(i256 [[VAL:%.*]], i1 false)
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i256 [[TMP0]] to i128
; CHECK-NEXT:    ret i128 [[TMP1]]
;
entry:
  %shr = lshr i256 %val, 128
  %conv_hi = trunc i256 %shr to i128
  %cmp = icmp eq i128 %conv_hi, 0
  %conv_lo = trunc i256 %val to i128
  %0 = call i128 @llvm.ctlz.i128(i128 %conv_lo, i1 false)
  %add = add i128 %0, 128
  %1 = call i128 @llvm.ctlz.i128(i128 %conv_hi, i1 false)
  %retval = select i1 %cmp, i128 %add, i128 %1
  ret i128 %retval
}

; Negative test: mismatched source values, should NOT be folded.
define i32 @split_ctlz_different_sources(i64 %val, i64 %val2) {
; CHECK-LABEL: @split_ctlz_different_sources(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[SHR:%.*]] = lshr i64 [[VAL:%.*]], 32
; CHECK-NEXT:    [[CONV_HI:%.*]] = trunc i64 [[SHR]] to i32
; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CONV_HI]], 0
; CHECK-NEXT:    [[CONV_LO:%.*]] = trunc i64 [[VAL2:%.*]] to i32
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_LO]], i1 false)
; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP0]], 32
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_HI]], i1 false)
; CHECK-NEXT:    [[RETVAL:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[TMP1]]
; CHECK-NEXT:    ret i32 [[RETVAL]]
;
entry:
  %shr = lshr i64 %val, 32
  %conv_hi = trunc i64 %shr to i32
  %cmp = icmp eq i32 %conv_hi, 0
  %conv_lo = trunc i64 %val2 to i32
  %0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
  %add = add i32 %0, 32
  %1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
  %retval = select i1 %cmp, i32 %add, i32 %1
  ret i32 %retval
}

; Negative test: wrong shift amount, should NOT be folded.
define i32 @split_ctlz_wrong_shift(i64 %val) {
; CHECK-LABEL: @split_ctlz_wrong_shift(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[SHR:%.*]] = lshr i64 [[VAL:%.*]], 16
; CHECK-NEXT:    [[CONV_HI:%.*]] = trunc i64 [[SHR]] to i32
; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CONV_HI]], 0
; CHECK-NEXT:    [[CONV_LO:%.*]] = trunc i64 [[VAL]] to i32
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_LO]], i1 false)
; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP0]], 32
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_HI]], i1 false)
; CHECK-NEXT:    [[RETVAL:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[TMP1]]
; CHECK-NEXT:    ret i32 [[RETVAL]]
;
entry:
  %shr = lshr i64 %val, 16
  %conv_hi = trunc i64 %shr to i32
  %cmp = icmp eq i32 %conv_hi, 0
  %conv_lo = trunc i64 %val to i32
  %0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
  %add = add i32 %0, 32
  %1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
  %retval = select i1 %cmp, i32 %add, i32 %1
  ret i32 %retval
}

; Negative test: plain or (without disjoint flag), should NOT be folded
; in select-based pattern because or(32, 32)=32 != add(32, 32)=64.
define i32 @split_ctlz_select_plain_or(i64 %val) {
; CHECK-LABEL: @split_ctlz_select_plain_or(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[SHR:%.*]] = lshr i64 [[VAL:%.*]], 32
; CHECK-NEXT:    [[CONV_HI:%.*]] = trunc i64 [[SHR]] to i32
; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CONV_HI]], 0
; CHECK-NEXT:    [[CONV_LO:%.*]] = trunc i64 [[VAL]] to i32
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_LO]], i1 false)
; CHECK-NEXT:    [[ADD:%.*]] = or i32 [[TMP0]], 32
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_HI]], i1 false)
; CHECK-NEXT:    [[RETVAL:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[TMP1]]
; CHECK-NEXT:    ret i32 [[RETVAL]]
;
entry:
  %shr = lshr i64 %val, 32
  %conv_hi = trunc i64 %shr to i32
  %cmp = icmp eq i32 %conv_hi, 0
  %conv_lo = trunc i64 %val to i32
  %0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
  %add = or i32 %0, 32
  %1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
  %retval = select i1 %cmp, i32 %add, i32 %1
  ret i32 %retval
}

; Negative test: multi-use of ctlz_hi prevents folding.
define i32 @split_ctlz_select_multiuse(i64 %val) {
; CHECK-LABEL: @split_ctlz_select_multiuse(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[SHR:%.*]] = lshr i64 [[VAL:%.*]], 32
; CHECK-NEXT:    [[CONV_HI:%.*]] = trunc i64 [[SHR]] to i32
; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CONV_HI]], 0
; CHECK-NEXT:    [[CONV_LO:%.*]] = trunc i64 [[VAL]] to i32
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_LO]], i1 false)
; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP0]], 32
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_HI]], i1 false)
; CHECK-NEXT:    [[RETVAL:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[TMP1]]
; CHECK-NEXT:    call void @use_i32(i32 [[TMP1]])
; CHECK-NEXT:    ret i32 [[RETVAL]]
;
entry:
  %shr = lshr i64 %val, 32
  %conv_hi = trunc i64 %shr to i32
  %cmp = icmp eq i32 %conv_hi, 0
  %conv_lo = trunc i64 %val to i32
  %0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
  %add = add i32 %0, 32
  %1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
  %retval = select i1 %cmp, i32 %add, i32 %1
  call void @use_i32(i32 %1)
  ret i32 %retval
}

declare void @use_i32(i32)
+274 −0

File added.

Preview size limit exceeded, changes collapsed.