[AggressiveInstCombine] Fold split-width i32 cttz/ctlz patterns into wide i64 intrinsics (#192296) (3aed0816) · Commits · llvm-doe / llvm-project

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

+190 −0

Original line number	Diff line number	Diff line
		@@ -53,6 +53,10 @@ STATISTIC(NumGuardedRotates,
		STATISTIC(NumGuardedFunnelShifts,
		"Number of guarded funnel shifts transformed into funnel shifts");
		STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
		STATISTIC(NumSelectCTTZFolded,
		"Number of select-based split cttz patterns folded");
		STATISTIC(NumSelectCTLZFolded,
		"Number of select-based split ctlz patterns folded");

		static cl::opt<unsigned> MaxInstrsToScan(
		"aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,
		@@ -68,6 +72,190 @@ static cl::opt<unsigned>
		cl::desc("The maximum length of a constant string to "
		"inline a memchr call."));

		/// Try to fold a select-based split cttz pattern into a single full-width cttz.
		///
		/// %lo = trunc iN %val to i(N/2)
		/// %cmp = icmp eq i(N/2) %lo, 0
		/// %shr = lshr iN %val, N/2
		/// %hi = trunc iN %shr to i(N/2)
		/// %cttz_hi = call i(N/2) @llvm.cttz.i(N/2)(i(N/2) %hi, ...)
		/// %hi_plus = add/or_disjoint i(N/2) %cttz_hi, N/2
		/// %cttz_lo = call i(N/2) @llvm.cttz.i(N/2)(i(N/2) %lo, ...)
		/// %result = select i1 %cmp, i(N/2) %hi_plus, i(N/2) %cttz_lo
		/// -->
		/// %cttz_wide = call iN @llvm.cttz.iN(iN %val, i1 false)
		/// %result = trunc iN %cttz_wide to i(N/2)
		/// Alive proof (for i64/i32): https://alive2.llvm.org/ce/z/-s14-s
		static bool foldSelectSplitCTTZ(Instruction &I) {
		Value Cond, TrueVal, *FalseVal;
		if (!match(&I, m_Select(m_Value(Cond), m_Value(TrueVal), m_Value(FalseVal))))
		return false;

		Type *HalfTy = I.getType();
		if (!HalfTy->isIntegerTy())
		return false;
		unsigned HalfWidth = HalfTy->getIntegerBitWidth();

		// Bail out on very small types (i1, i2): the full-width cttz can return
		// values not representable in the half type (e.g., cttz.i4 can return 4,
		// which doesn't fit in i2).
		if (HalfWidth <= 2)
		return false;

		unsigned FullWidth = HalfWidth * 2;

		// select (icmp eq (trunc SrcVal to i(N/2)), 0), HiResult, LoResult
		// Or select (icmp ne ...), LoResult, HiResult
		Value *LoTrunc;
		Value HiResult, LoResult;
		if (match(Cond,
		m_SpecificICmp(CmpInst::ICMP_EQ, m_Value(LoTrunc), m_ZeroInt()))) {
		HiResult = TrueVal;
		LoResult = FalseVal;
		} else if (match(Cond, m_SpecificICmp(CmpInst::ICMP_NE, m_Value(LoTrunc),
		m_ZeroInt()))) {
		HiResult = FalseVal;
		LoResult = TrueVal;
		} else {
		return false;
		}

		// LoTrunc: trunc iN SrcVal to i(N/2)
		Value *SrcVal;
		if (!match(LoTrunc, m_Trunc(m_Value(SrcVal))))
		return false;
		if (!SrcVal->getType()->isIntegerTy(FullWidth))
		return false;

		// LoResult: cttz(trunc(SrcVal), _), must use same truncated value
		if (!match(LoResult, m_OneUse(m_Intrinsic<Intrinsic::cttz>(
		m_Specific(LoTrunc), m_Value()))))
		return false;

		// HiResult: add/or_disjoint(cttz(trunc(lshr(SrcVal, N/2)), _), N/2)
		Value *CttzHiCall;
		if (!match(HiResult, m_OneUse(m_AddLike(m_Value(CttzHiCall),
		m_SpecificInt(HalfWidth)))))
		return false;

		Value *HiCttzArg;
		if (!match(CttzHiCall, m_OneUse(m_Intrinsic<Intrinsic::cttz>(
		m_Value(HiCttzArg), m_Value()))))
		return false;

		if (!match(HiCttzArg,
		m_Trunc(m_LShr(m_Specific(SrcVal), m_SpecificInt(HalfWidth)))))
		return false;

		// Match successful.
		IRBuilder<> Builder(&I);
		Value *CttzWide = Builder.CreateIntrinsic(
		Intrinsic::cttz, {SrcVal->getType()}, {SrcVal, Builder.getFalse()});
		Value *Trunc = Builder.CreateTrunc(CttzWide, HalfTy);

		I.replaceAllUsesWith(Trunc);
		++NumSelectCTTZFolded;
		return true;
		}

		/// Same as foldSelectSplitCTTZ but for leading zeros (ctlz).
		///
		/// %shr = lshr iN %val, N/2
		/// %hi = trunc iN %shr to i(N/2)
		/// %cmp = icmp eq i(N/2) %hi, 0 (or icmp eq iN %shr, 0)
		/// %lo = trunc iN %val to i(N/2)
		/// %ctlz_lo = call i(N/2) @llvm.ctlz.i(N/2)(i(N/2) %lo, ...)
		/// %lo_plus = add/or_disjoint i(N/2) %ctlz_lo, N/2
		/// %ctlz_hi = call i(N/2) @llvm.ctlz.i(N/2)(i(N/2) %hi, ...)
		/// %result = select i1 %cmp, i(N/2) %lo_plus, i(N/2) %ctlz_hi
		/// -->
		/// %ctlz_wide = call iN @llvm.ctlz.iN(iN %val, i1 false)
		/// %result = trunc iN %ctlz_wide to i(N/2)
		///
		/// Alive proof (for i64/i32): https://alive2.llvm.org/ce/z/WfQepH
		static bool foldSelectSplitCTLZ(Instruction &I) {
		Value Cond, TrueVal, *FalseVal;
		if (!match(&I, m_Select(m_Value(Cond), m_Value(TrueVal), m_Value(FalseVal))))
		return false;

		Type *HalfTy = I.getType();
		if (!HalfTy->isIntegerTy())
		return false;
		unsigned HalfWidth = HalfTy->getIntegerBitWidth();

		// Bail out on very small types (i1, i2): the full-width ctlz can return
		// values not representable in the half type (e.g., ctlz.i4 can return 4,
		// which doesn't fit in i2).
		if (HalfWidth <= 2)
		return false;

		unsigned FullWidth = HalfWidth * 2;

		// select (icmp eq HiPart, 0), LoResult, HiResult
		// HiPart could be (trunc (lshr SrcVal, N/2) to i(N/2)) or (lshr SrcVal, N/2)
		Value *HiPart;
		Value LoResult, HiResult;
		if (match(Cond,
		m_SpecificICmp(CmpInst::ICMP_EQ, m_Value(HiPart), m_ZeroInt()))) {
		LoResult = TrueVal; // upper is zero: count in lower + N/2
		HiResult = FalseVal; // upper non-zero: count in upper
		} else if (match(Cond, m_SpecificICmp(CmpInst::ICMP_NE, m_Value(HiPart),
		m_ZeroInt()))) {
		LoResult = FalseVal;
		HiResult = TrueVal;
		} else {
		return false;
		}

		// Extract SrcVal from HiPart: either trunc(lshr(SrcVal, N/2)) or
		// lshr(SrcVal, N/2)
		Value *SrcVal;
		if (match(HiPart,
		m_Trunc(m_LShr(m_Value(SrcVal), m_SpecificInt(HalfWidth))))) {
		// HiPart is trunc(lshr(SrcVal, N/2))
		} else if (match(HiPart, m_LShr(m_Value(SrcVal), m_SpecificInt(HalfWidth)))) {
		// HiPart is lshr(SrcVal, N/2)
		} else {
		return false;
		}
		if (!SrcVal->getType()->isIntegerTy(FullWidth))
		return false;

		// HiResult: ctlz(trunc(lshr(SrcVal, N/2)), _)
		Value *HiCtlzArg;
		if (!match(HiResult, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_Value(HiCtlzArg),
		m_Value()))))
		return false;

		if (!match(HiCtlzArg,
		m_Trunc(m_LShr(m_Specific(SrcVal), m_SpecificInt(HalfWidth)))))
		return false;

		// LoResult: add/or_disjoint(ctlz(trunc(SrcVal), _), N/2)
		Value *CtlzLoCall;
		if (!match(LoResult, m_OneUse(m_AddLike(m_Value(CtlzLoCall),
		m_SpecificInt(HalfWidth)))))
		return false;

		Value *LoCtlzArg;
		if (!match(CtlzLoCall, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(
		m_Value(LoCtlzArg), m_Value()))))
		return false;

		if (!match(LoCtlzArg, m_Trunc(m_Specific(SrcVal))))
		return false;

		// Match successful.
		IRBuilder<> Builder(&I);
		Value *CtlzWide = Builder.CreateIntrinsic(
		Intrinsic::ctlz, {SrcVal->getType()}, {SrcVal, Builder.getFalse()});
		Value *Trunc = Builder.CreateTrunc(CtlzWide, HalfTy);

		I.replaceAllUsesWith(Trunc);
		++NumSelectCTLZFolded;
		return true;
		}

		/// Match a pattern for a bitwise funnel/rotate operation that partially guards
		/// against undefined behavior by branching around the funnel-shift/rotation
		/// when the shift amount is 0.
		@@ -2115,6 +2303,8 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
		for (Instruction &I : make_early_inc_range(llvm::reverse(BB))) {
		MadeChange \|= foldAnyOrAllBitsSet(I);
		MadeChange \|= foldGuardedFunnelShift(I, DT);
		MadeChange \|= foldSelectSplitCTTZ(I);
		MadeChange \|= foldSelectSplitCTLZ(I);
		MadeChange \|= tryToRecognizePopCount(I);
		MadeChange \|= tryToRecognizePopCount2n3(I);
		MadeChange \|= tryToFPToSat(I, TTI);

llvm/test/Transforms/AggressiveInstCombine/fold-split-ctlz.ll

0 → 100644

+273 −0

Original line number	Diff line number	Diff line
		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
		; RUN: opt < %s -passes=aggressive-instcombine -S \| FileCheck %s

		; Select-based split i64 ctlz
		define i32 @split_ctlz_select(i64 %val) {
		; CHECK-LABEL: @split_ctlz_select(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[TMP0:%.]] = call i64 @llvm.ctlz.i64(i64 [[VAL:%.]], i1 false)
		; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
		; CHECK-NEXT: ret i32 [[TMP1]]
		;
		entry:
		%shr = lshr i64 %val, 32
		%conv_hi = trunc i64 %shr to i32
		%cmp = icmp eq i32 %conv_hi, 0
		%conv_lo = trunc i64 %val to i32
		%0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
		%add = add i32 %0, 32
		%1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
		%retval = select i1 %cmp, i32 %add, i32 %1
		ret i32 %retval
		}

		; Select-based with i64 comparison on lshr result (not truncated).
		define i32 @split_ctlz_select_i64cmp(i64 %val) {
		; CHECK-LABEL: @split_ctlz_select_i64cmp(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[TMP0:%.]] = call i64 @llvm.ctlz.i64(i64 [[VAL:%.]], i1 false)
		; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
		; CHECK-NEXT: ret i32 [[TMP1]]
		;
		entry:
		%shr = lshr i64 %val, 32
		%cmp = icmp eq i64 %shr, 0
		%conv_hi = trunc i64 %shr to i32
		%conv_lo = trunc i64 %val to i32
		%0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
		%add = add i32 %0, 32
		%1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
		%retval = select i1 %cmp, i32 %add, i32 %1
		ret i32 %retval
		}

		; Select-based with icmp ne (inverted condition).
		define i32 @split_ctlz_select_ne(i64 %val) {
		; CHECK-LABEL: @split_ctlz_select_ne(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[TMP0:%.]] = call i64 @llvm.ctlz.i64(i64 [[VAL:%.]], i1 false)
		; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
		; CHECK-NEXT: ret i32 [[TMP1]]
		;
		entry:
		%shr = lshr i64 %val, 32
		%conv_hi = trunc i64 %shr to i32
		%cmp = icmp ne i32 %conv_hi, 0
		%conv_lo = trunc i64 %val to i32
		%0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
		%add = add i32 %0, 32
		%1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
		%retval = select i1 %cmp, i32 %1, i32 %add
		ret i32 %retval
		}

		; Select-based with or instead of add.
		define i32 @split_ctlz_select_or(i64 %val) {
		; CHECK-LABEL: @split_ctlz_select_or(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[TMP0:%.]] = call i64 @llvm.ctlz.i64(i64 [[VAL:%.]], i1 false)
		; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
		; CHECK-NEXT: ret i32 [[TMP1]]
		;
		entry:
		%shr = lshr i64 %val, 32
		%conv_hi = trunc i64 %shr to i32
		%cmp = icmp eq i32 %conv_hi, 0
		%conv_lo = trunc i64 %val to i32
		%0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
		%add = or disjoint i32 %0, 32
		%1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
		%retval = select i1 %cmp, i32 %add, i32 %1
		ret i32 %retval
		}

		; i32 split into two i16 ctlz.
		define i16 @split_ctlz_select_i32(i32 %val) {
		; CHECK-LABEL: @split_ctlz_select_i32(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[TMP0:%.]] = call i32 @llvm.ctlz.i32(i32 [[VAL:%.]], i1 false)
		; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i16
		; CHECK-NEXT: ret i16 [[TMP1]]
		;
		entry:
		%shr = lshr i32 %val, 16
		%conv_hi = trunc i32 %shr to i16
		%cmp = icmp eq i16 %conv_hi, 0
		%conv_lo = trunc i32 %val to i16
		%0 = call i16 @llvm.ctlz.i16(i16 %conv_lo, i1 false)
		%add = add i16 %0, 16
		%1 = call i16 @llvm.ctlz.i16(i16 %conv_hi, i1 false)
		%retval = select i1 %cmp, i16 %add, i16 %1
		ret i16 %retval
		}

		; i16 split into two i8 ctlz.
		define i8 @split_ctlz_select_i16(i16 %val) {
		; CHECK-LABEL: @split_ctlz_select_i16(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[TMP0:%.]] = call i16 @llvm.ctlz.i16(i16 [[VAL:%.]], i1 false)
		; CHECK-NEXT: [[TMP1:%.*]] = trunc i16 [[TMP0]] to i8
		; CHECK-NEXT: ret i8 [[TMP1]]
		;
		entry:
		%shr = lshr i16 %val, 8
		%conv_hi = trunc i16 %shr to i8
		%cmp = icmp eq i8 %conv_hi, 0
		%conv_lo = trunc i16 %val to i8
		%0 = call i8 @llvm.ctlz.i8(i8 %conv_lo, i1 false)
		%add = add i8 %0, 8
		%1 = call i8 @llvm.ctlz.i8(i8 %conv_hi, i1 false)
		%retval = select i1 %cmp, i8 %add, i8 %1
		ret i8 %retval
		}

		; i128 split into two i64 ctlz.
		define i64 @split_ctlz_select_i128(i128 %val) {
		; CHECK-LABEL: @split_ctlz_select_i128(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[TMP0:%.]] = call i128 @llvm.ctlz.i128(i128 [[VAL:%.]], i1 false)
		; CHECK-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64
		; CHECK-NEXT: ret i64 [[TMP1]]
		;
		entry:
		%shr = lshr i128 %val, 64
		%conv_hi = trunc i128 %shr to i64
		%cmp = icmp eq i64 %conv_hi, 0
		%conv_lo = trunc i128 %val to i64
		%0 = call i64 @llvm.ctlz.i64(i64 %conv_lo, i1 false)
		%add = add i64 %0, 64
		%1 = call i64 @llvm.ctlz.i64(i64 %conv_hi, i1 false)
		%retval = select i1 %cmp, i64 %add, i64 %1
		ret i64 %retval
		}

		; i256 split into two i128 ctlz.
		; This tests m_APInt matching for shift amounts > 64 bits.
		define i128 @split_ctlz_select_i256(i256 %val) {
		; CHECK-LABEL: @split_ctlz_select_i256(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[TMP0:%.]] = call i256 @llvm.ctlz.i256(i256 [[VAL:%.]], i1 false)
		; CHECK-NEXT: [[TMP1:%.*]] = trunc i256 [[TMP0]] to i128
		; CHECK-NEXT: ret i128 [[TMP1]]
		;
		entry:
		%shr = lshr i256 %val, 128
		%conv_hi = trunc i256 %shr to i128
		%cmp = icmp eq i128 %conv_hi, 0
		%conv_lo = trunc i256 %val to i128
		%0 = call i128 @llvm.ctlz.i128(i128 %conv_lo, i1 false)
		%add = add i128 %0, 128
		%1 = call i128 @llvm.ctlz.i128(i128 %conv_hi, i1 false)
		%retval = select i1 %cmp, i128 %add, i128 %1
		ret i128 %retval
		}

		; Negative test: mismatched source values, should NOT be folded.
		define i32 @split_ctlz_different_sources(i64 %val, i64 %val2) {
		; CHECK-LABEL: @split_ctlz_different_sources(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[SHR:%.]] = lshr i64 [[VAL:%.]], 32
		; CHECK-NEXT: [[CONV_HI:%.*]] = trunc i64 [[SHR]] to i32
		; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CONV_HI]], 0
		; CHECK-NEXT: [[CONV_LO:%.]] = trunc i64 [[VAL2:%.]] to i32
		; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_LO]], i1 false)
		; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], 32
		; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_HI]], i1 false)
		; CHECK-NEXT: [[RETVAL:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[TMP1]]
		; CHECK-NEXT: ret i32 [[RETVAL]]
		;
		entry:
		%shr = lshr i64 %val, 32
		%conv_hi = trunc i64 %shr to i32
		%cmp = icmp eq i32 %conv_hi, 0
		%conv_lo = trunc i64 %val2 to i32
		%0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
		%add = add i32 %0, 32
		%1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
		%retval = select i1 %cmp, i32 %add, i32 %1
		ret i32 %retval
		}

		; Negative test: wrong shift amount, should NOT be folded.
		define i32 @split_ctlz_wrong_shift(i64 %val) {
		; CHECK-LABEL: @split_ctlz_wrong_shift(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[SHR:%.]] = lshr i64 [[VAL:%.]], 16
		; CHECK-NEXT: [[CONV_HI:%.*]] = trunc i64 [[SHR]] to i32
		; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CONV_HI]], 0
		; CHECK-NEXT: [[CONV_LO:%.*]] = trunc i64 [[VAL]] to i32
		; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_LO]], i1 false)
		; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], 32
		; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_HI]], i1 false)
		; CHECK-NEXT: [[RETVAL:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[TMP1]]
		; CHECK-NEXT: ret i32 [[RETVAL]]
		;
		entry:
		%shr = lshr i64 %val, 16
		%conv_hi = trunc i64 %shr to i32
		%cmp = icmp eq i32 %conv_hi, 0
		%conv_lo = trunc i64 %val to i32
		%0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
		%add = add i32 %0, 32
		%1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
		%retval = select i1 %cmp, i32 %add, i32 %1
		ret i32 %retval
		}

		; Negative test: plain or (without disjoint flag), should NOT be folded
		; in select-based pattern because or(32, 32)=32 != add(32, 32)=64.
		define i32 @split_ctlz_select_plain_or(i64 %val) {
		; CHECK-LABEL: @split_ctlz_select_plain_or(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[SHR:%.]] = lshr i64 [[VAL:%.]], 32
		; CHECK-NEXT: [[CONV_HI:%.*]] = trunc i64 [[SHR]] to i32
		; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CONV_HI]], 0
		; CHECK-NEXT: [[CONV_LO:%.*]] = trunc i64 [[VAL]] to i32
		; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_LO]], i1 false)
		; CHECK-NEXT: [[ADD:%.*]] = or i32 [[TMP0]], 32
		; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_HI]], i1 false)
		; CHECK-NEXT: [[RETVAL:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[TMP1]]
		; CHECK-NEXT: ret i32 [[RETVAL]]
		;
		entry:
		%shr = lshr i64 %val, 32
		%conv_hi = trunc i64 %shr to i32
		%cmp = icmp eq i32 %conv_hi, 0
		%conv_lo = trunc i64 %val to i32
		%0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
		%add = or i32 %0, 32
		%1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
		%retval = select i1 %cmp, i32 %add, i32 %1
		ret i32 %retval
		}

		; Negative test: multi-use of ctlz_hi prevents folding.
		define i32 @split_ctlz_select_multiuse(i64 %val) {
		; CHECK-LABEL: @split_ctlz_select_multiuse(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[SHR:%.]] = lshr i64 [[VAL:%.]], 32
		; CHECK-NEXT: [[CONV_HI:%.*]] = trunc i64 [[SHR]] to i32
		; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CONV_HI]], 0
		; CHECK-NEXT: [[CONV_LO:%.*]] = trunc i64 [[VAL]] to i32
		; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_LO]], i1 false)
		; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], 32
		; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[CONV_HI]], i1 false)
		; CHECK-NEXT: [[RETVAL:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[TMP1]]
		; CHECK-NEXT: call void @use_i32(i32 [[TMP1]])
		; CHECK-NEXT: ret i32 [[RETVAL]]
		;
		entry:
		%shr = lshr i64 %val, 32
		%conv_hi = trunc i64 %shr to i32
		%cmp = icmp eq i32 %conv_hi, 0
		%conv_lo = trunc i64 %val to i32
		%0 = call i32 @llvm.ctlz.i32(i32 %conv_lo, i1 false)
		%add = add i32 %0, 32
		%1 = call i32 @llvm.ctlz.i32(i32 %conv_hi, i1 false)
		%retval = select i1 %cmp, i32 %add, i32 %1
		call void @use_i32(i32 %1)
		ret i32 %retval
		}

		declare void @use_i32(i32)

llvm/test/Transforms/AggressiveInstCombine/fold-split-cttz.ll

0 → 100644

+274 −0

File added.

Preview size limit exceeded, changes collapsed.