[InstCombine] dropRedundantMaskingOfLeftShiftInput(): truncation (PR42563) (ccf1a5f4) · Commits · llvm-doe / llvm-project

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

+33 −2

Original line number	Diff line number	Diff line
		@@ -162,10 +162,20 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
		"The input must be 'shl'!");

		Value Masked, ShiftShAmt;
		match(OuterShift, m_Shift(m_Value(Masked), m_Value(ShiftShAmt)));
		match(OuterShift,
		m_Shift(m_Value(Masked), m_ZExtOrSelf(m_Value(ShiftShAmt))));

		// If there is a truncation between an outer shift and a possibly-mask,
		// then said truncation must be one-use, else we can't perform the fold.
		Value *Trunc;
		if (match(Masked, m_CombineAnd(m_Trunc(m_Value(Masked)), m_Value(Trunc))) &&
		!Trunc->hasOneUse())
		return nullptr;

		Type *NarrowestTy = OuterShift->getType();
		Type *WidestTy = Masked->getType();
		bool HadTrunc = WidestTy != NarrowestTy;

		// The mask must be computed in a type twice as wide to ensure
		// that no bits are lost if the sum-of-shifts is wider than the base type.
		Type *ExtendedTy = WidestTy->getExtendedType();
		@@ -186,6 +196,14 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
		Constant *NewMask;

		if (match(Masked, m_c_And(m_CombineOr(MaskA, MaskB), m_Value(X)))) {
		// Peek through an optional zext of the shift amount.
		match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt)));

		// We have two shift amounts from two different shifts. The types of those
		// shift amounts may not match. If that's the case let's bailout now.
		if (MaskShAmt->getType() != ShiftShAmt->getType())
		return nullptr;

		// Can we simplify (MaskShAmt+ShiftShAmt) ?
		auto *SumOfShAmts = dyn_cast_or_null<Constant>(SimplifyAddInst(
		MaskShAmt, ShiftShAmt, /IsNSW=/false, /IsNUW=/false, Q));
		@@ -210,6 +228,14 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
		} else if (match(Masked, m_c_And(m_CombineOr(MaskC, MaskD), m_Value(X))) \|\|
		match(Masked, m_Shr(m_Shl(m_Value(X), m_Value(MaskShAmt)),
		m_Deferred(MaskShAmt)))) {
		// Peek through an optional zext of the shift amount.
		match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt)));

		// We have two shift amounts from two different shifts. The types of those
		// shift amounts may not match. If that's the case let's bailout now.
		if (MaskShAmt->getType() != ShiftShAmt->getType())
		return nullptr;

		// Can we simplify (ShiftShAmt-MaskShAmt) ?
		auto *ShAmtsDiff = dyn_cast_or_null<Constant>(SimplifySubInst(
		ShiftShAmt, MaskShAmt, /IsNSW=/false, /IsNUW=/false, Q));
		@@ -254,10 +280,15 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
		return nullptr;
		}

		// If we need to apply truncation, let's do it first, since we can.
		// We have already ensured that the old truncation will go away.
		if (HadTrunc)
		X = Builder.CreateTrunc(X, NarrowestTy);

		// No 'NUW'/'NSW'! We no longer know that we won't shift-out non-0 bits.
		// We didn't change the Type of this outermost shift, so we can just do it.
		auto *NewShift = BinaryOperator::Create(OuterShift->getOpcode(), X,
		OuterShift->getOperand(1));

		if (!NeedMask)
		return NewShift;

llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll

+12 −12

Original line number	Diff line number	Diff line
		@@ -26,9 +26,9 @@ define i32 @t0_basic(i64 %x, i32 %nbits) {
		; CHECK-NEXT: call void @use64(i64 [[T2]])
		; CHECK-NEXT: call void @use64(i64 [[T3]])
		; CHECK-NEXT: call void @use32(i32 [[T4]])
		; CHECK-NEXT: [[T5:%.]] = and i64 [[T3]], [[X:%.]]
		; CHECK-NEXT: [[T6:%.*]] = trunc i64 [[T5]] to i32
		; CHECK-NEXT: [[T7:%.*]] = shl i32 [[T6]], [[T4]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc i64 [[X:%.]] to i32
		; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], [[T4]]
		; CHECK-NEXT: [[T7:%.*]] = and i32 [[TMP2]], 2147483647
		; CHECK-NEXT: ret i32 [[T7]]
		;
		%t0 = add i32 %nbits, -1
		@@ -66,9 +66,9 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
		; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]])
		; CHECK-NEXT: [[T5:%.]] = and <8 x i64> [[T3]], [[X:%.]]
		; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32>
		; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc <8 x i64> [[X:%.]] to <8 x i32>
		; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
		; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
		; CHECK-NEXT: ret <8 x i32> [[T7]]
		;
		%t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
		@@ -101,9 +101,9 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) {
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
		; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]])
		; CHECK-NEXT: [[T5:%.]] = and <8 x i64> [[T3]], [[X:%.]]
		; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32>
		; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc <8 x i64> [[X:%.]] to <8 x i32>
		; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
		; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
		; CHECK-NEXT: ret <8 x i32> [[T7]]
		;
		%t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>
		@@ -136,9 +136,9 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
		; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]])
		; CHECK-NEXT: [[T5:%.]] = and <8 x i64> [[T3]], [[X:%.]]
		; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32>
		; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc <8 x i64> [[X:%.]] to <8 x i32>
		; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
		; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 undef, i32 0, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 -1>
		; CHECK-NEXT: ret <8 x i32> [[T7]]
		;
		%t0 = add <8 x i32> %nbits, <i32 -33, i32 -32, i32 -31, i32 -1, i32 0, i32 1, i32 31, i32 32>

llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll

+12 −12

Original line number	Diff line number	Diff line
		@@ -26,9 +26,9 @@ define i32 @t0_basic(i64 %x, i32 %nbits) {
		; CHECK-NEXT: call void @use64(i64 [[T2]])
		; CHECK-NEXT: call void @use64(i64 [[T3]])
		; CHECK-NEXT: call void @use32(i32 [[T4]])
		; CHECK-NEXT: [[T5:%.]] = and i64 [[T3]], [[X:%.]]
		; CHECK-NEXT: [[T6:%.*]] = trunc i64 [[T5]] to i32
		; CHECK-NEXT: [[T7:%.*]] = shl i32 [[T6]], [[T4]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc i64 [[X:%.]] to i32
		; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], [[T4]]
		; CHECK-NEXT: [[T7:%.*]] = and i32 [[TMP2]], 2147483647
		; CHECK-NEXT: ret i32 [[T7]]
		;
		%t0 = add i32 %nbits, -1
		@@ -66,9 +66,9 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
		; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]])
		; CHECK-NEXT: [[T5:%.]] = and <8 x i64> [[T3]], [[X:%.]]
		; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32>
		; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc <8 x i64> [[X:%.]] to <8 x i32>
		; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
		; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
		; CHECK-NEXT: ret <8 x i32> [[T7]]
		;
		%t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
		@@ -101,9 +101,9 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) {
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
		; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]])
		; CHECK-NEXT: [[T5:%.]] = and <8 x i64> [[T3]], [[X:%.]]
		; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32>
		; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc <8 x i64> [[X:%.]] to <8 x i32>
		; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
		; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
		; CHECK-NEXT: ret <8 x i32> [[T7]]
		;
		%t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>
		@@ -136,9 +136,9 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
		; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]])
		; CHECK-NEXT: [[T5:%.]] = and <8 x i64> [[T3]], [[X:%.]]
		; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32>
		; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc <8 x i64> [[X:%.]] to <8 x i32>
		; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
		; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 undef, i32 0, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 -1>
		; CHECK-NEXT: ret <8 x i32> [[T7]]
		;
		%t0 = add <8 x i32> %nbits, <i32 -33, i32 -32, i32 -31, i32 -1, i32 0, i32 1, i32 31, i32 32>

llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll

+12 −12

Original line number	Diff line number	Diff line
		@@ -22,9 +22,9 @@ define i32 @t0_basic(i64 %x, i32 %nbits) {
		; CHECK-NEXT: call void @use64(i64 [[T0]])
		; CHECK-NEXT: call void @use64(i64 [[T1]])
		; CHECK-NEXT: call void @use32(i32 [[T2]])
		; CHECK-NEXT: [[T3:%.]] = and i64 [[T1]], [[X:%.]]
		; CHECK-NEXT: [[T4:%.*]] = trunc i64 [[T3]] to i32
		; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T4]], [[T2]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc i64 [[X:%.]] to i32
		; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], [[T2]]
		; CHECK-NEXT: [[T5:%.*]] = and i32 [[TMP2]], 2147483647
		; CHECK-NEXT: ret i32 [[T5]]
		;
		%t0 = zext i32 %nbits to i64
		@@ -54,9 +54,9 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]])
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
		; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
		; CHECK-NEXT: [[T3:%.]] = and <8 x i64> [[T1]], [[X:%.]]
		; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
		; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc <8 x i64> [[X:%.]] to <8 x i32>
		; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
		; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
		; CHECK-NEXT: ret <8 x i32> [[T5]]
		;
		%t0 = zext <8 x i32> %nbits to <8 x i64>
		@@ -81,9 +81,9 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) {
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]])
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
		; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
		; CHECK-NEXT: [[T3:%.]] = and <8 x i64> [[T1]], [[X:%.]]
		; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
		; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc <8 x i64> [[X:%.]] to <8 x i32>
		; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
		; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
		; CHECK-NEXT: ret <8 x i32> [[T5]]
		;
		%t0 = zext <8 x i32> %nbits to <8 x i64>
		@@ -108,9 +108,9 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]])
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
		; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
		; CHECK-NEXT: [[T3:%.]] = and <8 x i64> [[T1]], [[X:%.]]
		; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
		; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc <8 x i64> [[X:%.]] to <8 x i32>
		; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
		; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 undef, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 undef, i32 undef>
		; CHECK-NEXT: ret <8 x i32> [[T5]]
		;
		%t0 = zext <8 x i32> %nbits to <8 x i64>

llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll

+12 −12

Original line number	Diff line number	Diff line
		@@ -24,9 +24,9 @@ define i32 @t0_basic(i64 %x, i32 %nbits) {
		; CHECK-NEXT: call void @use64(i64 [[T1]])
		; CHECK-NEXT: call void @use64(i64 [[T2]])
		; CHECK-NEXT: call void @use32(i32 [[T3]])
		; CHECK-NEXT: [[T4:%.]] = and i64 [[T2]], [[X:%.]]
		; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T4]] to i32
		; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T5]], [[T3]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc i64 [[X:%.]] to i32
		; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], [[T3]]
		; CHECK-NEXT: [[T6:%.*]] = and i32 [[TMP2]], 2147483647
		; CHECK-NEXT: ret i32 [[T6]]
		;
		%t0 = zext i32 %nbits to i64
		@@ -60,9 +60,9 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
		; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]])
		; CHECK-NEXT: [[T4:%.]] = and <8 x i64> [[T2]], [[X:%.]]
		; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32>
		; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc <8 x i64> [[X:%.]] to <8 x i32>
		; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
		; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
		; CHECK-NEXT: ret <8 x i32> [[T6]]
		;
		%t0 = zext <8 x i32> %nbits to <8 x i64>
		@@ -91,9 +91,9 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) {
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
		; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]])
		; CHECK-NEXT: [[T4:%.]] = and <8 x i64> [[T2]], [[X:%.]]
		; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32>
		; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc <8 x i64> [[X:%.]] to <8 x i32>
		; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
		; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
		; CHECK-NEXT: ret <8 x i32> [[T6]]
		;
		%t0 = zext <8 x i32> %nbits to <8 x i64>
		@@ -122,9 +122,9 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
		; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
		; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]])
		; CHECK-NEXT: [[T4:%.]] = and <8 x i64> [[T2]], [[X:%.]]
		; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32>
		; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]]
		; CHECK-NEXT: [[TMP1:%.]] = trunc <8 x i64> [[X:%.]] to <8 x i32>
		; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
		; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 undef, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 undef, i32 undef>
		; CHECK-NEXT: ret <8 x i32> [[T6]]
		;
		%t0 = zext <8 x i32> %nbits to <8 x i64>