Recommit "[DAGCombiner] Transform `(icmp eq/ne (and X,C0),(shift X,C1))` to... (ed7c97e0) · Commits · llvm-doe / llvm-project

llvm/include/llvm/CodeGen/TargetLowering.h

+18 −0

Original line number	Diff line number	Diff line
		@@ -832,6 +832,24 @@ public:
		return N->getOpcode() == ISD::FDIV;
		}

		// Given:
		// (icmp eq/ne (and X, C0), (shift X, C1))
		// or
		// (icmp eq/ne X, (rotate X, CPow2))

		// If C0 is a mask or shifted mask and the shift amt (C1) isolates the
		// remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
		// Do we prefer the shift to be shift-right, shift-left, or rotate.
		// Note: Its only valid to convert the rotate version to the shift version iff
		// the shift-amt (`C1`) is a power of 2 (including 0).
		// If ShiftOpc (current Opcode) is returned, do nothing.
		virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(
		EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
		const APInt &ShiftOrRotateAmt,
		const std::optional<APInt> &AndMask) const {
		return ShiftOpc;
		}

		/// These two forms are equivalent:
		/// sub %y, (xor %x, -1)
		/// add (add %x, 1), %y

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+120 −15

Original line number	Diff line number	Diff line
		@@ -12466,13 +12466,11 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {

		ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
		EVT VT = N->getValueType(0);
		SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);

		SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
		SDLoc(N), !PreferSetCC);

		if (!Combined)
		return SDValue();
		SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);

		if (Combined) {
		// If we prefer to have a setcc, and we don't, we'll try our best to
		// recreate one using rebuildSetCC.
		if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
		@@ -12485,10 +12483,117 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
		if (NewSetCC)
		return NewSetCC;
		}

		return Combined;
		}

		// Optimize
		// 1) (icmp eq/ne (and X, C0), (shift X, C1))
		// or
		// 2) (icmp eq/ne X, (rotate X, C1))
		// If C0 is a mask or shifted mask and the shift amt (C1) isolates the
		// remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
		// Then:
		// If C1 is a power of 2, then the rotate and shift+and versions are
		// equivilent, so we can interchange them depending on target preference.
		// Otherwise, if we have the shift+and version we can interchange srl/shl
		// which inturn affects the constant C0. We can use this to get better
		// constants again determined by target preference.
		if (Cond == ISD::SETNE \|\| Cond == ISD::SETEQ) {
		auto IsAndWithShift = [](SDValue A, SDValue B) {
		return A.getOpcode() == ISD::AND &&
		(B.getOpcode() == ISD::SRL \|\| B.getOpcode() == ISD::SHL) &&
		A.getOperand(0) == B.getOperand(0);
		};
		auto IsRotateWithOp = [](SDValue A, SDValue B) {
		return (B.getOpcode() == ISD::ROTL \|\| B.getOpcode() == ISD::ROTR) &&
		B.getOperand(0) == A;
		};
		SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
		bool IsRotate = false;

		// Find either shift+and or rotate pattern.
		if (IsAndWithShift(N0, N1)) {
		AndOrOp = N0;
		ShiftOrRotate = N1;
		} else if (IsAndWithShift(N1, N0)) {
		AndOrOp = N1;
		ShiftOrRotate = N0;
		} else if (IsRotateWithOp(N0, N1)) {
		IsRotate = true;
		AndOrOp = N0;
		ShiftOrRotate = N1;
		} else if (IsRotateWithOp(N1, N0)) {
		IsRotate = true;
		AndOrOp = N1;
		ShiftOrRotate = N0;
		}

		if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
		(IsRotate \|\| AndOrOp.hasOneUse())) {
		EVT OpVT = N0.getValueType();
		// Get constant shift/rotate amount and possibly mask (if its shift+and
		// variant).
		auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
		ConstantSDNode CNode = isConstOrConstSplat(Op, /AllowUndefs*/ false,
		/AllowTrunc/ false);
		if (CNode == nullptr)
		return std::nullopt;
		return CNode->getAPIntValue();
		};
		std::optional<APInt> AndCMask =
		IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
		std::optional<APInt> ShiftCAmt =
		GetAPIntValue(ShiftOrRotate.getOperand(1));
		unsigned NumBits = OpVT.getScalarSizeInBits();

		// We found constants.
		if (ShiftCAmt && (IsRotate \|\| AndCMask) && ShiftCAmt->ult(NumBits)) {
		unsigned ShiftOpc = ShiftOrRotate.getOpcode();
		// Check that the constants meet the constraints.
		bool CanTransform = IsRotate;
		if (!CanTransform) {
		// Check that mask and shift compliment eachother
		CanTransform = ShiftCAmt == (~AndCMask).popcount();
		// Check that we are comparing all bits
		CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
		// Check that the and mask is correct for the shift
		CanTransform &=
		ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
		}

		// See if target prefers another shift/rotate opcode.
		unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
		OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
		// Transform is valid and we have a new preference.
		if (CanTransform && NewShiftOpc != ShiftOpc) {
		SDLoc DL(N);
		SDValue NewShiftOrRotate =
		DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
		ShiftOrRotate.getOperand(1));
		SDValue NewAndOrOp = SDValue();

		if (NewShiftOpc == ISD::SHL \|\| NewShiftOpc == ISD::SRL) {
		APInt NewMask =
		NewShiftOpc == ISD::SHL
		? APInt::getHighBitsSet(NumBits,
		NumBits - ShiftCAmt->getZExtValue())
		: APInt::getLowBitsSet(NumBits,
		NumBits - ShiftCAmt->getZExtValue());
		NewAndOrOp =
		DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
		DAG.getConstant(NewMask, DL, OpVT));
		} else {
		NewAndOrOp = ShiftOrRotate.getOperand(0);
		}

		return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
		}
		}
		}
		}
		return SDValue();
		}

		SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
		SDValue LHS = N->getOperand(0);
		SDValue RHS = N->getOperand(1);

llvm/lib/Target/X86/X86ISelLowering.cpp

+67 −0

Original line number	Diff line number	Diff line
		@@ -3263,6 +3263,73 @@ bool X86TargetLowering::
		return NewShiftOpcode == ISD::SHL;
		}

		unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand(
		EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
		const APInt &ShiftOrRotateAmt, const std::optional<APInt> &AndMask) const {
		if (!VT.isInteger())
		return ShiftOpc;

		bool PreferRotate = false;
		if (VT.isVector()) {
		// For vectors, if we have rotate instruction support, then its definetly
		// best. Otherwise its not clear what the best so just don't make changed.
		PreferRotate = Subtarget.hasAVX512() && (VT.getScalarType() == MVT::i32 \|\|
		VT.getScalarType() == MVT::i64);
		} else {
		// For scalar, if we have bmi prefer rotate for rorx. Otherwise prefer
		// rotate unless we have a zext mask+shr.
		PreferRotate = Subtarget.hasBMI2();
		if (!PreferRotate) {
		unsigned MaskBits =
		VT.getScalarSizeInBits() - ShiftOrRotateAmt.getZExtValue();
		PreferRotate = (MaskBits != 8) && (MaskBits != 16) && (MaskBits != 32);
		}
		}

		if (ShiftOpc == ISD::SHL \|\| ShiftOpc == ISD::SRL) {
		assert(AndMask.has_value() && "Null andmask when querying about shift+and");

		if (PreferRotate && MayTransformRotate)
		return ISD::ROTL;

		// If vector we don't really get much benefit swapping around constants.
		// Maybe we could check if the DAG has the flipped node already in the
		// future.
		if (VT.isVector())
		return ShiftOpc;

		// See if the beneficial to swap shift type.
		if (ShiftOpc == ISD::SHL) {
		// If the current setup has imm64 mask, then inverse will have
		// at least imm32 mask (or be zext i32 -> i64).
		if (VT == MVT::i64)
		return AndMask->getSignificantBits() > 32 ? (unsigned)ISD::SRL
		: ShiftOpc;

		// We can only benefit if req at least 7-bit for the mask. We
		// don't want to replace shl of 1,2,3 as they can be implemented
		// with lea/add.
		return ShiftOrRotateAmt.uge(7) ? (unsigned)ISD::SRL : ShiftOpc;
		}

		if (VT == MVT::i64)
		// Keep exactly 32-bit imm64, this is zext i32 -> i64 which is
		// extremely efficient.
		return AndMask->getSignificantBits() > 33 ? (unsigned)ISD::SHL : ShiftOpc;

		// Keep small shifts as shl so we can generate add/lea.
		return ShiftOrRotateAmt.ult(7) ? (unsigned)ISD::SHL : ShiftOpc;
		}

		// We prefer rotate for vectors of if we won't get a zext mask with SRL
		// (PreferRotate will be set in the latter case).
		if (PreferRotate \|\| VT.isVector())
		return ShiftOpc;

		// Non-vector type and we have a zext mask with SRL.
		return ISD::SRL;
		}

		bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
		return N->getOpcode() != ISD::FP_EXTEND;
		}

llvm/lib/Target/X86/X86ISelLowering.h

+5 −0

Original line number	Diff line number	Diff line
		@@ -1138,6 +1138,11 @@ namespace llvm {
		unsigned OldShiftOpcode, unsigned NewShiftOpcode,
		SelectionDAG &DAG) const override;

		unsigned preferedOpcodeForCmpEqPiecesOfOperand(
		EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
		const APInt &ShiftOrRotateAmt,
		const std::optional<APInt> &AndMask) const override;

		bool preferScalarizeSplat(SDNode *N) const override;

		bool shouldFoldConstantShiftPairToMask(const SDNode *N,

llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll

+95 −85

Original line number	Diff line number	Diff line
		@@ -20,9 +20,8 @@ define i1 @shr_to_shl_eq_i8_s2(i8 %x) {
		; CHECK-LABEL: shr_to_shl_eq_i8_s2:
		; CHECK: # %bb.0:
		; CHECK-NEXT: movl %edi, %eax
		; CHECK-NEXT: andb $63, %al
		; CHECK-NEXT: shrb $2, %dil
		; CHECK-NEXT: cmpb %dil, %al
		; CHECK-NEXT: rolb $2, %al
		; CHECK-NEXT: cmpb %al, %dil
		; CHECK-NEXT: sete %al
		; CHECK-NEXT: retq
		%and = and i8 %x, 63
		@@ -35,9 +34,9 @@ define i1 @shl_to_shr_ne_i8_s7(i8 %x) {
		; CHECK-LABEL: shl_to_shr_ne_i8_s7:
		; CHECK: # %bb.0:
		; CHECK-NEXT: movl %edi, %eax
		; CHECK-NEXT: shlb $7, %al
		; CHECK-NEXT: andb $-128, %dil
		; CHECK-NEXT: cmpb %dil, %al
		; CHECK-NEXT: shrb $7, %al
		; CHECK-NEXT: andb $1, %dil
		; CHECK-NEXT: cmpb %al, %dil
		; CHECK-NEXT: setne %al
		; CHECK-NEXT: retq
		%shl = shl i8 %x, 7
		@@ -63,9 +62,8 @@ define i1 @shr_to_shl_eq_i8_s1(i8 %x) {
		; CHECK-LABEL: shr_to_shl_eq_i8_s1:
		; CHECK: # %bb.0:
		; CHECK-NEXT: movl %edi, %eax
		; CHECK-NEXT: andb $127, %al
		; CHECK-NEXT: shrb %dil
		; CHECK-NEXT: cmpb %dil, %al
		; CHECK-NEXT: rolb %al
		; CHECK-NEXT: cmpb %al, %dil
		; CHECK-NEXT: sete %al
		; CHECK-NEXT: retq
		%and = and i8 %x, 127
		@@ -77,10 +75,10 @@ define i1 @shr_to_shl_eq_i8_s1(i8 %x) {
		define i1 @shr_to_shl_eq_i32_s3(i32 %x) {
		; CHECK-LABEL: shr_to_shl_eq_i32_s3:
		; CHECK: # %bb.0:
		; CHECK-NEXT: movl %edi, %eax
		; CHECK-NEXT: andl $536870911, %eax # imm = 0x1FFFFFFF
		; CHECK-NEXT: shrl $3, %edi
		; CHECK-NEXT: cmpl %edi, %eax
		; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
		; CHECK-NEXT: leal (,%rdi,8), %eax
		; CHECK-NEXT: andl $-8, %edi
		; CHECK-NEXT: cmpl %eax, %edi
		; CHECK-NEXT: sete %al
		; CHECK-NEXT: retq
		%and = and i32 %x, 536870911
		@@ -105,14 +103,20 @@ define i1 @shl_to_shr_eq_i32_s3_fail(i32 %x) {
		}

		define i1 @shl_to_shr_ne_i32_s16(i32 %x) {
		; CHECK-LABEL: shl_to_shr_ne_i32_s16:
		; CHECK: # %bb.0:
		; CHECK-NEXT: movl %edi, %eax
		; CHECK-NEXT: shll $16, %eax
		; CHECK-NEXT: andl $-65536, %edi # imm = 0xFFFF0000
		; CHECK-NEXT: cmpl %edi, %eax
		; CHECK-NEXT: setne %al
		; CHECK-NEXT: retq
		; CHECK-NOBMI-LABEL: shl_to_shr_ne_i32_s16:
		; CHECK-NOBMI: # %bb.0:
		; CHECK-NOBMI-NEXT: movzwl %di, %eax
		; CHECK-NOBMI-NEXT: shrl $16, %edi
		; CHECK-NOBMI-NEXT: cmpl %edi, %eax
		; CHECK-NOBMI-NEXT: setne %al
		; CHECK-NOBMI-NEXT: retq
		;
		; CHECK-BMI2-LABEL: shl_to_shr_ne_i32_s16:
		; CHECK-BMI2: # %bb.0:
		; CHECK-BMI2-NEXT: rorxl $16, %edi, %eax
		; CHECK-BMI2-NEXT: cmpl %eax, %edi
		; CHECK-BMI2-NEXT: setne %al
		; CHECK-BMI2-NEXT: retq
		%shl = shl i32 %x, 16
		%and = and i32 %x, 4294901760
		%r = icmp ne i32 %shl, %and
		@@ -137,9 +141,8 @@ define i1 @shl_to_shr_ne_i32_s16_fail(i32 %x) {
		define i1 @shr_to_shl_eq_i16_s1(i16 %x) {
		; CHECK-LABEL: shr_to_shl_eq_i16_s1:
		; CHECK: # %bb.0:
		; CHECK-NEXT: movzwl %di, %eax
		; CHECK-NEXT: andl $32767, %edi # imm = 0x7FFF
		; CHECK-NEXT: shrl %eax
		; CHECK-NEXT: movl %edi, %eax
		; CHECK-NEXT: rolw %ax
		; CHECK-NEXT: cmpw %ax, %di
		; CHECK-NEXT: sete %al
		; CHECK-NEXT: retq
		@@ -167,9 +170,9 @@ define i1 @shr_to_shl_eq_i16_s1_fail(i16 %x) {
		define i1 @shl_to_shr_eq_i64_s44(i64 %x) {
		; CHECK-LABEL: shl_to_shr_eq_i64_s44:
		; CHECK: # %bb.0:
		; CHECK-NEXT: movabsq $-17592186044416, %rax # imm = 0xFFFFF00000000000
		; CHECK-NEXT: andq %rdi, %rax
		; CHECK-NEXT: shlq $44, %rdi
		; CHECK-NEXT: movq %rdi, %rax
		; CHECK-NEXT: shrq $44, %rax
		; CHECK-NEXT: andl $1048575, %edi # imm = 0xFFFFF
		; CHECK-NEXT: cmpq %rax, %rdi
		; CHECK-NEXT: sete %al
		; CHECK-NEXT: retq
		@@ -180,13 +183,20 @@ define i1 @shl_to_shr_eq_i64_s44(i64 %x) {
		}

		define i1 @shr_to_shl_ne_i64_s32(i64 %x) {
		; CHECK-LABEL: shr_to_shl_ne_i64_s32:
		; CHECK: # %bb.0:
		; CHECK-NEXT: movl %edi, %eax
		; CHECK-NEXT: shrq $32, %rdi
		; CHECK-NEXT: cmpq %rdi, %rax
		; CHECK-NEXT: setne %al
		; CHECK-NEXT: retq
		; CHECK-NOBMI-LABEL: shr_to_shl_ne_i64_s32:
		; CHECK-NOBMI: # %bb.0:
		; CHECK-NOBMI-NEXT: movl %edi, %eax
		; CHECK-NOBMI-NEXT: shrq $32, %rdi
		; CHECK-NOBMI-NEXT: cmpq %rdi, %rax
		; CHECK-NOBMI-NEXT: setne %al
		; CHECK-NOBMI-NEXT: retq
		;
		; CHECK-BMI2-LABEL: shr_to_shl_ne_i64_s32:
		; CHECK-BMI2: # %bb.0:
		; CHECK-BMI2-NEXT: rorxq $32, %rdi, %rax
		; CHECK-BMI2-NEXT: cmpq %rax, %rdi
		; CHECK-BMI2-NEXT: setne %al
		; CHECK-BMI2-NEXT: retq
		%and = and i64 %x, 4294967295
		%shr = lshr i64 %x, 32
		%r = icmp ne i64 %and, %shr
		@@ -230,9 +240,9 @@ define i1 @ashr_to_shl_ne_i64_s32_fail(i64 %x) {
		define i1 @shl_to_shr_eq_i64_s63(i64 %x) {
		; CHECK-LABEL: shl_to_shr_eq_i64_s63:
		; CHECK: # %bb.0:
		; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
		; CHECK-NEXT: andq %rdi, %rax
		; CHECK-NEXT: shlq $63, %rdi
		; CHECK-NEXT: movq %rdi, %rax
		; CHECK-NEXT: shrq $63, %rax
		; CHECK-NEXT: andl $1, %edi
		; CHECK-NEXT: cmpq %rax, %rdi
		; CHECK-NEXT: sete %al
		; CHECK-NEXT: retq
		@@ -258,23 +268,14 @@ define i1 @shl_to_shr_eq_i64_s63_fail(i64 %x) {
		}

		define i1 @shr_to_shl_eq_i64_s7(i64 %x) {
		; CHECK-NOBMI-LABEL: shr_to_shl_eq_i64_s7:
		; CHECK-NOBMI: # %bb.0:
		; CHECK-NOBMI-NEXT: movabsq $144115188075855871, %rax # imm = 0x1FFFFFFFFFFFFFF
		; CHECK-NOBMI-NEXT: andq %rdi, %rax
		; CHECK-NOBMI-NEXT: shrq $7, %rdi
		; CHECK-NOBMI-NEXT: cmpq %rdi, %rax
		; CHECK-NOBMI-NEXT: sete %al
		; CHECK-NOBMI-NEXT: retq
		;
		; CHECK-BMI2-LABEL: shr_to_shl_eq_i64_s7:
		; CHECK-BMI2: # %bb.0:
		; CHECK-BMI2-NEXT: movb $57, %al
		; CHECK-BMI2-NEXT: bzhiq %rax, %rdi, %rax
		; CHECK-BMI2-NEXT: shrq $7, %rdi
		; CHECK-BMI2-NEXT: cmpq %rdi, %rax
		; CHECK-BMI2-NEXT: sete %al
		; CHECK-BMI2-NEXT: retq
		; CHECK-LABEL: shr_to_shl_eq_i64_s7:
		; CHECK: # %bb.0:
		; CHECK-NEXT: movq %rdi, %rax
		; CHECK-NEXT: shlq $7, %rax
		; CHECK-NEXT: andq $-128, %rdi
		; CHECK-NEXT: cmpq %rax, %rdi
		; CHECK-NEXT: sete %al
		; CHECK-NEXT: retq
		%and = and i64 %x, 144115188075855871
		%shr = lshr i64 %x, 7
		%r = icmp eq i64 %and, %shr
		@@ -284,9 +285,8 @@ define i1 @shr_to_shl_eq_i64_s7(i64 %x) {
		define i1 @shl_to_shr_ne_i32_s24(i32 %x) {
		; CHECK-LABEL: shl_to_shr_ne_i32_s24:
		; CHECK: # %bb.0:
		; CHECK-NEXT: movl %edi, %eax
		; CHECK-NEXT: shll $24, %eax
		; CHECK-NEXT: andl $-16777216, %edi # imm = 0xFF000000
		; CHECK-NEXT: movzbl %dil, %eax
		; CHECK-NEXT: shrl $24, %edi
		; CHECK-NEXT: cmpl %edi, %eax
		; CHECK-NEXT: setne %al
		; CHECK-NEXT: retq
		@@ -312,14 +312,20 @@ define i1 @shr_to_shl_ne_i32_s24_fail(i32 %x) {
		}

		define i1 @shr_to_shl_ne_i32_s8(i32 %x) {
		; CHECK-LABEL: shr_to_shl_ne_i32_s8:
		; CHECK: # %bb.0:
		; CHECK-NEXT: movl %edi, %eax
		; CHECK-NEXT: andl $16777215, %eax # imm = 0xFFFFFF
		; CHECK-NEXT: shrl $8, %edi
		; CHECK-NEXT: cmpl %edi, %eax
		; CHECK-NEXT: setne %al
		; CHECK-NEXT: retq
		; CHECK-NOBMI-LABEL: shr_to_shl_ne_i32_s8:
		; CHECK-NOBMI: # %bb.0:
		; CHECK-NOBMI-NEXT: movl %edi, %eax
		; CHECK-NOBMI-NEXT: roll $8, %eax
		; CHECK-NOBMI-NEXT: cmpl %eax, %edi
		; CHECK-NOBMI-NEXT: setne %al
		; CHECK-NOBMI-NEXT: retq
		;
		; CHECK-BMI2-LABEL: shr_to_shl_ne_i32_s8:
		; CHECK-BMI2: # %bb.0:
		; CHECK-BMI2-NEXT: rorxl $24, %edi, %eax
		; CHECK-BMI2-NEXT: cmpl %eax, %edi
		; CHECK-BMI2-NEXT: setne %al
		; CHECK-BMI2-NEXT: retq
		%and = and i32 %x, 16777215
		%shr = lshr i32 %x, 8
		%r = icmp ne i32 %and, %shr
		@@ -359,9 +365,8 @@ define <4 x i1> @shr_to_ror_eq_4xi32_s4(<4 x i32> %x) {
		;
		; CHECK-AVX512-LABEL: shr_to_ror_eq_4xi32_s4:
		; CHECK-AVX512: # %bb.0:
		; CHECK-AVX512-NEXT: vpsrld $4, %xmm0, %xmm1
		; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
		; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
		; CHECK-AVX512-NEXT: vprold $4, %xmm0, %xmm1
		; CHECK-AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
		; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
		; CHECK-AVX512-NEXT: retq
		%shr = lshr <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
		@@ -402,9 +407,8 @@ define <4 x i1> @shl_to_ror_eq_4xi32_s8(<4 x i32> %x) {
		;
		; CHECK-AVX512-LABEL: shl_to_ror_eq_4xi32_s8:
		; CHECK-AVX512: # %bb.0:
		; CHECK-AVX512-NEXT: vpslld $8, %xmm0, %xmm1
		; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
		; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
		; CHECK-AVX512-NEXT: vprold $8, %xmm0, %xmm1
		; CHECK-AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
		; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
		; CHECK-AVX512-NEXT: retq
		%shr = shl <4 x i32> %x, <i32 8, i32 8, i32 8, i32 8>
		@@ -754,9 +758,9 @@ define i1 @shl_to_shr_eq_i32_s9(i32 %x) {
		; CHECK-LABEL: shl_to_shr_eq_i32_s9:
		; CHECK: # %bb.0:
		; CHECK-NEXT: movl %edi, %eax
		; CHECK-NEXT: andl $-512, %eax # imm = 0xFE00
		; CHECK-NEXT: shll $9, %edi
		; CHECK-NEXT: cmpl %edi, %eax
		; CHECK-NEXT: shrl $9, %eax
		; CHECK-NEXT: andl $8388607, %edi # imm = 0x7FFFFF
		; CHECK-NEXT: cmpl %eax, %edi
		; CHECK-NEXT: sete %al
		; CHECK-NEXT: retq
		%and = and i32 %x, -512
		@@ -769,9 +773,9 @@ define i1 @shr_to_shl_eq_i32_s5(i32 %x) {
		; CHECK-LABEL: shr_to_shl_eq_i32_s5:
		; CHECK: # %bb.0:
		; CHECK-NEXT: movl %edi, %eax
		; CHECK-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF
		; CHECK-NEXT: shrl $5, %edi
		; CHECK-NEXT: cmpl %edi, %eax
		; CHECK-NEXT: shll $5, %eax
		; CHECK-NEXT: andl $-32, %edi
		; CHECK-NEXT: cmpl %eax, %edi
		; CHECK-NEXT: sete %al
		; CHECK-NEXT: retq
		%and = and i32 %x, 134217727
		@@ -781,14 +785,20 @@ define i1 @shr_to_shl_eq_i32_s5(i32 %x) {
		}

		define i1 @shr_to_rotate_eq_i32_s5(i32 %x) {
		; CHECK-LABEL: shr_to_rotate_eq_i32_s5:
		; CHECK: # %bb.0:
		; CHECK-NEXT: movl %edi, %eax
		; CHECK-NEXT: andl $268435455, %eax # imm = 0xFFFFFFF
		; CHECK-NEXT: shrl $4, %edi
		; CHECK-NEXT: cmpl %edi, %eax
		; CHECK-NEXT: sete %al
		; CHECK-NEXT: retq
		; CHECK-NOBMI-LABEL: shr_to_rotate_eq_i32_s5:
		; CHECK-NOBMI: # %bb.0:
		; CHECK-NOBMI-NEXT: movl %edi, %eax
		; CHECK-NOBMI-NEXT: roll $4, %eax
		; CHECK-NOBMI-NEXT: cmpl %eax, %edi
		; CHECK-NOBMI-NEXT: sete %al
		; CHECK-NOBMI-NEXT: retq
		;
		; CHECK-BMI2-LABEL: shr_to_rotate_eq_i32_s5:
		; CHECK-BMI2: # %bb.0:
		; CHECK-BMI2-NEXT: rorxl $28, %edi, %eax
		; CHECK-BMI2-NEXT: cmpl %eax, %edi
		; CHECK-BMI2-NEXT: sete %al
		; CHECK-BMI2-NEXT: retq
		%and = and i32 %x, 268435455
		%sh = lshr i32 %x, 4
		%r = icmp eq i32 %and, %sh