Merging r197492: (849a06ce) · Commits · llvm-doe / llvm-project

llvm/lib/Target/X86/X86ISelLowering.cpp

+10 −3

Original line number	Diff line number	Diff line
		@@ -5395,7 +5395,8 @@ LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
		/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
		/// There's even a handy isZeroNode for that purpose.
		static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
		SDLoc &DL, SelectionDAG &DAG) {
		SDLoc &DL, SelectionDAG &DAG,
		bool isAfterLegalize) {
		EVT EltVT = VT.getVectorElementType();
		unsigned NumElems = Elts.size();

		@@ -5431,7 +5432,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
		// load of the entire vector width starting at the base pointer. If we found
		// consecutive loads for the low half, generate a vzext_load node.
		if (LastLoadedElt == NumElems - 1) {

		if (isAfterLegalize &&
		!DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
		return SDValue();

		SDValue NewLd = SDValue();

		if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
		NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
		LDBase->getPointerInfo(),
		@@ -6075,7 +6082,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
		V[i] = Op.getOperand(i);

		// Check for elements which are consecutive loads.
		SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
		SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false);
		if (LD.getNode())
		return LD;

		@@ -16263,7 +16270,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
		for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
		Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));

		return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
		return EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
		}

		/// PerformTruncateCombine - Converts truncate operation to

llvm/test/CodeGen/X86/v4i32load-crash.ll

0 → 100644

+27 −0

Original line number	Diff line number	Diff line
		; RUN: llc --mcpu=x86-64 --mattr=ssse3 < %s

		;PR18045:
		;Issue of selection for 'v4i32 load'.
		;This instruction is not legal for X86 CPUs with sse < 'sse4.1'.
		;This node was generated by X86ISelLowering.cpp, EltsFromConsecutiveLoads
		;static function after legilize stage.

		@e = external global [4 x i32], align 4
		@f = external global [4 x i32], align 4

		; Function Attrs: nounwind
		define void @fn3(i32 %el) {
		entry:
		%0 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 0)
		%1 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 1)
		%2 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 2)
		%3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 3)
		%4 = insertelement <4 x i32> undef, i32 %0, i32 0
		%5 = insertelement <4 x i32> %4, i32 %1, i32 1
		%6 = insertelement <4 x i32> %5, i32 %2, i32 2
		%7 = insertelement <4 x i32> %6, i32 %3, i32 3
		%8 = add <4 x i32> %6, %7
		store <4 x i32> %8, <4 x i32>* bitcast ([4 x i32]* @f to <4 x i32>*)
		ret void
		}