Revert "[MS] Overhaul how clang passes overaligned args on x86_32" (eaabaf7e) · Commits · llvm-doe / llvm-project

clang/include/clang/CodeGen/CGFunctionInfo.h

+3 −14

Original line number	Diff line number	Diff line
		@@ -88,7 +88,6 @@ private:
		Kind TheKind;
		bool PaddingInReg : 1;
		bool InAllocaSRet : 1; // isInAlloca()
		bool InAllocaIndirect : 1;// isInAlloca()
		bool IndirectByVal : 1; // isIndirect()
		bool IndirectRealign : 1; // isIndirect()
		bool SRetAfterThis : 1; // isIndirect()
		@@ -111,8 +110,8 @@ private:

		public:
		ABIArgInfo(Kind K = Direct)
		: TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K),
		PaddingInReg(false), InAllocaSRet(false), InAllocaIndirect(false),
		: TypeData(nullptr), PaddingType(nullptr), DirectOffset(0),
		TheKind(K), PaddingInReg(false), InAllocaSRet(false),
		IndirectByVal(false), IndirectRealign(false), SRetAfterThis(false),
		InReg(false), CanBeFlattened(false), SignExt(false) {}

		@@ -186,10 +185,9 @@ public:
		AI.setInReg(true);
		return AI;
		}
		static ABIArgInfo getInAlloca(unsigned FieldIndex, bool Indirect = false) {
		static ABIArgInfo getInAlloca(unsigned FieldIndex) {
		auto AI = ABIArgInfo(InAlloca);
		AI.setInAllocaFieldIndex(FieldIndex);
		AI.setInAllocaIndirect(Indirect);
		return AI;
		}
		static ABIArgInfo getExpand() {
		@@ -382,15 +380,6 @@ public:
		AllocaFieldIndex = FieldIndex;
		}

		unsigned getInAllocaIndirect() const {
		assert(isInAlloca() && "Invalid kind!");
		return InAllocaIndirect;
		}
		void setInAllocaIndirect(bool Indirect) {
		assert(isInAlloca() && "Invalid kind!");
		InAllocaIndirect = Indirect;
		}

		/// Return true if this field of an inalloca struct should be returned
		/// to implement a struct return calling convention.
		bool getInAllocaSRet() const {

clang/lib/CodeGen/CGCall.cpp

+6 −30

Original line number	Diff line number	Diff line
		@@ -2339,9 +2339,6 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
		auto FieldIndex = ArgI.getInAllocaFieldIndex();
		Address V =
		Builder.CreateStructGEP(ArgStruct, FieldIndex, Arg->getName());
		if (ArgI.getInAllocaIndirect())
		V = Address(Builder.CreateLoad(V),
		getContext().getTypeAlignInChars(Ty));
		ArgVals.push_back(ParamValue::forIndirect(V));
		break;
		}
		@@ -4041,39 +4038,18 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
		assert(NumIRArgs == 0);
		assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
		if (I->isAggregate()) {
		// Replace the placeholder with the appropriate argument slot GEP.
		Address Addr = I->hasLValue()
		? I->getKnownLValue().getAddress(*this)
		: I->getKnownRValue().getAggregateAddress();
		llvm::Instruction *Placeholder =
		cast<llvm::Instruction>(Addr.getPointer());

		if (!ArgInfo.getInAllocaIndirect()) {
		// Replace the placeholder with the appropriate argument slot GEP.
		CGBuilderTy::InsertPoint IP = Builder.saveIP();
		Builder.SetInsertPoint(Placeholder);
		Addr = Builder.CreateStructGEP(ArgMemory,
		ArgInfo.getInAllocaFieldIndex());
		Addr =
		Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
		Builder.restoreIP(IP);
		} else {
		// For indirect things such as overaligned structs, replace the
		// placeholder with a regular aggregate temporary alloca. Store the
		// address of this alloca into the struct.
		Addr = CreateMemTemp(info_it->type, "inalloca.indirect.tmp");
		Address ArgSlot = Builder.CreateStructGEP(
		ArgMemory, ArgInfo.getInAllocaFieldIndex());
		Builder.CreateStore(Addr.getPointer(), ArgSlot);
		}
		deferPlaceholderReplacement(Placeholder, Addr.getPointer());
		} else if (ArgInfo.getInAllocaIndirect()) {
		// Make a temporary alloca and store the address of it into the argument
		// struct.
		Address Addr = CreateMemTempWithoutCast(
		I->Ty, getContext().getTypeAlignInChars(I->Ty),
		"indirect-arg-temp");
		I->copyInto(*this, Addr);
		Address ArgSlot =
		Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
		Builder.CreateStore(Addr.getPointer(), ArgSlot);
		} else {
		// Store the RValue into the argument struct.
		Address Addr =

clang/lib/CodeGen/TargetInfo.cpp

+24 −50

Original line number	Diff line number	Diff line
		@@ -1676,7 +1676,6 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
		bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;

		Ty = useFirstFieldIfTransparentUnion(Ty);
		TypeInfo TI = getContext().getTypeInfo(Ty);

		// Check with the C++ ABI first.
		const RecordType *RT = Ty->getAs<RecordType>();
		@@ -1726,7 +1725,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
		bool NeedsPadding = false;
		bool InReg;
		if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
		unsigned SizeInRegs = (TI.Width + 31) / 32;
		unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
		SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
		llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
		if (InReg)
		@@ -1736,19 +1735,14 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
		}
		llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;

		// Pass over-aligned aggregates on Windows indirectly. This behavior was
		// added in MSVC 2015.
		if (IsWin32StructABI && TI.AlignIsRequired && TI.Align > 32)
		return getIndirectResult(Ty, /ByVal=/false, State);

		// Expand small (<= 128-bit) record types when we know that the stack layout
		// of those arguments will match the struct. This is important because the
		// LLVM backend isn't smart enough to remove byval, which inhibits many
		// optimizations.
		// Don't do this for the MCU if there are still free integer registers
		// (see X86_64 ABI for full explanation).
		if (TI.Width <= 4 * 32 && (!IsMCUABI \|\| State.FreeRegs == 0) &&
		canExpandIndirectArgument(Ty))
		if (getContext().getTypeSize(Ty) <= 4 * 32 &&
		(!IsMCUABI \|\| State.FreeRegs == 0) && canExpandIndirectArgument(Ty))
		return ABIArgInfo::getExpandWithPadding(
		IsFastCall \|\| IsVectorCall \|\| IsRegCall, PaddingType);

		@@ -1756,24 +1750,14 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
		}

		if (const VectorType *VT = Ty->getAs<VectorType>()) {
		// On Windows, vectors are passed directly if registers are available, or
		// indirectly if not. This avoids the need to align argument memory. Pass
		// user-defined vector types larger than 512 bits indirectly for simplicity.
		if (IsWin32StructABI) {
		if (TI.Width <= 512 && State.FreeSSERegs > 0) {
		--State.FreeSSERegs;
		return ABIArgInfo::getDirectInReg();
		}
		return getIndirectResult(Ty, /ByVal=/false, State);
		}

		// On Darwin, some vectors are passed in memory, we handle this by passing
		// it as an i8/i16/i32/i64.
		if (IsDarwinVectorABI) {
		if ((TI.Width == 8 \|\| TI.Width == 16 \|\| TI.Width == 32) \|\|
		(TI.Width == 64 && VT->getNumElements() == 1))
		return ABIArgInfo::getDirect(
		llvm::IntegerType::get(getVMContext(), TI.Width));
		uint64_t Size = getContext().getTypeSize(Ty);
		if ((Size == 8 \|\| Size == 16 \|\| Size == 32) \|\|
		(Size == 64 && VT->getNumElements() == 1))
		return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
		Size));
		}

		if (IsX86_MMXType(CGT.ConvertType(Ty)))
		@@ -1803,10 +1787,9 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
		CCState State(FI);
		if (IsMCUABI)
		State.FreeRegs = 3;
		else if (State.CC == llvm::CallingConv::X86_FastCall) {
		else if (State.CC == llvm::CallingConv::X86_FastCall)
		State.FreeRegs = 2;
		State.FreeSSERegs = 3;
		} else if (State.CC == llvm::CallingConv::X86_VectorCall) {
		else if (State.CC == llvm::CallingConv::X86_VectorCall) {
		State.FreeRegs = 2;
		State.FreeSSERegs = 6;
		} else if (FI.getHasRegParm())
		@@ -1814,11 +1797,6 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
		else if (State.CC == llvm::CallingConv::X86_RegCall) {
		State.FreeRegs = 5;
		State.FreeSSERegs = 8;
		} else if (IsWin32StructABI) {
		// Since MSVC 2015, the first three SSE vectors have been passed in
		// registers. The rest are passed indirectly.
		State.FreeRegs = DefaultNumRegisterParameters;
		State.FreeSSERegs = 3;
		} else
		State.FreeRegs = DefaultNumRegisterParameters;

		@@ -1865,25 +1843,16 @@ X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
		CharUnits &StackOffset, ABIArgInfo &Info,
		QualType Type) const {
		// Arguments are always 4-byte-aligned.
		CharUnits WordSize = CharUnits::fromQuantity(4);
		assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct");
		CharUnits FieldAlign = CharUnits::fromQuantity(4);

		// sret pointers and indirect things will require an extra pointer
		// indirection, unless they are byval. Most things are byval, and will not
		// require this indirection.
		bool IsIndirect = false;
		if (Info.isIndirect() && !Info.getIndirectByVal())
		IsIndirect = true;
		Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect);
		llvm::Type *LLTy = CGT.ConvertTypeForMem(Type);
		if (IsIndirect)
		LLTy = LLTy->getPointerTo(0);
		FrameFields.push_back(LLTy);
		StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type);
		assert(StackOffset.isMultipleOf(FieldAlign) && "unaligned inalloca struct");
		Info = ABIArgInfo::getInAlloca(FrameFields.size());
		FrameFields.push_back(CGT.ConvertTypeForMem(Type));
		StackOffset += getContext().getTypeSizeInChars(Type);

		// Insert padding bytes to respect alignment.
		CharUnits FieldEnd = StackOffset;
		StackOffset = FieldEnd.alignTo(WordSize);
		StackOffset = FieldEnd.alignTo(FieldAlign);
		if (StackOffset != FieldEnd) {
		CharUnits NumBytes = StackOffset - FieldEnd;
		llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
		@@ -1897,12 +1866,16 @@ static bool isArgInAlloca(const ABIArgInfo &Info) {
		switch (Info.getKind()) {
		case ABIArgInfo::InAlloca:
		return true;
		case ABIArgInfo::Indirect:
		assert(Info.getIndirectByVal());
		return true;
		case ABIArgInfo::Ignore:
		return false;
		case ABIArgInfo::Indirect:
		case ABIArgInfo::Direct:
		case ABIArgInfo::Extend:
		return !Info.getInReg();
		if (Info.getInReg())
		return false;
		return true;
		case ABIArgInfo::Expand:
		case ABIArgInfo::CoerceAndExpand:
		// These are aggregate types which are never passed in registers when
		@@ -1936,7 +1909,8 @@ void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {

		// Put the sret parameter into the inalloca struct if it's in memory.
		if (Ret.isIndirect() && !Ret.getInReg()) {
		addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType());
		CanQualType PtrTy = getContext().getPointerType(FI.getReturnType());
		addFieldToArgStruct(FrameFields, StackOffset, Ret, PtrTy);
		// On Windows, the hidden sret parameter is always returned in eax.
		Ret.setInAllocaSRet(IsWin32StructABI);
		}

clang/test/CodeGen/x86_32-arguments-win32.c

+0 −44

Original line number	Diff line number	Diff line
		@@ -46,47 +46,3 @@ struct s6 {
		struct s6 f6_1(void) { while (1) {} }
		void f6_2(struct s6 a0) {}


		// MSVC passes up to three vectors in registers, and the rest indirectly. We
		// (arbitrarily) pass oversized vectors indirectly, since that is the safest way
		// to do it.
		typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
		typedef float __m256 __attribute__((__vector_size__(32), __aligned__(32)));
		typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
		typedef float __m1024 __attribute__((__vector_size__(128), __aligned__(128)));

		__m128 gv128;
		__m256 gv256;
		__m512 gv512;
		__m1024 gv1024;

		void receive_vec_128(__m128 x, __m128 y, __m128 z, __m128 w, __m128 q) {
		gv128 = x + y + z + w + q;
		}
		void receive_vec_256(__m256 x, __m256 y, __m256 z, __m256 w, __m256 q) {
		gv256 = x + y + z + w + q;
		}
		void receive_vec_512(__m512 x, __m512 y, __m512 z, __m512 w, __m512 q) {
		gv512 = x + y + z + w + q;
		}
		void receive_vec_1024(__m1024 x, __m1024 y, __m1024 z, __m1024 w, __m1024 q) {
		gv1024 = x + y + z + w + q;
		}
		// CHECK-LABEL: define dso_local void @receive_vec_128(<4 x float> inreg %x, <4 x float> inreg %y, <4 x float> inreg %z, <4 x float>* %0, <4 x float>* %1)
		// CHECK-LABEL: define dso_local void @receive_vec_256(<8 x float> inreg %x, <8 x float> inreg %y, <8 x float> inreg %z, <8 x float>* %0, <8 x float>* %1)
		// CHECK-LABEL: define dso_local void @receive_vec_512(<16 x float> inreg %x, <16 x float> inreg %y, <16 x float> inreg %z, <16 x float>* %0, <16 x float>* %1)
		// CHECK-LABEL: define dso_local void @receive_vec_1024(<32 x float>* %0, <32 x float>* %1, <32 x float>* %2, <32 x float>* %3, <32 x float>* %4)

		void pass_vec_128() {
		__m128 z = {0};
		receive_vec_128(z, z, z, z, z);
		}

		// CHECK-LABEL: define dso_local void @pass_vec_128()
		// CHECK: call void @receive_vec_128(<4 x float> inreg %{{[^,)]}}, <4 x float> inreg %{{[^,)]}}, <4 x float> inreg %{{[^,)]}}, <4 x float> %{{[^,)]}}, <4 x float> %{{[^,)]*}})


		void __fastcall fastcall_indirect_vec(__m128 x, __m128 y, __m128 z, __m128 w, int edx, __m128 q) {
		gv128 = x + y + z + w + q;
		}
		// CHECK-LABEL: define dso_local x86_fastcallcc void @"\01@fastcall_indirect_vec@84"(<4 x float> inreg %x, <4 x float> inreg %y, <4 x float> inreg %z, <4 x float>* inreg %0, i32 inreg %edx, <4 x float>* %1)

clang/test/CodeGenCXX/inalloca-overaligned.cpp

deleted100644 → 0

+0 −52

Original line number	Diff line number	Diff line
		// RUN: %clang_cc1 -fms-extensions -w -triple i386-pc-win32 -emit-llvm -o - %s \| FileCheck %s

		// PR44395
		// MSVC passes overaligned types indirectly since MSVC 2015. Make sure that
		// works with inalloca.

		// FIXME: Pass non-trivial and overaligned types indirectly. Right now the C++
		// ABI rules say to use inalloca, and they take precedence, so it's not easy to
		// implement this.


		struct NonTrivial {
		NonTrivial();
		NonTrivial(const NonTrivial &o);
		int x;
		};

		struct __declspec(align(64)) OverAligned {
		OverAligned();
		int buf[16];
		};

		extern int gvi32;

		int receive_inalloca_overaligned(NonTrivial nt, OverAligned o) {
		return nt.x + o.buf[0];
		}

		// CHECK-LABEL: define dso_local i32 @"?receive_inalloca_overaligned@@Y{{.*}}"
		// CHECK-SAME: (<{ %struct.NonTrivial, %struct.OverAligned* }>* inalloca %0)

		int pass_inalloca_overaligned() {
		gvi32 = receive_inalloca_overaligned(NonTrivial(), OverAligned());
		return gvi32;
		}

		// CHECK-LABEL: define dso_local i32 @"?pass_inalloca_overaligned@@Y{{.*}}"
		// CHECK: [[TMP:%[^ ]*]] = alloca %struct.OverAligned, align 64
		// CHECK: call i8* @llvm.stacksave()
		// CHECK: alloca inalloca <{ %struct.NonTrivial, %struct.OverAligned* }>

		// Construct OverAligned into TMP.
		// CHECK: call x86_thiscallcc %struct.OverAligned* @"??0OverAligned@@QAE@XZ"(%struct.OverAligned* [[TMP]])

		// Construct NonTrivial into the GEP.
		// CHECK: [[GEP:%[^ ]]] = getelementptr inbounds <{ %struct.NonTrivial, %struct.OverAligned }>, <{ %struct.NonTrivial, %struct.OverAligned* }>* %{{.*}}, i32 0, i32 0
		// CHECK: call x86_thiscallcc %struct.NonTrivial* @"??0NonTrivial@@QAE@XZ"(%struct.NonTrivial* [[GEP]])

		// Store the address of an OverAligned temporary into the struct.
		// CHECK: getelementptr inbounds <{ %struct.NonTrivial, %struct.OverAligned* }>, <{ %struct.NonTrivial, %struct.OverAligned* }>* %{{.*}}, i32 0, i32 1
		// CHECK: store %struct.OverAligned* [[TMP]], %struct.OverAligned** %{{.*}}, align 4
		// CHECK: call i32 @"?receive_inalloca_overaligned@@Y{{.}}"(<{ %struct.NonTrivial, %struct.OverAligned }>* inalloca %argmem)