Commit eaabaf7e authored by Hans Wennborg's avatar Hans Wennborg
Browse files

Revert "[MS] Overhaul how clang passes overaligned args on x86_32"

It broke some Chromium tests, so let's revert until it can be fixed; see
https://crbug.com/1046362

This reverts commit 2af74e27.
parent 8e21d7b9
Loading
Loading
Loading
Loading
+3 −14
Original line number Diff line number Diff line
@@ -88,7 +88,6 @@ private:
  Kind TheKind;
  bool PaddingInReg : 1;
  bool InAllocaSRet : 1;    // isInAlloca()
  bool InAllocaIndirect : 1;// isInAlloca()
  bool IndirectByVal : 1;   // isIndirect()
  bool IndirectRealign : 1; // isIndirect()
  bool SRetAfterThis : 1;   // isIndirect()
@@ -111,8 +110,8 @@ private:

public:
  ABIArgInfo(Kind K = Direct)
      : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K),
        PaddingInReg(false), InAllocaSRet(false), InAllocaIndirect(false),
      : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0),
        TheKind(K), PaddingInReg(false), InAllocaSRet(false),
        IndirectByVal(false), IndirectRealign(false), SRetAfterThis(false),
        InReg(false), CanBeFlattened(false), SignExt(false) {}

@@ -186,10 +185,9 @@ public:
    AI.setInReg(true);
    return AI;
  }
  static ABIArgInfo getInAlloca(unsigned FieldIndex, bool Indirect = false) {
  static ABIArgInfo getInAlloca(unsigned FieldIndex) {
    auto AI = ABIArgInfo(InAlloca);
    AI.setInAllocaFieldIndex(FieldIndex);
    AI.setInAllocaIndirect(Indirect);
    return AI;
  }
  static ABIArgInfo getExpand() {
@@ -382,15 +380,6 @@ public:
    AllocaFieldIndex = FieldIndex;
  }

  unsigned getInAllocaIndirect() const {
    assert(isInAlloca() && "Invalid kind!");
    return InAllocaIndirect;
  }
  void setInAllocaIndirect(bool Indirect) {
    assert(isInAlloca() && "Invalid kind!");
    InAllocaIndirect = Indirect;
  }

  /// Return true if this field of an inalloca struct should be returned
  /// to implement a struct return calling convention.
  bool getInAllocaSRet() const {
+6 −30
Original line number Diff line number Diff line
@@ -2339,9 +2339,6 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
      auto FieldIndex = ArgI.getInAllocaFieldIndex();
      Address V =
          Builder.CreateStructGEP(ArgStruct, FieldIndex, Arg->getName());
      if (ArgI.getInAllocaIndirect())
        V = Address(Builder.CreateLoad(V),
                    getContext().getTypeAlignInChars(Ty));
      ArgVals.push_back(ParamValue::forIndirect(V));
      break;
    }
@@ -4041,39 +4038,18 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
      assert(NumIRArgs == 0);
      assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
      if (I->isAggregate()) {
        // Replace the placeholder with the appropriate argument slot GEP.
        Address Addr = I->hasLValue()
                           ? I->getKnownLValue().getAddress(*this)
                           : I->getKnownRValue().getAggregateAddress();
        llvm::Instruction *Placeholder =
            cast<llvm::Instruction>(Addr.getPointer());

        if (!ArgInfo.getInAllocaIndirect()) {
          // Replace the placeholder with the appropriate argument slot GEP.
        CGBuilderTy::InsertPoint IP = Builder.saveIP();
        Builder.SetInsertPoint(Placeholder);
          Addr = Builder.CreateStructGEP(ArgMemory,
                                         ArgInfo.getInAllocaFieldIndex());
        Addr =
            Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
        Builder.restoreIP(IP);
        } else {
          // For indirect things such as overaligned structs, replace the
          // placeholder with a regular aggregate temporary alloca. Store the
          // address of this alloca into the struct.
          Addr = CreateMemTemp(info_it->type, "inalloca.indirect.tmp");
          Address ArgSlot = Builder.CreateStructGEP(
              ArgMemory, ArgInfo.getInAllocaFieldIndex());
          Builder.CreateStore(Addr.getPointer(), ArgSlot);
        }
        deferPlaceholderReplacement(Placeholder, Addr.getPointer());
      } else if (ArgInfo.getInAllocaIndirect()) {
        // Make a temporary alloca and store the address of it into the argument
        // struct.
        Address Addr = CreateMemTempWithoutCast(
            I->Ty, getContext().getTypeAlignInChars(I->Ty),
            "indirect-arg-temp");
        I->copyInto(*this, Addr);
        Address ArgSlot =
            Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
        Builder.CreateStore(Addr.getPointer(), ArgSlot);
      } else {
        // Store the RValue into the argument struct.
        Address Addr =
+24 −50
Original line number Diff line number Diff line
@@ -1676,7 +1676,6 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
  bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;

  Ty = useFirstFieldIfTransparentUnion(Ty);
  TypeInfo TI = getContext().getTypeInfo(Ty);

  // Check with the C++ ABI first.
  const RecordType *RT = Ty->getAs<RecordType>();
@@ -1726,7 +1725,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
    bool NeedsPadding = false;
    bool InReg;
    if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
      unsigned SizeInRegs = (TI.Width + 31) / 32;
      unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
      SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
      llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
      if (InReg)
@@ -1736,19 +1735,14 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
    }
    llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;

    // Pass over-aligned aggregates on Windows indirectly. This behavior was
    // added in MSVC 2015.
    if (IsWin32StructABI && TI.AlignIsRequired && TI.Align > 32)
      return getIndirectResult(Ty, /*ByVal=*/false, State);

    // Expand small (<= 128-bit) record types when we know that the stack layout
    // of those arguments will match the struct. This is important because the
    // LLVM backend isn't smart enough to remove byval, which inhibits many
    // optimizations.
    // Don't do this for the MCU if there are still free integer registers
    // (see X86_64 ABI for full explanation).
    if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) &&
        canExpandIndirectArgument(Ty))
    if (getContext().getTypeSize(Ty) <= 4 * 32 &&
        (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty))
      return ABIArgInfo::getExpandWithPadding(
          IsFastCall || IsVectorCall || IsRegCall, PaddingType);

@@ -1756,24 +1750,14 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
  }

  if (const VectorType *VT = Ty->getAs<VectorType>()) {
    // On Windows, vectors are passed directly if registers are available, or
    // indirectly if not. This avoids the need to align argument memory. Pass
    // user-defined vector types larger than 512 bits indirectly for simplicity.
    if (IsWin32StructABI) {
      if (TI.Width <= 512 && State.FreeSSERegs > 0) {
        --State.FreeSSERegs;
        return ABIArgInfo::getDirectInReg();
      }
      return getIndirectResult(Ty, /*ByVal=*/false, State);
    }

    // On Darwin, some vectors are passed in memory, we handle this by passing
    // it as an i8/i16/i32/i64.
    if (IsDarwinVectorABI) {
      if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) ||
          (TI.Width == 64 && VT->getNumElements() == 1))
        return ABIArgInfo::getDirect(
            llvm::IntegerType::get(getVMContext(), TI.Width));
      uint64_t Size = getContext().getTypeSize(Ty);
      if ((Size == 8 || Size == 16 || Size == 32) ||
          (Size == 64 && VT->getNumElements() == 1))
        return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
                                                            Size));
    }

    if (IsX86_MMXType(CGT.ConvertType(Ty)))
@@ -1803,10 +1787,9 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
  CCState State(FI);
  if (IsMCUABI)
    State.FreeRegs = 3;
  else if (State.CC == llvm::CallingConv::X86_FastCall) {
  else if (State.CC == llvm::CallingConv::X86_FastCall)
    State.FreeRegs = 2;
    State.FreeSSERegs = 3;
  } else if (State.CC == llvm::CallingConv::X86_VectorCall) {
  else if (State.CC == llvm::CallingConv::X86_VectorCall) {
    State.FreeRegs = 2;
    State.FreeSSERegs = 6;
  } else if (FI.getHasRegParm())
@@ -1814,11 +1797,6 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
  else if (State.CC == llvm::CallingConv::X86_RegCall) {
    State.FreeRegs = 5;
    State.FreeSSERegs = 8;
  } else if (IsWin32StructABI) {
    // Since MSVC 2015, the first three SSE vectors have been passed in
    // registers. The rest are passed indirectly.
    State.FreeRegs = DefaultNumRegisterParameters;
    State.FreeSSERegs = 3;
  } else
    State.FreeRegs = DefaultNumRegisterParameters;

@@ -1865,25 +1843,16 @@ X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
                                   CharUnits &StackOffset, ABIArgInfo &Info,
                                   QualType Type) const {
  // Arguments are always 4-byte-aligned.
  CharUnits WordSize = CharUnits::fromQuantity(4);
  assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct");
  CharUnits FieldAlign = CharUnits::fromQuantity(4);

  // sret pointers and indirect things will require an extra pointer
  // indirection, unless they are byval. Most things are byval, and will not
  // require this indirection.
  bool IsIndirect = false;
  if (Info.isIndirect() && !Info.getIndirectByVal())
    IsIndirect = true;
  Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect);
  llvm::Type *LLTy = CGT.ConvertTypeForMem(Type);
  if (IsIndirect)
    LLTy = LLTy->getPointerTo(0);
  FrameFields.push_back(LLTy);
  StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type);
  assert(StackOffset.isMultipleOf(FieldAlign) && "unaligned inalloca struct");
  Info = ABIArgInfo::getInAlloca(FrameFields.size());
  FrameFields.push_back(CGT.ConvertTypeForMem(Type));
  StackOffset += getContext().getTypeSizeInChars(Type);

  // Insert padding bytes to respect alignment.
  CharUnits FieldEnd = StackOffset;
  StackOffset = FieldEnd.alignTo(WordSize);
  StackOffset = FieldEnd.alignTo(FieldAlign);
  if (StackOffset != FieldEnd) {
    CharUnits NumBytes = StackOffset - FieldEnd;
    llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
@@ -1897,12 +1866,16 @@ static bool isArgInAlloca(const ABIArgInfo &Info) {
  switch (Info.getKind()) {
  case ABIArgInfo::InAlloca:
    return true;
  case ABIArgInfo::Indirect:
    assert(Info.getIndirectByVal());
    return true;
  case ABIArgInfo::Ignore:
    return false;
  case ABIArgInfo::Indirect:
  case ABIArgInfo::Direct:
  case ABIArgInfo::Extend:
    return !Info.getInReg();
    if (Info.getInReg())
      return false;
    return true;
  case ABIArgInfo::Expand:
  case ABIArgInfo::CoerceAndExpand:
    // These are aggregate types which are never passed in registers when
@@ -1936,7 +1909,8 @@ void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {

  // Put the sret parameter into the inalloca struct if it's in memory.
  if (Ret.isIndirect() && !Ret.getInReg()) {
    addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType());
    CanQualType PtrTy = getContext().getPointerType(FI.getReturnType());
    addFieldToArgStruct(FrameFields, StackOffset, Ret, PtrTy);
    // On Windows, the hidden sret parameter is always returned in eax.
    Ret.setInAllocaSRet(IsWin32StructABI);
  }
+0 −44
Original line number Diff line number Diff line
@@ -46,47 +46,3 @@ struct s6 {
struct s6 f6_1(void) { while (1) {} }
void f6_2(struct s6 a0) {}

// MSVC passes up to three vectors in registers, and the rest indirectly.  We
// (arbitrarily) pass oversized vectors indirectly, since that is the safest way
// to do it.
typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
typedef float __m256 __attribute__((__vector_size__(32), __aligned__(32)));
typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
typedef float __m1024 __attribute__((__vector_size__(128), __aligned__(128)));

__m128 gv128;
__m256 gv256;
__m512 gv512;
__m1024 gv1024;

void receive_vec_128(__m128 x, __m128 y, __m128 z, __m128 w, __m128 q) {
  gv128 = x + y + z + w + q;
}
void receive_vec_256(__m256 x, __m256 y, __m256 z, __m256 w, __m256 q) {
  gv256 = x + y + z + w + q;
}
void receive_vec_512(__m512 x, __m512 y, __m512 z, __m512 w, __m512 q) {
  gv512 = x + y + z + w + q;
}
void receive_vec_1024(__m1024 x, __m1024 y, __m1024 z, __m1024 w, __m1024 q) {
  gv1024 = x + y + z + w + q;
}
// CHECK-LABEL: define dso_local void @receive_vec_128(<4 x float> inreg %x, <4 x float> inreg %y, <4 x float> inreg %z, <4 x float>* %0, <4 x float>* %1)
// CHECK-LABEL: define dso_local void @receive_vec_256(<8 x float> inreg %x, <8 x float> inreg %y, <8 x float> inreg %z, <8 x float>* %0, <8 x float>* %1)
// CHECK-LABEL: define dso_local void @receive_vec_512(<16 x float> inreg %x, <16 x float> inreg %y, <16 x float> inreg %z, <16 x float>* %0, <16 x float>* %1)
// CHECK-LABEL: define dso_local void @receive_vec_1024(<32 x float>* %0, <32 x float>* %1, <32 x float>* %2, <32 x float>* %3, <32 x float>* %4)

void pass_vec_128() {
  __m128 z = {0};
  receive_vec_128(z, z, z, z, z);
}

// CHECK-LABEL: define dso_local void @pass_vec_128()
// CHECK: call void @receive_vec_128(<4 x float> inreg %{{[^,)]*}}, <4 x float> inreg %{{[^,)]*}}, <4 x float> inreg %{{[^,)]*}}, <4 x float>* %{{[^,)]*}}, <4 x float>* %{{[^,)]*}})


void __fastcall fastcall_indirect_vec(__m128 x, __m128 y, __m128 z, __m128 w, int edx, __m128 q) {
  gv128 = x + y + z + w + q;
}
// CHECK-LABEL: define dso_local x86_fastcallcc void @"\01@fastcall_indirect_vec@84"(<4 x float> inreg %x, <4 x float> inreg %y, <4 x float> inreg %z, <4 x float>* inreg %0, i32 inreg %edx, <4 x float>* %1)
+0 −52
Original line number Diff line number Diff line
// RUN: %clang_cc1 -fms-extensions -w -triple i386-pc-win32 -emit-llvm -o - %s | FileCheck %s

// PR44395
// MSVC passes overaligned types indirectly since MSVC 2015. Make sure that
// works with inalloca.

// FIXME: Pass non-trivial *and* overaligned types indirectly. Right now the C++
// ABI rules say to use inalloca, and they take precedence, so it's not easy to
// implement this.


struct NonTrivial {
  NonTrivial();
  NonTrivial(const NonTrivial &o);
  int x;
};

struct __declspec(align(64)) OverAligned {
  OverAligned();
  int buf[16];
};

extern int gvi32;

int receive_inalloca_overaligned(NonTrivial nt, OverAligned o) {
  return nt.x + o.buf[0];
}

// CHECK-LABEL: define dso_local i32 @"?receive_inalloca_overaligned@@Y{{.*}}"
// CHECK-SAME: (<{ %struct.NonTrivial, %struct.OverAligned* }>* inalloca %0)

int pass_inalloca_overaligned() {
  gvi32 = receive_inalloca_overaligned(NonTrivial(), OverAligned());
  return gvi32;
}

// CHECK-LABEL: define dso_local i32 @"?pass_inalloca_overaligned@@Y{{.*}}"
// CHECK: [[TMP:%[^ ]*]] = alloca %struct.OverAligned, align 64
// CHECK: call i8* @llvm.stacksave()
// CHECK: alloca inalloca <{ %struct.NonTrivial, %struct.OverAligned* }>

// Construct OverAligned into TMP.
// CHECK: call x86_thiscallcc %struct.OverAligned* @"??0OverAligned@@QAE@XZ"(%struct.OverAligned* [[TMP]])

// Construct NonTrivial into the GEP.
// CHECK: [[GEP:%[^ ]*]] = getelementptr inbounds <{ %struct.NonTrivial, %struct.OverAligned* }>, <{ %struct.NonTrivial, %struct.OverAligned* }>* %{{.*}}, i32 0, i32 0
// CHECK: call x86_thiscallcc %struct.NonTrivial* @"??0NonTrivial@@QAE@XZ"(%struct.NonTrivial* [[GEP]])

// Store the address of an OverAligned temporary into the struct.
// CHECK: getelementptr inbounds <{ %struct.NonTrivial, %struct.OverAligned* }>, <{ %struct.NonTrivial, %struct.OverAligned* }>* %{{.*}}, i32 0, i32 1
// CHECK: store %struct.OverAligned* [[TMP]], %struct.OverAligned** %{{.*}}, align 4
// CHECK: call i32 @"?receive_inalloca_overaligned@@Y{{.*}}"(<{ %struct.NonTrivial, %struct.OverAligned* }>* inalloca %argmem)
Loading