Unverified Commit a51597f3 authored by Nathan Gauër's avatar Nathan Gauër Committed by GitHub
Browse files

[HLSL] Handle logical pointer for array assign (#193227)

This commits adds SPIR-V testing on an existing test (almost-NFC on DXIL
testing). It also copies it and invokes Clang using the experimental
logical pointer flag.
Adding this flag shows a missing case in the frontend, handled with this
commit.

Due to the difference in index handling between the structured.gep and
legacy one, the Cbuffer load codegen had to be rewritten. It's a bit
more naive, as we get one gep per level, but this will be handled by
optimizations later on.
parent f60c5d98
Loading
Loading
Loading
Loading
+22 −0
Original line number Diff line number Diff line
@@ -362,6 +362,28 @@ public:
                      ElementType, Align, Addr.isKnownNonNull());
  }

  using CGBuilderBaseTy::CreateStructuredGEP;
  llvm::Value *CreateAccessChain(bool Logical, llvm::Type *BaseType,
                                 llvm::Value *PtrBase,
                                 ArrayRef<llvm::Value *> IdxList,
                                 const Twine &Name = "") {

    if (Logical)
      return CreateStructuredGEP(BaseType, PtrBase, IdxList, Name);
    return CreateInBoundsGEP(BaseType, PtrBase, IdxList, Name);
  }

  Address CreateAccessChain(bool Logical, Address Addr,
                            ArrayRef<llvm::Value *> IdxList,
                            llvm::Type *ElementType, CharUnits Align,
                            const Twine &Name = "") {

    return RawAddress(CreateAccessChain(Logical, Addr.getElementType(),
                                        emitRawPointerFromAddress(Addr),
                                        IdxList, Name),
                      ElementType, Align, Addr.isKnownNonNull());
  }

  using CGBuilderBaseTy::CreateIsNull;
  llvm::Value *CreateIsNull(Address Addr, const Twine &Name = "") {
    if (!Addr.hasOffset())
+9 −3
Original line number Diff line number Diff line
@@ -652,9 +652,15 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
  auto Emit = [&](Expr *Init, uint64_t ArrayIndex) {
    llvm::Value *element = begin;
    if (ArrayIndex > 0) {
      if (CGF.getLangOpts().EmitLogicalPointer)
        element = Builder.CreateStructuredGEP(
            AType, begin, llvm::ConstantInt::get(CGF.SizeTy, ArrayIndex),
            "arrayinit.element");
      else
        element = Builder.CreateInBoundsGEP(
            llvmElementType, begin,
          llvm::ConstantInt::get(CGF.SizeTy, ArrayIndex), "arrayinit.element");
            llvm::ConstantInt::get(CGF.SizeTy, ArrayIndex),
            "arrayinit.element");

      // Tell the cleanup that it needs to destroy up to this
      // element.  TODO: some of these stores can be trivially
+107 −84
Original line number Diff line number Diff line
@@ -1605,64 +1605,34 @@ namespace {
/// copying out of a cbuffer).
class HLSLBufferCopyEmitter {
  CodeGenFunction &CGF;
  Address DestPtr;
  Address DstPtr;
  Address SrcPtr;
  llvm::Type *LayoutTy = nullptr;

  SmallVector<llvm::Value *> CurStoreIndices;
  SmallVector<llvm::Value *> CurLoadIndices;

  void emitCopyAtIndices(llvm::Type *FieldTy, llvm::ConstantInt *StoreIndex,
                         llvm::ConstantInt *LoadIndex) {
    CurStoreIndices.push_back(StoreIndex);
    CurLoadIndices.push_back(LoadIndex);
    llvm::scope_exit RestoreIndices([&]() {
      CurStoreIndices.pop_back();
      CurLoadIndices.pop_back();
    });

    // First, see if this is some kind of aggregate and recurse.
    if (processArray(FieldTy))
      return;
    if (processBufferLayoutArray(FieldTy))
      return;
    if (processStruct(FieldTy))
      return;

    // When we have a scalar or vector element we can emit the copy.
    CharUnits Align = CharUnits::fromQuantity(
        CGF.CGM.getDataLayout().getABITypeAlign(FieldTy));
    Address SrcGEP = RawAddress(
        CGF.Builder.CreateInBoundsGEP(LayoutTy, SrcPtr.getBasePointer(),
                                      CurLoadIndices, "cbuf.src"),
        FieldTy, Align, SrcPtr.isKnownNonNull());
    Address DestGEP = CGF.Builder.CreateInBoundsGEP(
        DestPtr, CurStoreIndices, FieldTy, Align, "cbuf.dest");
    llvm::Value *Load = CGF.Builder.CreateLoad(SrcGEP, "cbuf.load");
    CGF.Builder.CreateStore(Load, DestGEP);
  }

  bool processArray(llvm::Type *FieldTy) {
    auto *AT = dyn_cast<llvm::ArrayType>(FieldTy);
    if (!AT)
      return false;
  // Creates & returns either a structured.gep or a ptradd/gep depending on
  // langopts.
  llvm::Value *emitAccessChain(llvm::Type *BaseTy, llvm::Value *Base,
                               ArrayRef<llvm::Value *> Indices) {
    bool EmitLogical = CGF.getLangOpts().EmitLogicalPointer;
    if (EmitLogical)
      return CGF.Builder.CreateAccessChain(EmitLogical, BaseTy, Base, Indices);

    // If we have an llvm::ArrayType this is just a regular array with no top
    // level padding, so all we need to do is copy each member.
    for (unsigned I = 0, E = AT->getNumElements(); I < E; ++I)
      emitCopyAtIndices(AT->getElementType(),
                        llvm::ConstantInt::get(CGF.SizeTy, I),
                        llvm::ConstantInt::get(CGF.SizeTy, I));
    return true;
    llvm::SmallVector<llvm::Value *> GEPIndices;
    GEPIndices.reserve(Indices.size() + 1);
    GEPIndices.push_back(llvm::ConstantInt::get(CGF.IntTy, 0));
    GEPIndices.append(Indices.begin(), Indices.end());
    return CGF.Builder.CreateAccessChain(EmitLogical, BaseTy, Base, GEPIndices);
  }

  bool processBufferLayoutArray(llvm::Type *FieldTy) {
  bool isBufferLayoutArray(llvm::StructType *ST) {
    // A buffer layout array is a struct with two elements: the padded array,
    // and the last element. That is, is should look something like this:
    //
    //   { [%n x { %type, %padding }], %type }
    //
    auto *ST = dyn_cast<llvm::StructType>(FieldTy);
    if (!ST || ST->getNumElements() != 2)
      return false;

@@ -1681,51 +1651,104 @@ class HLSLBufferCopyEmitter {
    llvm::Type *ElementTy = ST->getElementType(1);
    if (PaddedTy->getElementType(0) != ElementTy)
      return false;
    return true;
  }

    // All but the last of the logical array elements are in the padded array.
    unsigned NumElts = PaddedEltsTy->getNumElements() + 1;
  void emitBufferLayoutCopy(Value *Src, llvm::StructType *SrcTy, Value *Dst,
                            llvm::ArrayType *DstTy) {
    // Those assumptions are checked by isBufferLayoutArray.
    auto *SrcPaddedArrayTy = cast<llvm::ArrayType>(SrcTy->getElementType(0));
    auto *SrcPaddedEltTy =
        cast<llvm::StructType>(SrcPaddedArrayTy->getElementType());
    assert(SrcPaddedArrayTy->getNumElements() + 1 == DstTy->getNumElements());
    assert(SrcPaddedEltTy->getElementType(0) == SrcTy->getElementType(1));

    auto *SrcDataTy = SrcTy->getElementType(1);
    auto Zero = llvm::ConstantInt::get(CGF.IntTy, 0);

    for (unsigned I = 0; I < SrcPaddedArrayTy->getNumElements(); ++I) {
      auto Index = llvm::ConstantInt::get(CGF.IntTy, I);
      auto *SrcElt = emitAccessChain(SrcTy, Src, {Zero, Index, Zero});
      auto *DstElt = emitAccessChain(DstTy, Dst, {Index});
      emitElementCopy(SrcElt, SrcDataTy, DstElt, DstTy->getElementType());
    }

    auto *SrcElt =
        emitAccessChain(SrcTy, Src, {llvm::ConstantInt::get(CGF.IntTy, 1)});
    auto *DstElt = emitAccessChain(
        DstTy, Dst,
        {llvm::ConstantInt::get(CGF.IntTy, DstTy->getNumElements() - 1)});
    emitElementCopy(SrcElt, SrcDataTy, DstElt, DstTy->getElementType());
  }

  void emitCopy(Value *Src, llvm::StructType *SrcTy, Value *Dst,
                llvm::Type *DstTy) {
    if (isBufferLayoutArray(SrcTy))
      return emitBufferLayoutCopy(Src, SrcTy, Dst,
                                  cast<llvm::ArrayType>(DstTy));

    unsigned SrcIndex = 0;
    unsigned DstIndex = 0;

    auto *DstST = cast<llvm::StructType>(DstTy);
    while (SrcIndex < SrcTy->getNumElements() &&
           DstIndex < DstST->getNumElements()) {
      if (CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(
              SrcTy->getElementType(SrcIndex))) {
        SrcIndex += 1;
        continue;
      }

    // Add an extra indirection to the load for the struct and walk the
    // array prefix.
    CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 0));
    for (unsigned I = 0; I < NumElts - 1; ++I) {
      // We need to copy the element itself, without the padding.
      CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, I));
      emitCopyAtIndices(ElementTy, llvm::ConstantInt::get(CGF.SizeTy, I),
                        llvm::ConstantInt::get(CGF.Int32Ty, 0));
      CurLoadIndices.pop_back();
      if (CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(
              DstST->getElementType(DstIndex))) {
        DstIndex += 1;
        continue;
      }
    CurLoadIndices.pop_back();

    // Now copy the last element.
    emitCopyAtIndices(ElementTy,
                      llvm::ConstantInt::get(CGF.SizeTy, NumElts - 1),
                      llvm::ConstantInt::get(CGF.Int32Ty, 1));
      auto *SrcElt = emitAccessChain(
          SrcTy, Src, {llvm::ConstantInt::get(CGF.IntTy, SrcIndex)});
      auto *DstElt = emitAccessChain(
          DstTy, Dst, {llvm::ConstantInt::get(CGF.IntTy, DstIndex)});
      emitElementCopy(SrcElt, SrcTy->getElementType(SrcIndex), DstElt,
                      DstST->getElementType(DstIndex));
      DstIndex += 1;
      SrcIndex += 1;
    }
  }

    return true;
  void emitCopy(Value *Src, llvm::ArrayType *SrcTy, Value *Dst,
                llvm::Type *DstTy) {
    for (unsigned I = 0, E = SrcTy->getNumElements(); I < E; ++I) {
      auto *SrcElt =
          emitAccessChain(SrcTy, Src, {llvm::ConstantInt::get(CGF.IntTy, I)});
      auto *DstElt =
          emitAccessChain(DstTy, Dst, {llvm::ConstantInt::get(CGF.IntTy, I)});
      emitElementCopy(SrcElt, SrcTy->getElementType(), DstElt,
                      cast<llvm::ArrayType>(DstTy)->getElementType());
    }
  }

  bool processStruct(llvm::Type *FieldTy) {
    auto *ST = dyn_cast<llvm::StructType>(FieldTy);
    if (!ST)
      return false;
  void emitElementCopy(Value *Src, llvm::Type *SrcTy, Value *Dst,
                       llvm::Type *DstTy) {
    if (auto *AT = dyn_cast<llvm::ArrayType>(SrcTy))
      return emitCopy(Src, AT, Dst, DstTy);
    if (auto *ST = dyn_cast<llvm::StructType>(SrcTy))
      return emitCopy(Src, ST, Dst, DstTy);

    // Copy the struct field by field, but skip any explicit padding.
    unsigned Skipped = 0;
    for (unsigned I = 0, E = ST->getNumElements(); I < E; ++I) {
      llvm::Type *ElementTy = ST->getElementType(I);
      if (CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(ElementTy))
        ++Skipped;
      else
        emitCopyAtIndices(ElementTy, llvm::ConstantInt::get(CGF.Int32Ty, I),
                          llvm::ConstantInt::get(CGF.Int32Ty, I + Skipped));
    }
    return true;
    // When we have a scalar or vector element we can emit the copy.
    CharUnits SrcAlign =
        CharUnits::fromQuantity(CGF.CGM.getDataLayout().getABITypeAlign(SrcTy));
    CharUnits DstAlign =
        CharUnits::fromQuantity(CGF.CGM.getDataLayout().getABITypeAlign(DstTy));
    Address SrcAddr(Src, SrcTy, SrcAlign);
    Address DstAddr(Dst, DstTy, DstAlign);
    llvm::Value *Load = CGF.Builder.CreateLoad(SrcAddr, "cbuf.load");
    CGF.Builder.CreateStore(Load, DstAddr);
  }

public:
  HLSLBufferCopyEmitter(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr)
      : CGF(CGF), DestPtr(DestPtr), SrcPtr(SrcPtr) {}
  HLSLBufferCopyEmitter(CodeGenFunction &CGF, Address DstPtr, Address SrcPtr)
      : CGF(CGF), DstPtr(DstPtr), SrcPtr(SrcPtr) {}

  bool emitCopy(QualType CType) {
    LayoutTy = HLSLBufferLayoutBuilder(CGF.CGM).layOutType(CType);
@@ -1735,16 +1758,16 @@ public:
    // currently.
    //
    // See https://github.com/llvm/wg-hlsl/issues/351
    emitCopyAtIndices(LayoutTy, llvm::ConstantInt::get(CGF.SizeTy, 0),
                      llvm::ConstantInt::get(CGF.SizeTy, 0));
    emitElementCopy(SrcPtr.getBasePointer(), LayoutTy, DstPtr.getBasePointer(),
                    DstPtr.getElementType());
    return true;
  }
};
} // namespace

bool CGHLSLRuntime::emitBufferCopy(CodeGenFunction &CGF, Address DestPtr,
bool CGHLSLRuntime::emitBufferCopy(CodeGenFunction &CGF, Address DstPtr,
                                   Address SrcPtr, QualType CType) {
  return HLSLBufferCopyEmitter(CGF, DestPtr, SrcPtr).emitCopy(CType);
  return HLSLBufferCopyEmitter(CGF, DstPtr, SrcPtr).emitCopy(CType);
}

LValue CGHLSLRuntime::emitBufferMemberExpr(CodeGenFunction &CGF,
+19 −15
Original line number Diff line number Diff line
@@ -172,18 +172,20 @@ void arr_assign9() {

// CHECK-LABEL: define hidden void {{.*}}arr_assign10
// CHECK: [[C:%.*]] = alloca [2 x [2 x i32]], align 4
// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 0, i32 0
// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 0
// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0]], i32 0, i32 0
// CHECK-NEXT: [[L0:%.*]] = load i32, ptr addrspace(2) @c3, align 4
// CHECK-NEXT: store i32 [[L0]], ptr [[V0]], align 4
// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 0, i32 1
// CHECK-NEXT: store i32 [[L0]], ptr [[V1]], align 4
// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0]], i32 0, i32 1
// CHECK-NEXT: [[L1:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @c3, i32 16), align 4
// CHECK-NEXT: store i32 [[L1]], ptr [[V1]], align 4
// CHECK-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 1, i32 0
// CHECK-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 1
// CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [2 x i32], ptr [[V2]], i32 0, i32 0
// CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @c3, i32 32), align 4
// CHECK-NEXT: store i32 [[L2]], ptr [[V2]], align 4
// CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 1, i32 1
// CHECK-NEXT: store i32 [[L2]], ptr [[V3]], align 4
// CHECK-NEXT: [[V4:%.*]] = getelementptr inbounds [2 x i32], ptr [[V2]], i32 0, i32 1
// CHECK-NEXT: [[L3:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @c3, i32 48), align 4
// CHECK-NEXT: store i32 [[L3]], ptr [[V3]], align 4
// CHECK-NEXT: store i32 [[L3]], ptr [[V4]], align 4
// CHECK-NEXT: ret void
void arr_assign10() {
  int C[2][2];
@@ -192,18 +194,20 @@ void arr_assign10() {

// CHECK-LABEL: define hidden void {{.*}}arr_assign11
// CHECK: [[C:%.*]] = alloca [2 x %struct.S], align 1
// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 0, i32 0
// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 0
// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds %struct.S, ptr [[V0]], i32 0, i32 0
// CHECK-NEXT: [[L0:%.*]] = load i32, ptr addrspace(2) @c4, align 4
// CHECK-NEXT: store i32 [[L0]], ptr [[V0]], align 4
// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 0, i32 1
// CHECK-NEXT: store i32 [[L0]], ptr [[V1]], align 4
// CHECK-NEXT: [[V2:%.*]] = getelementptr inbounds %struct.S, ptr [[V0]], i32 0, i32 1
// CHECK-NEXT: [[L1:%.*]] = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @c4, i32 4), align 4
// CHECK-NEXT: store float [[L1]], ptr [[V1]], align 4
// CHECK-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 1, i32 0
// CHECK-NEXT: store float [[L1]], ptr [[V2]], align 4
// CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 1
// CHECK-NEXT: [[V4:%.*]] = getelementptr inbounds %struct.S, ptr [[V3]], i32 0, i32 0
// CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @c4, i32 16), align 4
// CHECK-NEXT: store i32 [[L2]], ptr [[V2]], align 4
// CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 1, i32 1
// CHECK-NEXT: store i32 [[L2]], ptr [[V4]], align 4
// CHECK-NEXT: [[V5:%.*]] = getelementptr inbounds %struct.S, ptr [[V3]], i32 0, i32 1
// CHECK-NEXT: [[L3:%.*]] = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @c4, i32 20), align 4
// CHECK-NEXT: store float [[L3]], ptr [[V3]], align 4
// CHECK-NEXT: store float [[L3]], ptr [[V5]], align 4
// CHECK-NEXT: ret void
void arr_assign11() {
  S C[2];
+443 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading