Unverified Commit 21861991 authored by Saiyedul Islam's avatar Saiyedul Islam Committed by GitHub
Browse files

[OpenMP] Cleanup and fixes for ABI agnostic DeviceRTL (#71234)

Fixes the DeviceRTL compilation to ensure it is ABI agnostic. Uses
already available global variable "oclc_ABI_version" instead of
"llvm.amdgcn.abi.verion".

It also adds some minor fields in ImplicitArg structure.
parent 1f21e498
Loading
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -17432,11 +17432,11 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
/// Emit code based on Code Object ABI version.
/// COV_4    : Emit code to use dispatch ptr
/// COV_5    : Emit code to use implicitarg ptr
/// COV_NONE : Emit code to load a global variable "llvm.amdgcn.abi.version"
/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
///            and use its value for COV_4 or COV_5 approach. It is used for
///            compiling device libraries in an ABI-agnostic way.
///
/// Note: "llvm.amdgcn.abi.version" is supposed to be emitted and intialized by
/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
///       clang during compilation of user code.
Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
  llvm::LoadInst *LD;
@@ -17444,7 +17444,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
  auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
  if (Cov == clang::TargetOptions::COV_None) {
    StringRef Name = "llvm.amdgcn.abi.version";
    StringRef Name = "__oclc_ABI_version";
    auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
    if (!ABIVersionC)
      ABIVersionC = new llvm::GlobalVariable(
+5 −1
Original line number Diff line number Diff line
@@ -362,11 +362,15 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
/// AMDGPU ROCm device libraries.
void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
    CodeGen::CodeGenModule &CGM) const {
  StringRef Name = "llvm.amdgcn.abi.version";
  StringRef Name = "__oclc_ABI_version";
  llvm::GlobalVariable *OriginalGV = CGM.getModule().getNamedGlobal(Name);
  if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage()))
    return;

  if (CGM.getTarget().getTargetOpts().CodeObjectVersion ==
      clang::TargetOptions::COV_None)
    return;

  auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32);
  llvm::Constant *COV = llvm::ConstantInt::get(
      Type, CGM.getTarget().getTargetOpts().CodeObjectVersion);
+2 −2
Original line number Diff line number Diff line
@@ -2,14 +2,14 @@
// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -mcode-object-version=none %s -o - | FileCheck %s

//.
// CHECK: @llvm.amdgcn.abi.version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 0
// CHECK: @__oclc_ABI_version = external addrspace(4) global i32
//.
// CHECK-LABEL: define dso_local i32 @foo(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(4) @llvm.amdgcn.abi.version, align 4
// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4
// CHECK-NEXT:    [[TMP1:%.*]] = icmp sge i32 [[TMP0]], 500
// CHECK-NEXT:    [[TMP2:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP2]], i32 12
+1 −1
Original line number Diff line number Diff line
@@ -29,7 +29,7 @@ int [[clang::address_space(999)]] bbb = 1234;
// CHECK: @u = addrspace(5) global i32 undef, align 4
// CHECK: @aaa = addrspace(6) global i32 1000, align 4
// CHECK: @bbb = addrspace(999) global i32 1234, align 4
// CHECK: @llvm.amdgcn.abi.version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400
//.
// CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+8 −8
Original line number Diff line number Diff line
@@ -17,9 +17,9 @@

#include "Inputs/cuda.h"

// LINKED4: @llvm.amdgcn.abi.version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400
// LINKED4: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400
// LINKED4-LABEL: bar
// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @llvm.amdgcn.abi.version to ptr), align {{.*}}
// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
// LINKED4-NOT: icmp sge i32 %{{.*}}, 500
// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// LINKED4: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
@@ -28,7 +28,7 @@
// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef

// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @llvm.amdgcn.abi.version to ptr), align {{.*}}
// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
// LINKED4-NOT: icmp sge i32 %{{.*}}, 500
// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// LINKED4: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
@@ -37,7 +37,7 @@
// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef

// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @llvm.amdgcn.abi.version to ptr), align {{.*}}
// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
// LINKED4-NOT: icmp sge i32 %{{.*}}, 500
// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// LINKED4: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
@@ -47,9 +47,9 @@
// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
// LINKED4: "amdgpu_code_object_version", i32 400

// LINKED5: llvm.amdgcn.abi.version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
// LINKED5: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
// LINKED5-LABEL: bar
// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @llvm.amdgcn.abi.version to ptr), align {{.*}}
// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
// LINKED5-NOT: icmp sge i32 %{{.*}}, 500
// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// LINKED5: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
@@ -58,7 +58,7 @@
// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef

// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @llvm.amdgcn.abi.version to ptr), align {{.*}}
// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
// LINKED5-NOT: icmp sge i32 %{{.*}}, 500
// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// LINKED5: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
@@ -67,7 +67,7 @@
// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef

// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @llvm.amdgcn.abi.version to ptr), align {{.*}}
// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
// LINKED5-NOT: icmp sge i32 %{{.*}}, 500
// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// LINKED5: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
Loading