Commit bad44d5f authored by Huber, Joseph's avatar Huber, Joseph
Browse files

[OpenMP] Add RTL function for getting number of threads in block.

This patch adds support for the
`__kmpc_get_hardware_num_threads_in_block` function that returns the
number of threads. This was missing in the new runtime and was used by
the AMDGPU plugin which prevented it from using the new runtime. This
patchs also unified the interface for getting the thread numbers in the
frontend.

Originally authored by jdoerfert.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D111475
parent f45d5e71
Loading
Loading
Loading
Loading
+0 −13
Original line number Diff line number Diff line
@@ -46,16 +46,3 @@ llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUThreadID(CodeGenFunction &CGF) {
      CGF.CGM.getIntrinsic(llvm::Intrinsic::amdgcn_workitem_id_x);
  return Bld.CreateCall(F, llvm::None, "nvptx_tid");
}

llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) {
  CGBuilderTy &Bld = CGF.Builder;
  llvm::Module *M = &CGF.CGM.getModule();
  const char *LocSize = "__kmpc_amdgcn_gpu_num_threads";
  llvm::Function *F = M->getFunction(LocSize);
  if (!F) {
    F = llvm::Function::Create(
        llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false),
        llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());
  }
  return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
}
+0 −3
Original line number Diff line number Diff line
@@ -32,9 +32,6 @@ public:

  /// Get the id of the current thread on the GPU.
  llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override;

  /// Get the maximum number of threads in a block of the GPU.
  llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) override;
};

} // namespace CodeGen
+13 −0
Original line number Diff line number Diff line
@@ -3947,3 +3947,16 @@ void CGOpenMPRuntimeGPU::clear() {
  }
  CGOpenMPRuntime::clear();
}

llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) {
  CGBuilderTy &Bld = CGF.Builder;
  llvm::Module *M = &CGF.CGM.getModule();
  const char *LocSize = "__kmpc_get_hardware_num_threads_in_block";
  llvm::Function *F = M->getFunction(LocSize);
  if (!F) {
    F = llvm::Function::Create(
        llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false),
        llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());
  }
  return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
}
+1 −1
Original line number Diff line number Diff line
@@ -182,7 +182,7 @@ public:
  virtual llvm::Value *getGPUThreadID(CodeGenFunction &CGF) = 0;

  /// Get the maximum number of threads in a block of the GPU.
  virtual llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) = 0;
  llvm::Value *getGPUNumThreads(CodeGenFunction &CGF);

  /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
  /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
+0 −8
Original line number Diff line number Diff line
@@ -46,11 +46,3 @@ llvm::Value *CGOpenMPRuntimeNVPTX::getGPUThreadID(CodeGenFunction &CGF) {
      &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x);
  return Bld.CreateCall(F, llvm::None, "nvptx_tid");
}

llvm::Value *CGOpenMPRuntimeNVPTX::getGPUNumThreads(CodeGenFunction &CGF) {
  CGBuilderTy &Bld = CGF.Builder;
  llvm::Function *F;
  F = llvm::Intrinsic::getDeclaration(
      &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x);
  return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
}
Loading