Unverified Commit 82f25499 authored by Sergio Afonso's avatar Sergio Afonso Committed by GitHub
Browse files

[OpenMP][OMPIRBuilder] Use device shared memory for arg structures (#150925)

Argument structures are created when sections of the LLVM IR
corresponding to an OpenMP construct are outlined into their own
function. For this, stack allocations are used.

This patch modifies this behavior when compiling for a target device and
outlining `parallel`-related IR, so that it uses device shared memory
instead of private stack space. This is needed in order for threads to
have access to these arguments.
parent 3c39478e
Loading
Loading
Loading
Loading
+15 −2
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@

namespace llvm {
class CanonicalLoopInfo;
class CodeExtractor;
class ScanInfo;
struct TargetRegionEntryInfo;
class OffloadEntriesInfoManager;
@@ -2556,17 +2557,27 @@ public:
    // TODO: this should be safe to enable by default
    bool FixUpNonEntryAllocas = false;

    LLVM_ABI virtual ~OutlineInfo() = default;

    /// Collect all blocks in between EntryBB and ExitBB in both the given
    /// vector and set.
    LLVM_ABI void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
                                SmallVectorImpl<BasicBlock *> &BlockVector);

    /// Create a CodeExtractor instance based on the information stored in this
    /// structure, the list of collected blocks from a previous call to
    /// \c collectBlocks and a flag stating whether arguments must be passed in
    /// address space 0.
    LLVM_ABI virtual std::unique_ptr<CodeExtractor>
    createCodeExtractor(ArrayRef<BasicBlock *> Blocks,
                        bool ArgsInZeroAddressSpace, Twine Suffix = Twine(""));

    /// Return the function that contains the region to be outlined.
    Function *getFunction() const { return EntryBB->getParent(); }
  };

  /// Collection of regions that need to be outlined during finalization.
  SmallVector<OutlineInfo, 16> OutlineInfos;
  SmallVector<std::unique_ptr<OutlineInfo>, 16> OutlineInfos;

  /// A collection of candidate target functions that's constant allocas will
  /// attempt to be raised on a call of finalize after all currently enqueued
@@ -2581,7 +2592,9 @@ public:
  std::forward_list<ScanInfo> ScanInfos;

  /// Add a new region that will be outlined later.
  void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
  void addOutlineInfo(std::unique_ptr<OutlineInfo> &&OI) {
    OutlineInfos.emplace_back(std::move(OI));
  }

  /// An ordered map of auto-generated variables to their unique names.
  /// It stores variables with the following names: 1) ".gomp_critical_user_" +
+38 −12
Original line number Diff line number Diff line
@@ -17,14 +17,15 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/Support/Compiler.h"
#include <limits>

namespace llvm {

template <typename PtrType> class SmallPtrSetImpl;
class AddrSpaceCastInst;
class AllocaInst;
class BasicBlock;
class BlockFrequency;
class BlockFrequencyInfo;
class BranchProbabilityInfo;
@@ -94,15 +95,23 @@ class LLVM_ABI CodeExtractor {
  BranchProbabilityInfo *BPI;
  AssumptionCache *AC;

  // A block outside of the extraction set where any intermediate allocations
  // will be placed inside. If this is null, allocations will be placed in the
  // entry block of the function.
  /// A block outside of the extraction set where any intermediate allocations
  /// will be placed inside. If this is null, allocations will be placed in the
  /// entry block of the function.
  BasicBlock *AllocationBlock;

  // If true, varargs functions can be extracted.
  /// A block outside of the extraction set where deallocations for intermediate
  /// allocations can be placed inside. Not used for automatically deallocated
  /// memory (e.g. `alloca`), which is the default.
  ///
  /// If it is null and needed, the end of the replacement basic block will be
  /// used to place deallocations.
  BasicBlock *DeallocationBlock;

  /// If true, varargs functions can be extracted.
  bool AllowVarArgs;

  // Bits of intermediate state computed at various phases of extraction.
  /// Bits of intermediate state computed at various phases of extraction.
  SetVector<BasicBlock *> Blocks;

  /// Lists of blocks that are branched from the code region to be extracted,
@@ -123,13 +132,13 @@ class LLVM_ABI CodeExtractor {
  /// 1, etc.
  SmallVector<BasicBlock *> ExtractedFuncRetVals;

  // Suffix to use when creating extracted function (appended to the original
  // function name + "."). If empty, the default is to use the entry block
  // label, if non-empty, otherwise "extracted".
  /// Suffix to use when creating extracted function (appended to the original
  /// function name + "."). If empty, the default is to use the entry block
  /// label, if non-empty, otherwise "extracted".
  std::string Suffix;

  // If true, the outlined function has aggregate argument in zero address
  // space.
  /// If true, the outlined function has aggregate argument in zero address
  /// space.
  bool ArgsInZeroAddressSpace;

  // If true, the outlined function always return void even when there is only
@@ -152,7 +161,9 @@ public:
  /// however code extractor won't validate whether extraction is legal. Any new
  /// allocations will be placed in the AllocationBlock, unless it is null, in
  /// which case it will be placed in the entry block of the function from which
  /// the code is being extracted. If ArgsInZeroAddressSpace param is set to
  /// the code is being extracted. Explicit deallocations for the aforementioned
  /// allocations will be placed in the DeallocationBlock or the end of the
  /// replacement block, if needed. If ArgsInZeroAddressSpace param is set to
  /// true, then the aggregate param pointer of the outlined function is
  /// declared in zero address space. If VoidReturnWithSingleOutput is set to
  /// true, then the return type of the outlined function is set void even if
@@ -162,9 +173,12 @@ public:
                BranchProbabilityInfo *BPI = nullptr,
                AssumptionCache *AC = nullptr, bool AllowVarArgs = false,
                bool AllowAlloca = false, BasicBlock *AllocationBlock = nullptr,
                BasicBlock *DeallocationBlock = nullptr,
                std::string Suffix = "", bool ArgsInZeroAddressSpace = false,
                bool VoidReturnWithSingleOutput = true);

  virtual ~CodeExtractor() = default;

  /// Perform the extraction, returning the new function.
  ///
  /// Returns zero when called on a CodeExtractor instance where isEligible
@@ -244,6 +258,18 @@ public:
  /// region, passing it instead as a scalar.
  void excludeArgFromAggregate(Value *Arg);

protected:
  /// Allocate an intermediate variable at the specified point.
  virtual Instruction *allocateVar(BasicBlock *BB, BasicBlock::iterator AllocIP,
                                   Type *VarType, const Twine &Name = Twine(""),
                                   AddrSpaceCastInst **CastedAlloc = nullptr);

  /// Deallocate a previously-allocated intermediate variable at the specified
  /// point.
  virtual Instruction *deallocateVar(BasicBlock *BB,
                                     BasicBlock::iterator DeallocIP, Value *Var,
                                     Type *VarType);

private:
  struct LifetimeMarkerInfo {
    bool SinkLifeStart = false;
+278 −96

File changed.

Preview size limit exceeded, changes collapsed.

+1 −0
Original line number Diff line number Diff line
@@ -721,6 +721,7 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
            SubRegion, &*DT, /* AggregateArgs */ false, /* BFI */ nullptr,
            /* BPI */ nullptr, AC, /* AllowVarArgs */ false,
            /* AllowAlloca */ false, /* AllocaBlock */ nullptr,
            /* DeallocationBlock */ nullptr,
            /* Suffix */ "cold." + std::to_string(OutlinedFunctionID),
            /* ArgsInZeroAddressSpace */ false,
            /* VoidReturnWithSingleOutput */ false);
+2 −2
Original line number Diff line number Diff line
@@ -2789,7 +2789,7 @@ unsigned IROutliner::doOutline(Module &M) {
      OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
      OS->CE = new (ExtractorAllocator.Allocate())
          CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
                        false, nullptr, "outlined");
                        false, nullptr, nullptr, "outlined");
      findAddInputsOutputs(M, *OS, NotSame);
      if (!OS->IgnoreRegion)
        OutlinedRegions.push_back(OS);
@@ -2900,7 +2900,7 @@ unsigned IROutliner::doOutline(Module &M) {
      OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
      OS->CE = new (ExtractorAllocator.Allocate())
          CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
                        false, nullptr, "outlined");
                        false, nullptr, nullptr, "outlined");
      bool FunctionOutlined = extractSection(*OS);
      if (FunctionOutlined) {
        unsigned StartIdx = OS->Candidate->getStartIdx();
Loading