Unverified Commit c94db1af authored by Sergio Afonso's avatar Sergio Afonso Committed by GitHub
Browse files

[MLIR][OpenMP] Unify device shared memory logic, NFCI (#182856)

This patch creates a utils library for the OpenMP dialect with functions
used by MLIR to LLVM IR translation as well as the stack-to-shared pass
to determine which allocations must use local stack memory or device
shared memory.
parent fad06a41
Loading
Loading
Loading
Loading
+53 −0
Original line number Diff line number Diff line
//===- Utils.h - OpenMP dialect utilities -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This header file defines prototypes for various OpenMP utilities.
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_DIALECT_OPENMP_UTILS_UTILS_H_
#define MLIR_DIALECT_OPENMP_UTILS_UTILS_H_

#include "mlir/IR/Operation.h"
#include "mlir/IR/Value.h"

namespace mlir {
namespace omp {

/// Check whether the value representing an allocation, assumed to have been
/// defined in a shared device context, is used in a manner that would require
/// device shared memory for correctness.
///
/// When a use takes place inside an omp.parallel region and it's not as a
/// private clause argument, or when it is a reduction argument passed to
/// omp.parallel or a function call argument, then the defining allocation is
/// eligible for replacement with shared memory.
///
/// \see mlir::omp::opInSharedDeviceContext().
bool allocaUsesRequireSharedMem(Value alloc);

/// Check whether the given operation is located in a context where an
/// allocation to be used by multiple threads in a parallel region would have to
/// be placed in device shared memory to be accessible.
///
/// That means that it is inside of a target device module, it is a non-SPMD
/// target region, is inside of one or it's located in a device function, and it
/// is not not inside of a parallel region.
///
/// This represents a necessary but not sufficient set of conditions to use
/// device shared memory in place of regular allocas. For some variables, the
/// associated OpenMP construct or their uses might also need to be taken into
/// account.
///
/// \see mlir::omp::allocaUsesRequireSharedMem().
bool opInSharedDeviceContext(Operation &op);

} // namespace omp
} // namespace mlir

#endif // MLIR_DIALECT_OPENMP_UTILS_UTILS_H_
+1 −0
Original line number Diff line number Diff line
add_subdirectory(IR)
add_subdirectory(Transforms)
add_subdirectory(Utils)
+1 −0
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@ add_mlir_dialect_library(MLIROpenMPTransforms
  MLIRLLVMDialect
  MLIROpenACCMPCommon
  MLIROpenMPDialect
  MLIROpenMPUtils
  MLIRPass
  MLIRSupport
  MLIRTransforms
+13 −85
Original line number Diff line number Diff line
@@ -15,7 +15,9 @@

#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/Dialect/OpenMP/Utils/Utils.h"
#include "mlir/Pass/Pass.h"
#include "llvm/ADT/STLExtras.h"

namespace mlir {
namespace omp {
@@ -26,94 +28,20 @@ namespace omp {

using namespace mlir;

/// When a use takes place inside an omp.parallel region and it's not as a
/// private clause argument, or when it is a reduction argument passed to
/// omp.parallel or a function call argument, then the defining allocation is
/// eligible for replacement with shared memory.
static bool allocaUseRequiresDeviceSharedMem(const OpOperand &use) {
  Operation *owner = use.getOwner();
  if (auto parallelOp = dyn_cast<omp::ParallelOp>(owner)) {
    if (llvm::is_contained(parallelOp.getReductionVars(), use.get()))
      return true;
  } else if (auto callOp = dyn_cast<CallOpInterface>(owner)) {
    if (llvm::is_contained(callOp.getArgOperands(), use.get()))
      return true;
  }

  // If it is used directly inside of a parallel region, it has to be replaced
  // unless the use is a private clause.
  if (owner->getParentOfType<omp::ParallelOp>()) {
    if (auto argIface = dyn_cast<omp::BlockArgOpenMPOpInterface>(owner)) {
      if (auto privateSyms =
              cast_or_null<ArrayAttr>(owner->getAttr("private_syms"))) {
        for (auto [var, sym] :
             llvm::zip_equal(argIface.getPrivateVars(), privateSyms)) {
          if (var != use.get())
            continue;

          auto moduleOp = owner->getParentOfType<ModuleOp>();
          auto privateOp = cast<omp::PrivateClauseOp>(
              moduleOp.lookupSymbol(cast<SymbolRefAttr>(sym)));
          return privateOp.getDataSharingType() !=
                 omp::DataSharingClauseType::Private;
        }
      }
    }
    return true;
  }
  return false;
}

static bool shouldReplaceAllocaWithUses(const Operation::use_range &uses) {
  // Check direct uses and also follow hlfir.declare/fir.convert uses.
  for (const OpOperand &use : uses) {
    Operation *owner = use.getOwner();
    if (llvm::isa<LLVM::AddrSpaceCastOp, LLVM::GEPOp>(owner)) {
      if (shouldReplaceAllocaWithUses(owner->getUses()))
        return true;
    } else if (allocaUseRequiresDeviceSharedMem(use)) {
      return true;
    }
  }

  return false;
}

// TODO: Refactor the logic in `shouldReplaceAllocaWithDeviceSharedMem`,
// `shouldReplaceAllocaWithUses` and `allocaUseRequiresDeviceSharedMem` to
// be reusable by the MLIR to LLVM IR translation stage, as something very
// similar is also implemented there to choose between allocas and device
// shared memory allocations when processing OpenMP reductions, mapping and
// privatization.
/// Tell whether to replace an operation representing a stack allocation with a
/// device shared memory allocation/deallocation pair based on the location of
/// the allocation and its uses.
static bool shouldReplaceAllocaWithDeviceSharedMem(Operation &op) {
  auto offloadIface = op.getParentOfType<omp::OffloadModuleInterface>();
  if (!offloadIface || !offloadIface.getIsTargetDevice())
    return false;

  auto targetOp = op.getParentOfType<omp::TargetOp>();

  // It must be inside of a generic omp.target or in a target device function,
  // and not inside of omp.parallel.
  if (auto parallelOp = op.getParentOfType<omp::ParallelOp>()) {
    if (!targetOp || targetOp->isProperAncestor(parallelOp))
      return false;
  }

  if (targetOp) {
    if (targetOp.getKernelExecFlags(targetOp.getInnermostCapturedOmpOp()) !=
        omp::TargetExecMode::generic)
      return false;
  } else {
    auto declTargetIface = op.getParentOfType<omp::DeclareTargetInterface>();
    if (!declTargetIface || !declTargetIface.isDeclareTarget() ||
        declTargetIface.getDeclareTargetDeviceType() ==
            omp::DeclareTargetDeviceType::host)
      return false;
  }

  return shouldReplaceAllocaWithUses(op.getUses());
  return omp::opInSharedDeviceContext(op) &&
         llvm::any_of(op.getResults(), [&](Value result) {
           return omp::allocaUsesRequireSharedMem(result);
         });
}

/// Based on the location of the definition of the given value representing the
/// result of a device shared memory allocation, find the corresponding points
/// where its deallocation should be placed and introduce `omp.free_shared_mem`
/// ops at those points.
static void insertDeviceSharedMemDeallocation(OpBuilder &builder,
                                              TypeAttr elemType,
                                              Value arraySize,
+13 −0
Original line number Diff line number Diff line
add_mlir_dialect_library(MLIROpenMPUtils
  Utils.cpp

  ADDITIONAL_HEADER_DIRS
  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/OpenMP

  LINK_LIBS PUBLIC
  MLIRIR
  MLIRLLVMDialect
  MLIROpenACCMPCommon
  MLIROpenMPDialect
  MLIRSupport
  )
Loading