Commit 26927518 authored by Stephan Herhut's avatar Stephan Herhut
Browse files

Add 'gpu.terminator' operation.

Summary:
The 'gpu.terminator' operation is used as the terminator for the
regions of gpu.launch. This is to disambugaute them from the
return operation on 'gpu.func' functions.

This is a breaking change and users of the gpu dialect will need
to adapt their code when producting 'gpu.launch' operations.

Reviewers: nicolasvasilache

Subscribers: mehdi_amini, rriddle, jpienaar, burmako, shauheen, antiagainst, csigg, arpith-jacob, mgester, lucyrfox, liufengdb, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73620
parent 2930dab3
Loading
Loading
Loading
Loading
+18 −2
Original line number Diff line number Diff line
@@ -472,8 +472,24 @@ def GPU_LaunchOp : GPU_Op<"launch", [IsolatedFromAbove]>,
  let verifier = [{ return ::verify(*this); }];
}

def GPU_ReturnOp : GPU_Op<"return", [Terminator]>, Arguments<(ins)>,
    Results<(outs)> {
def GPU_ReturnOp : GPU_Op<"return", [HasParent<"GPUFuncOp">, Terminator]>,
    Arguments<(ins Variadic<AnyType>:$operands)>, Results<(outs)> {
  let summary = "Terminator for GPU functions.";
  let description = [{
    A terminator operation for regions that appear in the body of  `gpu.func`
    functions. The operands to the `gpu.return` are the result values returned
    by an incovation of the `gpu.func`.
  }];

  let builders = [OpBuilder<"Builder *builder, OperationState &result", " // empty">];

  let parser = [{ return parseReturnOp(parser, result); }];
  let printer = [{ p << getOperationName(); }];
  let verifier = [{ return ::verify(*this); }];
}

def GPU_TerminatorOp : GPU_Op<"terminator", [HasParent<"LaunchOp">, Terminator]>,
    Arguments<(ins)>, Results<(outs)> {
  let summary = "Terminator for GPU launch regions.";
  let description = [{
    A terminator operation for regions that appear in the body of `gpu.launch`
+3 −3
Original line number Diff line number Diff line
@@ -306,9 +306,9 @@ createLaunchBody(OpBuilder &builder, OpTy rootForOp, gpu::LaunchOp launchOp,
                 unsigned numBlockDims, unsigned numThreadDims) {
  OpBuilder::InsertionGuard bodyInsertionGuard(builder);
  builder.setInsertionPointToEnd(&launchOp.body().front());
  auto returnOp = builder.create<gpu::ReturnOp>(launchOp.getLoc());
  auto terminatorOp = builder.create<gpu::TerminatorOp>(launchOp.getLoc());

  rootForOp.getOperation()->moveBefore(returnOp);
  rootForOp.getOperation()->moveBefore(terminatorOp);
  SmallVector<Value, 3> workgroupID, numWorkGroups;
  packIdAndNumId(launchOp.getBlockIds(), launchOp.getGridSize(), numBlockDims,
                 workgroupID, numWorkGroups);
@@ -435,7 +435,7 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp,
  Location terminatorLoc = terminator.getLoc();
  terminator.erase();
  builder.setInsertionPointToEnd(innermostForOp.getBody());
  builder.create<gpu::ReturnOp>(terminatorLoc);
  builder.create<gpu::TerminatorOp>(terminatorLoc, llvm::None);
  launchOp.body().front().getOperations().splice(
      launchOp.body().front().begin(),
      innermostForOp.getBody()->getOperations());
+51 −7
Original line number Diff line number Diff line
@@ -270,18 +270,19 @@ static LogicalResult verify(LaunchOp op) {
  }

  // Block terminators without successors are expected to exit the kernel region
  // and must be `gpu.launch`.
  // and must be `gpu.terminator`.
  for (Block &block : op.body()) {
    if (block.empty())
      continue;
    if (block.back().getNumSuccessors() != 0)
      continue;
    if (!isa<gpu::ReturnOp>(&block.back())) {
    if (!isa<gpu::TerminatorOp>(&block.back())) {
      return block.back()
                 .emitError("expected 'gpu.terminator' or a terminator with "
                            "successors")
          .emitError()
          .append("expected '", gpu::TerminatorOp::getOperationName(),
                  "' or a terminator with successors")
          .attachNote(op.getLoc())
             << "in '" << LaunchOp::getOperationName() << "' body region";
          .append("in '", LaunchOp::getOperationName(), "' body region");
    }
  }

@@ -680,7 +681,7 @@ static ParseResult parseGPUFuncOp(OpAsmParser &parser, OperationState &result) {
           << "gpu.func requires named arguments";

  // Construct the function type. More types will be added to the region, but
  // not to the functiont type.
  // not to the function type.
  Builder &builder = parser.getBuilder();
  auto type = builder.getFunctionType(argTypes, resultTypes);
  result.addAttribute(GPUFuncOp::getTypeAttrName(), TypeAttr::get(type));
@@ -767,6 +768,10 @@ LogicalResult GPUFuncOp::verifyType() {
  if (!type.isa<FunctionType>())
    return emitOpError("requires '" + getTypeAttrName() +
                       "' attribute of function type");

  if (isKernel() && getType().getNumResults() != 0)
    return emitOpError() << "expected void return type for kernel function";

  return success();
}

@@ -814,6 +819,45 @@ LogicalResult GPUFuncOp::verifyBody() {
  return success();
}

//===----------------------------------------------------------------------===//
// ReturnOp
//===----------------------------------------------------------------------===//

static ParseResult parseReturnOp(OpAsmParser &parser, OperationState &result) {
  llvm::SmallVector<OpAsmParser::OperandType, 4> operands;
  llvm::SmallVector<Type, 4> types;
  if (parser.parseOperandList(operands) ||
      parser.parseOptionalColonTypeList(types) ||
      parser.resolveOperands(operands, types, parser.getCurrentLocation(),
                             result.operands))
    return failure();

  return success();
}

static LogicalResult verify(gpu::ReturnOp returnOp) {
  GPUFuncOp function = returnOp.getParentOfType<GPUFuncOp>();

  FunctionType funType = function.getType();

  if (funType.getNumResults() != returnOp.operands().size())
    return returnOp.emitOpError()
        .append("expected ", funType.getNumResults(), " result operands")
        .attachNote(function.getLoc())
        .append("return type declared here");

  for (auto pair : llvm::enumerate(
           llvm::zip(function.getType().getResults(), returnOp.operands()))) {
    Type type;
    Value operand;
    std::tie(type, operand) = pair.value();
    if (type != operand.getType())
      return returnOp.emitOpError() << "unexpected type `" << operand.getType()
                                    << "' for operand #" << pair.index();
  }
  return success();
}

//===----------------------------------------------------------------------===//
// GPUModuleOp
//===----------------------------------------------------------------------===//
+7 −1
Original line number Diff line number Diff line
@@ -99,7 +99,7 @@ static gpu::LaunchFuncOp inlineBeneficiaryOps(gpu::GPUFuncOp kernelFunc,
}

// Outline the `gpu.launch` operation body into a kernel function. Replace
// `gpu.return` operations by `std.return` in the generated function.
// `gpu.terminator` operations by `gpu.return` in the generated function.
static gpu::GPUFuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
  Location loc = launchOp.getLoc();
  // Create a builder with no insertion point, insertion will happen separately
@@ -116,6 +116,12 @@ static gpu::GPUFuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
                       builder.getUnitAttr());
  outlinedFunc.body().takeBody(launchOp.body());
  injectGpuIndexOperations(loc, outlinedFunc.body());
  outlinedFunc.walk([](gpu::TerminatorOp op) {
    OpBuilder replacer(op);
    replacer.create<gpu::ReturnOp>(op.getLoc());
    op.erase();
  });

  return outlinedFunc;
}

+1 −1
Original line number Diff line number Diff line
@@ -23,7 +23,7 @@ func @foo(%arg0: memref<?xf32>, %arg1 : index) {
      // CHECK: %[[prod_j:.*]] = muli %{{.*}}, %{{.*}} : index
      // CHECK: addi %{{.*}}, %[[prod_j]] : index

      // CHECK: gpu.return
      // CHECK: gpu.terminator
    }
  }
  return
Loading