[mlir][gpu] Add the `gpu-module-to-binary` pass. (43752a2a) · Commits · llvm-doe / llvm-project

mlir/include/mlir/Dialect/GPU/Transforms/Passes.h

+7 −0

Original line number	Diff line number	Diff line
		@@ -70,6 +70,13 @@ inline void populateGpuRewritePatterns(RewritePatternSet &patterns) {
		}

		namespace gpu {
		/// Searches for all GPU modules in `op` and transforms them into GPU binary
		/// operations. The resulting `gpu.binary` has `handler` as its offloading
		/// handler attribute.
		LogicalResult transformGpuModulesToBinaries(
		Operation *op, OffloadingLLVMTranslationAttrInterface handler = nullptr,
		const gpu::TargetOptions &options = {});

		/// Base pass class to serialize kernel functions through LLVM into
		/// user-specified IR and add the resulting blob as module attribute.
		class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {

mlir/include/mlir/Dialect/GPU/Transforms/Passes.td

+27 −0

Original line number	Diff line number	Diff line
		@@ -55,4 +55,31 @@ def GpuDecomposeMemrefsPass : Pass<"gpu-decompose-memrefs"> {
		];
		}

		def GpuModuleToBinaryPass
		: Pass<"gpu-module-to-binary", ""> {
		let summary = "Transforms a GPU module into a GPU binary.";
		let description = [{
		This pass searches for all nested GPU modules and serializes the module
		using the target attributes attached to the module, producing a GPU binary
		with an object for every target.

		The `format` argument can have the following values:
		1. `offloading`, `llvm`: producing an offloading representation.
		2. `assembly`, `isa`: producing assembly code.
		3. `binary`, `bin`: producing binaries.
		}];
		let options = [
		Option<"offloadingHandler", "handler", "Attribute", "nullptr",
		"Offloading handler to be attached to the resulting binary op.">,
		Option<"toolkitPath", "toolkit", "std::string", [{""}],
		"Toolkit path.">,
		ListOption<"linkFiles", "l", "std::string",
		"Extra files to link to.">,
		Option<"cmdOptions", "opts", "std::string", [{""}],
		"Command line options to pass to the tools.">,
		Option<"compilationTarget", "format", "std::string", [{"bin"}],
		"The target representation of the compilation process.">
		];
		}

		#endif // MLIR_DIALECT_GPU_PASSES

mlir/lib/Dialect/GPU/CMakeLists.txt

+3 −0

Original line number	Diff line number	Diff line
		@@ -51,6 +51,7 @@ add_mlir_dialect_library(MLIRGPUTransforms
		Transforms/GlobalIdRewriter.cpp
		Transforms/KernelOutlining.cpp
		Transforms/MemoryPromotion.cpp
		Transforms/ModuleToBinary.cpp
		Transforms/ParallelLoopMapper.cpp
		Transforms/SerializeToBlob.cpp
		Transforms/SerializeToCubin.cpp
		@@ -85,10 +86,12 @@ add_mlir_dialect_library(MLIRGPUTransforms
		MLIRGPUToLLVMIRTranslation
		MLIRLLVMToLLVMIRTranslation
		MLIRMemRefDialect
		MLIRNVVMTarget
		MLIRPass
		MLIRSCFDialect
		MLIRSideEffectInterfaces
		MLIRSupport
		MLIRROCDLTarget
		MLIRTransformUtils
		)

mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp

0 → 100644

+122 −0

Original line number	Diff line number	Diff line
		//===- ModuleToBinary.cpp - Transforms GPU modules to GPU binaries ----------=//
		//
		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		// See https://llvm.org/LICENSE.txt for license information.
		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		//
		//===----------------------------------------------------------------------===//
		//
		// This file implements the `GpuModuleToBinaryPass` pass, transforming GPU
		// modules into GPU binaries.
		//
		//===----------------------------------------------------------------------===//

		#include "mlir/Dialect/GPU/Transforms/Passes.h"

		#include "mlir/Dialect/Func/IR/FuncOps.h"
		#include "mlir/Dialect/GPU/IR/GPUDialect.h"
		#include "mlir/IR/BuiltinOps.h"
		#include "mlir/Target/LLVM/NVVM/Target.h"
		#include "mlir/Target/LLVM/ROCDL/Target.h"
		#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
		#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
		#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

		#include "llvm/ADT/STLExtras.h"
		#include "llvm/ADT/StringSwitch.h"

		using namespace mlir;
		using namespace mlir::gpu;

		namespace mlir {
		#define GEN_PASS_DEF_GPUMODULETOBINARYPASS
		#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
		} // namespace mlir

		namespace {
		class GpuModuleToBinaryPass
		: public impl::GpuModuleToBinaryPassBase<GpuModuleToBinaryPass> {
		public:
		using Base::Base;
		void getDependentDialects(DialectRegistry &registry) const override;
		void runOnOperation() final;
		};
		} // namespace

		void GpuModuleToBinaryPass::getDependentDialects(
		DialectRegistry &registry) const {
		// Register all GPU related translations.
		registerLLVMDialectTranslation(registry);
		registerGPUDialectTranslation(registry);
		#if MLIR_CUDA_CONVERSIONS_ENABLED == 1
		registerNVVMTarget(registry);
		#endif
		#if MLIR_ROCM_CONVERSIONS_ENABLED == 1
		registerROCDLTarget(registry);
		#endif
		}

		void GpuModuleToBinaryPass::runOnOperation() {
		RewritePatternSet patterns(&getContext());
		int targetFormat = llvm::StringSwitch<int>(compilationTarget)
		.Cases("offloading", "llvm", TargetOptions::offload)
		.Cases("assembly", "isa", TargetOptions::assembly)
		.Cases("binary", "bin", TargetOptions::binary)
		.Default(-1);
		if (targetFormat == -1)
		getOperation()->emitError() << "Invalid format specified.";
		TargetOptions targetOptions(
		toolkitPath, linkFiles, cmdOptions,
		static_cast<TargetOptions::CompilationTarget>(targetFormat));
		if (failed(transformGpuModulesToBinaries(
		getOperation(),
		offloadingHandler ? dyn_cast<OffloadingLLVMTranslationAttrInterface>(
		offloadingHandler.getValue())
		: OffloadingLLVMTranslationAttrInterface(nullptr),
		targetOptions)))
		return signalPassFailure();
		}

		namespace {
		LogicalResult moduleSerializer(GPUModuleOp op,
		OffloadingLLVMTranslationAttrInterface handler,
		const TargetOptions &targetOptions) {
		OpBuilder builder(op->getContext());
		SmallVector<Attribute> objects;
		// Serialize all targets.
		for (auto targetAttr : op.getTargetsAttr()) {
		assert(targetAttr && "Target attribute cannot be null.");
		auto target = dyn_cast<gpu::TargetAttrInterface>(targetAttr);
		assert(target &&
		"Target attribute doesn't implements `TargetAttrInterface`.");
		std::optional<SmallVector<char, 0>> object =
		target.serializeToObject(op, targetOptions);

		if (!object) {
		op.emitError("An error happened while serializing the module.");
		return failure();
		}

		objects.push_back(builder.getAttr<gpu::ObjectAttr>(
		target,
		builder.getStringAttr(StringRef(object->data(), object->size()))));
		}
		builder.setInsertionPointAfter(op);
		builder.create<gpu::BinaryOp>(op.getLoc(), op.getName(), handler,
		builder.getArrayAttr(objects));
		op->erase();
		return success();
		}
		} // namespace

		LogicalResult mlir::gpu::transformGpuModulesToBinaries(
		Operation *op, OffloadingLLVMTranslationAttrInterface handler,
		const gpu::TargetOptions &targetOptions) {
		for (Region &region : op->getRegions())
		for (Block &block : region.getBlocks())
		for (auto module :
		llvm::make_early_inc_range(block.getOps<GPUModuleOp>()))
		if (failed(moduleSerializer(module, handler, targetOptions)))
		return failure();
		return success();
		}

mlir/test/Dialect/GPU/module-to-binary-nvvm.mlir

0 → 100644

+25 −0

Original line number	Diff line number	Diff line
		// REQUIRES: host-supports-nvptx
		// RUN: mlir-opt %s --gpu-module-to-binary="format=llvm" \| FileCheck %s
		// RUN: mlir-opt %s --gpu-module-to-binary="format=isa" \| FileCheck %s

		module attributes {gpu.container_module} {
		// CHECK-LABEL:gpu.binary @kernel_module1
		// CHECK:[#gpu.object<#nvvm.target<chip = "sm_70">, "{{.*}}">]
		gpu.module @kernel_module1 [#nvvm.target<chip = "sm_70">] {
		llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr<f32>,
		%arg2: !llvm.ptr<f32>, %arg3: i64, %arg4: i64,
		%arg5: i64) attributes {gpu.kernel} {
		llvm.return
		}
		}

		// CHECK-LABEL:gpu.binary @kernel_module2
		// CHECK:[#gpu.object<#nvvm.target<flags = {fast}>, "{{.}}">, #gpu.object<#nvvm.target, "{{.}}">]
		gpu.module @kernel_module2 [#nvvm.target<flags = {fast}>, #nvvm.target] {
		llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr<f32>,
		%arg2: !llvm.ptr<f32>, %arg3: i64, %arg4: i64,
		%arg5: i64) attributes {gpu.kernel} {
		llvm.return
		}
		}
		}