[mlir][gpu] Add passes to attach (NVVM|ROCDL) target attributes to GPU Modules (fbbb8ade) · Commits · llvm-doe / llvm-project

mlir/include/mlir/Dialect/GPU/Transforms/Passes.td

+105 −0

Original line number	Diff line number	Diff line
		@@ -82,4 +82,109 @@ def GpuModuleToBinaryPass
		];
		}

		def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> {
		let summary = "Attaches an NVVM target attribute to a GPU Module.";
		let description = [{
		This pass searches for all GPU Modules in the immediate regions and attaches
		an NVVM target if the module matches the name specified by the `module` argument.

		Example:
		```
		// File: in.mlir:
		gpu.module @nvvm_module_1 {...}
		gpu.module @nvvm_module_2 {...}
		gpu.module @rocdl_module_1 {...}
		// mlir-opt --nvvm-attach-target="module=nvvm.* chip=sm_90" in.mlir
		gpu.module @nvvm_module_1 [#nvvm.target<chip = "sm_90">] {...}
		gpu.module @nvvm_module_2 [#nvvm.target<chip = "sm_90">] {...}
		gpu.module @rocdl_module_1 {...}
		```
		}];
		let options = [
		Option<"moduleMatcher", "module", "std::string",
		/default=/ [{""}],
		"Regex used to identify the modules to attach the target to.">,
		Option<"triple", "triple", "std::string",
		/default=/ "\"nvptx64-nvidia-cuda\"",
		"Target triple.">,
		Option<"chip", "chip", "std::string",
		/default=/"\"sm_50\"",
		"Target chip.">,
		Option<"features", "features", "std::string",
		/default=/"\"+ptx60\"",
		"Target features.">,
		Option<"optLevel", "O", "unsigned",
		/default=/"2",
		"Optimization level.">,
		Option<"fastFlag", "fast", "bool",
		/default=/"false",
		"Enable fast math mode.">,
		Option<"ftzFlag", "ftz", "bool",
		/default=/"false",
		"Enable flush to zero for denormals.">,
		ListOption<"linkLibs", "l", "std::string",
		"Extra bitcode libraries paths to link to.">,
		];
		}

		def GpuROCDLAttachTarget: Pass<"rocdl-attach-target", ""> {
		let summary = "Attaches a ROCDL target attribute to a GPU Module.";
		let description = [{
		This pass searches for all GPU Modules in the immediate regions and attaches
		a ROCDL target if the module matches the name specified by the `module` argument.

		Example:
		```
		// File: in.mlir:
		gpu.module @nvvm_module_1 {...}
		gpu.module @nvvm_module_2 {...}
		gpu.module @rocdl_module_1 {...}
		// mlir-opt --nvvm-attach-target="module=rocdl.* chip=gfx90a" in.mlir
		gpu.module @nvvm_module_1 {...}
		gpu.module @nvvm_module_2 {...}
		gpu.module @rocdl_module_1 [#rocdl.target<chip = "gfx90a">] {...}
		```
		}];
		let options = [
		Option<"moduleMatcher", "module", "std::string",
		/default=/ [{""}],
		"Regex used to identify the modules to attach the target to.">,
		Option<"triple", "triple", "std::string",
		/default=/ "\"amdgcn-amd-amdhsa\"",
		"Target triple.">,
		Option<"chip", "chip", "std::string",
		/default=/"\"gfx900\"",
		"Target chip.">,
		Option<"features", "features", "std::string",
		/default=/"\"\"",
		"Target features.">,
		Option<"abiVersion", "abi", "std::string",
		/default=/"\"400\"",
		"Optimization level.">,
		Option<"optLevel", "O", "unsigned",
		/default=/"2",
		"Optimization level.">,
		Option<"wave64Flag", "wave64", "bool",
		/default=/"true",
		"Use Wave64 mode.">,
		Option<"fastFlag", "fast", "bool",
		/default=/"false",
		"Enable fast relaxed math opt.">,
		Option<"dazFlag", "daz", "bool",
		/default=/"false",
		"Enable denormals are zero opt.">,
		Option<"finiteOnlyFlag", "finite-only", "bool",
		/default=/"false",
		"Enable finite only opt.">,
		Option<"unsafeMathFlag", "unsafe-math", "bool",
		/default=/"false",
		"Enable unsafe math opt.">,
		Option<"correctSqrtFlag", "correct-sqrt", "bool",
		/default=/"true",
		"Enable correct rounded sqrt.">,
		ListOption<"linkLibs", "l", "std::string",
		"Extra bitcode libraries paths to link to.">,
		];
		}

		#endif // MLIR_DIALECT_GPU_PASSES

mlir/lib/Dialect/GPU/CMakeLists.txt

+2 −0

Original line number	Diff line number	Diff line
		@@ -52,11 +52,13 @@ add_mlir_dialect_library(MLIRGPUTransforms
		Transforms/KernelOutlining.cpp
		Transforms/MemoryPromotion.cpp
		Transforms/ModuleToBinary.cpp
		Transforms/NVVMAttachTarget.cpp
		Transforms/ParallelLoopMapper.cpp
		Transforms/SerializeToBlob.cpp
		Transforms/SerializeToCubin.cpp
		Transforms/SerializeToHsaco.cpp
		Transforms/ShuffleRewriter.cpp
		Transforms/ROCDLAttachTarget.cpp

		ADDITIONAL_HEADER_DIRS
		${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU

mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp

0 → 100644

+86 −0

Original line number	Diff line number	Diff line
		//===- NVVMAttachTarget.cpp - Attach an NVVM target -----------------------===//
		//
		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		// See https://llvm.org/LICENSE.txt for license information.
		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		//
		//===----------------------------------------------------------------------===//
		//
		// This file implements the `GpuNVVMAttachTarget` pass, attaching `#nvvm.target`
		// attributes to GPU modules.
		//
		//===----------------------------------------------------------------------===//

		#include "mlir/Dialect/GPU/Transforms/Passes.h"

		#include "mlir/Dialect/GPU/IR/GPUDialect.h"
		#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
		#include "mlir/IR/Builders.h"
		#include "mlir/Pass/Pass.h"
		#include "mlir/Target/LLVM/NVVM/Target.h"
		#include "llvm/Support/Regex.h"

		namespace mlir {
		#define GEN_PASS_DEF_GPUNVVMATTACHTARGET
		#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
		} // namespace mlir

		using namespace mlir;
		using namespace mlir::NVVM;

		namespace {
		struct NVVMAttachTarget
		: public impl::GpuNVVMAttachTargetBase<NVVMAttachTarget> {
		using Base::Base;

		DictionaryAttr getFlags(OpBuilder &builder) const;

		void runOnOperation() override;

		void getDependentDialects(DialectRegistry &registry) const override {
		registerNVVMTarget(registry);
		}
		};
		} // namespace

		DictionaryAttr NVVMAttachTarget::getFlags(OpBuilder &builder) const {
		UnitAttr unitAttr = builder.getUnitAttr();
		SmallVector<NamedAttribute, 2> flags;
		auto addFlag = [&](StringRef flag) {
		flags.push_back(builder.getNamedAttr(flag, unitAttr));
		};
		if (fastFlag)
		addFlag("fast");
		if (ftzFlag)
		addFlag("ftz");
		if (flags.size())
		return builder.getDictionaryAttr(flags);
		return nullptr;
		}

		void NVVMAttachTarget::runOnOperation() {
		OpBuilder builder(&getContext());
		ArrayRef<std::string> libs(linkLibs);
		SmallVector<StringRef> filesToLink(libs.begin(), libs.end());
		auto target = builder.getAttr<NVVMTargetAttr>(
		optLevel, triple, chip, features, getFlags(builder),
		filesToLink.size() ? builder.getStrArrayAttr(filesToLink) : nullptr);
		llvm::Regex matcher(moduleMatcher);
		for (Region &region : getOperation()->getRegions())
		for (Block &block : region.getBlocks())
		for (auto module : block.getOps<gpu::GPUModuleOp>()) {
		// Check if the name of the module matches.
		if (!moduleMatcher.empty() && !matcher.match(module.getName()))
		continue;
		// Create the target array.
		SmallVector<Attribute> targets;
		if (std::optional<ArrayAttr> attrs = module.getTargets())
		targets.append(attrs->getValue().begin(), attrs->getValue().end());
		targets.push_back(target);
		// Remove any duplicate targets.
		targets.erase(std::unique(targets.begin(), targets.end()),
		targets.end());
		// Update the target attribute array.
		module.setTargetsAttr(builder.getArrayAttr(targets));
		}
		}

mlir/lib/Dialect/GPU/Transforms/ROCDLAttachTarget.cpp

0 → 100644

+94 −0

Original line number	Diff line number	Diff line
		//===- ROCDLAttachTarget.cpp - Attach an ROCDL target ---------------------===//
		//
		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		// See https://llvm.org/LICENSE.txt for license information.
		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		//
		//===----------------------------------------------------------------------===//
		//
		// This file implements the `GpuROCDLAttachTarget` pass, attaching
		// `#rocdl.target` attributes to GPU modules.
		//
		//===----------------------------------------------------------------------===//

		#include "mlir/Dialect/GPU/Transforms/Passes.h"

		#include "mlir/Dialect/GPU/IR/GPUDialect.h"
		#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
		#include "mlir/IR/Builders.h"
		#include "mlir/Pass/Pass.h"
		#include "mlir/Target/LLVM/ROCDL/Target.h"
		#include "llvm/Support/Regex.h"

		namespace mlir {
		#define GEN_PASS_DEF_GPUROCDLATTACHTARGET
		#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
		} // namespace mlir

		using namespace mlir;
		using namespace mlir::ROCDL;

		namespace {
		struct ROCDLAttachTarget
		: public impl::GpuROCDLAttachTargetBase<ROCDLAttachTarget> {
		using Base::Base;

		DictionaryAttr getFlags(OpBuilder &builder) const;

		void runOnOperation() override;

		void getDependentDialects(DialectRegistry &registry) const override {
		registerROCDLTarget(registry);
		}
		};
		} // namespace

		DictionaryAttr ROCDLAttachTarget::getFlags(OpBuilder &builder) const {
		UnitAttr unitAttr = builder.getUnitAttr();
		SmallVector<NamedAttribute, 6> flags;
		auto addFlag = [&](StringRef flag) {
		flags.push_back(builder.getNamedAttr(flag, unitAttr));
		};
		if (!wave64Flag)
		addFlag("no_wave64");
		if (fastFlag)
		addFlag("fast");
		if (dazFlag)
		addFlag("daz");
		if (finiteOnlyFlag)
		addFlag("finite_only");
		if (unsafeMathFlag)
		addFlag("unsafe_math");
		if (!correctSqrtFlag)
		addFlag("unsafe_sqrt");
		if (flags.size())
		return builder.getDictionaryAttr(flags);
		return nullptr;
		}

		void ROCDLAttachTarget::runOnOperation() {
		OpBuilder builder(&getContext());
		ArrayRef<std::string> libs(linkLibs);
		SmallVector<StringRef> filesToLink(libs.begin(), libs.end());
		auto target = builder.getAttr<ROCDLTargetAttr>(
		optLevel, triple, chip, features, abiVersion, getFlags(builder),
		filesToLink.size() ? builder.getStrArrayAttr(filesToLink) : nullptr);
		llvm::Regex matcher(moduleMatcher);
		for (Region &region : getOperation()->getRegions())
		for (Block &block : region.getBlocks())
		for (auto module : block.getOps<gpu::GPUModuleOp>()) {
		// Check if the name of the module matches.
		if (!moduleMatcher.empty() && !matcher.match(module.getName()))
		continue;
		// Create the target array.
		SmallVector<Attribute> targets;
		if (std::optional<ArrayAttr> attrs = module.getTargets())
		targets.append(attrs->getValue().begin(), attrs->getValue().end());
		targets.push_back(target);
		// Remove any duplicate targets.
		targets.erase(std::unique(targets.begin(), targets.end()),
		targets.end());
		// Update the target attribute array.
		module.setTargetsAttr(builder.getArrayAttr(targets));
		}
		}

mlir/test/Dialect/LLVMIR/attach-targets.mlir

0 → 100644

+29 −0

Original line number	Diff line number	Diff line
		// RUN: mlir-opt %s --nvvm-attach-target='module=nvvm.* O=3 chip=sm_90' --rocdl-attach-target='module=rocdl.* O=3 chip=gfx90a' \| FileCheck %s
		// RUN: mlir-opt %s --nvvm-attach-target='module=options.* O=1 chip=sm_70 fast=true ftz=true' --rocdl-attach-target='module=options.* l=file1.bc,file2.bc wave64=false finite-only=true' \| FileCheck %s --check-prefix=CHECK_OPTS

		module attributes {gpu.container_module} {
		// Verify the target is appended.
		// CHECK: @nvvm_module_1 [#nvvm.target<O = 3, chip = "sm_90">] {
		gpu.module @nvvm_module_1 {
		}
		// Verify the target is appended.
		// CHECK: @nvvm_module_2 [#nvvm.target<chip = "sm_60">, #nvvm.target<O = 3, chip = "sm_90">] {
		gpu.module @nvvm_module_2 [#nvvm.target<chip = "sm_60">] {
		}
		// Verify the target is not added multiple times.
		// CHECK: @nvvm_module_3 [#nvvm.target<O = 3, chip = "sm_90">] {
		gpu.module @nvvm_module_3 [#nvvm.target<O = 3, chip = "sm_90">] {
		}
		// Verify the NVVM target is not added as it fails to match the regex, but the ROCDL does get appended.
		// CHECK: @rocdl_module [#rocdl.target<O = 3, chip = "gfx90a">] {
		gpu.module @rocdl_module {
		}
		// Check the options were added.
		// CHECK_OPTS: @options_module_1 [#nvvm.target<O = 1, chip = "sm_70", flags = {fast, ftz}>, #rocdl.target<flags = {finite_only, no_wave64}, link = ["file1.bc", "file2.bc"]>] {
		gpu.module @options_module_1 {
		}
		// Check the options were added and that the first target was preserved.
		// CHECK_OPTS: @options_module_2 [#nvvm.target<O = 3, chip = "sm_90">, #nvvm.target<O = 1, chip = "sm_70", flags = {fast, ftz}>, #rocdl.target<flags = {finite_only, no_wave64}, link = ["file1.bc", "file2.bc"]>] {
		gpu.module @options_module_2 [#nvvm.target<O = 3, chip = "sm_90">] {
		}
		}