Merge pull request #267209 from Madouura/pr/triton-llvm (4774c536) · Commits · nix / nixpkgs

pkgs/by-name/op/openai-triton-llvm/package.nix

+89 −38

Original line number	Diff line number	Diff line
		{ config
		, lib
		{ lib
		, stdenv
		, fetchFromGitHub
		, pkgsBuildBuild
		, pkg-config
		, cmake
		, ninja
		, git
		, doxygen
		, sphinx
		, libxml2
		, libxcrypt
		, libedit
		, libffi
		, libpfm
		, mpfr
		, zlib
		, ncurses
		, doxygen
		, sphinx
		, which
		, sysctl
		, python3Packages
		, buildDocs ? true
		, buildMan ? true
		, buildTests ? true
		, llvmTargetsToBuild ? [ "NATIVE" ] # "NATIVE" resolves into x86 or aarch64 depending on stdenv
		, llvmProjectsToBuild ? [ "llvm" "mlir" ]
		}:

		stdenv.mkDerivation (finalAttrs: {
		let
		llvmNativeTarget =
		if stdenv.hostPlatform.isx86_64 then "X86"
		else if stdenv.hostPlatform.isAarch64 then "AArch64"
		else throw "Currently unsupported LLVM platform '${stdenv.hostPlatform.config}'";

		inferNativeTarget = t: if t == "NATIVE" then llvmNativeTarget else t;
		llvmTargetsToBuild' = [ "AMDGPU" "NVPTX" ] ++ builtins.map inferNativeTarget llvmTargetsToBuild;

		# This LLVM version can't seem to find pygments/pyyaml,
		# but a later update will likely fix this (openai-triton-2.1.0)
		python =
		if buildTests
		then python3Packages.python.withPackages (p: with p; [ psutil pygments pyyaml ])
		else python3Packages.python;

		isNative = stdenv.hostPlatform == stdenv.buildPlatform;
		in stdenv.mkDerivation (finalAttrs: {
		pname = "openai-triton-llvm";
		version = "14.0.6-f28c006a5895";

		@@ -33,7 +55,8 @@ stdenv.mkDerivation (finalAttrs: {
		"man"
		];

		# See https://github.com/openai/triton/blob/main/python/setup.py and https://github.com/ptillet/triton-llvm-releases/releases
		# See https://github.com/openai/triton/blob/main/python/setup.py
		# and https://github.com/ptillet/triton-llvm-releases/releases
		src = fetchFromGitHub {
		owner = "llvm";
		repo = "llvm-project";
		@@ -46,7 +69,7 @@ stdenv.mkDerivation (finalAttrs: {
		cmake
		ninja
		git
		python3Packages.python
		python
		] ++ lib.optionals (buildDocs \|\| buildMan) [
		doxygen
		sphinx
		@@ -58,6 +81,7 @@ stdenv.mkDerivation (finalAttrs: {
		libxcrypt
		libedit
		libffi
		libpfm
		mpfr
		];

		@@ -69,37 +93,55 @@ stdenv.mkDerivation (finalAttrs: {
		sourceRoot = "${finalAttrs.src.name}/llvm";

		cmakeFlags = [
		"-DLLVM_TARGETS_TO_BUILD=${
		let
		# Targets can be found in
		# https://github.com/llvm/llvm-project/tree/f28c006a5895fc0e329fe15fead81e37457cb1d1/clang/lib/Basic/Targets
		# NOTE: Unsure of how "host" would function, especially given that we might be cross-compiling.
		llvmTargets = [ "AMDGPU" "NVPTX" ]
		++ lib.optionals stdenv.isAarch64 [ "AArch64" ]
		++ lib.optionals stdenv.isx86_64 [ "X86" ];
		in
		lib.concatStringsSep ";" llvmTargets
		}"
		"-DLLVM_ENABLE_PROJECTS=llvm;mlir"
		"-DLLVM_INSTALL_UTILS=ON"
		] ++ lib.optionals (buildDocs \|\| buildMan) [
		"-DLLVM_INCLUDE_DOCS=ON"
		"-DMLIR_INCLUDE_DOCS=ON"
		"-DLLVM_BUILD_DOCS=ON"
		# "-DLLVM_ENABLE_DOXYGEN=ON" Way too slow, only uses one core
		"-DLLVM_ENABLE_SPHINX=ON"
		"-DSPHINX_OUTPUT_HTML=ON"
		"-DSPHINX_OUTPUT_MAN=ON"
		"-DSPHINX_WARNINGS_AS_ERRORS=OFF"
		] ++ lib.optionals buildTests [
		"-DLLVM_INCLUDE_TESTS=ON"
		"-DMLIR_INCLUDE_TESTS=ON"
		"-DLLVM_BUILD_TESTS=ON"
		(lib.cmakeFeature "LLVM_TARGETS_TO_BUILD" (lib.concatStringsSep ";" llvmTargetsToBuild'))
		(lib.cmakeFeature "LLVM_ENABLE_PROJECTS" (lib.concatStringsSep ";" llvmProjectsToBuild))
		(lib.cmakeFeature "LLVM_HOST_TRIPLE" stdenv.hostPlatform.config)
		(lib.cmakeFeature "LLVM_DEFAULT_TARGET_TRIPLE" stdenv.hostPlatform.config)
		(lib.cmakeBool "LLVM_INSTALL_UTILS" true)
		(lib.cmakeBool "LLVM_INCLUDE_DOCS" (buildDocs \|\| buildMan))
		(lib.cmakeBool "MLIR_INCLUDE_DOCS" (buildDocs \|\| buildMan))
		(lib.cmakeBool "LLVM_BUILD_DOCS" (buildDocs \|\| buildMan))
		# Way too slow, only uses one core
		# (lib.cmakeBool "LLVM_ENABLE_DOXYGEN" (buildDocs \|\| buildMan))
		(lib.cmakeBool "LLVM_ENABLE_SPHINX" (buildDocs \|\| buildMan))
		(lib.cmakeBool "SPHINX_OUTPUT_HTML" buildDocs)
		(lib.cmakeBool "SPHINX_OUTPUT_MAN" buildMan)
		(lib.cmakeBool "SPHINX_WARNINGS_AS_ERRORS" false)
		(lib.cmakeBool "LLVM_INCLUDE_TESTS" buildTests)
		(lib.cmakeBool "MLIR_INCLUDE_TESTS" buildTests)
		(lib.cmakeBool "LLVM_BUILD_TESTS" buildTests)
		# Cross compilation code taken/modified from LLVM 16 derivation
		] ++ lib.optionals (!isNative) (let
		nativeToolchainFlags = let
		nativeCC = pkgsBuildBuild.targetPackages.stdenv.cc;
		nativeBintools = nativeCC.bintools.bintools;
		in [
		(lib.cmakeFeature "CMAKE_C_COMPILER" "${nativeCC}/bin/${nativeCC.targetPrefix}cc")
		(lib.cmakeFeature "CMAKE_CXX_COMPILER" "${nativeCC}/bin/${nativeCC.targetPrefix}c++")
		(lib.cmakeFeature "CMAKE_AR" "${nativeBintools}/bin/${nativeBintools.targetPrefix}ar")
		(lib.cmakeFeature "CMAKE_STRIP" "${nativeBintools}/bin/${nativeBintools.targetPrefix}strip")
		(lib.cmakeFeature "CMAKE_RANLIB" "${nativeBintools}/bin/${nativeBintools.targetPrefix}ranlib")
		];

		# We need to repass the custom GNUInstallDirs values, otherwise CMake
		# will choose them for us, leading to wrong results in llvm-config-native
		nativeInstallFlags = [
		(lib.cmakeFeature "CMAKE_INSTALL_PREFIX" (placeholder "out"))
		(lib.cmakeFeature "CMAKE_INSTALL_BINDIR" "${placeholder "out"}/bin")
		(lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "${placeholder "out"}/include")
		(lib.cmakeFeature "CMAKE_INSTALL_LIBDIR" "${placeholder "out"}/lib")
		(lib.cmakeFeature "CMAKE_INSTALL_LIBEXECDIR" "${placeholder "out"}/libexec")
		];
		in [
		(lib.cmakeBool "CMAKE_CROSSCOMPILING" true)
		(lib.cmakeFeature "CROSS_TOOLCHAIN_FLAGS_NATIVE" (lib.concatStringsSep ";"
		(lib.concatLists [ nativeToolchainFlags nativeInstallFlags ])))
		]);

		postPatch = ''
		# `CMake Error: cannot write to file "/build/source/llvm/build/lib/cmake/mlir/MLIRTargets.cmake": Permission denied`
		chmod +w -R ../mlir
		patchShebangs ../mlir/test/mlir-reduce

		# FileSystem permissions tests fail with various special bits
		rm test/tools/llvm-objcopy/ELF/mirror-permissions-unix.test
		@@ -107,9 +149,21 @@ stdenv.mkDerivation (finalAttrs: {

		substituteInPlace unittests/Support/CMakeLists.txt \
		--replace "Path.cpp" ""
		'' + lib.optionalString stdenv.isAarch64 ''
		# Not sure why this fails
		rm test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s
		'';

		postInstall = lib.optionalString (!isNative) ''
		cp -a NATIVE/bin/llvm-config $out/bin/llvm-config-native
		'';

		doCheck = buildTests;

		nativeCheckInputs = [ which ]
		++ lib.optionals stdenv.isDarwin [ sysctl ];

		checkTarget = "check-all";
		requiredSystemFeatures = [ "big-parallel" ];

		meta = with lib; {
		@@ -117,9 +171,6 @@ stdenv.mkDerivation (finalAttrs: {
		homepage = "https://github.com/llvm/llvm-project";
		license = with licenses; [ ncsa ];
		maintainers = with maintainers; [ SomeoneSerge Madouura ];
		platforms = platforms.linux;
		# Consider the derivation broken if we're not building for CUDA or ROCm, or if we're building for aarch64
		# and ROCm is enabled. See https://github.com/RadeonOpenCompute/ROCm/issues/1831#issuecomment-1278205344.
		broken = stdenv.isAarch64 && !config.cudaSupport;
		platforms = with platforms; aarch64 ++ x86;
		};
		})

pkgs/development/python-modules/torch/default.nix

+6 −6

Original line number	Diff line number	Diff line
		@@ -16,9 +16,10 @@
		filelock,
		jinja2,
		networkx,
		openai-triton,
		sympy,
		numpy, pyyaml, cffi, click, typing-extensions,
		# ROCm build and `torch.compile` requires `openai-triton`
		tritonSupport ? (!stdenv.isDarwin), openai-triton,

		# Unit tests
		hypothesis, psutil,
		@@ -303,12 +304,13 @@ in buildPythonPackage rec {
		"-Wno-pass-failed"
		] ++ [
		"-Wno-unused-command-line-argument"
		"-Wno-maybe-uninitialized"
		"-Wno-uninitialized"
		"-Wno-array-bounds"
		"-Wno-stringop-overflow"
		"-Wno-free-nonheap-object"
		"-Wno-unused-result"
		] ++ lib.optionals stdenv.cc.isGNU [
		"-Wno-maybe-uninitialized"
		"-Wno-stringop-overflow"
		]));

		nativeBuildInputs = [
		@@ -377,12 +379,10 @@ in buildPythonPackage rec {
		# the following are required for tensorboard support
		pillow six future tensorboard protobuf

		# ROCm build and `torch.compile` requires openai-triton
		openai-triton

		# torch/csrc requires `pybind11` at runtime
		pybind11
		]
		++ lib.optionals tritonSupport [ openai-triton ]
		++ lib.optionals MPISupport [ mpi ]
		++ lib.optionals rocmSupport [ rocmtoolkit_joined ];