Loading pkgs/by-name/tr/triton-llvm/package.nix +7 −3 Original line number Diff line number Diff line Loading @@ -25,6 +25,10 @@ buildTests ? true, llvmTargetsToBuild ? [ "NATIVE" ], # "NATIVE" resolves into x86 or aarch64 depending on stdenv llvmProjectsToBuild ? [ # Required for building triton>=3.5.0 # https://github.com/triton-lang/triton/blob/c3c476f357f1e9768ea4e45aa5c17528449ab9ef/third_party/amd/CMakeLists.txt#L6 "lld" "llvm" "mlir" ], Loading Loading @@ -64,7 +68,7 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "triton-llvm"; version = "21.0.0-unstable-2025-06-10"; # See https://github.com/llvm/llvm-project/blob/main/cmake/Modules/LLVMVersion.cmake version = "22.0.0-unstable-2025-07-15"; # See https://github.com/llvm/llvm-project/blob/main/cmake/Modules/LLVMVersion.cmake outputs = [ "out" Loading @@ -80,8 +84,8 @@ stdenv.mkDerivation (finalAttrs: { src = fetchFromGitHub { owner = "llvm"; repo = "llvm-project"; rev = "8957e64a20fc7f4277565c6cfe3e555c119783ce"; hash = "sha256-ljdwHPLGZv72RBPBg5rs7pZczsB+WJhdCeHJxoi4gJQ="; rev = "7d5de3033187c8a3bb4d2e322f5462cdaf49808f"; hash = "sha256-ayW6sOZGvP3SBjfmpXvYQJrPOAElY0MEHPFvj2fq+bM="; }; nativeBuildInputs = [ Loading pkgs/development/python-modules/triton/0001-_build-allow-extra-cc-flags.patch +3 −2 Original line number Diff line number Diff line diff --git a/python/triton/runtime/build.py b/python/triton/runtime/build.py index 1b76548d4..2756dccdb 100644 index 7614fe2ae..203db996b 100644 --- a/python/triton/runtime/build.py +++ b/python/triton/runtime/build.py @@ -33,5 +33,13 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries): @@ -47,6 +47,14 @@ def _build(name: str, src: str, srcdir: str, library_dirs: list[str], include_di cc_cmd += [f'-l{lib}' for lib in libraries] cc_cmd += [f"-L{dir}" for dir in library_dirs] cc_cmd += [f"-I{dir}" for dir in include_dirs if dir is not None] Loading @@ -14,5 +14,6 @@ index 1b76548d4..2756dccdb 100644 + import shlex + cc_cmd.extend(shlex.split(cc_cmd_extra_flags)) + cc_cmd.extend(ccflags) subprocess.check_call(cc_cmd, stdout=subprocess.DEVNULL) return so pkgs/development/python-modules/triton/0002-nvidia-driver-short-circuit-before-ldconfig.patch +3 −3 Original line number Diff line number Diff line diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py index d088ec092..625de2db8 100644 index e6fd6a968..2b39fea29 100644 --- a/third_party/nvidia/backend/driver.py +++ b/third_party/nvidia/backend/driver.py @@ -23,6 +23,9 @@ def libcuda_dirs(): if env_libcuda_path: if env_libcuda_path := knobs.nvidia.libcuda_path: return [env_libcuda_path] + if os.path.exists("@libcudaStubsDir@"): + return ["@libcudaStubsDir@"] + libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode() libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode(errors="ignore") # each line looks like the following: # libcuda.so.1 (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so.1 pkgs/development/python-modules/triton/0003-nvidia-cudart-a-systempath.patch +8 −8 Original line number Diff line number Diff line diff --git a/third_party/nvidia/backend/driver.c b/third_party/nvidia/backend/driver.c index ab24f7657..46dbaceb0 100644 index bff09d8c1..a5c341711 100644 --- a/third_party/nvidia/backend/driver.c +++ b/third_party/nvidia/backend/driver.c @@ -1,4 +1,4 @@ Loading @@ -7,9 +7,9 @@ index ab24f7657..46dbaceb0 100644 +#include <cuda.h> #include <dlfcn.h> #include <stdbool.h> #define PY_SSIZE_T_CLEAN #include <stdlib.h> diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py index 47544bd8e..d57c6a70f 100644 index 2b39fea29..3346eb954 100644 --- a/third_party/nvidia/backend/driver.py +++ b/third_party/nvidia/backend/driver.py @@ -12,7 +12,8 @@ from triton.backends.compiler import GPUTarget Loading @@ -21,13 +21,13 @@ index 47544bd8e..d57c6a70f 100644 +include_dirs = [*shlex.split("@cudaToolkitIncludeDirs@"), os.path.join(dirname, "include")] libdevice_dir = os.path.join(dirname, "lib") libraries = ['cuda'] @@ -256,7 +257,7 @@ def make_launcher(constants, signature, tensordesc_meta): params = [f"&arg{i}" for i, ty in signature.items() if ty != "constexpr"] PyCUtensorMap = None @@ -265,7 +266,7 @@ def make_launcher(constants, signature, tensordesc_meta): params.append("&global_scratch") params.append("&profile_scratch") src = f""" -#include \"cuda.h\" +#include <cuda.h> #include <stdbool.h> #include <Python.h> #include <dlfcn.h> #include <stdbool.h> #include <stdlib.h> pkgs/development/python-modules/triton/0004-nvidia-allow-static-ptxas-path.patch +7 −7 Original line number Diff line number Diff line diff --git a/python/triton/knobs.py b/python/triton/knobs.py index 30804b170..c6a3a737d 100644 index 161f739bd..047b19d69 100644 --- a/python/triton/knobs.py +++ b/python/triton/knobs.py @@ -203,6 +203,8 @@ class env_nvidia_tool(env_base[str, NvidiaTool]): # accessible. self.default(), ] @@ -208,6 +208,8 @@ class env_nvidia_tool(env_base[str, NvidiaTool]): else: paths = [self.default_path] + import shlex + paths.extend(shlex.split("@nixpkgsExtraBinaryPaths@")) for path in paths: if not path or not os.access(path, os.X_OK): continue if tool := NvidiaTool.from_path(path): return tool Loading
pkgs/by-name/tr/triton-llvm/package.nix +7 −3 Original line number Diff line number Diff line Loading @@ -25,6 +25,10 @@ buildTests ? true, llvmTargetsToBuild ? [ "NATIVE" ], # "NATIVE" resolves into x86 or aarch64 depending on stdenv llvmProjectsToBuild ? [ # Required for building triton>=3.5.0 # https://github.com/triton-lang/triton/blob/c3c476f357f1e9768ea4e45aa5c17528449ab9ef/third_party/amd/CMakeLists.txt#L6 "lld" "llvm" "mlir" ], Loading Loading @@ -64,7 +68,7 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "triton-llvm"; version = "21.0.0-unstable-2025-06-10"; # See https://github.com/llvm/llvm-project/blob/main/cmake/Modules/LLVMVersion.cmake version = "22.0.0-unstable-2025-07-15"; # See https://github.com/llvm/llvm-project/blob/main/cmake/Modules/LLVMVersion.cmake outputs = [ "out" Loading @@ -80,8 +84,8 @@ stdenv.mkDerivation (finalAttrs: { src = fetchFromGitHub { owner = "llvm"; repo = "llvm-project"; rev = "8957e64a20fc7f4277565c6cfe3e555c119783ce"; hash = "sha256-ljdwHPLGZv72RBPBg5rs7pZczsB+WJhdCeHJxoi4gJQ="; rev = "7d5de3033187c8a3bb4d2e322f5462cdaf49808f"; hash = "sha256-ayW6sOZGvP3SBjfmpXvYQJrPOAElY0MEHPFvj2fq+bM="; }; nativeBuildInputs = [ Loading
pkgs/development/python-modules/triton/0001-_build-allow-extra-cc-flags.patch +3 −2 Original line number Diff line number Diff line diff --git a/python/triton/runtime/build.py b/python/triton/runtime/build.py index 1b76548d4..2756dccdb 100644 index 7614fe2ae..203db996b 100644 --- a/python/triton/runtime/build.py +++ b/python/triton/runtime/build.py @@ -33,5 +33,13 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries): @@ -47,6 +47,14 @@ def _build(name: str, src: str, srcdir: str, library_dirs: list[str], include_di cc_cmd += [f'-l{lib}' for lib in libraries] cc_cmd += [f"-L{dir}" for dir in library_dirs] cc_cmd += [f"-I{dir}" for dir in include_dirs if dir is not None] Loading @@ -14,5 +14,6 @@ index 1b76548d4..2756dccdb 100644 + import shlex + cc_cmd.extend(shlex.split(cc_cmd_extra_flags)) + cc_cmd.extend(ccflags) subprocess.check_call(cc_cmd, stdout=subprocess.DEVNULL) return so
pkgs/development/python-modules/triton/0002-nvidia-driver-short-circuit-before-ldconfig.patch +3 −3 Original line number Diff line number Diff line diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py index d088ec092..625de2db8 100644 index e6fd6a968..2b39fea29 100644 --- a/third_party/nvidia/backend/driver.py +++ b/third_party/nvidia/backend/driver.py @@ -23,6 +23,9 @@ def libcuda_dirs(): if env_libcuda_path: if env_libcuda_path := knobs.nvidia.libcuda_path: return [env_libcuda_path] + if os.path.exists("@libcudaStubsDir@"): + return ["@libcudaStubsDir@"] + libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode() libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode(errors="ignore") # each line looks like the following: # libcuda.so.1 (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so.1
pkgs/development/python-modules/triton/0003-nvidia-cudart-a-systempath.patch +8 −8 Original line number Diff line number Diff line diff --git a/third_party/nvidia/backend/driver.c b/third_party/nvidia/backend/driver.c index ab24f7657..46dbaceb0 100644 index bff09d8c1..a5c341711 100644 --- a/third_party/nvidia/backend/driver.c +++ b/third_party/nvidia/backend/driver.c @@ -1,4 +1,4 @@ Loading @@ -7,9 +7,9 @@ index ab24f7657..46dbaceb0 100644 +#include <cuda.h> #include <dlfcn.h> #include <stdbool.h> #define PY_SSIZE_T_CLEAN #include <stdlib.h> diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py index 47544bd8e..d57c6a70f 100644 index 2b39fea29..3346eb954 100644 --- a/third_party/nvidia/backend/driver.py +++ b/third_party/nvidia/backend/driver.py @@ -12,7 +12,8 @@ from triton.backends.compiler import GPUTarget Loading @@ -21,13 +21,13 @@ index 47544bd8e..d57c6a70f 100644 +include_dirs = [*shlex.split("@cudaToolkitIncludeDirs@"), os.path.join(dirname, "include")] libdevice_dir = os.path.join(dirname, "lib") libraries = ['cuda'] @@ -256,7 +257,7 @@ def make_launcher(constants, signature, tensordesc_meta): params = [f"&arg{i}" for i, ty in signature.items() if ty != "constexpr"] PyCUtensorMap = None @@ -265,7 +266,7 @@ def make_launcher(constants, signature, tensordesc_meta): params.append("&global_scratch") params.append("&profile_scratch") src = f""" -#include \"cuda.h\" +#include <cuda.h> #include <stdbool.h> #include <Python.h> #include <dlfcn.h> #include <stdbool.h> #include <stdlib.h>
pkgs/development/python-modules/triton/0004-nvidia-allow-static-ptxas-path.patch +7 −7 Original line number Diff line number Diff line diff --git a/python/triton/knobs.py b/python/triton/knobs.py index 30804b170..c6a3a737d 100644 index 161f739bd..047b19d69 100644 --- a/python/triton/knobs.py +++ b/python/triton/knobs.py @@ -203,6 +203,8 @@ class env_nvidia_tool(env_base[str, NvidiaTool]): # accessible. self.default(), ] @@ -208,6 +208,8 @@ class env_nvidia_tool(env_base[str, NvidiaTool]): else: paths = [self.default_path] + import shlex + paths.extend(shlex.split("@nixpkgsExtraBinaryPaths@")) for path in paths: if not path or not os.access(path, os.X_OK): continue if tool := NvidiaTool.from_path(path): return tool