Loading pkgs/development/python-modules/deep-gemm/default.nix 0 → 100644 +110 −0 Original line number Diff line number Diff line { lib, buildPythonPackage, fetchFromGitHub, # build-system setuptools, torch, # buildInputs fmt, pybind11, # nativeBuildInputs autoAddDriverRunpath, # tests pytestCheckHook, writableTmpDirAsHomeHook, # passthru deep-gemm, config, cudaPackages, cudaSupport ? config.cudaSupport, }: let inherit (lib) getBin optionalAttrs optionals ; in buildPythonPackage.override { inherit (torch) stdenv; } (finalAttrs: { pname = "deep-gemm"; version = "2.1.1.post3"; pyproject = true; src = fetchFromGitHub { owner = "deepseek-ai"; repo = "DeepGEMM"; tag = "v${finalAttrs.version}"; hash = "sha256-2yEHiuTaNUodWlZk7waqBsVMip2qiVJPgQHwsY0I63k="; }; patches = [ ./use-system-libraries.patch ]; env = optionalAttrs cudaSupport { CUDA_HOME = (getBin cudaPackages.cuda_nvcc).outPath; LDFLAGS = toString [ # Fake libcuda.so (the real one is deployed impurely) "-L${lib.getOutput "stubs" cudaPackages.cuda_cudart}/lib/stubs" ]; }; build-system = [ setuptools torch ]; nativeBuildInputs = [ autoAddDriverRunpath ]; buildInputs = [ fmt pybind11 ] ++ optionals cudaSupport ( with cudaPackages; [ cuda_cudart # cuda_runtime_api.h cuda_nvrtc # nvrtc.h cutlass # cute/arch/mma_sm100_desc.hpp libcublas # cublas_v2.h libcusolver # cusolverDn.h libcusparse # cusparse.h ] ); nativeCheckInputs = [ pytestCheckHook writableTmpDirAsHomeHook ]; # Tests require GPU access doCheck = false; passthru.gpuCheck = deep-gemm.overridePythonAttrs { requiredSystemFeatures = [ "cuda" ]; # dlopens libcuda.so at import time pythonImportsCheck = [ "deep_gemm" ]; doCheck = true; }; meta = { description = "Clean and efficient FP8 GEMM kernels with fine-grained scaling"; homepage = "https://github.com/deepseek-ai/DeepGEMM"; license = lib.licenses.mit; maintainers = with lib.maintainers; [ GaetanLepage ]; broken = !cudaSupport; }; }) pkgs/development/python-modules/deep-gemm/use-system-libraries.patch 0 → 100644 +54 −0 Original line number Diff line number Diff line diff --git a/setup.py b/setup.py index 38e891c..e846847 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ from setuptools import find_packages from setuptools.command.build_py import build_py from packaging.version import parse from pathlib import Path -from torch.utils.cpp_extension import CUDAExtension, CUDA_HOME +from torch.utils.cpp_extension import CUDAExtension from wheel.bdist_wheel import bdist_wheel as _bdist_wheel @@ -33,20 +33,10 @@ if DG_JIT_USE_RUNTIME_API: current_dir = os.path.dirname(os.path.realpath(__file__)) sources = ['csrc/python_api.cpp'] build_include_dirs = [ - f'{CUDA_HOME}/include', - f'{CUDA_HOME}/include/cccl', 'deep_gemm/include', - 'third-party/cutlass/include', - 'third-party/fmt/include', ] build_libraries = ['cuda', 'cudart', 'nvrtc'] build_library_dirs = [ - f'{CUDA_HOME}/lib64', - f'{CUDA_HOME}/lib64/stubs' -] -third_party_include_dirs = [ - 'third-party/cutlass/include/cute', - 'third-party/cutlass/include/cutlass', ] # Release @@ -142,19 +132,6 @@ class CustomBuildPy(build_py): build_include_dir = os.path.join(self.build_lib, 'deep_gemm/include') os.makedirs(build_include_dir, exist_ok=True) - # Copy third-party includes to the build directory - for d in third_party_include_dirs: - dirname = d.split('/')[-1] - src_dir = os.path.join(current_dir, d) - dst_dir = os.path.join(build_include_dir, dirname) - - # Remove existing directory if it exists - if os.path.exists(dst_dir): - shutil.rmtree(dst_dir) - - # Copy the directory - shutil.copytree(src_dir, dst_dir) - class CachedWheelsCommand(_bdist_wheel): def run(self): pkgs/top-level/python-packages.nix +2 −0 Original line number Diff line number Diff line Loading @@ -3748,6 +3748,8 @@ self: super: with self; { deep-ep = callPackage ../development/python-modules/deep-ep { }; deep-gemm = callPackage ../development/python-modules/deep-gemm { }; deep-translator = callPackage ../development/python-modules/deep-translator { }; deepdiff = callPackage ../development/python-modules/deepdiff { }; Loading Loading
pkgs/development/python-modules/deep-gemm/default.nix 0 → 100644 +110 −0 Original line number Diff line number Diff line { lib, buildPythonPackage, fetchFromGitHub, # build-system setuptools, torch, # buildInputs fmt, pybind11, # nativeBuildInputs autoAddDriverRunpath, # tests pytestCheckHook, writableTmpDirAsHomeHook, # passthru deep-gemm, config, cudaPackages, cudaSupport ? config.cudaSupport, }: let inherit (lib) getBin optionalAttrs optionals ; in buildPythonPackage.override { inherit (torch) stdenv; } (finalAttrs: { pname = "deep-gemm"; version = "2.1.1.post3"; pyproject = true; src = fetchFromGitHub { owner = "deepseek-ai"; repo = "DeepGEMM"; tag = "v${finalAttrs.version}"; hash = "sha256-2yEHiuTaNUodWlZk7waqBsVMip2qiVJPgQHwsY0I63k="; }; patches = [ ./use-system-libraries.patch ]; env = optionalAttrs cudaSupport { CUDA_HOME = (getBin cudaPackages.cuda_nvcc).outPath; LDFLAGS = toString [ # Fake libcuda.so (the real one is deployed impurely) "-L${lib.getOutput "stubs" cudaPackages.cuda_cudart}/lib/stubs" ]; }; build-system = [ setuptools torch ]; nativeBuildInputs = [ autoAddDriverRunpath ]; buildInputs = [ fmt pybind11 ] ++ optionals cudaSupport ( with cudaPackages; [ cuda_cudart # cuda_runtime_api.h cuda_nvrtc # nvrtc.h cutlass # cute/arch/mma_sm100_desc.hpp libcublas # cublas_v2.h libcusolver # cusolverDn.h libcusparse # cusparse.h ] ); nativeCheckInputs = [ pytestCheckHook writableTmpDirAsHomeHook ]; # Tests require GPU access doCheck = false; passthru.gpuCheck = deep-gemm.overridePythonAttrs { requiredSystemFeatures = [ "cuda" ]; # dlopens libcuda.so at import time pythonImportsCheck = [ "deep_gemm" ]; doCheck = true; }; meta = { description = "Clean and efficient FP8 GEMM kernels with fine-grained scaling"; homepage = "https://github.com/deepseek-ai/DeepGEMM"; license = lib.licenses.mit; maintainers = with lib.maintainers; [ GaetanLepage ]; broken = !cudaSupport; }; })
pkgs/development/python-modules/deep-gemm/use-system-libraries.patch 0 → 100644 +54 −0 Original line number Diff line number Diff line diff --git a/setup.py b/setup.py index 38e891c..e846847 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ from setuptools import find_packages from setuptools.command.build_py import build_py from packaging.version import parse from pathlib import Path -from torch.utils.cpp_extension import CUDAExtension, CUDA_HOME +from torch.utils.cpp_extension import CUDAExtension from wheel.bdist_wheel import bdist_wheel as _bdist_wheel @@ -33,20 +33,10 @@ if DG_JIT_USE_RUNTIME_API: current_dir = os.path.dirname(os.path.realpath(__file__)) sources = ['csrc/python_api.cpp'] build_include_dirs = [ - f'{CUDA_HOME}/include', - f'{CUDA_HOME}/include/cccl', 'deep_gemm/include', - 'third-party/cutlass/include', - 'third-party/fmt/include', ] build_libraries = ['cuda', 'cudart', 'nvrtc'] build_library_dirs = [ - f'{CUDA_HOME}/lib64', - f'{CUDA_HOME}/lib64/stubs' -] -third_party_include_dirs = [ - 'third-party/cutlass/include/cute', - 'third-party/cutlass/include/cutlass', ] # Release @@ -142,19 +132,6 @@ class CustomBuildPy(build_py): build_include_dir = os.path.join(self.build_lib, 'deep_gemm/include') os.makedirs(build_include_dir, exist_ok=True) - # Copy third-party includes to the build directory - for d in third_party_include_dirs: - dirname = d.split('/')[-1] - src_dir = os.path.join(current_dir, d) - dst_dir = os.path.join(build_include_dir, dirname) - - # Remove existing directory if it exists - if os.path.exists(dst_dir): - shutil.rmtree(dst_dir) - - # Copy the directory - shutil.copytree(src_dir, dst_dir) - class CachedWheelsCommand(_bdist_wheel): def run(self):
pkgs/top-level/python-packages.nix +2 −0 Original line number Diff line number Diff line Loading @@ -3748,6 +3748,8 @@ self: super: with self; { deep-ep = callPackage ../development/python-modules/deep-ep { }; deep-gemm = callPackage ../development/python-modules/deep-gemm { }; deep-translator = callPackage ../development/python-modules/deep-translator { }; deepdiff = callPackage ../development/python-modules/deepdiff { }; Loading