python3Packages.deep-gemm: init at 2.1.1.post3 (#508313) (828c59b3) · Commits · nix / nixpkgs

pkgs/development/python-modules/deep-gemm/default.nix

0 → 100644

+110 −0

Original line number	Diff line number	Diff line
		{
		lib,
		buildPythonPackage,
		fetchFromGitHub,

		# build-system
		setuptools,
		torch,

		# buildInputs
		fmt,
		pybind11,

		# nativeBuildInputs
		autoAddDriverRunpath,

		# tests
		pytestCheckHook,
		writableTmpDirAsHomeHook,

		# passthru
		deep-gemm,

		config,
		cudaPackages,
		cudaSupport ? config.cudaSupport,
		}:

		let
		inherit (lib)
		getBin
		optionalAttrs
		optionals
		;
		in
		buildPythonPackage.override { inherit (torch) stdenv; } (finalAttrs: {
		pname = "deep-gemm";
		version = "2.1.1.post3";
		pyproject = true;

		src = fetchFromGitHub {
		owner = "deepseek-ai";
		repo = "DeepGEMM";
		tag = "v${finalAttrs.version}";
		hash = "sha256-2yEHiuTaNUodWlZk7waqBsVMip2qiVJPgQHwsY0I63k=";
		};

		patches = [
		./use-system-libraries.patch
		];

		env = optionalAttrs cudaSupport {
		CUDA_HOME = (getBin cudaPackages.cuda_nvcc).outPath;

		LDFLAGS = toString [
		# Fake libcuda.so (the real one is deployed impurely)
		"-L${lib.getOutput "stubs" cudaPackages.cuda_cudart}/lib/stubs"
		];
		};

		build-system = [
		setuptools
		torch
		];

		nativeBuildInputs = [
		autoAddDriverRunpath
		];

		buildInputs = [
		fmt
		pybind11
		]
		++ optionals cudaSupport (
		with cudaPackages;
		[
		cuda_cudart # cuda_runtime_api.h
		cuda_nvrtc # nvrtc.h
		cutlass # cute/arch/mma_sm100_desc.hpp
		libcublas # cublas_v2.h
		libcusolver # cusolverDn.h
		libcusparse # cusparse.h
		]
		);

		nativeCheckInputs = [
		pytestCheckHook
		writableTmpDirAsHomeHook
		];

		# Tests require GPU access
		doCheck = false;

		passthru.gpuCheck = deep-gemm.overridePythonAttrs {
		requiredSystemFeatures = [ "cuda" ];

		# dlopens libcuda.so at import time
		pythonImportsCheck = [ "deep_gemm" ];

		doCheck = true;
		};

		meta = {
		description = "Clean and efficient FP8 GEMM kernels with fine-grained scaling";
		homepage = "https://github.com/deepseek-ai/DeepGEMM";
		license = lib.licenses.mit;
		maintainers = with lib.maintainers; [ GaetanLepage ];
		broken = !cudaSupport;
		};
		})

pkgs/development/python-modules/deep-gemm/use-system-libraries.patch

0 → 100644

+54 −0

Original line number	Diff line number	Diff line
		diff --git a/setup.py b/setup.py
		index 38e891c..e846847 100644
		--- a/setup.py
		+++ b/setup.py
		@@ -14,7 +14,7 @@ from setuptools import find_packages
		from setuptools.command.build_py import build_py
		from packaging.version import parse
		from pathlib import Path
		-from torch.utils.cpp_extension import CUDAExtension, CUDA_HOME
		+from torch.utils.cpp_extension import CUDAExtension
		from wheel.bdist_wheel import bdist_wheel as _bdist_wheel


		@@ -33,20 +33,10 @@ if DG_JIT_USE_RUNTIME_API:
		current_dir = os.path.dirname(os.path.realpath(__file__))
		sources = ['csrc/python_api.cpp']
		build_include_dirs = [
		- f'{CUDA_HOME}/include',
		- f'{CUDA_HOME}/include/cccl',
		'deep_gemm/include',
		- 'third-party/cutlass/include',
		- 'third-party/fmt/include',
		]
		build_libraries = ['cuda', 'cudart', 'nvrtc']
		build_library_dirs = [
		- f'{CUDA_HOME}/lib64',
		- f'{CUDA_HOME}/lib64/stubs'
		-]
		-third_party_include_dirs = [
		- 'third-party/cutlass/include/cute',
		- 'third-party/cutlass/include/cutlass',
		]

		# Release
		@@ -142,19 +132,6 @@ class CustomBuildPy(build_py):
		build_include_dir = os.path.join(self.build_lib, 'deep_gemm/include')
		os.makedirs(build_include_dir, exist_ok=True)

		- # Copy third-party includes to the build directory
		- for d in third_party_include_dirs:
		- dirname = d.split('/')[-1]
		- src_dir = os.path.join(current_dir, d)
		- dst_dir = os.path.join(build_include_dir, dirname)
		-
		- # Remove existing directory if it exists
		- if os.path.exists(dst_dir):
		- shutil.rmtree(dst_dir)
		-
		- # Copy the directory
		- shutil.copytree(src_dir, dst_dir)
		-

		class CachedWheelsCommand(_bdist_wheel):
		def run(self):

pkgs/top-level/python-packages.nix

+2 −0

Original line number	Diff line number	Diff line
		@@ -3748,6 +3748,8 @@ self: super: with self; {

		deep-ep = callPackage ../development/python-modules/deep-ep { };

		deep-gemm = callPackage ../development/python-modules/deep-gemm { };

		deep-translator = callPackage ../development/python-modules/deep-translator { };

		deepdiff = callPackage ../development/python-modules/deepdiff { };