Unverified Commit 828c59b3 authored by Nick Cao's avatar Nick Cao Committed by GitHub
Browse files

python3Packages.deep-gemm: init at 2.1.1.post3 (#508313)

parents 8ba92153 06e8add5
Loading
Loading
Loading
Loading
+110 −0
Original line number Diff line number Diff line
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,

  # build-system
  setuptools,
  torch,

  # buildInputs
  fmt,
  pybind11,

  # nativeBuildInputs
  autoAddDriverRunpath,

  # tests
  pytestCheckHook,
  writableTmpDirAsHomeHook,

  # passthru
  deep-gemm,

  config,
  cudaPackages,
  cudaSupport ? config.cudaSupport,
}:

let
  inherit (lib)
    getBin
    optionalAttrs
    optionals
    ;
in
buildPythonPackage.override { inherit (torch) stdenv; } (finalAttrs: {
  pname = "deep-gemm";
  version = "2.1.1.post3";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "deepseek-ai";
    repo = "DeepGEMM";
    tag = "v${finalAttrs.version}";
    hash = "sha256-2yEHiuTaNUodWlZk7waqBsVMip2qiVJPgQHwsY0I63k=";
  };

  patches = [
    ./use-system-libraries.patch
  ];

  env = optionalAttrs cudaSupport {
    CUDA_HOME = (getBin cudaPackages.cuda_nvcc).outPath;

    LDFLAGS = toString [
      # Fake libcuda.so (the real one is deployed impurely)
      "-L${lib.getOutput "stubs" cudaPackages.cuda_cudart}/lib/stubs"
    ];
  };

  build-system = [
    setuptools
    torch
  ];

  nativeBuildInputs = [
    autoAddDriverRunpath
  ];

  buildInputs = [
    fmt
    pybind11
  ]
  ++ optionals cudaSupport (
    with cudaPackages;
    [
      cuda_cudart # cuda_runtime_api.h
      cuda_nvrtc # nvrtc.h
      cutlass # cute/arch/mma_sm100_desc.hpp
      libcublas # cublas_v2.h
      libcusolver # cusolverDn.h
      libcusparse # cusparse.h
    ]
  );

  nativeCheckInputs = [
    pytestCheckHook
    writableTmpDirAsHomeHook
  ];

  # Tests require GPU access
  doCheck = false;

  passthru.gpuCheck = deep-gemm.overridePythonAttrs {
    requiredSystemFeatures = [ "cuda" ];

    # dlopens libcuda.so at import time
    pythonImportsCheck = [ "deep_gemm" ];

    doCheck = true;
  };

  meta = {
    description = "Clean and efficient FP8 GEMM kernels with fine-grained scaling";
    homepage = "https://github.com/deepseek-ai/DeepGEMM";
    license = lib.licenses.mit;
    maintainers = with lib.maintainers; [ GaetanLepage ];
    broken = !cudaSupport;
  };
})
+54 −0
Original line number Diff line number Diff line
diff --git a/setup.py b/setup.py
index 38e891c..e846847 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@ from setuptools import find_packages
 from setuptools.command.build_py import build_py
 from packaging.version import parse
 from pathlib import Path
-from torch.utils.cpp_extension import CUDAExtension, CUDA_HOME
+from torch.utils.cpp_extension import CUDAExtension
 from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
 
 
@@ -33,20 +33,10 @@ if DG_JIT_USE_RUNTIME_API:
 current_dir = os.path.dirname(os.path.realpath(__file__))
 sources = ['csrc/python_api.cpp']
 build_include_dirs = [
-    f'{CUDA_HOME}/include',
-    f'{CUDA_HOME}/include/cccl',
     'deep_gemm/include',
-    'third-party/cutlass/include',
-    'third-party/fmt/include',
 ]
 build_libraries = ['cuda', 'cudart', 'nvrtc']
 build_library_dirs = [
-    f'{CUDA_HOME}/lib64',
-    f'{CUDA_HOME}/lib64/stubs'
-]
-third_party_include_dirs = [
-    'third-party/cutlass/include/cute',
-    'third-party/cutlass/include/cutlass',
 ]
 
 # Release
@@ -142,19 +132,6 @@ class CustomBuildPy(build_py):
         build_include_dir = os.path.join(self.build_lib, 'deep_gemm/include')
         os.makedirs(build_include_dir, exist_ok=True)
 
-        # Copy third-party includes to the build directory
-        for d in third_party_include_dirs:
-            dirname = d.split('/')[-1]
-            src_dir = os.path.join(current_dir, d)
-            dst_dir = os.path.join(build_include_dir, dirname)
-
-            # Remove existing directory if it exists
-            if os.path.exists(dst_dir):
-                shutil.rmtree(dst_dir)
-
-            # Copy the directory
-            shutil.copytree(src_dir, dst_dir)
-
 
 class CachedWheelsCommand(_bdist_wheel):
     def run(self):
+2 −0
Original line number Diff line number Diff line
@@ -3748,6 +3748,8 @@ self: super: with self; {
  deep-ep = callPackage ../development/python-modules/deep-ep { };
  deep-gemm = callPackage ../development/python-modules/deep-gemm { };
  deep-translator = callPackage ../development/python-modules/deep-translator { };
  deepdiff = callPackage ../development/python-modules/deepdiff { };