Loading
+8 −20
Original line number Diff line number Diff line
From 10b7e8330bdba319a4162cceb8e5dd4280215b04 Mon Sep 17 00:00:00 2001
From: SomeoneSerge <else@someonex.net>
Date: Wed, 31 Jul 2024 12:06:15 +0000
Subject: [PATCH 2/2] setup.py: nix-support (respect cmakeFlags)

---
 setup.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/setup.py b/setup.py
index 01e006f9..14762146 100644
index e9b36e2a2..bc9e2f1e3 100644
--- a/setup.py
+++ b/setup.py
@@ -15,6 +15,15 @@ from setuptools import Extension, find_packages, setup
 from setuptools.command.build_ext import build_ext
 from torch.utils.cpp_extension import CUDA_HOME
@@ -20,6 +20,15 @@ from setuptools.command.build_ext import build_ext
 from setuptools_scm import get_version
 from torch.utils.cpp_extension import CUDA_HOME, ROCM_HOME
 
+import os
+import json
@@ -27,14 +18,11 @@ index 01e006f9..14762146 100644
 
 def load_module_from_path(module_name, path):
     spec = importlib.util.spec_from_file_location(module_name, path)
@@ -159,6 +168,7 @@ class cmake_build_ext(build_ext):
             '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}'.format(outdir),
             '-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY={}'.format(self.build_temp),
             '-DVLLM_TARGET_DEVICE={}'.format(VLLM_TARGET_DEVICE),
@@ -151,6 +160,7 @@ class cmake_build_ext(build_ext):
         cmake_args = [
             "-DCMAKE_BUILD_TYPE={}".format(cfg),
             "-DVLLM_TARGET_DEVICE={}".format(VLLM_TARGET_DEVICE),
+            *NIX_ATTRS["cmakeFlags"],
         ]
 
         verbose = envs.VERBOSE
-- 
2.45.1
+7 −6
Original line number Diff line number Diff line
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
index 81623def..2a6e2c92 100644
index a2de597c8..4c2410209 100644
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -521,6 +521,7 @@ def _run_in_subprocess(fn: Callable[[], _T]) -> _T:
@@ -1121,7 +1121,7 @@ def _run_in_subprocess(fn: Callable[[], _T]) -> _T:
         # cannot use `sys.executable __file__` here because the script
         # contains relative imports
         returned = subprocess.run(_SUBPROCESS_COMMAND,
                                   input=input_bytes,
+                                  env={'PYTHONPATH': ':'.join(sys.path)},
                                   capture_output=True)
         returned = subprocess.run(
-            _SUBPROCESS_COMMAND, input=input_bytes, capture_output=True
+            _SUBPROCESS_COMMAND, input=input_bytes, capture_output=True, env={'PYTHONPATH': ':'.join(sys.path)},
         )
 
         # check if the subprocess is successful
+2 −2
Original line number Diff line number Diff line
diff --git a/requirements/cpu.txt b/requirements/cpu.txt
index 2db6d87ee..37f816170 100644
index d11787df4..71575d707 100644
--- a/requirements/cpu.txt
+++ b/requirements/cpu.txt
@@ -21,9 +21,6 @@ torchvision; platform_machine != "ppc64le" and platform_machine != "s390x"
@@ -20,9 +20,6 @@ torchvision; platform_machine != "ppc64le" and platform_machine != "s390x"
 torchvision==0.23.0; platform_machine == "ppc64le"
 datasets # for benchmark scripts
 
+34 −23
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@
  uvicorn,
  pydantic,
  aioprometheus,
  anthropic,
  nvidia-ml-py,
  openai,
  pyzmq,
@@ -53,6 +54,7 @@
  compressed-tensors,
  mistral-common,
  msgspec,
  model-hosting-container-standards,
  numactl,
  tokenizers,
  oneDNN,
@@ -98,10 +100,11 @@ let
  # see CMakeLists.txt, grepping for CUTLASS_REVISION
  # https://github.com/vllm-project/vllm/blob/v${version}/CMakeLists.txt
  cutlass = fetchFromGitHub {
    name = "cutlass-source";
    owner = "NVIDIA";
    repo = "cutlass";
    tag = "v4.0.0";
    hash = "sha256-HJY+Go1viPkSVZPEs/NyMtYJzas4mMLiIZF3kNX+WgA=";
    tag = "v4.2.1";
    hash = "sha256-iP560D5Vwuj6wX1otJhwbvqe/X4mYVeKTpK533Wr5gY=";
  };

  # FlashMLA's Blackwell (SM100) kernels were developed against CUTLASS v3.9.0
@@ -126,10 +129,11 @@ let
    # grep for GIT_TAG in the following file
    # https://github.com/vllm-project/vllm/blob/v${version}/cmake/external_projects/flashmla.cmake
    src = fetchFromGitHub {
      name = "FlashMLA-source";
      owner = "vllm-project";
      repo = "FlashMLA";
      rev = "5f65b85703c7ed75fda01e06495077caad207c3f";
      hash = "sha256-DO9EFNSoAgyfRRc095v1UjT+Zdzk4cFY0+n28FVEwI0=";
      rev = "46d64a8ebef03fa50b4ae74937276a5c940e3f95";
      hash = "sha256-jtMzWB5hKz8mJGsdK6q4YpQbGp9IrQxbwmB3a64DIl0=";
    };

    dontConfigure = true;
@@ -145,6 +149,16 @@ let
    '';
  };

  # grep for GIT_TAG in the following file
  # https://github.com/vllm-project/vllm/blob/v${version}/cmake/external_projects/qutlass.cmake
  qutlass = fetchFromGitHub {
    name = "qutlass-source";
    owner = "IST-DASLab";
    repo = "qutlass";
    rev = "830d2c4537c7396e14a02a46fbddd18b5d107c65";
    hash = "sha256-aG4qd0vlwP+8gudfvHwhtXCFmBOJKQQTvcwahpEqC84=";
  };

  vllm-flash-attn' = lib.defaultTo (stdenv.mkDerivation {
    pname = "vllm-flash-attn";
    # https://github.com/vllm-project/flash-attention/blob/${src.rev}/vllm_flash_attn/__init__.py
@@ -153,10 +167,11 @@ let
    # grep for GIT_TAG in the following file
    # https://github.com/vllm-project/vllm/blob/v${version}/cmake/external_projects/vllm_flash_attn.cmake
    src = fetchFromGitHub {
      name = "flash-attention-source";
      owner = "vllm-project";
      repo = "flash-attention";
      rev = "ee4d25bd84e0cbc7e0b9b9685085fd5db2dcb62a";
      hash = "sha256-2r0Habd/kBpvM4/aQFIYyj+uQAa3M9gjk3DcBZHFNfA=";
      rev = "58e0626a692f09241182582659e3bf8f16472659";
      hash = "sha256-ewdZd7LuBKBV0y3AaGRWISJzjg6cu59D2OtgqoDjrbM=";
    };

    patches = [
@@ -284,7 +299,7 @@ in

buildPythonPackage rec {
  pname = "vllm";
  version = "0.11.0";
  version = "0.11.2";
  pyproject = true;

  stdenv = torch.stdenv;
@@ -293,38 +308,31 @@ buildPythonPackage rec {
    owner = "vllm-project";
    repo = "vllm";
    tag = "v${version}";
    hash = "sha256-47TPvvPQvVbh6Gm2yvi+xhWZ8tSma91rp9hp/SBrEY8=";
    hash = "sha256-DoSlkFmR3KKEtfSfdRB++0CZeeXgxmM3zZjONlxbe8U=";
  };

  patches = [
    ./0002-setup.py-nix-support-respect-cmakeFlags.patch
    ./0003-propagate-pythonpath.patch
    ./0005-drop-intel-reqs.patch
    # TODO: Remove the below patches when included in vLLM release
    (fetchpatch {
      url = "https://github.com/vllm-project/vllm/commit/9705fba7b727a3b9c275b012258608531e2223d1.patch";
      hash = "sha256-DxRGLiwkegMlMjqFmFc0igpaVv06/Y2WjL+ISoIOET4=";
    })
    # patch above is previous commit needed to apply patch below
    # oneDNN / CPU fix from https://github.com/vllm-project/vllm/pull/26401
    (fetchpatch {
      url = "https://github.com/vllm-project/vllm/commit/d7be1f2a480bdc62a6a1ec0126a401e3d42985fe.patch";
      hash = "sha256-Zi1k5wiOPjsbWHFKpcLq9Ns43wIP37Mbvesi5K80zaQ=";
    })
  ];

  postPatch = ''
    # Remove vendored pynvml entirely
    rm vllm/third_party/pynvml.py
    substituteInPlace tests/utils.py \
      --replace-fail "from vllm.third_party.pynvml import" "from pynvml import"
    substituteInPlace vllm/utils/__init__.py \
      --replace-fail "import vllm.third_party.pynvml" "import pynvml"
      --replace-fail \
        "from vllm.third_party.pynvml import" \
        "from pynvml import"
    substituteInPlace vllm/utils/import_utils.py \
      --replace-fail \
        "import vllm.third_party.pynvml as pynvml" \
        "import pynvml"

    # pythonRelaxDeps does not cover build-system
    substituteInPlace pyproject.toml \
      --replace-fail "torch ==" "torch >=" \
      --replace-fail "setuptools>=77.0.3,<80.0.0" "setuptools"
      --replace-fail "setuptools>=77.0.3,<81.0.0" "setuptools"

    # Ignore the python version check because it hard-codes minor versions and
    # lags behind `ray`'s python interpreter support
@@ -393,6 +401,7 @@ buildPythonPackage rec {

  dependencies = [
    aioprometheus
    anthropic
    blake3
    cachetools
    cbor2
@@ -424,6 +433,7 @@ buildPythonPackage rec {
    partial-json-parser
    compressed-tensors
    mistral-common
    model-hosting-container-standards
    torch
    torchaudio
    torchvision
@@ -460,6 +470,7 @@ buildPythonPackage rec {
    (lib.cmakeFeature "FETCHCONTENT_SOURCE_DIR_CUTLASS" "${lib.getDev cutlass}")
    (lib.cmakeFeature "FLASH_MLA_SRC_DIR" "${lib.getDev flashmla}")
    (lib.cmakeFeature "VLLM_FLASH_ATTN_SRC_DIR" "${lib.getDev vllm-flash-attn'}")
    (lib.cmakeFeature "QUTLASS_SRC_DIR" "${lib.getDev qutlass}")
    (lib.cmakeFeature "TORCH_CUDA_ARCH_LIST" "${gpuTargetString}")
    (lib.cmakeFeature "CUTLASS_NVCC_ARCHS_ENABLED" "${cudaPackages.flags.cmakeCudaArchitecturesString}")
    (lib.cmakeFeature "CUDA_TOOLKIT_ROOT_DIR" "${symlinkJoin {