Commit e58aa1c6 authored by Conroy Cheers's avatar Conroy Cheers
Browse files

python312Packages.vllm: 0.9.0.1 -> 0.9.1

parent 5b0471b7
Loading
Loading
Loading
Loading
+17 −4
Original line number Diff line number Diff line
From 7511784ceb9252091a9d63ac6b54dcc67dd2b262 Mon Sep 17 00:00:00 2001
From: Conroy Cheers <conroy@corncheese.org>
Date: Fri, 13 Jun 2025 17:42:10 +1000
Subject: [PATCH] drop intel reqs

---
 requirements/cpu.txt | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/requirements/cpu.txt b/requirements/cpu.txt
index 121330158..d41918883 100644
index d7b0fc6d8..be2df751b 100644
--- a/requirements/cpu.txt
+++ b/requirements/cpu.txt
@@ -20,7 +20,3 @@ datasets # for benchmark scripts
 
@@ -24,8 +24,5 @@ datasets # for benchmark scripts
 # cpu cannot use triton 3.3.0
 triton==3.2.0; platform_machine == "x86_64"
-
 
-# Intel Extension for PyTorch, only for x86_64 CPUs
-intel-openmp==2024.2.1; platform_machine == "x86_64"
-intel_extension_for_pytorch==2.7.0; platform_machine == "x86_64"
 py-libnuma; platform_system != "Darwin"
 psutil; platform_system != "Darwin"
-- 
2.49.0
+36 −27
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@
  stdenv,
  python,
  buildPythonPackage,
  pythonAtLeast,
  fetchFromGitHub,
  fetchpatch,
  symlinkJoin,
@@ -67,6 +68,7 @@
  opentelemetry-exporter-otlp,
  bitsandbytes,
  flashinfer,
  py-libnuma,

  # internal dependency - for overriding in overlays
  vllm-flash-attn ? null,
@@ -246,16 +248,19 @@ in

buildPythonPackage rec {
  pname = "vllm";
  version = "0.9.0.1";
  version = "0.9.1";
  pyproject = true;

  # https://github.com/vllm-project/vllm/issues/12083
  disabled = pythonAtLeast "3.13";

  stdenv = torch.stdenv;

  src = fetchFromGitHub {
    owner = "vllm-project";
    repo = "vllm";
    tag = "v${version}";
    hash = "sha256-gNe/kdsDQno8Fd6mo29feWmbyC0c2+kljlVxY4v7R9U=";
    hash = "sha256-sp7rDpewTPXTVRBJHJMj+8pJDS6wAu0/OTJZwbPPqKc=";
  };

  patches = [
@@ -264,14 +269,18 @@ buildPythonPackage rec {
      url = "https://github.com/vllm-project/vllm/commit/6a5d7e45f52c3a13de43b8b4fa9033e3b342ebd2.patch";
      hash = "sha256-KYthqu+6XwsYYd80PtfrMMjuRV9+ionccr7EbjE4jJE=";
    })
    (fetchpatch {
      name = "fall-back-to-gloo-when-nccl-unavailable.patch";
      url = "https://github.com/vllm-project/vllm/commit/aa131a94410683b0a02e74fed2ce95e6c2b6b030.patch";
      hash = "sha256-jNlQZQ8xiW85JWyBjsPZ6FoRQsiG1J8bwzmQjnaWFBg=";
    })
    ./0002-setup.py-nix-support-respect-cmakeFlags.patch
    ./0003-propagate-pythonpath.patch
    ./0004-drop-lsmod.patch
    ./0005-drop-intel-reqs.patch
  ];

  postPatch =
    ''
  postPatch = ''
    # pythonRelaxDeps does not cover build-system
    substituteInPlace pyproject.toml \
      --replace-fail "torch ==" "torch >="
@@ -286,11 +295,6 @@ buildPythonPackage rec {
    # Pass build environment PYTHONPATH to vLLM's Python configuration scripts
    substituteInPlace CMakeLists.txt \
      --replace-fail '$PYTHONPATH' '$ENV{PYTHONPATH}'
    ''
    + lib.optionalString (nccl == null) ''
      # On platforms where NCCL is not supported (e.g. Jetson), substitute Gloo (provided by Torch)
      substituteInPlace vllm/distributed/parallel_state.py \
        --replace-fail '"nccl"' '"gloo"'
  '';

  nativeBuildInputs =
@@ -362,7 +366,6 @@ buildPythonPackage rec {
      outlines
      pandas
      prometheus-fastapi-instrumentator
      psutil
      py-cpuinfo
      pyarrow
      pydantic
@@ -392,9 +395,15 @@ buildPythonPackage rec {
      opentelemetry-api
      opentelemetry-exporter-otlp
      bitsandbytes
      # vLLM needs Torch's compiler to be present in order to use torch.compile
      torch.stdenv.cc
    ]
    ++ uvicorn.optional-dependencies.standard
    ++ aioprometheus.optional-dependencies.starlette
    ++ lib.optionals stdenv.targetPlatform.isLinux [
      py-libnuma
      psutil
    ]
    ++ lib.optionals cudaSupport [
      cupy
      pynvml
@@ -404,11 +413,11 @@ buildPythonPackage rec {
  dontUseCmakeConfigure = true;
  cmakeFlags =
    [
    ]
    ++ lib.optionals cudaSupport [
      (lib.cmakeFeature "FETCHCONTENT_SOURCE_DIR_CUTLASS" "${lib.getDev cutlass}")
      (lib.cmakeFeature "FLASH_MLA_SRC_DIR" "${lib.getDev flashmla}")
      (lib.cmakeFeature "VLLM_FLASH_ATTN_SRC_DIR" "${lib.getDev vllm-flash-attn'}")
    ]
    ++ lib.optionals cudaSupport [
      (lib.cmakeFeature "TORCH_CUDA_ARCH_LIST" "${gpuTargetString}")
      (lib.cmakeFeature "CUTLASS_NVCC_ARCHS_ENABLED" "${cudaPackages.flags.cmakeCudaArchitecturesString}")
      (lib.cmakeFeature "CUDA_TOOLKIT_ROOT_DIR" "${symlinkJoin {