Loading pkgs/development/python-modules/vllm/0005-drop-intel-reqs.patch +17 −4 Original line number Diff line number Diff line From 7511784ceb9252091a9d63ac6b54dcc67dd2b262 Mon Sep 17 00:00:00 2001 From: Conroy Cheers <conroy@corncheese.org> Date: Fri, 13 Jun 2025 17:42:10 +1000 Subject: [PATCH] drop intel reqs --- requirements/cpu.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/requirements/cpu.txt b/requirements/cpu.txt index 121330158..d41918883 100644 index d7b0fc6d8..be2df751b 100644 --- a/requirements/cpu.txt +++ b/requirements/cpu.txt @@ -20,7 +20,3 @@ datasets # for benchmark scripts @@ -24,8 +24,5 @@ datasets # for benchmark scripts # cpu cannot use triton 3.3.0 triton==3.2.0; platform_machine == "x86_64" - -# Intel Extension for PyTorch, only for x86_64 CPUs -intel-openmp==2024.2.1; platform_machine == "x86_64" -intel_extension_for_pytorch==2.7.0; platform_machine == "x86_64" py-libnuma; platform_system != "Darwin" psutil; platform_system != "Darwin" -- 2.49.0 pkgs/development/python-modules/vllm/default.nix +36 −27 Original line number Diff line number Diff line Loading @@ -3,6 +3,7 @@ stdenv, python, buildPythonPackage, pythonAtLeast, fetchFromGitHub, fetchpatch, symlinkJoin, Loading Loading @@ -67,6 +68,7 @@ opentelemetry-exporter-otlp, bitsandbytes, flashinfer, py-libnuma, # internal dependency - for overriding in overlays vllm-flash-attn ? null, Loading Loading @@ -246,16 +248,19 @@ in buildPythonPackage rec { pname = "vllm"; version = "0.9.0.1"; version = "0.9.1"; pyproject = true; # https://github.com/vllm-project/vllm/issues/12083 disabled = pythonAtLeast "3.13"; stdenv = torch.stdenv; src = fetchFromGitHub { owner = "vllm-project"; repo = "vllm"; tag = "v${version}"; hash = "sha256-gNe/kdsDQno8Fd6mo29feWmbyC0c2+kljlVxY4v7R9U="; hash = "sha256-sp7rDpewTPXTVRBJHJMj+8pJDS6wAu0/OTJZwbPPqKc="; }; patches = [ Loading @@ -264,14 +269,18 @@ buildPythonPackage rec { url = "https://github.com/vllm-project/vllm/commit/6a5d7e45f52c3a13de43b8b4fa9033e3b342ebd2.patch"; hash = "sha256-KYthqu+6XwsYYd80PtfrMMjuRV9+ionccr7EbjE4jJE="; }) (fetchpatch { name = "fall-back-to-gloo-when-nccl-unavailable.patch"; url = "https://github.com/vllm-project/vllm/commit/aa131a94410683b0a02e74fed2ce95e6c2b6b030.patch"; hash = "sha256-jNlQZQ8xiW85JWyBjsPZ6FoRQsiG1J8bwzmQjnaWFBg="; }) ./0002-setup.py-nix-support-respect-cmakeFlags.patch ./0003-propagate-pythonpath.patch ./0004-drop-lsmod.patch ./0005-drop-intel-reqs.patch ]; postPatch = '' postPatch = '' # pythonRelaxDeps does not cover build-system substituteInPlace pyproject.toml \ --replace-fail "torch ==" "torch >=" Loading @@ -286,11 +295,6 @@ buildPythonPackage rec { # Pass build environment PYTHONPATH to vLLM's Python configuration scripts substituteInPlace CMakeLists.txt \ --replace-fail '$PYTHONPATH' '$ENV{PYTHONPATH}' '' + lib.optionalString (nccl == null) '' # On platforms where NCCL is not supported (e.g. Jetson), substitute Gloo (provided by Torch) substituteInPlace vllm/distributed/parallel_state.py \ --replace-fail '"nccl"' '"gloo"' ''; nativeBuildInputs = Loading Loading @@ -362,7 +366,6 @@ buildPythonPackage rec { outlines pandas prometheus-fastapi-instrumentator psutil py-cpuinfo pyarrow pydantic Loading Loading @@ -392,9 +395,15 @@ buildPythonPackage rec { opentelemetry-api opentelemetry-exporter-otlp bitsandbytes # vLLM needs Torch's compiler to be present in order to use torch.compile torch.stdenv.cc ] ++ uvicorn.optional-dependencies.standard ++ aioprometheus.optional-dependencies.starlette ++ lib.optionals stdenv.targetPlatform.isLinux [ py-libnuma psutil ] ++ lib.optionals cudaSupport [ cupy pynvml Loading @@ -404,11 +413,11 @@ buildPythonPackage rec { dontUseCmakeConfigure = true; cmakeFlags = [ ] ++ lib.optionals cudaSupport [ (lib.cmakeFeature "FETCHCONTENT_SOURCE_DIR_CUTLASS" "${lib.getDev cutlass}") (lib.cmakeFeature "FLASH_MLA_SRC_DIR" "${lib.getDev flashmla}") (lib.cmakeFeature "VLLM_FLASH_ATTN_SRC_DIR" "${lib.getDev vllm-flash-attn'}") ] ++ lib.optionals cudaSupport [ (lib.cmakeFeature "TORCH_CUDA_ARCH_LIST" "${gpuTargetString}") (lib.cmakeFeature "CUTLASS_NVCC_ARCHS_ENABLED" "${cudaPackages.flags.cmakeCudaArchitecturesString}") (lib.cmakeFeature "CUDA_TOOLKIT_ROOT_DIR" "${symlinkJoin { Loading Loading
pkgs/development/python-modules/vllm/0005-drop-intel-reqs.patch +17 −4 Original line number Diff line number Diff line From 7511784ceb9252091a9d63ac6b54dcc67dd2b262 Mon Sep 17 00:00:00 2001 From: Conroy Cheers <conroy@corncheese.org> Date: Fri, 13 Jun 2025 17:42:10 +1000 Subject: [PATCH] drop intel reqs --- requirements/cpu.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/requirements/cpu.txt b/requirements/cpu.txt index 121330158..d41918883 100644 index d7b0fc6d8..be2df751b 100644 --- a/requirements/cpu.txt +++ b/requirements/cpu.txt @@ -20,7 +20,3 @@ datasets # for benchmark scripts @@ -24,8 +24,5 @@ datasets # for benchmark scripts # cpu cannot use triton 3.3.0 triton==3.2.0; platform_machine == "x86_64" - -# Intel Extension for PyTorch, only for x86_64 CPUs -intel-openmp==2024.2.1; platform_machine == "x86_64" -intel_extension_for_pytorch==2.7.0; platform_machine == "x86_64" py-libnuma; platform_system != "Darwin" psutil; platform_system != "Darwin" -- 2.49.0
pkgs/development/python-modules/vllm/default.nix +36 −27 Original line number Diff line number Diff line Loading @@ -3,6 +3,7 @@ stdenv, python, buildPythonPackage, pythonAtLeast, fetchFromGitHub, fetchpatch, symlinkJoin, Loading Loading @@ -67,6 +68,7 @@ opentelemetry-exporter-otlp, bitsandbytes, flashinfer, py-libnuma, # internal dependency - for overriding in overlays vllm-flash-attn ? null, Loading Loading @@ -246,16 +248,19 @@ in buildPythonPackage rec { pname = "vllm"; version = "0.9.0.1"; version = "0.9.1"; pyproject = true; # https://github.com/vllm-project/vllm/issues/12083 disabled = pythonAtLeast "3.13"; stdenv = torch.stdenv; src = fetchFromGitHub { owner = "vllm-project"; repo = "vllm"; tag = "v${version}"; hash = "sha256-gNe/kdsDQno8Fd6mo29feWmbyC0c2+kljlVxY4v7R9U="; hash = "sha256-sp7rDpewTPXTVRBJHJMj+8pJDS6wAu0/OTJZwbPPqKc="; }; patches = [ Loading @@ -264,14 +269,18 @@ buildPythonPackage rec { url = "https://github.com/vllm-project/vllm/commit/6a5d7e45f52c3a13de43b8b4fa9033e3b342ebd2.patch"; hash = "sha256-KYthqu+6XwsYYd80PtfrMMjuRV9+ionccr7EbjE4jJE="; }) (fetchpatch { name = "fall-back-to-gloo-when-nccl-unavailable.patch"; url = "https://github.com/vllm-project/vllm/commit/aa131a94410683b0a02e74fed2ce95e6c2b6b030.patch"; hash = "sha256-jNlQZQ8xiW85JWyBjsPZ6FoRQsiG1J8bwzmQjnaWFBg="; }) ./0002-setup.py-nix-support-respect-cmakeFlags.patch ./0003-propagate-pythonpath.patch ./0004-drop-lsmod.patch ./0005-drop-intel-reqs.patch ]; postPatch = '' postPatch = '' # pythonRelaxDeps does not cover build-system substituteInPlace pyproject.toml \ --replace-fail "torch ==" "torch >=" Loading @@ -286,11 +295,6 @@ buildPythonPackage rec { # Pass build environment PYTHONPATH to vLLM's Python configuration scripts substituteInPlace CMakeLists.txt \ --replace-fail '$PYTHONPATH' '$ENV{PYTHONPATH}' '' + lib.optionalString (nccl == null) '' # On platforms where NCCL is not supported (e.g. Jetson), substitute Gloo (provided by Torch) substituteInPlace vllm/distributed/parallel_state.py \ --replace-fail '"nccl"' '"gloo"' ''; nativeBuildInputs = Loading Loading @@ -362,7 +366,6 @@ buildPythonPackage rec { outlines pandas prometheus-fastapi-instrumentator psutil py-cpuinfo pyarrow pydantic Loading Loading @@ -392,9 +395,15 @@ buildPythonPackage rec { opentelemetry-api opentelemetry-exporter-otlp bitsandbytes # vLLM needs Torch's compiler to be present in order to use torch.compile torch.stdenv.cc ] ++ uvicorn.optional-dependencies.standard ++ aioprometheus.optional-dependencies.starlette ++ lib.optionals stdenv.targetPlatform.isLinux [ py-libnuma psutil ] ++ lib.optionals cudaSupport [ cupy pynvml Loading @@ -404,11 +413,11 @@ buildPythonPackage rec { dontUseCmakeConfigure = true; cmakeFlags = [ ] ++ lib.optionals cudaSupport [ (lib.cmakeFeature "FETCHCONTENT_SOURCE_DIR_CUTLASS" "${lib.getDev cutlass}") (lib.cmakeFeature "FLASH_MLA_SRC_DIR" "${lib.getDev flashmla}") (lib.cmakeFeature "VLLM_FLASH_ATTN_SRC_DIR" "${lib.getDev vllm-flash-attn'}") ] ++ lib.optionals cudaSupport [ (lib.cmakeFeature "TORCH_CUDA_ARCH_LIST" "${gpuTargetString}") (lib.cmakeFeature "CUTLASS_NVCC_ARCHS_ENABLED" "${cudaPackages.flags.cmakeCudaArchitecturesString}") (lib.cmakeFeature "CUDA_TOOLKIT_ROOT_DIR" "${symlinkJoin { Loading