python3Packages.vllm: 0.11.0 -> 0.11.2 (3e1a4d4e) · Commits · nix / nixpkgs

pkgs/development/python-modules/vllm/0002-setup.py-nix-support-respect-cmakeFlags.patch

+8 −20

Original line number	Diff line number	Diff line
		From 10b7e8330bdba319a4162cceb8e5dd4280215b04 Mon Sep 17 00:00:00 2001
		From: SomeoneSerge <else@someonex.net>
		Date: Wed, 31 Jul 2024 12:06:15 +0000
		Subject: [PATCH 2/2] setup.py: nix-support (respect cmakeFlags)

		---
		setup.py \| 10 ++++++++++
		1 file changed, 10 insertions(+)

		diff --git a/setup.py b/setup.py
		index 01e006f9..14762146 100644
		index e9b36e2a2..bc9e2f1e3 100644
		--- a/setup.py
		+++ b/setup.py
		@@ -15,6 +15,15 @@ from setuptools import Extension, find_packages, setup
		from setuptools.command.build_ext import build_ext
		from torch.utils.cpp_extension import CUDA_HOME
		@@ -20,6 +20,15 @@ from setuptools.command.build_ext import build_ext
		from setuptools_scm import get_version
		from torch.utils.cpp_extension import CUDA_HOME, ROCM_HOME

		+import os
		+import json
		@@ -27,14 +18,11 @@ index 01e006f9..14762146 100644

		def load_module_from_path(module_name, path):
		spec = importlib.util.spec_from_file_location(module_name, path)
		@@ -159,6 +168,7 @@ class cmake_build_ext(build_ext):
		'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}'.format(outdir),
		'-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY={}'.format(self.build_temp),
		'-DVLLM_TARGET_DEVICE={}'.format(VLLM_TARGET_DEVICE),
		@@ -151,6 +160,7 @@ class cmake_build_ext(build_ext):
		cmake_args = [
		"-DCMAKE_BUILD_TYPE={}".format(cfg),
		"-DVLLM_TARGET_DEVICE={}".format(VLLM_TARGET_DEVICE),
		+ *NIX_ATTRS["cmakeFlags"],
		]

		verbose = envs.VERBOSE
		--
		2.45.1

pkgs/development/python-modules/vllm/0003-propagate-pythonpath.patch

+7 −6

Original line number	Diff line number	Diff line
		diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
		index 81623def..2a6e2c92 100644
		index a2de597c8..4c2410209 100644
		--- a/vllm/model_executor/models/registry.py
		+++ b/vllm/model_executor/models/registry.py
		@@ -521,6 +521,7 @@ def _run_in_subprocess(fn: Callable[[], _T]) -> _T:
		@@ -1121,7 +1121,7 @@ def _run_in_subprocess(fn: Callable[[], _T]) -> _T:
		# cannot use `sys.executable __file__` here because the script
		# contains relative imports
		returned = subprocess.run(_SUBPROCESS_COMMAND,
		input=input_bytes,
		+ env={'PYTHONPATH': ':'.join(sys.path)},
		capture_output=True)
		returned = subprocess.run(
		- _SUBPROCESS_COMMAND, input=input_bytes, capture_output=True
		+ _SUBPROCESS_COMMAND, input=input_bytes, capture_output=True, env={'PYTHONPATH': ':'.join(sys.path)},
		)

		# check if the subprocess is successful

pkgs/development/python-modules/vllm/0005-drop-intel-reqs.patch

+2 −2

Original line number	Diff line number	Diff line
		diff --git a/requirements/cpu.txt b/requirements/cpu.txt
		index 2db6d87ee..37f816170 100644
		index d11787df4..71575d707 100644
		--- a/requirements/cpu.txt
		+++ b/requirements/cpu.txt
		@@ -21,9 +21,6 @@ torchvision; platform_machine != "ppc64le" and platform_machine != "s390x"
		@@ -20,9 +20,6 @@ torchvision; platform_machine != "ppc64le" and platform_machine != "s390x"
		torchvision==0.23.0; platform_machine == "ppc64le"
		datasets # for benchmark scripts

pkgs/development/python-modules/vllm/default.nix

+34 −23

Original line number	Diff line number	Diff line
		@@ -34,6 +34,7 @@
		uvicorn,
		pydantic,
		aioprometheus,
		anthropic,
		nvidia-ml-py,
		openai,
		pyzmq,
		@@ -53,6 +54,7 @@
		compressed-tensors,
		mistral-common,
		msgspec,
		model-hosting-container-standards,
		numactl,
		tokenizers,
		oneDNN,
		@@ -98,10 +100,11 @@ let
		# see CMakeLists.txt, grepping for CUTLASS_REVISION
		# https://github.com/vllm-project/vllm/blob/v${version}/CMakeLists.txt
		cutlass = fetchFromGitHub {
		name = "cutlass-source";
		owner = "NVIDIA";
		repo = "cutlass";
		tag = "v4.0.0";
		hash = "sha256-HJY+Go1viPkSVZPEs/NyMtYJzas4mMLiIZF3kNX+WgA=";
		tag = "v4.2.1";
		hash = "sha256-iP560D5Vwuj6wX1otJhwbvqe/X4mYVeKTpK533Wr5gY=";
		};

		# FlashMLA's Blackwell (SM100) kernels were developed against CUTLASS v3.9.0
		@@ -126,10 +129,11 @@ let
		# grep for GIT_TAG in the following file
		# https://github.com/vllm-project/vllm/blob/v${version}/cmake/external_projects/flashmla.cmake
		src = fetchFromGitHub {
		name = "FlashMLA-source";
		owner = "vllm-project";
		repo = "FlashMLA";
		rev = "5f65b85703c7ed75fda01e06495077caad207c3f";
		hash = "sha256-DO9EFNSoAgyfRRc095v1UjT+Zdzk4cFY0+n28FVEwI0=";
		rev = "46d64a8ebef03fa50b4ae74937276a5c940e3f95";
		hash = "sha256-jtMzWB5hKz8mJGsdK6q4YpQbGp9IrQxbwmB3a64DIl0=";
		};

		dontConfigure = true;
		@@ -145,6 +149,16 @@ let
		'';
		};

		# grep for GIT_TAG in the following file
		# https://github.com/vllm-project/vllm/blob/v${version}/cmake/external_projects/qutlass.cmake
		qutlass = fetchFromGitHub {
		name = "qutlass-source";
		owner = "IST-DASLab";
		repo = "qutlass";
		rev = "830d2c4537c7396e14a02a46fbddd18b5d107c65";
		hash = "sha256-aG4qd0vlwP+8gudfvHwhtXCFmBOJKQQTvcwahpEqC84=";
		};

		vllm-flash-attn' = lib.defaultTo (stdenv.mkDerivation {
		pname = "vllm-flash-attn";
		# https://github.com/vllm-project/flash-attention/blob/${src.rev}/vllm_flash_attn/__init__.py
		@@ -153,10 +167,11 @@ let
		# grep for GIT_TAG in the following file
		# https://github.com/vllm-project/vllm/blob/v${version}/cmake/external_projects/vllm_flash_attn.cmake
		src = fetchFromGitHub {
		name = "flash-attention-source";
		owner = "vllm-project";
		repo = "flash-attention";
		rev = "ee4d25bd84e0cbc7e0b9b9685085fd5db2dcb62a";
		hash = "sha256-2r0Habd/kBpvM4/aQFIYyj+uQAa3M9gjk3DcBZHFNfA=";
		rev = "58e0626a692f09241182582659e3bf8f16472659";
		hash = "sha256-ewdZd7LuBKBV0y3AaGRWISJzjg6cu59D2OtgqoDjrbM=";
		};

		patches = [
		@@ -284,7 +299,7 @@ in

		buildPythonPackage rec {
		pname = "vllm";
		version = "0.11.0";
		version = "0.11.2";
		pyproject = true;

		stdenv = torch.stdenv;
		@@ -293,38 +308,31 @@ buildPythonPackage rec {
		owner = "vllm-project";
		repo = "vllm";
		tag = "v${version}";
		hash = "sha256-47TPvvPQvVbh6Gm2yvi+xhWZ8tSma91rp9hp/SBrEY8=";
		hash = "sha256-DoSlkFmR3KKEtfSfdRB++0CZeeXgxmM3zZjONlxbe8U=";
		};

		patches = [
		./0002-setup.py-nix-support-respect-cmakeFlags.patch
		./0003-propagate-pythonpath.patch
		./0005-drop-intel-reqs.patch
		# TODO: Remove the below patches when included in vLLM release
		(fetchpatch {
		url = "https://github.com/vllm-project/vllm/commit/9705fba7b727a3b9c275b012258608531e2223d1.patch";
		hash = "sha256-DxRGLiwkegMlMjqFmFc0igpaVv06/Y2WjL+ISoIOET4=";
		})
		# patch above is previous commit needed to apply patch below
		# oneDNN / CPU fix from https://github.com/vllm-project/vllm/pull/26401
		(fetchpatch {
		url = "https://github.com/vllm-project/vllm/commit/d7be1f2a480bdc62a6a1ec0126a401e3d42985fe.patch";
		hash = "sha256-Zi1k5wiOPjsbWHFKpcLq9Ns43wIP37Mbvesi5K80zaQ=";
		})
		];

		postPatch = ''
		# Remove vendored pynvml entirely
		rm vllm/third_party/pynvml.py
		substituteInPlace tests/utils.py \
		--replace-fail "from vllm.third_party.pynvml import" "from pynvml import"
		substituteInPlace vllm/utils/__init__.py \
		--replace-fail "import vllm.third_party.pynvml" "import pynvml"
		--replace-fail \
		"from vllm.third_party.pynvml import" \
		"from pynvml import"
		substituteInPlace vllm/utils/import_utils.py \
		--replace-fail \
		"import vllm.third_party.pynvml as pynvml" \
		"import pynvml"

		# pythonRelaxDeps does not cover build-system
		substituteInPlace pyproject.toml \
		--replace-fail "torch ==" "torch >=" \
		--replace-fail "setuptools>=77.0.3,<80.0.0" "setuptools"
		--replace-fail "setuptools>=77.0.3,<81.0.0" "setuptools"

		# Ignore the python version check because it hard-codes minor versions and
		# lags behind `ray`'s python interpreter support
		@@ -393,6 +401,7 @@ buildPythonPackage rec {

		dependencies = [
		aioprometheus
		anthropic
		blake3
		cachetools
		cbor2
		@@ -424,6 +433,7 @@ buildPythonPackage rec {
		partial-json-parser
		compressed-tensors
		mistral-common
		model-hosting-container-standards
		torch
		torchaudio
		torchvision
		@@ -460,6 +470,7 @@ buildPythonPackage rec {
		(lib.cmakeFeature "FETCHCONTENT_SOURCE_DIR_CUTLASS" "${lib.getDev cutlass}")
		(lib.cmakeFeature "FLASH_MLA_SRC_DIR" "${lib.getDev flashmla}")
		(lib.cmakeFeature "VLLM_FLASH_ATTN_SRC_DIR" "${lib.getDev vllm-flash-attn'}")
		(lib.cmakeFeature "QUTLASS_SRC_DIR" "${lib.getDev qutlass}")
		(lib.cmakeFeature "TORCH_CUDA_ARCH_LIST" "${gpuTargetString}")
		(lib.cmakeFeature "CUTLASS_NVCC_ARCHS_ENABLED" "${cudaPackages.flags.cmakeCudaArchitecturesString}")
		(lib.cmakeFeature "CUDA_TOOLKIT_ROOT_DIR" "${symlinkJoin {