Loading pkgs/development/python-modules/vllm/default.nix +19 −12 Original line number Diff line number Diff line Loading @@ -3,7 +3,7 @@ stdenv, python, buildPythonPackage, pythonAtLeast, pythonOlder, fetchFromGitHub, fetchpatch, symlinkJoin, Loading Loading @@ -115,8 +115,8 @@ let src = fetchFromGitHub { owner = "vllm-project"; repo = "FlashMLA"; rev = "0e43e774597682284358ff2c54530757b654b8d1"; hash = "sha256-wxL/jtq/lsLg1o+4392KNgfw5TYlW6lqEVbmR3Jl4/Q="; rev = "a757314c04eedd166e329e846c820eb1bdd702de"; hash = "sha256-KT9R6ju7XzgqKHPGQwzw0yNiKL3DNW6qJrEBvmLn4hY="; }; dontConfigure = true; Loading @@ -135,15 +135,15 @@ let vllm-flash-attn' = lib.defaultTo (stdenv.mkDerivation { pname = "vllm-flash-attn"; # https://github.com/vllm-project/flash-attention/blob/${src.rev}/vllm_flash_attn/__init__.py version = "2.7.4.post1"; version = "2.7.2.post1"; # grep for GIT_TAG in the following file # https://github.com/vllm-project/vllm/blob/v${version}/cmake/external_projects/vllm_flash_attn.cmake src = fetchFromGitHub { owner = "vllm-project"; repo = "flash-attention"; rev = "57b4e68b9f9d94750b46de8f8dbd2bfcc86edd4f"; hash = "sha256-c7L7WZVVEnXMOTPBoSp7jhkl9d4TA4sj11QvOSWTDIE="; rev = "ee4d25bd84e0cbc7e0b9b9685085fd5db2dcb62a"; hash = "sha256-2r0Habd/kBpvM4/aQFIYyj+uQAa3M9gjk3DcBZHFNfA="; }; patches = [ Loading Loading @@ -179,7 +179,7 @@ let cpuSupport = !cudaSupport && !rocmSupport; # https://github.com/pytorch/pytorch/blob/v2.7.1/torch/utils/cpp_extension.py#L2343-L2345 # https://github.com/pytorch/pytorch/blob/v2.8.0/torch/utils/cpp_extension.py#L2411-L2414 supportedTorchCudaCapabilities = let real = [ Loading @@ -204,8 +204,12 @@ let "10.0a" "10.1" "10.1a" "10.3" "10.3a" "12.0" "12.0a" "12.1" "12.1a" ]; ptx = lists.map (x: "${x}+PTX") real; in Loading Loading @@ -245,6 +249,7 @@ let mergedCudaLibraries = with cudaPackages; [ cuda_cudart # cuda_runtime.h, -lcudart cuda_cccl libcurand # curand_kernel.h libcusparse # cusparse.h libcusolver # cusolverDn.h cuda_nvtx Loading @@ -266,19 +271,16 @@ in buildPythonPackage rec { pname = "vllm"; version = "0.10.1.1"; version = "0.10.2"; pyproject = true; # https://github.com/vllm-project/vllm/issues/12083 disabled = pythonAtLeast "3.13"; stdenv = torch.stdenv; src = fetchFromGitHub { owner = "vllm-project"; repo = "vllm"; tag = "v${version}"; hash = "sha256-lLNjBv5baER0AArX3IV4HWjDZ2jTGXyGIvnHupR8MGM="; hash = "sha256-m9P4cxxdAToGKKIyTQdFupG3vZ3sEueMMxjugYfjKbo="; }; patches = [ Loading Loading @@ -481,7 +483,12 @@ buildPythonPackage rec { maintainers = with lib.maintainers; [ happysalada lach daniel-fahey ]; # Python 3.12 vLLM v0.10.2+CPU blake3 1.0.7 incompatibility # discovered during https://github.com/NixOS/nixpkgs/pull/447722 # reported upstream in https://github.com/vllm-project/vllm/issues/26229 broken = (cpuSupport && pythonOlder "3.13"); badPlatforms = [ # CMake Error at cmake/cpu_extension.cmake:78 (find_isa): # find_isa Function invoked with incorrect arguments for function named: Loading Loading
pkgs/development/python-modules/vllm/default.nix +19 −12 Original line number Diff line number Diff line Loading @@ -3,7 +3,7 @@ stdenv, python, buildPythonPackage, pythonAtLeast, pythonOlder, fetchFromGitHub, fetchpatch, symlinkJoin, Loading Loading @@ -115,8 +115,8 @@ let src = fetchFromGitHub { owner = "vllm-project"; repo = "FlashMLA"; rev = "0e43e774597682284358ff2c54530757b654b8d1"; hash = "sha256-wxL/jtq/lsLg1o+4392KNgfw5TYlW6lqEVbmR3Jl4/Q="; rev = "a757314c04eedd166e329e846c820eb1bdd702de"; hash = "sha256-KT9R6ju7XzgqKHPGQwzw0yNiKL3DNW6qJrEBvmLn4hY="; }; dontConfigure = true; Loading @@ -135,15 +135,15 @@ let vllm-flash-attn' = lib.defaultTo (stdenv.mkDerivation { pname = "vllm-flash-attn"; # https://github.com/vllm-project/flash-attention/blob/${src.rev}/vllm_flash_attn/__init__.py version = "2.7.4.post1"; version = "2.7.2.post1"; # grep for GIT_TAG in the following file # https://github.com/vllm-project/vllm/blob/v${version}/cmake/external_projects/vllm_flash_attn.cmake src = fetchFromGitHub { owner = "vllm-project"; repo = "flash-attention"; rev = "57b4e68b9f9d94750b46de8f8dbd2bfcc86edd4f"; hash = "sha256-c7L7WZVVEnXMOTPBoSp7jhkl9d4TA4sj11QvOSWTDIE="; rev = "ee4d25bd84e0cbc7e0b9b9685085fd5db2dcb62a"; hash = "sha256-2r0Habd/kBpvM4/aQFIYyj+uQAa3M9gjk3DcBZHFNfA="; }; patches = [ Loading Loading @@ -179,7 +179,7 @@ let cpuSupport = !cudaSupport && !rocmSupport; # https://github.com/pytorch/pytorch/blob/v2.7.1/torch/utils/cpp_extension.py#L2343-L2345 # https://github.com/pytorch/pytorch/blob/v2.8.0/torch/utils/cpp_extension.py#L2411-L2414 supportedTorchCudaCapabilities = let real = [ Loading @@ -204,8 +204,12 @@ let "10.0a" "10.1" "10.1a" "10.3" "10.3a" "12.0" "12.0a" "12.1" "12.1a" ]; ptx = lists.map (x: "${x}+PTX") real; in Loading Loading @@ -245,6 +249,7 @@ let mergedCudaLibraries = with cudaPackages; [ cuda_cudart # cuda_runtime.h, -lcudart cuda_cccl libcurand # curand_kernel.h libcusparse # cusparse.h libcusolver # cusolverDn.h cuda_nvtx Loading @@ -266,19 +271,16 @@ in buildPythonPackage rec { pname = "vllm"; version = "0.10.1.1"; version = "0.10.2"; pyproject = true; # https://github.com/vllm-project/vllm/issues/12083 disabled = pythonAtLeast "3.13"; stdenv = torch.stdenv; src = fetchFromGitHub { owner = "vllm-project"; repo = "vllm"; tag = "v${version}"; hash = "sha256-lLNjBv5baER0AArX3IV4HWjDZ2jTGXyGIvnHupR8MGM="; hash = "sha256-m9P4cxxdAToGKKIyTQdFupG3vZ3sEueMMxjugYfjKbo="; }; patches = [ Loading Loading @@ -481,7 +483,12 @@ buildPythonPackage rec { maintainers = with lib.maintainers; [ happysalada lach daniel-fahey ]; # Python 3.12 vLLM v0.10.2+CPU blake3 1.0.7 incompatibility # discovered during https://github.com/NixOS/nixpkgs/pull/447722 # reported upstream in https://github.com/vllm-project/vllm/issues/26229 broken = (cpuSupport && pythonOlder "3.13"); badPlatforms = [ # CMake Error at cmake/cpu_extension.cmake:78 (find_isa): # find_isa Function invoked with incorrect arguments for function named: Loading