llama-cpp: 2249 -> 2294; bring upstream flake (03173009) · Commits · nix / nixpkgs

pkgs/by-name/ll/llama-cpp/package.nix

+96 −87

Original line number	Diff line number	Diff line
		@@ -15,12 +15,16 @@
		, openclSupport ? false
		, clblast

		, blasSupport ? !rocmSupport && !cudaSupport
		, openblas
		, blasSupport ? builtins.all (x: !x) [ cudaSupport metalSupport openclSupport rocmSupport vulkanSupport ]
		, pkg-config
		, metalSupport ? stdenv.isDarwin && stdenv.isAarch64 && !openclSupport
		, patchelf
		, static ? true # if false will build the shared objects as well
		, vulkanSupport ? false
		, mpiSupport ? false # Increases the runtime closure by ~700M
		, vulkan-headers
		, vulkan-loader
		, ninja
		, git
		, mpi
		}:

		let
		@@ -28,16 +32,50 @@ let
		# otherwise we get libstdc++ errors downstream.
		# cuda imposes an upper bound on the gcc version, e.g. the latest gcc compatible with cudaPackages_11 is gcc11
		effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv;
		inherit (lib) cmakeBool cmakeFeature optionals;

		darwinBuildInputs =
		with darwin.apple_sdk.frameworks;
		[
		Accelerate
		CoreVideo
		CoreGraphics
		]
		++ optionals metalSupport [ MetalKit ];

		cudaBuildInputs = with cudaPackages; [
		cuda_cccl.dev # <nv/target>

		# A temporary hack for reducing the closure size, remove once cudaPackages
		# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
		cuda_cudart.dev
		cuda_cudart.lib
		cuda_cudart.static
		libcublas.dev
		libcublas.lib
		libcublas.static
		];

		rocmBuildInputs = with rocmPackages; [
		clr
		hipblas
		rocblas
		];

		vulkanBuildInputs = [
		vulkan-headers
		vulkan-loader
		];
		in
		effectiveStdenv.mkDerivation (finalAttrs: {
		pname = "llama-cpp";
		version = "2249";
		version = "2294";

		src = fetchFromGitHub {
		owner = "ggerganov";
		repo = "llama.cpp";
		rev = "refs/tags/b${finalAttrs.version}";
		hash = "sha256-ikJUToUbA60u/8azR6dPmPyodq/nQe5L2aotlYBclaE=";
		hash = "sha256-uZi4Bj03PgfFV+jS5M+A1sMCWC/GMY5IyyrlR1b4Sh4=";
		};

		postPatch = ''
		@@ -45,7 +83,8 @@ effectiveStdenv.mkDerivation (finalAttrs: {
		--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
		'';

		nativeBuildInputs = [ cmake ] ++ lib.optionals blasSupport [ pkg-config ] ++ lib.optionals cudaSupport [
		nativeBuildInputs = [ cmake ninja pkg-config git ]
		++ optionals cudaSupport [
		cudaPackages.cuda_nvcc

		# TODO: Replace with autoAddDriverRunpath
		@@ -53,86 +92,55 @@ effectiveStdenv.mkDerivation (finalAttrs: {
		cudaPackages.autoAddOpenGLRunpathHook
		];

		buildInputs = lib.optionals effectiveStdenv.isDarwin
		(with darwin.apple_sdk.frameworks; [
		Accelerate
		CoreGraphics
		CoreVideo
		Foundation
		])
		++ lib.optionals metalSupport (with darwin.apple_sdk.frameworks; [
		MetalKit
		])
		++ lib.optionals cudaSupport (with cudaPackages; [
		cuda_cccl.dev # <nv/target>

		# A temporary hack for reducing the closure size, remove once cudaPackages
		# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
		cuda_cudart.dev
		cuda_cudart.lib
		cuda_cudart.static
		libcublas.dev
		libcublas.lib
		libcublas.static
		]) ++ lib.optionals rocmSupport [
		rocmPackages.clr
		rocmPackages.hipblas
		rocmPackages.rocblas
		] ++ lib.optionals openclSupport [
		clblast
		] ++ lib.optionals blasSupport [
		openblas
		];
		buildInputs = optionals effectiveStdenv.isDarwin darwinBuildInputs
		++ optionals cudaSupport cudaBuildInputs
		++ optionals mpiSupport mpi
		++ optionals openclSupport [ clblast ]
		++ optionals rocmSupport rocmBuildInputs
		++ optionals vulkanSupport vulkanBuildInputs;

		cmakeFlags = [
		"-DLLAMA_NATIVE=OFF"
		"-DLLAMA_BUILD_SERVER=ON"
		]
		++ lib.optionals metalSupport [
		"-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1"
		"-DLLAMA_METAL=ON"
		# -march=native is non-deterministic; override with platform-specific flags if needed
		(cmakeBool "LLAMA_NATIVE" false)
		(cmakeBool "BUILD_SHARED_SERVER" true)
		(cmakeBool "BUILD_SHARED_LIBS" true)
		(cmakeBool "BUILD_SHARED_LIBS" true)
		(cmakeBool "LLAMA_BLAS" blasSupport)
		(cmakeBool "LLAMA_CLBLAST" openclSupport)
		(cmakeBool "LLAMA_CUBLAS" cudaSupport)
		(cmakeBool "LLAMA_HIPBLAS" rocmSupport)
		(cmakeBool "LLAMA_METAL" metalSupport)
		(cmakeBool "LLAMA_MPI" mpiSupport)
		(cmakeBool "LLAMA_VULKAN" vulkanSupport)
		]
		++ lib.optionals cudaSupport [
		"-DLLAMA_CUBLAS=ON"
		++ optionals cudaSupport [
		(
		with cudaPackages.flags;
		cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
		builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
		)
		)
		]
		++ lib.optionals rocmSupport [
		"-DLLAMA_HIPBLAS=1"
		"-DCMAKE_C_COMPILER=hipcc"
		"-DCMAKE_CXX_COMPILER=hipcc"
		"-DCMAKE_POSITION_INDEPENDENT_CODE=ON"
		++ optionals rocmSupport [
		(cmakeFeature "CMAKE_C_COMPILER" "hipcc")
		(cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")

		# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
		# in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
		# and select the line that matches the current nixpkgs version of rocBLAS.
		# Should likely use `rocmPackages.clr.gpuTargets`.
		"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
		]
		++ lib.optionals openclSupport [
		"-DLLAMA_CLBLAST=ON"
		]
		++ lib.optionals blasSupport [
		"-DLLAMA_BLAS=ON"
		"-DLLAMA_BLAS_VENDOR=OpenBLAS"
		]
		++ lib.optionals (!static) [
		(lib.cmakeBool "BUILD_SHARED_LIBS" true)
		];

		installPhase = ''
		runHook preInstall

		mkdir -p $out/bin
		${lib.optionalString (!static) ''
		mkdir $out/lib
		cp libggml_shared.so $out/lib
		cp libllama.so $out/lib
		''}

		for f in bin/*; do
		test -x "$f" \|\| continue
		${lib.optionalString (!static) ''
		${patchelf}/bin/patchelf "$f" --set-rpath "$out/lib"
		''}
		cp "$f" $out/bin/llama-cpp-"$(basename "$f")"
		done

		${lib.optionalString metalSupport "cp ./bin/ggml-metal.metal $out/bin/ggml-metal.metal"}

		runHook postInstall
		++ optionals metalSupport [ (cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
		++ optionals blasSupport [ (cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];

		# upstream plans on adding targets at the cmakelevel, remove those
		# additional steps after that
		postInstall = ''
		mv $out/bin/main $out/bin/llama
		mv $out/bin/server $out/bin/llama-server
		mkdir -p $out/include
		cp $src/llama.h $out/include/
		'';

		passthru.updateScript = nix-update-script {
		@@ -144,9 +152,10 @@ effectiveStdenv.mkDerivation (finalAttrs: {
		description = "Port of Facebook's LLaMA model in C/C++";
		homepage = "https://github.com/ggerganov/llama.cpp/";
		license = licenses.mit;
		mainProgram = "llama-cpp-main";
		maintainers = with maintainers; [ dit7ya elohmeier ];
		broken = (effectiveStdenv.isDarwin && effectiveStdenv.isx86_64) \|\| lib.count lib.id [openclSupport blasSupport rocmSupport cudaSupport] == 0;
		mainProgram = "llama";
		maintainers = with maintainers; [ dit7ya elohmeier philiptaron ];
		platforms = platforms.unix;
		badPlatforms = optionals (cudaSupport \|\| openclSupport) lib.platforms.darwin;
		broken = (metalSupport && !effectiveStdenv.isDarwin);
		};
		})

Admin message