Merge pull request #330331 from ck3d/localai-2192 (b854c515) · Commits · mdf / nixpkgs

pkgs/by-name/lo/local-ai/package.nix

+52 −43

Original line number	Diff line number	Diff line
		@@ -17,6 +17,10 @@
		, buildGoModule
		, makeWrapper
		, ncurses
		, which

		, enable_upx ? true
		, upx

		# apply feature parameter names according to
		# https://github.com/NixOS/rfcs/pull/169
		@@ -115,8 +119,8 @@ let
		src = fetchFromGitHub {
		owner = "ggerganov";
		repo = "llama.cpp";
		rev = "cb5fad4c6c2cbef92e9b8b63449e1cb7664e4846";
		hash = "sha256-cIJuDC+MFLd5hkA1kUxuaw2dZagHqn5fi5Q2XKvDEII=";
		rev = "ed9d2854c9de4ae1f448334294e61167b04bec2a";
		hash = "sha256-Xu2h9Zu+Q9utfFFmDWBOEu/EXth4xWRNoTMvPF5Fo/A=";
		fetchSubmodules = true;
		};
		postPatch = prev.postPatch + ''
		@@ -269,8 +273,8 @@ let
		src = fetchFromGitHub {
		owner = "ggerganov";
		repo = "whisper.cpp";
		rev = "b29b3b29240aac8b71ce8e5a4360c1f1562ad66f";
		hash = "sha256-vSd+AP9AexbG4wvdkk6wjxYQBZdKWGK2Ix7c86MUfB8=";
		rev = "6739eb83c3ca5cf40d24c6fe8442a761a1eb6248";
		hash = "sha256-1yDdJVjIwYDJKn93zn4xOJXMoDTqaG2TvakjdHIMCxk=";
		};

		nativeBuildInputs = [ cmake pkg-config ]
		@@ -388,36 +392,40 @@ let
		stdenv;

		pname = "local-ai";
		version = "2.18.1";
		version = "2.19.4";
		src = fetchFromGitHub {
		owner = "go-skynet";
		repo = "LocalAI";
		rev = "v${version}";
		hash = "sha256-hRrbGUUawQV4fqxAn3eFBvn4/lZ+NrKhxnGHqpljrec=";
		hash = "sha256-aKq6/DI+4+BvIEw6eONqPr3mZXuz7rMFN+FBypVj0Gc=";
		};

		prepare-sources =
		let
		cp = "cp -r --no-preserve=mode,ownership";
		in
		''
		mkdir sources
		${cp} ${go-llama} sources/go-llama.cpp
		${cp} ${gpt4all} sources/gpt4all
		${cp} ${if with_tts then go-piper else go-piper.src} sources/go-piper
		${cp} ${go-rwkv} sources/go-rwkv.cpp
		${cp} ${whisper-cpp.src} sources/whisper.cpp
		cp ${whisper-cpp}/lib/lib*.a sources/whisper.cpp
		${cp} ${go-bert} sources/go-bert.cpp
		${cp} ${if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src} sources/go-stable-diffusion
		${cp} ${if with_tinydream then go-tiny-dream else go-tiny-dream.src} sources/go-tiny-dream
		'';

		self = buildGoModule.override { stdenv = effectiveStdenv; } {
		inherit pname version src;

		vendorHash = "sha256-uvko1PQWW5P+6cgmwVKocKBm5GndszqCsSbxlXANqJs=";
		vendorHash = "sha256-HEKE75+ixuNbM+KEuhbQQ/NYYEzVlGYOttPavftWKhk=";

		env.NIX_CFLAGS_COMPILE = lib.optionalString with_stablediffusion " -isystem ${opencv}/include/opencv4";

		postPatch =
		let
		cp = "cp -r --no-preserve=mode,ownership";
		in
		''
		postPatch = ''
		sed -i Makefile \
		-e 's;git clone.*go-llama\.cpp$;${cp} ${go-llama} sources/go-llama\.cpp;' \
		-e 's;git clone.*gpt4all$;${cp} ${gpt4all} sources/gpt4all;' \
		-e 's;git clone.*go-piper$;${cp} ${if with_tts then go-piper else go-piper.src} sources/go-piper;' \
		-e 's;git clone.*go-rwkv\.cpp$;${cp} ${go-rwkv} sources/go-rwkv\.cpp;' \
		-e 's;git clone.*whisper\.cpp$;${cp} ${whisper-cpp.src} sources/whisper\.cpp;' \
		-e 's;git clone.*go-bert\.cpp$;${cp} ${go-bert} sources/go-bert\.cpp;' \
		-e 's;git clone.*diffusion$;${cp} ${if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src} sources/go-stable-diffusion;' \
		-e 's;git clone.*go-tiny-dream$;${cp} ${if with_tinydream then go-tiny-dream else go-tiny-dream.src} sources/go-tiny-dream;' \
		-e 's, && git checkout.*,,g' \
		-e '/mod download/ d' \
		-e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-fallback/ d' \
		-e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-avx/ d' \
		@@ -428,18 +436,23 @@ let
		-e '/^CGO_LDFLAGS_WHISPER?=/ s;$;-L${libcufft}/lib -L${cuda_cudart}/lib;'
		'';

		postConfigure = ''
		postConfigure = prepare-sources + ''
		shopt -s extglob
		mkdir -p backend-assets/grpc
		cp ${llama-cpp-grpc}/bin/grpc-server backend-assets/grpc/llama-cpp-avx2
		cp ${llama-cpp-rpc}/bin/grpc-server backend-assets/grpc/llama-cpp-grpc

		mkdir -p backend/cpp/llama/llama.cpp

		mkdir -p backend-assets/util
		cp ${llama-cpp-rpc}/bin/llama-rpc-server backend-assets/util/llama-cpp-rpc-server

		# avoid rebuild of prebuilt make targets
		touch backend-assets/grpc/* backend-assets/util/* sources/*/lib.a
		'';

		buildInputs = [ ]
		++ lib.optionals with_cublas [ libcublas ]
		++ lib.optionals with_cublas [ cuda_cudart libcublas libcufft ]
		++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
		++ lib.optionals with_openblas [ openblas.dev ]
		++ lib.optionals with_stablediffusion go-stable-diffusion.buildInputs
		@@ -451,14 +464,15 @@ let
		protoc-gen-go-grpc
		makeWrapper
		ncurses # tput
		which
		]
		++ lib.optional enable_upx upx
		++ lib.optionals with_cublas [ cuda_nvcc ];

		enableParallelBuilding = false;

		modBuildPhase = ''
		mkdir sources
		make prepare-sources protogen-go
		modBuildPhase = prepare-sources + ''
		make protogen-go
		go mod tidy -v
		'';

		@@ -478,12 +492,6 @@ let
		buildPhase = ''
		runHook preBuild

		mkdir sources
		make prepare-sources
		# avoid rebuild of prebuilt libraries
		touch sources/*/lib.a
		cp ${whisper-cpp}/lib/static/lib*.a sources/whisper.cpp

		local flagsArray=(
		''${enableParallelBuilding:+-j''${NIX_BUILD_CORES}}
		SHELL=$SHELL
		@@ -518,7 +526,8 @@ let
		]
		++ lib.optionals with_clblas [ clblast ocl-icd ]
		++ lib.optionals with_openblas [ openblas ]
		++ lib.optionals with_tts [ piper-phonemize ];
		++ lib.optionals with_tts [ piper-phonemize ]
		++ lib.optionals (with_tts && enable_upx) [ fmt spdlog ];
		in
		''
		wrapProgram $out/bin/${pname} \

pkgs/by-name/lo/local-ai/tests.nix

+9 −8

Original line number	Diff line number	Diff line
		@@ -101,17 +101,16 @@ in

		# https://localai.io/advanced/#full-config-model-file-reference
		model-configs.${model} = rec {
		context_size = 8192;
		context_size = 16 * 1024; # 128kb is possible, but needs 16GB RAM
		backend = "llama-cpp";
		parameters = {
		# https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF
		# https://ai.meta.com/blog/meta-llama-3/
		# https://ai.meta.com/blog/meta-llama-3-1/
		model = fetchurl {
		url = "https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf";
		sha256 = "ab9e4eec7e80892fd78f74d9a15d0299f1e22121cea44efd68a7a02a3fe9a1da";
		url = "https://huggingface.co/lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf";
		sha256 = "f2be3e1a239c12c9f3f01a962b11fb2807f8032fdb63b0a5502ea42ddef55e44";
		};
		# defaults from:
		# https://deepinfra.com/meta-llama/Meta-Llama-3-8B-Instruct
		# https://deepinfra.com/meta-llama/Meta-Llama-3.1-8B-Instruct
		temperature = 0.7;
		top_p = 0.9;
		top_k = 0;
		@@ -135,7 +134,9 @@ in

		{{.Content}}${builtins.head stopwords}'';

		chat = "<\|begin_of_text\|>{{.Input}}<\|start_header_id\|>assistant<\|end_header_id\|>";
		chat = "{{.Input}}<\|start_header_id\|>assistant<\|end_header_id\|>";

		completion = "{{.Input}}";
		};
		};

		@@ -185,7 +186,7 @@ in
		machine.succeed("curl -f http://localhost:${port}/v1/chat/completions --json @${writers.writeJSON "request-chat-completions.json" requests.chat-completions} --output chat-completions.json")
		machine.copy_from_vm("chat-completions.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .object == \"chat.completion\"' chat-completions.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .choices \| first.message.content \| tonumber == 3' chat-completions.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .choices \| first.message.content \| split(\" \") \| last \| tonumber == 3' chat-completions.json")

		machine.succeed("curl -f http://localhost:${port}/v1/edits --json @${writers.writeJSON "request-edit-completions.json" requests.edit-completions} --output edit-completions.json")
		machine.copy_from_vm("edit-completions.json")