Merge pull request #307020 from ck3d/local-ai-2130 (56cfc87a) · Commits · nix / nixpkgs

pkgs/by-name/lo/local-ai/lib.nix

0 → 100644

+30 −0

Original line number	Diff line number	Diff line
		{ lib
		, writers
		, writeText
		, linkFarmFromDrvs
		}: {
		genModels = configs:
		let
		name = lib.strings.sanitizeDerivationName
		(builtins.concatStringsSep "_" ([ "local-ai-models" ] ++ (builtins.attrNames configs)));

		genModelFiles = name: config:
		let
		templateName = type: name + "_" + type;

		config' = lib.recursiveUpdate config ({
		inherit name;
		} // lib.optionalAttrs (lib.isDerivation config.parameters.model) {
		parameters.model = config.parameters.model.name;
		} // lib.optionalAttrs (config ? template) {
		template = builtins.mapAttrs (n: _: templateName n) config.template;
		});
		in
		[ (writers.writeYAML "${name}.yaml" config') ]
		++ lib.optional (lib.isDerivation config.parameters.model)
		config.parameters.model
		++ lib.optionals (config ? template)
		(lib.mapAttrsToList (n: writeText "${templateName n}.tmpl") config.template);
		in
		linkFarmFromDrvs name (lib.flatten (lib.mapAttrsToList genModelFiles configs));
		}

pkgs/by-name/lo/local-ai/module.nix

0 → 100644

+56 −0

Original line number	Diff line number	Diff line
		{ pkgs, config, lib, ... }:
		let
		cfg = config.services.local-ai;
		inherit (lib) mkOption types;
		in
		{
		options.services.local-ai = {
		enable = lib.mkEnableOption "Enable service";

		package = lib.mkPackageOption pkgs "local-ai" { };

		extraArgs = mkOption {
		type = types.listOf types.str;
		default = [ ];
		};

		port = mkOption {
		type = types.port;
		default = 8080;
		};

		threads = mkOption {
		type = types.int;
		default = 1;
		};

		models = mkOption {
		type = types.either types.package types.str;
		default = "models";
		};
		};

		config = lib.mkIf cfg.enable {
		systemd.services.local-ai = {
		wantedBy = [ "multi-user.target" ];
		serviceConfig = {
		DynamicUser = true;
		ExecStart = lib.escapeShellArgs ([
		"${cfg.package}/bin/local-ai"
		"--debug"
		"--address"
		":${toString cfg.port}"
		"--threads"
		(toString cfg.threads)
		"--localai-config-dir"
		"."
		"--models-path"
		(toString cfg.models)
		]
		++ cfg.extraArgs);
		RuntimeDirectory = "local-ai";
		WorkingDirectory = "%t/local-ai";
		};
		};
		};
		}

pkgs/by-name/lo/local-ai/package.nix

+24 −16

Original line number	Diff line number	Diff line
		@@ -6,6 +6,8 @@
		, fetchpatch
		, fetchFromGitHub
		, protobuf
		, protoc-gen-go
		, protoc-gen-go-grpc
		, grpc
		, openssl
		, llama-cpp
		@@ -61,8 +63,8 @@ let

		inherit (cudaPackages) libcublas cuda_nvcc cuda_cccl cuda_cudart cudatoolkit;

		go-llama-ggml = effectiveStdenv.mkDerivation {
		name = "go-llama-ggml";
		go-llama = effectiveStdenv.mkDerivation {
		name = "go-llama";
		src = fetchFromGitHub {
		owner = "go-skynet";
		repo = "go-llama.cpp";
		@@ -98,8 +100,8 @@ let
		src = fetchFromGitHub {
		owner = "ggerganov";
		repo = "llama.cpp";
		rev = "1b67731e184e27a465b8c5476061294a4af668ea";
		hash = "sha256-0WWbsklpW6HhFRkvWpYh8Lhi8VIansS/zmyIKNQRkIs=";
		rev = "784e11dea1f5ce9638851b2b0dddb107e2a609c8";
		hash = "sha256-yAQAUo5J+a6O2kTqhFL1UH0tANxpQn3JhAd3MByaC6I=";
		fetchSubmodules = true;
		};
		postPatch = prev.postPatch + ''
		@@ -252,8 +254,8 @@ let
		src = fetchFromGitHub {
		owner = "ggerganov";
		repo = "whisper.cpp";
		rev = "8f253ef3af1c62c04316ba4afa7145fc4d701a8c";
		hash = "sha256-yHHjhpQIn99A/hqFwAb7TfTf4Q9KnKat93zyXS70bT8=";
		rev = "858452d58dba3acdc3431c9bced2bb8cfd9bf418";
		hash = "sha256-2fT3RgGpBex1mF6GJsVDo4rb0F31YqxTymsXcrpQAZk=";
		};

		nativeBuildInputs = [ cmake pkg-config ]
		@@ -371,18 +373,18 @@ let
		stdenv;

		pname = "local-ai";
		version = "2.12.4";
		version = "2.13.0";
		src = fetchFromGitHub {
		owner = "go-skynet";
		repo = "LocalAI";
		rev = "v${version}";
		hash = "sha256-piu2B6u4ZfxiOd9SXrE7jiiiwL2SM8EqXo2s5qeKRl0=";
		hash = "sha256-jZE8Ow9FFhnx/jvsURLYlYtSuKpE4UWBezxg/mpHs9g=";
		};

		self = buildGoModule.override { stdenv = effectiveStdenv; } {
		inherit pname version src;

		vendorHash = "sha256-8Hu1y/PK21twnB7D22ltslFFzRrsB8d1R2hkgIFB/XY=";
		vendorHash = "sha256-nWNK2YekQnBSLx4ouNSe6esIe0yFuo69E0HStYLQANg=";

		env.NIX_CFLAGS_COMPILE = lib.optionalString with_stablediffusion " -isystem ${opencv}/include/opencv4";

		@@ -392,12 +394,12 @@ let
		in
		''
		sed -i Makefile \
		-e 's;git clone.*go-llama-ggml$;${cp} ${go-llama-ggml} sources/go-llama-ggml;' \
		-e 's;git clone.*go-llama\.cpp$;${cp} ${go-llama} sources/go-llama\.cpp;' \
		-e 's;git clone.*gpt4all$;${cp} ${gpt4all} sources/gpt4all;' \
		-e 's;git clone.*go-piper$;${cp} ${if with_tts then go-piper else go-piper.src} sources/go-piper;' \
		-e 's;git clone.*go-rwkv$;${cp} ${go-rwkv} sources/go-rwkv;' \
		-e 's;git clone.*go-rwkv\.cpp$;${cp} ${go-rwkv} sources/go-rwkv\.cpp;' \
		-e 's;git clone.*whisper\.cpp$;${cp} ${whisper-cpp.src} sources/whisper\.cpp;' \
		-e 's;git clone.*go-bert$;${cp} ${go-bert} sources/go-bert;' \
		-e 's;git clone.*go-bert\.cpp$;${cp} ${go-bert} sources/go-bert\.cpp;' \
		-e 's;git clone.*diffusion$;${cp} ${if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src} sources/go-stable-diffusion;' \
		-e 's;git clone.*go-tiny-dream$;${cp} ${if with_tinydream then go-tiny-dream else go-tiny-dream.src} sources/go-tiny-dream;' \
		-e 's, && git checkout.*,,g' \
		@@ -415,14 +417,19 @@ let
		++ lib.optionals with_stablediffusion go-stable-diffusion.buildInputs
		++ lib.optionals with_tts go-piper.buildInputs;

		nativeBuildInputs = [ makeWrapper ]
		nativeBuildInputs = [
		protobuf
		protoc-gen-go
		protoc-gen-go-grpc
		makeWrapper
		]
		++ lib.optionals with_cublas [ cuda_nvcc ];

		enableParallelBuilding = false;

		modBuildPhase = ''
		mkdir sources
		make prepare-sources
		make prepare-sources protogen-go
		go mod tidy -v
		'';

		@@ -486,7 +493,7 @@ let

		passthru.local-packages = {
		inherit
		go-tiny-dream go-rwkv go-bert go-llama-ggml gpt4all go-piper
		go-tiny-dream go-rwkv go-bert go-llama gpt4all go-piper
		llama-cpp-grpc whisper-cpp go-tiny-dream-ncnn espeak-ng' piper-phonemize
		piper-tts';
		};
		@@ -498,6 +505,7 @@ let
		};

		passthru.tests = callPackages ./tests.nix { inherit self; };
		passthru.lib = callPackages ./lib.nix { };

		meta = with lib; {
		description = "OpenAI alternative to run local LLMs, image and audio generation";

pkgs/by-name/lo/local-ai/tests.nix

+188 −100

Original line number	Diff line number	Diff line
		@@ -5,156 +5,244 @@
		, fetchurl
		, writers
		, symlinkJoin
		, linkFarmFromDrvs
		, jq
		}:
		let
		common-config = { config, ... }: {
		imports = [ ./module.nix ];
		services.local-ai = {
		enable = true;
		package = self;
		threads = config.virtualisation.cores;
		};
		};

		inherit (self.lib) genModels;
		in
		{
		version = testers.testVersion {
		package = self;
		version = "v" + self.version;
		command = "local-ai --help";
		};

		health =
		health = testers.runNixOSTest ({ config, ... }: {
		name = self.name + "-health";
		nodes.machine = common-config;
		testScript =
		let
		port = "8080";
		in
		testers.runNixOSTest {
		name = self.name + "-health";
		nodes.machine = {
		systemd.services.local-ai = {
		wantedBy = [ "multi-user.target" ];
		serviceConfig.ExecStart = "${self}/bin/local-ai --debug --localai-config-dir . --address :${port}";
		};
		};
		testScript = ''
		''
		machine.wait_for_open_port(${port})
		machine.succeed("curl -f http://localhost:${port}/readyz")
		'';
		};
		});

		# https://localai.io/docs/getting-started/manual/
		llama =
		# https://localai.io/features/embeddings/#bert-embeddings
		bert =
		let
		port = "8080";
		gguf = fetchurl {
		url = "https://huggingface.co/TheBloke/Luna-AI-Llama2-Uncensored-GGUF/resolve/main/luna-ai-llama2-uncensored.Q4_K_M.gguf";
		sha256 = "6a9dc401c84f0d48996eaa405174999c3a33bf12c2bfd8ea4a1e98f376de1f15";
		model = "embedding";
		model-configs.${model} = {
		# Note: q4_0 and q4_1 models can not be loaded
		parameters.model = fetchurl {
		url = "https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-f16.bin";
		sha256 = "9c195b2453a4fef60a4f6be3a88a39211366214df6498a4fe4885c9e22314f50";
		};
		backend = "bert-embeddings";
		embeddings = true;
		};

		models = genModels model-configs;

		requests.request = {
		inherit model;
		input = "Your text string goes here";
		};
		models = linkFarmFromDrvs "models" [
		gguf
		];
		in
		testers.runNixOSTest {
		name = self.name + "-llama";
		nodes.machine =
		name = self.name + "-bert";
		nodes.machine = {
		imports = [ common-config ];
		virtualisation.cores = 2;
		virtualisation.memorySize = 2048;
		services.local-ai.models = models;
		};
		passthru = { inherit models requests; };
		testScript =
		let
		cores = 4;
		port = "8080";
		in
		{
		virtualisation = {
		inherit cores;
		memorySize = 8192;
		''
		machine.wait_for_open_port(${port})
		machine.succeed("curl -f http://localhost:${port}/readyz")
		machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .data[].id == \"${model}\"' models.json")
		machine.succeed("curl -f http://localhost:${port}/embeddings --json @${writers.writeJSON "request.json" requests.request} --output embeddings.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .model == \"${model}\"' embeddings.json")
		'';
		};
		systemd.services.local-ai = {
		wantedBy = [ "multi-user.target" ];
		serviceConfig.ExecStart = "${self}/bin/local-ai --debug --threads ${toString cores} --models-path ${models} --localai-config-dir . --address :${port}";

		} // lib.optionalAttrs (!self.features.with_cublas && !self.features.with_clblas) {
		# https://localai.io/docs/getting-started/manual/
		llama =
		let
		model = "gpt-3.5-turbo";

		# https://localai.io/advanced/#full-config-model-file-reference
		model-configs.${model} = rec {
		context_size = 8192;
		parameters = {
		# https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF
		# https://ai.meta.com/blog/meta-llama-3/
		model = fetchurl {
		url = "https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf";
		sha256 = "ab9e4eec7e80892fd78f74d9a15d0299f1e22121cea44efd68a7a02a3fe9a1da";
		};
		# defaults from:
		# https://deepinfra.com/meta-llama/Meta-Llama-3-8B-Instruct
		temperature = 0.7;
		top_p = 0.9;
		top_k = 0;
		# following parameter leads to outputs like: !!!!!!!!!!!!!!!!!!!
		#repeat_penalty = 1;
		presence_penalty = 0;
		frequency_penalty = 0;
		max_tokens = 100;
		};
		stopwords = [ "<\|eot_id\|>" ];
		template = {
		# Templates implement following specifications
		# https://github.com/meta-llama/llama3/tree/main?tab=readme-ov-file#instruction-tuned-models
		# ... and are insprired by:
		# https://github.com/mudler/LocalAI/blob/master/embedded/models/llama3-instruct.yaml
		#
		# The rules for template evaluateion are defined here:
		# https://pkg.go.dev/text/template
		chat_message = ''
		<\|start_header_id\|>{{.RoleName}}<\|end_header_id\|>

		{{.Content}}${builtins.head stopwords}'';

		chat = "<\|begin_of_text\|>{{.Input}}<\|start_header_id\|>assistant<\|end_header_id\|>";
		};
		};
		testScript =
		let

		models = genModels model-configs;

		requests = {
		# https://localai.io/features/text-generation/#chat-completions
		request-chat-completions = {
		model = gguf.name;
		messages = [{ role = "user"; content = "Say this is a test!"; }];
		temperature = 0.7;
		chat-completions = {
		inherit model;
		messages = [{ role = "user"; content = "1 + 2 = ?"; }];
		};
		# https://localai.io/features/text-generation/#edit-completions
		request-edit-completions = {
		model = gguf.name;
		edit-completions = {
		inherit model;
		instruction = "rephrase";
		input = "Black cat jumped out of the window";
		temperature = 0.7;
		max_tokens = 50;
		};
		# https://localai.io/features/text-generation/#completions
		request-completions = {
		model = gguf.name;
		completions = {
		inherit model;
		prompt = "A long time ago in a galaxy far, far away";
		temperature = 0.7;
		};
		};
		in
		testers.runNixOSTest {
		name = self.name + "-llama";
		nodes.machine = {
		imports = [ common-config ];
		virtualisation.cores = 4;
		virtualisation.memorySize = 8192;
		services.local-ai.models = models;
		};
		passthru = { inherit models requests; };
		testScript =
		let
		port = "8080";
		in
		''
		machine.wait_for_open_port(${port})
		machine.succeed("curl -f http://localhost:${port}/readyz")
		machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .data[].id == \"${gguf.name}\"' models.json")
		machine.succeed("curl -f http://localhost:${port}/v1/chat/completions --json @${writers.writeJSON "request-chat-completions.json" request-chat-completions} --output chat-completions.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .data[].id == \"${model}\"' models.json")

		machine.succeed("curl -f http://localhost:${port}/v1/chat/completions --json @${writers.writeJSON "request-chat-completions.json" requests.chat-completions} --output chat-completions.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .object == \"chat.completion\"' chat-completions.json")
		machine.succeed("curl -f http://localhost:${port}/v1/edits --json @${writers.writeJSON "request-edit-completions.json" request-edit-completions} --output edit-completions.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .choices \| first.message.content \| tonumber == 3' chat-completions.json")

		machine.succeed("curl -f http://localhost:${port}/v1/edits --json @${writers.writeJSON "request-edit-completions.json" requests.edit-completions} --output edit-completions.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .object == \"edit\"' edit-completions.json")
		machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" request-completions} --output completions.json")
		machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens \| debug == ${toString requests.edit-completions.max_tokens}' edit-completions.json")

		machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" requests.completions} --output completions.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .object ==\"text_completion\"' completions.json")
		machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens \| debug == ${toString model-configs.${model}.parameters.max_tokens}' completions.json")
		'';
		};

		} // lib.optionalAttrs self.features.with_tts {
		} // lib.optionalAttrs (self.features.with_tts && !self.features.with_cublas && !self.features.with_clblas) {
		# https://localai.io/features/text-to-audio/#piper
		tts =
		let
		port = "8080";
		voice-en-us = fetchzip {
		url = "https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz";
		hash = "sha256-5wf+6H5HeQY0qgdqnAG1vSqtjIFM9lXH53OgouuPm0M=";
		stripRoot = false;
		};
		ggml-tiny-en = fetchurl {
		model-stt = "whisper-en";
		model-configs.${model-stt} = {
		backend = "whisper";
		parameters.model = fetchurl {
		url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin";
		hash = "sha256-x3xXZvHO8JtrfUfyG1Rsvd1BV4hrO11tT3CekeZsfCs=";
		};
		whisper-en = {
		name = "whisper-en";
		backend = "whisper";
		parameters.model = ggml-tiny-en.name;
		};
		models = symlinkJoin {
		name = "models";

		model-tts = "piper-en";
		model-configs.${model-tts} = {
		backend = "piper";
		parameters.model = "en-us-danny-low.onnx";
		};

		models =
		let
		models = genModels model-configs;
		in
		symlinkJoin {
		inherit (models) name;
		paths = [
		voice-en-us
		(linkFarmFromDrvs "whisper-en" [
		(writers.writeYAML "whisper-en.yaml" whisper-en)
		ggml-tiny-en
		])
		models
		(fetchzip {
		url = "https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz";
		hash = "sha256-5wf+6H5HeQY0qgdqnAG1vSqtjIFM9lXH53OgouuPm0M=";
		stripRoot = false;
		})
		];
		};

		requests.request = {
		model = model-tts;
		input = "Hello, how are you?";
		};
		in
		testers.runNixOSTest {
		name = self.name + "-tts";
		nodes.machine =
		let
		cores = 2;
		in
		{
		virtualisation = {
		inherit cores;
		};
		systemd.services.local-ai = {
		wantedBy = [ "multi-user.target" ];
		serviceConfig.ExecStart = "${self}/bin/local-ai --debug --threads ${toString cores} --models-path ${models} --localai-config-dir . --address :${port}";
		};
		nodes.machine = {
		imports = [ common-config ];
		virtualisation.cores = 2;
		services.local-ai.models = models;
		};
		passthru = { inherit models requests; };
		testScript =
		let
		request = {
		model = "en-us-danny-low.onnx";
		backend = "piper";
		input = "Hello, how are you?";
		};
		port = "8080";
		in
		''
		machine.wait_for_open_port(${port})
		machine.succeed("curl -f http://localhost:${port}/readyz")
		machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" request} --output out.wav")
		machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${whisper-en.name} --output transcription.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .segments \| first.text == \"${request.input}\"' transcription.json")
		machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug' models.json")
		machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" requests.request} --output out.wav")
		machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${model-stt} --output transcription.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .segments \| first.text == \"${requests.request.input}\"' transcription.json")
		'';
		};
		}