Loading pkgs/by-name/lo/local-ai/lib.nix 0 → 100644 +30 −0 Original line number Diff line number Diff line { lib , writers , writeText , linkFarmFromDrvs }: { genModels = configs: let name = lib.strings.sanitizeDerivationName (builtins.concatStringsSep "_" ([ "local-ai-models" ] ++ (builtins.attrNames configs))); genModelFiles = name: config: let templateName = type: name + "_" + type; config' = lib.recursiveUpdate config ({ inherit name; } // lib.optionalAttrs (lib.isDerivation config.parameters.model) { parameters.model = config.parameters.model.name; } // lib.optionalAttrs (config ? template) { template = builtins.mapAttrs (n: _: templateName n) config.template; }); in [ (writers.writeYAML "${name}.yaml" config') ] ++ lib.optional (lib.isDerivation config.parameters.model) config.parameters.model ++ lib.optionals (config ? template) (lib.mapAttrsToList (n: writeText "${templateName n}.tmpl") config.template); in linkFarmFromDrvs name (lib.flatten (lib.mapAttrsToList genModelFiles configs)); } pkgs/by-name/lo/local-ai/module.nix 0 → 100644 +56 −0 Original line number Diff line number Diff line { pkgs, config, lib, ... }: let cfg = config.services.local-ai; inherit (lib) mkOption types; in { options.services.local-ai = { enable = lib.mkEnableOption "Enable service"; package = lib.mkPackageOption pkgs "local-ai" { }; extraArgs = mkOption { type = types.listOf types.str; default = [ ]; }; port = mkOption { type = types.port; default = 8080; }; threads = mkOption { type = types.int; default = 1; }; models = mkOption { type = types.either types.package types.str; default = "models"; }; }; config = lib.mkIf cfg.enable { systemd.services.local-ai = { wantedBy = [ "multi-user.target" ]; serviceConfig = { DynamicUser = true; ExecStart = lib.escapeShellArgs ([ "${cfg.package}/bin/local-ai" "--debug" "--address" ":${toString cfg.port}" "--threads" (toString cfg.threads) "--localai-config-dir" "." "--models-path" (toString cfg.models) ] ++ cfg.extraArgs); RuntimeDirectory = "local-ai"; WorkingDirectory = "%t/local-ai"; }; }; }; } pkgs/by-name/lo/local-ai/package.nix +24 −16 Original line number Diff line number Diff line Loading @@ -6,6 +6,8 @@ , fetchpatch , fetchFromGitHub , protobuf , protoc-gen-go , protoc-gen-go-grpc , grpc , openssl , llama-cpp Loading Loading @@ -61,8 +63,8 @@ let inherit (cudaPackages) libcublas cuda_nvcc cuda_cccl cuda_cudart cudatoolkit; go-llama-ggml = effectiveStdenv.mkDerivation { name = "go-llama-ggml"; go-llama = effectiveStdenv.mkDerivation { name = "go-llama"; src = fetchFromGitHub { owner = "go-skynet"; repo = "go-llama.cpp"; Loading Loading @@ -98,8 +100,8 @@ let src = fetchFromGitHub { owner = "ggerganov"; repo = "llama.cpp"; rev = "1b67731e184e27a465b8c5476061294a4af668ea"; hash = "sha256-0WWbsklpW6HhFRkvWpYh8Lhi8VIansS/zmyIKNQRkIs="; rev = "784e11dea1f5ce9638851b2b0dddb107e2a609c8"; hash = "sha256-yAQAUo5J+a6O2kTqhFL1UH0tANxpQn3JhAd3MByaC6I="; fetchSubmodules = true; }; postPatch = prev.postPatch + '' Loading Loading @@ -252,8 +254,8 @@ let src = fetchFromGitHub { owner = "ggerganov"; repo = "whisper.cpp"; rev = "8f253ef3af1c62c04316ba4afa7145fc4d701a8c"; hash = "sha256-yHHjhpQIn99A/hqFwAb7TfTf4Q9KnKat93zyXS70bT8="; rev = "858452d58dba3acdc3431c9bced2bb8cfd9bf418"; hash = "sha256-2fT3RgGpBex1mF6GJsVDo4rb0F31YqxTymsXcrpQAZk="; }; nativeBuildInputs = [ cmake pkg-config ] Loading Loading @@ -371,18 +373,18 @@ let stdenv; pname = "local-ai"; version = "2.12.4"; version = "2.13.0"; src = fetchFromGitHub { owner = "go-skynet"; repo = "LocalAI"; rev = "v${version}"; hash = "sha256-piu2B6u4ZfxiOd9SXrE7jiiiwL2SM8EqXo2s5qeKRl0="; hash = "sha256-jZE8Ow9FFhnx/jvsURLYlYtSuKpE4UWBezxg/mpHs9g="; }; self = buildGoModule.override { stdenv = effectiveStdenv; } { inherit pname version src; vendorHash = "sha256-8Hu1y/PK21twnB7D22ltslFFzRrsB8d1R2hkgIFB/XY="; vendorHash = "sha256-nWNK2YekQnBSLx4ouNSe6esIe0yFuo69E0HStYLQANg="; env.NIX_CFLAGS_COMPILE = lib.optionalString with_stablediffusion " -isystem ${opencv}/include/opencv4"; Loading @@ -392,12 +394,12 @@ let in '' sed -i Makefile \ -e 's;git clone.*go-llama-ggml$;${cp} ${go-llama-ggml} sources/go-llama-ggml;' \ -e 's;git clone.*go-llama\.cpp$;${cp} ${go-llama} sources/go-llama\.cpp;' \ -e 's;git clone.*gpt4all$;${cp} ${gpt4all} sources/gpt4all;' \ -e 's;git clone.*go-piper$;${cp} ${if with_tts then go-piper else go-piper.src} sources/go-piper;' \ -e 's;git clone.*go-rwkv$;${cp} ${go-rwkv} sources/go-rwkv;' \ -e 's;git clone.*go-rwkv\.cpp$;${cp} ${go-rwkv} sources/go-rwkv\.cpp;' \ -e 's;git clone.*whisper\.cpp$;${cp} ${whisper-cpp.src} sources/whisper\.cpp;' \ -e 's;git clone.*go-bert$;${cp} ${go-bert} sources/go-bert;' \ -e 's;git clone.*go-bert\.cpp$;${cp} ${go-bert} sources/go-bert\.cpp;' \ -e 's;git clone.*diffusion$;${cp} ${if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src} sources/go-stable-diffusion;' \ -e 's;git clone.*go-tiny-dream$;${cp} ${if with_tinydream then go-tiny-dream else go-tiny-dream.src} sources/go-tiny-dream;' \ -e 's, && git checkout.*,,g' \ Loading @@ -415,14 +417,19 @@ let ++ lib.optionals with_stablediffusion go-stable-diffusion.buildInputs ++ lib.optionals with_tts go-piper.buildInputs; nativeBuildInputs = [ makeWrapper ] nativeBuildInputs = [ protobuf protoc-gen-go protoc-gen-go-grpc makeWrapper ] ++ lib.optionals with_cublas [ cuda_nvcc ]; enableParallelBuilding = false; modBuildPhase = '' mkdir sources make prepare-sources make prepare-sources protogen-go go mod tidy -v ''; Loading Loading @@ -486,7 +493,7 @@ let passthru.local-packages = { inherit go-tiny-dream go-rwkv go-bert go-llama-ggml gpt4all go-piper go-tiny-dream go-rwkv go-bert go-llama gpt4all go-piper llama-cpp-grpc whisper-cpp go-tiny-dream-ncnn espeak-ng' piper-phonemize piper-tts'; }; Loading @@ -498,6 +505,7 @@ let }; passthru.tests = callPackages ./tests.nix { inherit self; }; passthru.lib = callPackages ./lib.nix { }; meta = with lib; { description = "OpenAI alternative to run local LLMs, image and audio generation"; Loading pkgs/by-name/lo/local-ai/tests.nix +188 −100 Original line number Diff line number Diff line Loading @@ -5,156 +5,244 @@ , fetchurl , writers , symlinkJoin , linkFarmFromDrvs , jq }: let common-config = { config, ... }: { imports = [ ./module.nix ]; services.local-ai = { enable = true; package = self; threads = config.virtualisation.cores; }; }; inherit (self.lib) genModels; in { version = testers.testVersion { package = self; version = "v" + self.version; command = "local-ai --help"; }; health = health = testers.runNixOSTest ({ config, ... }: { name = self.name + "-health"; nodes.machine = common-config; testScript = let port = "8080"; in testers.runNixOSTest { name = self.name + "-health"; nodes.machine = { systemd.services.local-ai = { wantedBy = [ "multi-user.target" ]; serviceConfig.ExecStart = "${self}/bin/local-ai --debug --localai-config-dir . --address :${port}"; }; }; testScript = '' '' machine.wait_for_open_port(${port}) machine.succeed("curl -f http://localhost:${port}/readyz") ''; }; }); # https://localai.io/docs/getting-started/manual/ llama = # https://localai.io/features/embeddings/#bert-embeddings bert = let port = "8080"; gguf = fetchurl { url = "https://huggingface.co/TheBloke/Luna-AI-Llama2-Uncensored-GGUF/resolve/main/luna-ai-llama2-uncensored.Q4_K_M.gguf"; sha256 = "6a9dc401c84f0d48996eaa405174999c3a33bf12c2bfd8ea4a1e98f376de1f15"; model = "embedding"; model-configs.${model} = { # Note: q4_0 and q4_1 models can not be loaded parameters.model = fetchurl { url = "https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-f16.bin"; sha256 = "9c195b2453a4fef60a4f6be3a88a39211366214df6498a4fe4885c9e22314f50"; }; backend = "bert-embeddings"; embeddings = true; }; models = genModels model-configs; requests.request = { inherit model; input = "Your text string goes here"; }; models = linkFarmFromDrvs "models" [ gguf ]; in testers.runNixOSTest { name = self.name + "-llama"; nodes.machine = name = self.name + "-bert"; nodes.machine = { imports = [ common-config ]; virtualisation.cores = 2; virtualisation.memorySize = 2048; services.local-ai.models = models; }; passthru = { inherit models requests; }; testScript = let cores = 4; port = "8080"; in { virtualisation = { inherit cores; memorySize = 8192; '' machine.wait_for_open_port(${port}) machine.succeed("curl -f http://localhost:${port}/readyz") machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${model}\"' models.json") machine.succeed("curl -f http://localhost:${port}/embeddings --json @${writers.writeJSON "request.json" requests.request} --output embeddings.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .model == \"${model}\"' embeddings.json") ''; }; systemd.services.local-ai = { wantedBy = [ "multi-user.target" ]; serviceConfig.ExecStart = "${self}/bin/local-ai --debug --threads ${toString cores} --models-path ${models} --localai-config-dir . --address :${port}"; } // lib.optionalAttrs (!self.features.with_cublas && !self.features.with_clblas) { # https://localai.io/docs/getting-started/manual/ llama = let model = "gpt-3.5-turbo"; # https://localai.io/advanced/#full-config-model-file-reference model-configs.${model} = rec { context_size = 8192; parameters = { # https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF # https://ai.meta.com/blog/meta-llama-3/ model = fetchurl { url = "https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"; sha256 = "ab9e4eec7e80892fd78f74d9a15d0299f1e22121cea44efd68a7a02a3fe9a1da"; }; # defaults from: # https://deepinfra.com/meta-llama/Meta-Llama-3-8B-Instruct temperature = 0.7; top_p = 0.9; top_k = 0; # following parameter leads to outputs like: !!!!!!!!!!!!!!!!!!! #repeat_penalty = 1; presence_penalty = 0; frequency_penalty = 0; max_tokens = 100; }; stopwords = [ "<|eot_id|>" ]; template = { # Templates implement following specifications # https://github.com/meta-llama/llama3/tree/main?tab=readme-ov-file#instruction-tuned-models # ... and are insprired by: # https://github.com/mudler/LocalAI/blob/master/embedded/models/llama3-instruct.yaml # # The rules for template evaluateion are defined here: # https://pkg.go.dev/text/template chat_message = '' <|start_header_id|>{{.RoleName}}<|end_header_id|> {{.Content}}${builtins.head stopwords}''; chat = "<|begin_of_text|>{{.Input}}<|start_header_id|>assistant<|end_header_id|>"; }; }; testScript = let models = genModels model-configs; requests = { # https://localai.io/features/text-generation/#chat-completions request-chat-completions = { model = gguf.name; messages = [{ role = "user"; content = "Say this is a test!"; }]; temperature = 0.7; chat-completions = { inherit model; messages = [{ role = "user"; content = "1 + 2 = ?"; }]; }; # https://localai.io/features/text-generation/#edit-completions request-edit-completions = { model = gguf.name; edit-completions = { inherit model; instruction = "rephrase"; input = "Black cat jumped out of the window"; temperature = 0.7; max_tokens = 50; }; # https://localai.io/features/text-generation/#completions request-completions = { model = gguf.name; completions = { inherit model; prompt = "A long time ago in a galaxy far, far away"; temperature = 0.7; }; }; in testers.runNixOSTest { name = self.name + "-llama"; nodes.machine = { imports = [ common-config ]; virtualisation.cores = 4; virtualisation.memorySize = 8192; services.local-ai.models = models; }; passthru = { inherit models requests; }; testScript = let port = "8080"; in '' machine.wait_for_open_port(${port}) machine.succeed("curl -f http://localhost:${port}/readyz") machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${gguf.name}\"' models.json") machine.succeed("curl -f http://localhost:${port}/v1/chat/completions --json @${writers.writeJSON "request-chat-completions.json" request-chat-completions} --output chat-completions.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${model}\"' models.json") machine.succeed("curl -f http://localhost:${port}/v1/chat/completions --json @${writers.writeJSON "request-chat-completions.json" requests.chat-completions} --output chat-completions.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"chat.completion\"' chat-completions.json") machine.succeed("curl -f http://localhost:${port}/v1/edits --json @${writers.writeJSON "request-edit-completions.json" request-edit-completions} --output edit-completions.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .choices | first.message.content | tonumber == 3' chat-completions.json") machine.succeed("curl -f http://localhost:${port}/v1/edits --json @${writers.writeJSON "request-edit-completions.json" requests.edit-completions} --output edit-completions.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"edit\"' edit-completions.json") machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" request-completions} --output completions.json") machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString requests.edit-completions.max_tokens}' edit-completions.json") machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" requests.completions} --output completions.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .object ==\"text_completion\"' completions.json") machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString model-configs.${model}.parameters.max_tokens}' completions.json") ''; }; } // lib.optionalAttrs self.features.with_tts { } // lib.optionalAttrs (self.features.with_tts && !self.features.with_cublas && !self.features.with_clblas) { # https://localai.io/features/text-to-audio/#piper tts = let port = "8080"; voice-en-us = fetchzip { url = "https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz"; hash = "sha256-5wf+6H5HeQY0qgdqnAG1vSqtjIFM9lXH53OgouuPm0M="; stripRoot = false; }; ggml-tiny-en = fetchurl { model-stt = "whisper-en"; model-configs.${model-stt} = { backend = "whisper"; parameters.model = fetchurl { url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin"; hash = "sha256-x3xXZvHO8JtrfUfyG1Rsvd1BV4hrO11tT3CekeZsfCs="; }; whisper-en = { name = "whisper-en"; backend = "whisper"; parameters.model = ggml-tiny-en.name; }; models = symlinkJoin { name = "models"; model-tts = "piper-en"; model-configs.${model-tts} = { backend = "piper"; parameters.model = "en-us-danny-low.onnx"; }; models = let models = genModels model-configs; in symlinkJoin { inherit (models) name; paths = [ voice-en-us (linkFarmFromDrvs "whisper-en" [ (writers.writeYAML "whisper-en.yaml" whisper-en) ggml-tiny-en ]) models (fetchzip { url = "https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz"; hash = "sha256-5wf+6H5HeQY0qgdqnAG1vSqtjIFM9lXH53OgouuPm0M="; stripRoot = false; }) ]; }; requests.request = { model = model-tts; input = "Hello, how are you?"; }; in testers.runNixOSTest { name = self.name + "-tts"; nodes.machine = let cores = 2; in { virtualisation = { inherit cores; }; systemd.services.local-ai = { wantedBy = [ "multi-user.target" ]; serviceConfig.ExecStart = "${self}/bin/local-ai --debug --threads ${toString cores} --models-path ${models} --localai-config-dir . --address :${port}"; }; nodes.machine = { imports = [ common-config ]; virtualisation.cores = 2; services.local-ai.models = models; }; passthru = { inherit models requests; }; testScript = let request = { model = "en-us-danny-low.onnx"; backend = "piper"; input = "Hello, how are you?"; }; port = "8080"; in '' machine.wait_for_open_port(${port}) machine.succeed("curl -f http://localhost:${port}/readyz") machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" request} --output out.wav") machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${whisper-en.name} --output transcription.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${request.input}\"' transcription.json") machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json") machine.succeed("${jq}/bin/jq --exit-status 'debug' models.json") machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" requests.request} --output out.wav") machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${model-stt} --output transcription.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${requests.request.input}\"' transcription.json") ''; }; } Loading
pkgs/by-name/lo/local-ai/lib.nix 0 → 100644 +30 −0 Original line number Diff line number Diff line { lib , writers , writeText , linkFarmFromDrvs }: { genModels = configs: let name = lib.strings.sanitizeDerivationName (builtins.concatStringsSep "_" ([ "local-ai-models" ] ++ (builtins.attrNames configs))); genModelFiles = name: config: let templateName = type: name + "_" + type; config' = lib.recursiveUpdate config ({ inherit name; } // lib.optionalAttrs (lib.isDerivation config.parameters.model) { parameters.model = config.parameters.model.name; } // lib.optionalAttrs (config ? template) { template = builtins.mapAttrs (n: _: templateName n) config.template; }); in [ (writers.writeYAML "${name}.yaml" config') ] ++ lib.optional (lib.isDerivation config.parameters.model) config.parameters.model ++ lib.optionals (config ? template) (lib.mapAttrsToList (n: writeText "${templateName n}.tmpl") config.template); in linkFarmFromDrvs name (lib.flatten (lib.mapAttrsToList genModelFiles configs)); }
pkgs/by-name/lo/local-ai/module.nix 0 → 100644 +56 −0 Original line number Diff line number Diff line { pkgs, config, lib, ... }: let cfg = config.services.local-ai; inherit (lib) mkOption types; in { options.services.local-ai = { enable = lib.mkEnableOption "Enable service"; package = lib.mkPackageOption pkgs "local-ai" { }; extraArgs = mkOption { type = types.listOf types.str; default = [ ]; }; port = mkOption { type = types.port; default = 8080; }; threads = mkOption { type = types.int; default = 1; }; models = mkOption { type = types.either types.package types.str; default = "models"; }; }; config = lib.mkIf cfg.enable { systemd.services.local-ai = { wantedBy = [ "multi-user.target" ]; serviceConfig = { DynamicUser = true; ExecStart = lib.escapeShellArgs ([ "${cfg.package}/bin/local-ai" "--debug" "--address" ":${toString cfg.port}" "--threads" (toString cfg.threads) "--localai-config-dir" "." "--models-path" (toString cfg.models) ] ++ cfg.extraArgs); RuntimeDirectory = "local-ai"; WorkingDirectory = "%t/local-ai"; }; }; }; }
pkgs/by-name/lo/local-ai/package.nix +24 −16 Original line number Diff line number Diff line Loading @@ -6,6 +6,8 @@ , fetchpatch , fetchFromGitHub , protobuf , protoc-gen-go , protoc-gen-go-grpc , grpc , openssl , llama-cpp Loading Loading @@ -61,8 +63,8 @@ let inherit (cudaPackages) libcublas cuda_nvcc cuda_cccl cuda_cudart cudatoolkit; go-llama-ggml = effectiveStdenv.mkDerivation { name = "go-llama-ggml"; go-llama = effectiveStdenv.mkDerivation { name = "go-llama"; src = fetchFromGitHub { owner = "go-skynet"; repo = "go-llama.cpp"; Loading Loading @@ -98,8 +100,8 @@ let src = fetchFromGitHub { owner = "ggerganov"; repo = "llama.cpp"; rev = "1b67731e184e27a465b8c5476061294a4af668ea"; hash = "sha256-0WWbsklpW6HhFRkvWpYh8Lhi8VIansS/zmyIKNQRkIs="; rev = "784e11dea1f5ce9638851b2b0dddb107e2a609c8"; hash = "sha256-yAQAUo5J+a6O2kTqhFL1UH0tANxpQn3JhAd3MByaC6I="; fetchSubmodules = true; }; postPatch = prev.postPatch + '' Loading Loading @@ -252,8 +254,8 @@ let src = fetchFromGitHub { owner = "ggerganov"; repo = "whisper.cpp"; rev = "8f253ef3af1c62c04316ba4afa7145fc4d701a8c"; hash = "sha256-yHHjhpQIn99A/hqFwAb7TfTf4Q9KnKat93zyXS70bT8="; rev = "858452d58dba3acdc3431c9bced2bb8cfd9bf418"; hash = "sha256-2fT3RgGpBex1mF6GJsVDo4rb0F31YqxTymsXcrpQAZk="; }; nativeBuildInputs = [ cmake pkg-config ] Loading Loading @@ -371,18 +373,18 @@ let stdenv; pname = "local-ai"; version = "2.12.4"; version = "2.13.0"; src = fetchFromGitHub { owner = "go-skynet"; repo = "LocalAI"; rev = "v${version}"; hash = "sha256-piu2B6u4ZfxiOd9SXrE7jiiiwL2SM8EqXo2s5qeKRl0="; hash = "sha256-jZE8Ow9FFhnx/jvsURLYlYtSuKpE4UWBezxg/mpHs9g="; }; self = buildGoModule.override { stdenv = effectiveStdenv; } { inherit pname version src; vendorHash = "sha256-8Hu1y/PK21twnB7D22ltslFFzRrsB8d1R2hkgIFB/XY="; vendorHash = "sha256-nWNK2YekQnBSLx4ouNSe6esIe0yFuo69E0HStYLQANg="; env.NIX_CFLAGS_COMPILE = lib.optionalString with_stablediffusion " -isystem ${opencv}/include/opencv4"; Loading @@ -392,12 +394,12 @@ let in '' sed -i Makefile \ -e 's;git clone.*go-llama-ggml$;${cp} ${go-llama-ggml} sources/go-llama-ggml;' \ -e 's;git clone.*go-llama\.cpp$;${cp} ${go-llama} sources/go-llama\.cpp;' \ -e 's;git clone.*gpt4all$;${cp} ${gpt4all} sources/gpt4all;' \ -e 's;git clone.*go-piper$;${cp} ${if with_tts then go-piper else go-piper.src} sources/go-piper;' \ -e 's;git clone.*go-rwkv$;${cp} ${go-rwkv} sources/go-rwkv;' \ -e 's;git clone.*go-rwkv\.cpp$;${cp} ${go-rwkv} sources/go-rwkv\.cpp;' \ -e 's;git clone.*whisper\.cpp$;${cp} ${whisper-cpp.src} sources/whisper\.cpp;' \ -e 's;git clone.*go-bert$;${cp} ${go-bert} sources/go-bert;' \ -e 's;git clone.*go-bert\.cpp$;${cp} ${go-bert} sources/go-bert\.cpp;' \ -e 's;git clone.*diffusion$;${cp} ${if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src} sources/go-stable-diffusion;' \ -e 's;git clone.*go-tiny-dream$;${cp} ${if with_tinydream then go-tiny-dream else go-tiny-dream.src} sources/go-tiny-dream;' \ -e 's, && git checkout.*,,g' \ Loading @@ -415,14 +417,19 @@ let ++ lib.optionals with_stablediffusion go-stable-diffusion.buildInputs ++ lib.optionals with_tts go-piper.buildInputs; nativeBuildInputs = [ makeWrapper ] nativeBuildInputs = [ protobuf protoc-gen-go protoc-gen-go-grpc makeWrapper ] ++ lib.optionals with_cublas [ cuda_nvcc ]; enableParallelBuilding = false; modBuildPhase = '' mkdir sources make prepare-sources make prepare-sources protogen-go go mod tidy -v ''; Loading Loading @@ -486,7 +493,7 @@ let passthru.local-packages = { inherit go-tiny-dream go-rwkv go-bert go-llama-ggml gpt4all go-piper go-tiny-dream go-rwkv go-bert go-llama gpt4all go-piper llama-cpp-grpc whisper-cpp go-tiny-dream-ncnn espeak-ng' piper-phonemize piper-tts'; }; Loading @@ -498,6 +505,7 @@ let }; passthru.tests = callPackages ./tests.nix { inherit self; }; passthru.lib = callPackages ./lib.nix { }; meta = with lib; { description = "OpenAI alternative to run local LLMs, image and audio generation"; Loading
pkgs/by-name/lo/local-ai/tests.nix +188 −100 Original line number Diff line number Diff line Loading @@ -5,156 +5,244 @@ , fetchurl , writers , symlinkJoin , linkFarmFromDrvs , jq }: let common-config = { config, ... }: { imports = [ ./module.nix ]; services.local-ai = { enable = true; package = self; threads = config.virtualisation.cores; }; }; inherit (self.lib) genModels; in { version = testers.testVersion { package = self; version = "v" + self.version; command = "local-ai --help"; }; health = health = testers.runNixOSTest ({ config, ... }: { name = self.name + "-health"; nodes.machine = common-config; testScript = let port = "8080"; in testers.runNixOSTest { name = self.name + "-health"; nodes.machine = { systemd.services.local-ai = { wantedBy = [ "multi-user.target" ]; serviceConfig.ExecStart = "${self}/bin/local-ai --debug --localai-config-dir . --address :${port}"; }; }; testScript = '' '' machine.wait_for_open_port(${port}) machine.succeed("curl -f http://localhost:${port}/readyz") ''; }; }); # https://localai.io/docs/getting-started/manual/ llama = # https://localai.io/features/embeddings/#bert-embeddings bert = let port = "8080"; gguf = fetchurl { url = "https://huggingface.co/TheBloke/Luna-AI-Llama2-Uncensored-GGUF/resolve/main/luna-ai-llama2-uncensored.Q4_K_M.gguf"; sha256 = "6a9dc401c84f0d48996eaa405174999c3a33bf12c2bfd8ea4a1e98f376de1f15"; model = "embedding"; model-configs.${model} = { # Note: q4_0 and q4_1 models can not be loaded parameters.model = fetchurl { url = "https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-f16.bin"; sha256 = "9c195b2453a4fef60a4f6be3a88a39211366214df6498a4fe4885c9e22314f50"; }; backend = "bert-embeddings"; embeddings = true; }; models = genModels model-configs; requests.request = { inherit model; input = "Your text string goes here"; }; models = linkFarmFromDrvs "models" [ gguf ]; in testers.runNixOSTest { name = self.name + "-llama"; nodes.machine = name = self.name + "-bert"; nodes.machine = { imports = [ common-config ]; virtualisation.cores = 2; virtualisation.memorySize = 2048; services.local-ai.models = models; }; passthru = { inherit models requests; }; testScript = let cores = 4; port = "8080"; in { virtualisation = { inherit cores; memorySize = 8192; '' machine.wait_for_open_port(${port}) machine.succeed("curl -f http://localhost:${port}/readyz") machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${model}\"' models.json") machine.succeed("curl -f http://localhost:${port}/embeddings --json @${writers.writeJSON "request.json" requests.request} --output embeddings.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .model == \"${model}\"' embeddings.json") ''; }; systemd.services.local-ai = { wantedBy = [ "multi-user.target" ]; serviceConfig.ExecStart = "${self}/bin/local-ai --debug --threads ${toString cores} --models-path ${models} --localai-config-dir . --address :${port}"; } // lib.optionalAttrs (!self.features.with_cublas && !self.features.with_clblas) { # https://localai.io/docs/getting-started/manual/ llama = let model = "gpt-3.5-turbo"; # https://localai.io/advanced/#full-config-model-file-reference model-configs.${model} = rec { context_size = 8192; parameters = { # https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF # https://ai.meta.com/blog/meta-llama-3/ model = fetchurl { url = "https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"; sha256 = "ab9e4eec7e80892fd78f74d9a15d0299f1e22121cea44efd68a7a02a3fe9a1da"; }; # defaults from: # https://deepinfra.com/meta-llama/Meta-Llama-3-8B-Instruct temperature = 0.7; top_p = 0.9; top_k = 0; # following parameter leads to outputs like: !!!!!!!!!!!!!!!!!!! #repeat_penalty = 1; presence_penalty = 0; frequency_penalty = 0; max_tokens = 100; }; stopwords = [ "<|eot_id|>" ]; template = { # Templates implement following specifications # https://github.com/meta-llama/llama3/tree/main?tab=readme-ov-file#instruction-tuned-models # ... and are insprired by: # https://github.com/mudler/LocalAI/blob/master/embedded/models/llama3-instruct.yaml # # The rules for template evaluateion are defined here: # https://pkg.go.dev/text/template chat_message = '' <|start_header_id|>{{.RoleName}}<|end_header_id|> {{.Content}}${builtins.head stopwords}''; chat = "<|begin_of_text|>{{.Input}}<|start_header_id|>assistant<|end_header_id|>"; }; }; testScript = let models = genModels model-configs; requests = { # https://localai.io/features/text-generation/#chat-completions request-chat-completions = { model = gguf.name; messages = [{ role = "user"; content = "Say this is a test!"; }]; temperature = 0.7; chat-completions = { inherit model; messages = [{ role = "user"; content = "1 + 2 = ?"; }]; }; # https://localai.io/features/text-generation/#edit-completions request-edit-completions = { model = gguf.name; edit-completions = { inherit model; instruction = "rephrase"; input = "Black cat jumped out of the window"; temperature = 0.7; max_tokens = 50; }; # https://localai.io/features/text-generation/#completions request-completions = { model = gguf.name; completions = { inherit model; prompt = "A long time ago in a galaxy far, far away"; temperature = 0.7; }; }; in testers.runNixOSTest { name = self.name + "-llama"; nodes.machine = { imports = [ common-config ]; virtualisation.cores = 4; virtualisation.memorySize = 8192; services.local-ai.models = models; }; passthru = { inherit models requests; }; testScript = let port = "8080"; in '' machine.wait_for_open_port(${port}) machine.succeed("curl -f http://localhost:${port}/readyz") machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${gguf.name}\"' models.json") machine.succeed("curl -f http://localhost:${port}/v1/chat/completions --json @${writers.writeJSON "request-chat-completions.json" request-chat-completions} --output chat-completions.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${model}\"' models.json") machine.succeed("curl -f http://localhost:${port}/v1/chat/completions --json @${writers.writeJSON "request-chat-completions.json" requests.chat-completions} --output chat-completions.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"chat.completion\"' chat-completions.json") machine.succeed("curl -f http://localhost:${port}/v1/edits --json @${writers.writeJSON "request-edit-completions.json" request-edit-completions} --output edit-completions.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .choices | first.message.content | tonumber == 3' chat-completions.json") machine.succeed("curl -f http://localhost:${port}/v1/edits --json @${writers.writeJSON "request-edit-completions.json" requests.edit-completions} --output edit-completions.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"edit\"' edit-completions.json") machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" request-completions} --output completions.json") machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString requests.edit-completions.max_tokens}' edit-completions.json") machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" requests.completions} --output completions.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .object ==\"text_completion\"' completions.json") machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString model-configs.${model}.parameters.max_tokens}' completions.json") ''; }; } // lib.optionalAttrs self.features.with_tts { } // lib.optionalAttrs (self.features.with_tts && !self.features.with_cublas && !self.features.with_clblas) { # https://localai.io/features/text-to-audio/#piper tts = let port = "8080"; voice-en-us = fetchzip { url = "https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz"; hash = "sha256-5wf+6H5HeQY0qgdqnAG1vSqtjIFM9lXH53OgouuPm0M="; stripRoot = false; }; ggml-tiny-en = fetchurl { model-stt = "whisper-en"; model-configs.${model-stt} = { backend = "whisper"; parameters.model = fetchurl { url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin"; hash = "sha256-x3xXZvHO8JtrfUfyG1Rsvd1BV4hrO11tT3CekeZsfCs="; }; whisper-en = { name = "whisper-en"; backend = "whisper"; parameters.model = ggml-tiny-en.name; }; models = symlinkJoin { name = "models"; model-tts = "piper-en"; model-configs.${model-tts} = { backend = "piper"; parameters.model = "en-us-danny-low.onnx"; }; models = let models = genModels model-configs; in symlinkJoin { inherit (models) name; paths = [ voice-en-us (linkFarmFromDrvs "whisper-en" [ (writers.writeYAML "whisper-en.yaml" whisper-en) ggml-tiny-en ]) models (fetchzip { url = "https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz"; hash = "sha256-5wf+6H5HeQY0qgdqnAG1vSqtjIFM9lXH53OgouuPm0M="; stripRoot = false; }) ]; }; requests.request = { model = model-tts; input = "Hello, how are you?"; }; in testers.runNixOSTest { name = self.name + "-tts"; nodes.machine = let cores = 2; in { virtualisation = { inherit cores; }; systemd.services.local-ai = { wantedBy = [ "multi-user.target" ]; serviceConfig.ExecStart = "${self}/bin/local-ai --debug --threads ${toString cores} --models-path ${models} --localai-config-dir . --address :${port}"; }; nodes.machine = { imports = [ common-config ]; virtualisation.cores = 2; services.local-ai.models = models; }; passthru = { inherit models requests; }; testScript = let request = { model = "en-us-danny-low.onnx"; backend = "piper"; input = "Hello, how are you?"; }; port = "8080"; in '' machine.wait_for_open_port(${port}) machine.succeed("curl -f http://localhost:${port}/readyz") machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" request} --output out.wav") machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${whisper-en.name} --output transcription.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${request.input}\"' transcription.json") machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json") machine.succeed("${jq}/bin/jq --exit-status 'debug' models.json") machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" requests.request} --output out.wav") machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${model-stt} --output transcription.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${requests.request.input}\"' transcription.json") ''; }; }