Loading pkgs/by-name/lo/local-ai/lib.nix +29 −20 Original line number Diff line number Diff line { lib , writers , writeText , linkFarmFromDrvs }: { genModels = configs: { lib, writers, writeText, linkFarmFromDrvs, }: { genModels = configs: let name = lib.strings.sanitizeDerivationName (builtins.concatStringsSep "_" ([ "local-ai-models" ] ++ (builtins.attrNames configs))); name = lib.strings.sanitizeDerivationName ( builtins.concatStringsSep "_" ([ "local-ai-models" ] ++ (builtins.attrNames configs)) ); genModelFiles = name: config: genModelFiles = name: config: let templateName = type: name + "_" + type; config' = lib.recursiveUpdate config ({ config' = lib.recursiveUpdate config ( { inherit name; } // lib.optionalAttrs (lib.isDerivation config.parameters.model) { } // lib.optionalAttrs (lib.isDerivation config.parameters.model) { parameters.model = config.parameters.model.name; } // lib.optionalAttrs (config ? template) { } // lib.optionalAttrs (config ? template) { template = builtins.mapAttrs (n: _: templateName n) config.template; }); } ); in [ (writers.writeYAML "${name}.yaml" config') ] ++ lib.optional (lib.isDerivation config.parameters.model) config.parameters.model ++ lib.optionals (config ? template) (lib.mapAttrsToList (n: writeText "${templateName n}.tmpl") config.template); ++ lib.optional (lib.isDerivation config.parameters.model) config.parameters.model ++ lib.optionals (config ? template) ( lib.mapAttrsToList (n: writeText "${templateName n}.tmpl") config.template ); in linkFarmFromDrvs name (lib.flatten (lib.mapAttrsToList genModelFiles configs)); } pkgs/by-name/lo/local-ai/module.nix +25 −12 Original line number Diff line number Diff line { pkgs, config, lib, ... }: { pkgs, config, lib, ... }: let cfg = config.services.local-ai; inherit (lib) mkOption types; Loading Loading @@ -35,7 +40,13 @@ in }; logLevel = mkOption { type = types.enum [ "error" "warn" "info" "debug" "trace" ]; type = types.enum [ "error" "warn" "info" "debug" "trace" ]; default = "warn"; }; }; Loading @@ -46,7 +57,8 @@ in environment.LLAMACPP_PARALLEL = toString cfg.parallelRequests; serviceConfig = { DynamicUser = true; ExecStart = lib.escapeShellArgs ([ ExecStart = lib.escapeShellArgs ( [ "${cfg.package}/bin/local-ai" "--address=:${toString cfg.port}" "--threads=${toString cfg.threads}" Loading @@ -55,7 +67,8 @@ in "--log-level=${cfg.logLevel}" ] ++ lib.optional (cfg.parallelRequests > 1) "--parallel-requests" ++ cfg.extraArgs); ++ cfg.extraArgs ); RuntimeDirectory = "local-ai"; WorkingDirectory = "%t/local-ai"; }; Loading pkgs/by-name/lo/local-ai/package.nix +288 −209 Original line number Diff line number Diff line { config , callPackages , stdenv , lib , addDriverRunpath , fetchFromGitHub , protobuf , protoc-gen-go , protoc-gen-go-grpc , grpc , openssl , llama-cpp { config, callPackages, stdenv, lib, addDriverRunpath, fetchFromGitHub, protobuf, protoc-gen-go, protoc-gen-go-grpc, grpc, openssl, llama-cpp, # needed for audio-to-text , ffmpeg , cmake , pkg-config , buildGoModule , makeWrapper , ncurses , which ffmpeg, cmake, pkg-config, buildGoModule, makeWrapper, ncurses, which, , enable_upx ? true , upx enable_upx ? true, upx, # apply feature parameter names according to # https://github.com/NixOS/rfcs/pull/169 # CPU extensions , enable_avx ? true , enable_avx2 ? true , enable_avx512 ? stdenv.hostPlatform.avx512Support , enable_f16c ? true , enable_fma ? true , with_openblas ? false , openblas , with_cublas ? config.cudaSupport , cudaPackages , with_clblas ? false , clblast , ocl-icd , opencl-headers , with_tinydream ? false # do not compile with cublas , ncnn , with_stablediffusion ? true , opencv , with_tts ? true , onnxruntime , sonic , spdlog , fmt , espeak-ng , piper-tts enable_avx ? true, enable_avx2 ? true, enable_avx512 ? stdenv.hostPlatform.avx512Support, enable_f16c ? true, enable_fma ? true, with_openblas ? false, openblas, with_cublas ? config.cudaSupport, cudaPackages, with_clblas ? false, clblast, ocl-icd, opencl-headers, with_tinydream ? false, # do not compile with cublas ncnn, with_stablediffusion ? true, opencv, with_tts ? true, onnxruntime, sonic, spdlog, fmt, espeak-ng, piper-tts, }: let BUILD_TYPE = assert (lib.count lib.id [ with_openblas with_cublas with_clblas ]) <= 1; if with_openblas then "openblas" else if with_cublas then "cublas" else if with_clblas then "clblas" else ""; assert (lib.count lib.id [ with_openblas with_cublas with_clblas ]) <= 1; if with_openblas then "openblas" else if with_cublas then "cublas" else if with_clblas then "clblas" else ""; inherit (cudaPackages) libcublas cuda_nvcc cuda_cccl cuda_cudart libcufft; inherit (cudaPackages) libcublas cuda_nvcc cuda_cccl cuda_cudart libcufft ; go-llama = effectiveStdenv.mkDerivation { name = "go-llama"; Loading @@ -81,13 +97,21 @@ let "BUILD_TYPE=${BUILD_TYPE}" ]; buildInputs = [ ] ++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas ] ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ] buildInputs = [ ] ++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas ] ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ] ++ lib.optionals with_openblas [ openblas.dev ]; nativeBuildInputs = [ cmake ] ++ lib.optionals with_cublas [ cuda_nvcc ]; nativeBuildInputs = [ cmake ] ++ lib.optionals with_cublas [ cuda_nvcc ]; dontUseCmakeConfigure = true; Loading @@ -98,7 +122,8 @@ let ''; }; llama-cpp-rpc = (llama-cpp-grpc.overrideAttrs (prev: { llama-cpp-rpc = (llama-cpp-grpc.overrideAttrs (prev: { name = "llama-cpp-rpc"; cmakeFlags = prev.cmakeFlags ++ [ (lib.cmakeBool "GGML_AVX" false) Loading @@ -107,23 +132,28 @@ let (lib.cmakeBool "GGML_FMA" false) (lib.cmakeBool "GGML_F16C" false) ]; })).override { })).override { cudaSupport = false; openclSupport = false; blasSupport = false; rpcSupport = true; }; llama-cpp-grpc = (llama-cpp.overrideAttrs (final: prev: { llama-cpp-grpc = (llama-cpp.overrideAttrs ( final: prev: { name = "llama-cpp-grpc"; src = fetchFromGitHub { owner = "ggerganov"; repo = "llama.cpp"; rev = "ed9d2854c9de4ae1f448334294e61167b04bec2a"; hash = "sha256-Xu2h9Zu+Q9utfFFmDWBOEu/EXth4xWRNoTMvPF5Fo/A="; rev = "fc54ef0d1c138133a01933296d50a36a1ab64735"; hash = "sha256-o87EhrA2Oa98pwyb6GSUgwERY0/GWJiX7kvlxDv4zb4="; fetchSubmodules = true; }; postPatch = prev.postPatch + '' postPatch = prev.postPatch + '' cd examples cp -r --no-preserve=mode ${src}/backend/cpp/llama grpc-server cp llava/clip.* llava/llava.* grpc-server Loading @@ -148,33 +178,15 @@ let grpc openssl ]; })).override { } )).override { cudaSupport = with_cublas; rocmSupport = false; openclSupport = with_clblas; blasSupport = with_openblas; }; gpt4all = stdenv.mkDerivation { name = "gpt4all"; src = fetchFromGitHub { owner = "nomic-ai"; repo = "gpt4all"; rev = "27a8b020c36b0df8f8b82a252d261cda47cf44b8"; hash = "sha256-djq1eK6ncvhkO3MNDgasDBUY/7WWcmZt/GJsHAulLdI="; fetchSubmodules = true; }; makeFlags = [ "-C gpt4all-bindings/golang" ]; buildFlags = [ "libgpt4all.a" ]; dontUseCmakeConfigure = true; nativeBuildInputs = [ cmake ]; installPhase = '' mkdir $out tar cf - --exclude=CMakeFiles . \ | tar xf - -C $out ''; }; espeak-ng' = espeak-ng.overrideAttrs (self: { name = "espeak-ng'"; inherit (go-piper) src; Loading @@ -199,8 +211,14 @@ let name = "piper-phonemize"; inherit (go-piper) src; sourceRoot = "${go-piper.src.name}/piper-phonemize"; buildInputs = [ espeak-ng' onnxruntime ]; nativeBuildInputs = [ cmake pkg-config ]; buildInputs = [ espeak-ng' onnxruntime ]; nativeBuildInputs = [ cmake pkg-config ]; cmakeFlags = [ (lib.cmakeFeature "ONNXRUNTIME_DIR" "${onnxruntime.dev}") (lib.cmakeFeature "ESPEAK_NG_DIR" "${espeak-ng'}") Loading Loading @@ -240,7 +258,15 @@ let -e '/CXXFLAGS *= / s;$; -DSPDLOG_FMT_EXTERNAL=1;' ''; buildFlags = [ "libpiper_binding.a" ]; buildInputs = [ piper-tts' espeak-ng' piper-phonemize sonic fmt spdlog onnxruntime ]; buildInputs = [ piper-tts' espeak-ng' piper-phonemize sonic fmt spdlog onnxruntime ]; installPhase = '' cp -r --no-preserve=mode $src $out mkdir -p $out/piper-phonemize/pi Loading Loading @@ -273,16 +299,28 @@ let src = fetchFromGitHub { owner = "ggerganov"; repo = "whisper.cpp"; rev = "6739eb83c3ca5cf40d24c6fe8442a761a1eb6248"; hash = "sha256-1yDdJVjIwYDJKn93zn4xOJXMoDTqaG2TvakjdHIMCxk="; rev = "9e3c5345cd46ea718209db53464e426c3fe7a25e"; hash = "sha256-JOptyveuaKRLzeZ6GuB3A70IM7dk4we95g5o25XVXJI="; }; nativeBuildInputs = [ cmake pkg-config ] ++ lib.optionals with_cublas [ cuda_nvcc ]; nativeBuildInputs = [ cmake pkg-config ] ++ lib.optionals with_cublas [ cuda_nvcc ]; buildInputs = [ ] ++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas libcufft ] ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ] buildInputs = [ ] ++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas libcufft ] ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ] ++ lib.optionals with_openblas [ openblas.dev ]; cmakeFlags = [ Loading Loading @@ -379,7 +417,8 @@ let meta.broken = lib.versionOlder go-tiny-dream.stdenv.cc.version "13"; }; GO_TAGS = lib.optional with_tinydream "tinydream" GO_TAGS = lib.optional with_tinydream "tinydream" ++ lib.optional with_tts "tts" ++ lib.optional with_stablediffusion "stablediffusion"; Loading @@ -392,12 +431,12 @@ let stdenv; pname = "local-ai"; version = "2.19.4"; version = "2.20.1"; src = fetchFromGitHub { owner = "go-skynet"; repo = "LocalAI"; rev = "v${version}"; hash = "sha256-aKq6/DI+4+BvIEw6eONqPr3mZXuz7rMFN+FBypVj0Gc="; hash = "sha256-FeZZC0Tg9JT9Yj0e27GOLSdHEtWl17AHK3j7epwPyY8="; }; prepare-sources = Loading @@ -407,36 +446,41 @@ let '' mkdir sources ${cp} ${go-llama} sources/go-llama.cpp ${cp} ${gpt4all} sources/gpt4all ${cp} ${if with_tts then go-piper else go-piper.src} sources/go-piper ${cp} ${go-rwkv} sources/go-rwkv.cpp ${cp} ${whisper-cpp.src} sources/whisper.cpp cp ${whisper-cpp}/lib/lib*.a sources/whisper.cpp ${cp} ${go-bert} sources/go-bert.cpp ${cp} ${if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src} sources/go-stable-diffusion ${cp} ${ if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src } sources/go-stable-diffusion ${cp} ${if with_tinydream then go-tiny-dream else go-tiny-dream.src} sources/go-tiny-dream ''; self = buildGoModule.override { stdenv = effectiveStdenv; } { inherit pname version src; vendorHash = "sha256-HEKE75+ixuNbM+KEuhbQQ/NYYEzVlGYOttPavftWKhk="; vendorHash = "sha256-mDxp5frUIECSHKjxaJVqIP7mnIusvdT45Xlxc9+P5tE="; env.NIX_CFLAGS_COMPILE = lib.optionalString with_stablediffusion " -isystem ${opencv}/include/opencv4"; postPatch = '' postPatch = '' sed -i Makefile \ -e '/mod download/ d' \ -e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-fallback/ d' \ -e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-avx/ d' \ -e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-cuda/ d' \ '' + lib.optionalString with_cublas '' '' + lib.optionalString with_cublas '' sed -i Makefile \ -e '/^CGO_LDFLAGS_WHISPER?=/ s;$;-L${libcufft}/lib -L${cuda_cudart}/lib;' ''; postConfigure = prepare-sources + '' postConfigure = prepare-sources + '' shopt -s extglob mkdir -p backend-assets/grpc cp ${llama-cpp-grpc}/bin/grpc-server backend-assets/grpc/llama-cpp-avx2 Loading @@ -451,9 +495,18 @@ let touch backend-assets/grpc/* backend-assets/util/* sources/**/lib*.a ''; buildInputs = [ ] ++ lib.optionals with_cublas [ cuda_cudart libcublas libcufft ] ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ] buildInputs = [ ] ++ lib.optionals with_cublas [ cuda_cudart libcublas libcufft ] ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ] ++ lib.optionals with_openblas [ openblas.dev ] ++ lib.optionals with_stablediffusion go-stable-diffusion.buildInputs ++ lib.optionals with_tts go-piper.buildInputs; Loading @@ -465,13 +518,13 @@ let makeWrapper ncurses # tput which ] ++ lib.optional enable_upx upx ++ lib.optionals with_cublas [ cuda_nvcc ]; ] ++ lib.optional enable_upx upx ++ lib.optionals with_cublas [ cuda_nvcc ]; enableParallelBuilding = false; modBuildPhase = prepare-sources + '' modBuildPhase = prepare-sources + '' make protogen-go go mod tidy -v ''; Loading @@ -482,7 +535,8 @@ let # containing spaces env.GO_TAGS = builtins.concatStringsSep " " GO_TAGS; makeFlags = [ makeFlags = [ "VERSION=v${version}" "BUILD_TYPE=${BUILD_TYPE}" ] Loading Loading @@ -516,7 +570,8 @@ let # raises an segmentation fault postFixup = let LD_LIBRARY_PATH = [ ] LD_LIBRARY_PATH = [ ] ++ lib.optionals with_cublas [ # driverLink has to be first to avoid loading the stub version of libcuda.so # https://github.com/NixOS/nixpkgs/issues/320145#issuecomment-2190319327 Loading @@ -524,10 +579,16 @@ let (lib.getLib libcublas) cuda_cudart ] ++ lib.optionals with_clblas [ clblast ocl-icd ] ++ lib.optionals with_clblas [ clblast ocl-icd ] ++ lib.optionals with_openblas [ openblas ] ++ lib.optionals with_tts [ piper-phonemize ] ++ lib.optionals (with_tts && enable_upx) [ fmt spdlog ]; ++ lib.optionals (with_tts && enable_upx) [ fmt spdlog ]; in '' wrapProgram $out/bin/${pname} \ Loading @@ -537,15 +598,30 @@ let passthru.local-packages = { inherit go-tiny-dream go-rwkv go-bert go-llama gpt4all go-piper llama-cpp-grpc whisper-cpp go-tiny-dream-ncnn espeak-ng' piper-phonemize piper-tts' llama-cpp-rpc; go-tiny-dream go-rwkv go-bert go-llama go-piper llama-cpp-grpc whisper-cpp go-tiny-dream-ncnn espeak-ng' piper-phonemize piper-tts' llama-cpp-rpc ; }; passthru.features = { inherit with_cublas with_openblas with_tts with_stablediffusion with_tinydream with_clblas; with_cublas with_openblas with_tts with_stablediffusion with_tinydream with_clblas ; }; passthru.tests = callPackages ./tests.nix { inherit self; }; Loading @@ -555,7 +631,10 @@ let description = "OpenAI alternative to run local LLMs, image and audio generation"; homepage = "https://localai.io"; license = licenses.mit; maintainers = with maintainers; [ onny ck3d ]; maintainers = with maintainers; [ onny ck3d ]; platforms = platforms.linux; }; }; Loading pkgs/by-name/lo/local-ai/tests.nix +106 −85 Original line number Diff line number Diff line { self , lib , testers , fetchzip , fetchurl , writers , symlinkJoin , jq , prom2json { self, lib, testers, fetchzip, fetchurl, writers, symlinkJoin, jq, prom2json, }: let common-config = { config, ... }: { common-config = { config, ... }: { imports = [ ./module.nix ]; services.local-ai = { enable = true; Loading Loading @@ -73,7 +76,9 @@ in virtualisation.memorySize = 2048; services.local-ai.models = models; }; passthru = { inherit models requests; }; passthru = { inherit models requests; }; testScript = let port = "8080"; Loading @@ -93,7 +98,8 @@ in ''; }; } // lib.optionalAttrs (!self.features.with_cublas && !self.features.with_clblas) { } // lib.optionalAttrs (!self.features.with_cublas && !self.features.with_clblas) { # https://localai.io/docs/getting-started/manual/ llama = let Loading Loading @@ -146,7 +152,12 @@ in # https://localai.io/features/text-generation/#chat-completions chat-completions = { inherit model; messages = [{ role = "user"; content = "1 + 2 = ?"; }]; messages = [ { role = "user"; content = "1 + 2 = ?"; } ]; }; # https://localai.io/features/text-generation/#edit-completions edit-completions = { Loading @@ -172,7 +183,9 @@ in # TODO: Add test case parallel requests services.local-ai.parallelRequests = 2; }; passthru = { inherit models requests; }; passthru = { inherit models requests; }; testScript = let port = "8080"; Loading @@ -196,14 +209,20 @@ in machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" requests.completions} --output completions.json") machine.copy_from_vm("completions.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .object ==\"text_completion\"' completions.json") machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString model-configs.${model}.parameters.max_tokens}' completions.json") machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${ toString model-configs.${model}.parameters.max_tokens }' completions.json") machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json") machine.copy_from_vm("metrics.json") ''; }; } // lib.optionalAttrs (self.features.with_tts && !self.features.with_cublas && !self.features.with_clblas) { } // lib.optionalAttrs (self.features.with_tts && !self.features.with_cublas && !self.features.with_clblas) { # https://localai.io/features/text-to-audio/#piper tts = let Loading Loading @@ -250,7 +269,9 @@ in virtualisation.cores = 2; services.local-ai.models = models; }; passthru = { inherit models requests; }; passthru = { inherit models requests; }; testScript = let port = "8080"; Loading Loading
pkgs/by-name/lo/local-ai/lib.nix +29 −20 Original line number Diff line number Diff line { lib , writers , writeText , linkFarmFromDrvs }: { genModels = configs: { lib, writers, writeText, linkFarmFromDrvs, }: { genModels = configs: let name = lib.strings.sanitizeDerivationName (builtins.concatStringsSep "_" ([ "local-ai-models" ] ++ (builtins.attrNames configs))); name = lib.strings.sanitizeDerivationName ( builtins.concatStringsSep "_" ([ "local-ai-models" ] ++ (builtins.attrNames configs)) ); genModelFiles = name: config: genModelFiles = name: config: let templateName = type: name + "_" + type; config' = lib.recursiveUpdate config ({ config' = lib.recursiveUpdate config ( { inherit name; } // lib.optionalAttrs (lib.isDerivation config.parameters.model) { } // lib.optionalAttrs (lib.isDerivation config.parameters.model) { parameters.model = config.parameters.model.name; } // lib.optionalAttrs (config ? template) { } // lib.optionalAttrs (config ? template) { template = builtins.mapAttrs (n: _: templateName n) config.template; }); } ); in [ (writers.writeYAML "${name}.yaml" config') ] ++ lib.optional (lib.isDerivation config.parameters.model) config.parameters.model ++ lib.optionals (config ? template) (lib.mapAttrsToList (n: writeText "${templateName n}.tmpl") config.template); ++ lib.optional (lib.isDerivation config.parameters.model) config.parameters.model ++ lib.optionals (config ? template) ( lib.mapAttrsToList (n: writeText "${templateName n}.tmpl") config.template ); in linkFarmFromDrvs name (lib.flatten (lib.mapAttrsToList genModelFiles configs)); }
pkgs/by-name/lo/local-ai/module.nix +25 −12 Original line number Diff line number Diff line { pkgs, config, lib, ... }: { pkgs, config, lib, ... }: let cfg = config.services.local-ai; inherit (lib) mkOption types; Loading Loading @@ -35,7 +40,13 @@ in }; logLevel = mkOption { type = types.enum [ "error" "warn" "info" "debug" "trace" ]; type = types.enum [ "error" "warn" "info" "debug" "trace" ]; default = "warn"; }; }; Loading @@ -46,7 +57,8 @@ in environment.LLAMACPP_PARALLEL = toString cfg.parallelRequests; serviceConfig = { DynamicUser = true; ExecStart = lib.escapeShellArgs ([ ExecStart = lib.escapeShellArgs ( [ "${cfg.package}/bin/local-ai" "--address=:${toString cfg.port}" "--threads=${toString cfg.threads}" Loading @@ -55,7 +67,8 @@ in "--log-level=${cfg.logLevel}" ] ++ lib.optional (cfg.parallelRequests > 1) "--parallel-requests" ++ cfg.extraArgs); ++ cfg.extraArgs ); RuntimeDirectory = "local-ai"; WorkingDirectory = "%t/local-ai"; }; Loading
pkgs/by-name/lo/local-ai/package.nix +288 −209 Original line number Diff line number Diff line { config , callPackages , stdenv , lib , addDriverRunpath , fetchFromGitHub , protobuf , protoc-gen-go , protoc-gen-go-grpc , grpc , openssl , llama-cpp { config, callPackages, stdenv, lib, addDriverRunpath, fetchFromGitHub, protobuf, protoc-gen-go, protoc-gen-go-grpc, grpc, openssl, llama-cpp, # needed for audio-to-text , ffmpeg , cmake , pkg-config , buildGoModule , makeWrapper , ncurses , which ffmpeg, cmake, pkg-config, buildGoModule, makeWrapper, ncurses, which, , enable_upx ? true , upx enable_upx ? true, upx, # apply feature parameter names according to # https://github.com/NixOS/rfcs/pull/169 # CPU extensions , enable_avx ? true , enable_avx2 ? true , enable_avx512 ? stdenv.hostPlatform.avx512Support , enable_f16c ? true , enable_fma ? true , with_openblas ? false , openblas , with_cublas ? config.cudaSupport , cudaPackages , with_clblas ? false , clblast , ocl-icd , opencl-headers , with_tinydream ? false # do not compile with cublas , ncnn , with_stablediffusion ? true , opencv , with_tts ? true , onnxruntime , sonic , spdlog , fmt , espeak-ng , piper-tts enable_avx ? true, enable_avx2 ? true, enable_avx512 ? stdenv.hostPlatform.avx512Support, enable_f16c ? true, enable_fma ? true, with_openblas ? false, openblas, with_cublas ? config.cudaSupport, cudaPackages, with_clblas ? false, clblast, ocl-icd, opencl-headers, with_tinydream ? false, # do not compile with cublas ncnn, with_stablediffusion ? true, opencv, with_tts ? true, onnxruntime, sonic, spdlog, fmt, espeak-ng, piper-tts, }: let BUILD_TYPE = assert (lib.count lib.id [ with_openblas with_cublas with_clblas ]) <= 1; if with_openblas then "openblas" else if with_cublas then "cublas" else if with_clblas then "clblas" else ""; assert (lib.count lib.id [ with_openblas with_cublas with_clblas ]) <= 1; if with_openblas then "openblas" else if with_cublas then "cublas" else if with_clblas then "clblas" else ""; inherit (cudaPackages) libcublas cuda_nvcc cuda_cccl cuda_cudart libcufft; inherit (cudaPackages) libcublas cuda_nvcc cuda_cccl cuda_cudart libcufft ; go-llama = effectiveStdenv.mkDerivation { name = "go-llama"; Loading @@ -81,13 +97,21 @@ let "BUILD_TYPE=${BUILD_TYPE}" ]; buildInputs = [ ] ++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas ] ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ] buildInputs = [ ] ++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas ] ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ] ++ lib.optionals with_openblas [ openblas.dev ]; nativeBuildInputs = [ cmake ] ++ lib.optionals with_cublas [ cuda_nvcc ]; nativeBuildInputs = [ cmake ] ++ lib.optionals with_cublas [ cuda_nvcc ]; dontUseCmakeConfigure = true; Loading @@ -98,7 +122,8 @@ let ''; }; llama-cpp-rpc = (llama-cpp-grpc.overrideAttrs (prev: { llama-cpp-rpc = (llama-cpp-grpc.overrideAttrs (prev: { name = "llama-cpp-rpc"; cmakeFlags = prev.cmakeFlags ++ [ (lib.cmakeBool "GGML_AVX" false) Loading @@ -107,23 +132,28 @@ let (lib.cmakeBool "GGML_FMA" false) (lib.cmakeBool "GGML_F16C" false) ]; })).override { })).override { cudaSupport = false; openclSupport = false; blasSupport = false; rpcSupport = true; }; llama-cpp-grpc = (llama-cpp.overrideAttrs (final: prev: { llama-cpp-grpc = (llama-cpp.overrideAttrs ( final: prev: { name = "llama-cpp-grpc"; src = fetchFromGitHub { owner = "ggerganov"; repo = "llama.cpp"; rev = "ed9d2854c9de4ae1f448334294e61167b04bec2a"; hash = "sha256-Xu2h9Zu+Q9utfFFmDWBOEu/EXth4xWRNoTMvPF5Fo/A="; rev = "fc54ef0d1c138133a01933296d50a36a1ab64735"; hash = "sha256-o87EhrA2Oa98pwyb6GSUgwERY0/GWJiX7kvlxDv4zb4="; fetchSubmodules = true; }; postPatch = prev.postPatch + '' postPatch = prev.postPatch + '' cd examples cp -r --no-preserve=mode ${src}/backend/cpp/llama grpc-server cp llava/clip.* llava/llava.* grpc-server Loading @@ -148,33 +178,15 @@ let grpc openssl ]; })).override { } )).override { cudaSupport = with_cublas; rocmSupport = false; openclSupport = with_clblas; blasSupport = with_openblas; }; gpt4all = stdenv.mkDerivation { name = "gpt4all"; src = fetchFromGitHub { owner = "nomic-ai"; repo = "gpt4all"; rev = "27a8b020c36b0df8f8b82a252d261cda47cf44b8"; hash = "sha256-djq1eK6ncvhkO3MNDgasDBUY/7WWcmZt/GJsHAulLdI="; fetchSubmodules = true; }; makeFlags = [ "-C gpt4all-bindings/golang" ]; buildFlags = [ "libgpt4all.a" ]; dontUseCmakeConfigure = true; nativeBuildInputs = [ cmake ]; installPhase = '' mkdir $out tar cf - --exclude=CMakeFiles . \ | tar xf - -C $out ''; }; espeak-ng' = espeak-ng.overrideAttrs (self: { name = "espeak-ng'"; inherit (go-piper) src; Loading @@ -199,8 +211,14 @@ let name = "piper-phonemize"; inherit (go-piper) src; sourceRoot = "${go-piper.src.name}/piper-phonemize"; buildInputs = [ espeak-ng' onnxruntime ]; nativeBuildInputs = [ cmake pkg-config ]; buildInputs = [ espeak-ng' onnxruntime ]; nativeBuildInputs = [ cmake pkg-config ]; cmakeFlags = [ (lib.cmakeFeature "ONNXRUNTIME_DIR" "${onnxruntime.dev}") (lib.cmakeFeature "ESPEAK_NG_DIR" "${espeak-ng'}") Loading Loading @@ -240,7 +258,15 @@ let -e '/CXXFLAGS *= / s;$; -DSPDLOG_FMT_EXTERNAL=1;' ''; buildFlags = [ "libpiper_binding.a" ]; buildInputs = [ piper-tts' espeak-ng' piper-phonemize sonic fmt spdlog onnxruntime ]; buildInputs = [ piper-tts' espeak-ng' piper-phonemize sonic fmt spdlog onnxruntime ]; installPhase = '' cp -r --no-preserve=mode $src $out mkdir -p $out/piper-phonemize/pi Loading Loading @@ -273,16 +299,28 @@ let src = fetchFromGitHub { owner = "ggerganov"; repo = "whisper.cpp"; rev = "6739eb83c3ca5cf40d24c6fe8442a761a1eb6248"; hash = "sha256-1yDdJVjIwYDJKn93zn4xOJXMoDTqaG2TvakjdHIMCxk="; rev = "9e3c5345cd46ea718209db53464e426c3fe7a25e"; hash = "sha256-JOptyveuaKRLzeZ6GuB3A70IM7dk4we95g5o25XVXJI="; }; nativeBuildInputs = [ cmake pkg-config ] ++ lib.optionals with_cublas [ cuda_nvcc ]; nativeBuildInputs = [ cmake pkg-config ] ++ lib.optionals with_cublas [ cuda_nvcc ]; buildInputs = [ ] ++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas libcufft ] ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ] buildInputs = [ ] ++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas libcufft ] ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ] ++ lib.optionals with_openblas [ openblas.dev ]; cmakeFlags = [ Loading Loading @@ -379,7 +417,8 @@ let meta.broken = lib.versionOlder go-tiny-dream.stdenv.cc.version "13"; }; GO_TAGS = lib.optional with_tinydream "tinydream" GO_TAGS = lib.optional with_tinydream "tinydream" ++ lib.optional with_tts "tts" ++ lib.optional with_stablediffusion "stablediffusion"; Loading @@ -392,12 +431,12 @@ let stdenv; pname = "local-ai"; version = "2.19.4"; version = "2.20.1"; src = fetchFromGitHub { owner = "go-skynet"; repo = "LocalAI"; rev = "v${version}"; hash = "sha256-aKq6/DI+4+BvIEw6eONqPr3mZXuz7rMFN+FBypVj0Gc="; hash = "sha256-FeZZC0Tg9JT9Yj0e27GOLSdHEtWl17AHK3j7epwPyY8="; }; prepare-sources = Loading @@ -407,36 +446,41 @@ let '' mkdir sources ${cp} ${go-llama} sources/go-llama.cpp ${cp} ${gpt4all} sources/gpt4all ${cp} ${if with_tts then go-piper else go-piper.src} sources/go-piper ${cp} ${go-rwkv} sources/go-rwkv.cpp ${cp} ${whisper-cpp.src} sources/whisper.cpp cp ${whisper-cpp}/lib/lib*.a sources/whisper.cpp ${cp} ${go-bert} sources/go-bert.cpp ${cp} ${if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src} sources/go-stable-diffusion ${cp} ${ if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src } sources/go-stable-diffusion ${cp} ${if with_tinydream then go-tiny-dream else go-tiny-dream.src} sources/go-tiny-dream ''; self = buildGoModule.override { stdenv = effectiveStdenv; } { inherit pname version src; vendorHash = "sha256-HEKE75+ixuNbM+KEuhbQQ/NYYEzVlGYOttPavftWKhk="; vendorHash = "sha256-mDxp5frUIECSHKjxaJVqIP7mnIusvdT45Xlxc9+P5tE="; env.NIX_CFLAGS_COMPILE = lib.optionalString with_stablediffusion " -isystem ${opencv}/include/opencv4"; postPatch = '' postPatch = '' sed -i Makefile \ -e '/mod download/ d' \ -e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-fallback/ d' \ -e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-avx/ d' \ -e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-cuda/ d' \ '' + lib.optionalString with_cublas '' '' + lib.optionalString with_cublas '' sed -i Makefile \ -e '/^CGO_LDFLAGS_WHISPER?=/ s;$;-L${libcufft}/lib -L${cuda_cudart}/lib;' ''; postConfigure = prepare-sources + '' postConfigure = prepare-sources + '' shopt -s extglob mkdir -p backend-assets/grpc cp ${llama-cpp-grpc}/bin/grpc-server backend-assets/grpc/llama-cpp-avx2 Loading @@ -451,9 +495,18 @@ let touch backend-assets/grpc/* backend-assets/util/* sources/**/lib*.a ''; buildInputs = [ ] ++ lib.optionals with_cublas [ cuda_cudart libcublas libcufft ] ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ] buildInputs = [ ] ++ lib.optionals with_cublas [ cuda_cudart libcublas libcufft ] ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ] ++ lib.optionals with_openblas [ openblas.dev ] ++ lib.optionals with_stablediffusion go-stable-diffusion.buildInputs ++ lib.optionals with_tts go-piper.buildInputs; Loading @@ -465,13 +518,13 @@ let makeWrapper ncurses # tput which ] ++ lib.optional enable_upx upx ++ lib.optionals with_cublas [ cuda_nvcc ]; ] ++ lib.optional enable_upx upx ++ lib.optionals with_cublas [ cuda_nvcc ]; enableParallelBuilding = false; modBuildPhase = prepare-sources + '' modBuildPhase = prepare-sources + '' make protogen-go go mod tidy -v ''; Loading @@ -482,7 +535,8 @@ let # containing spaces env.GO_TAGS = builtins.concatStringsSep " " GO_TAGS; makeFlags = [ makeFlags = [ "VERSION=v${version}" "BUILD_TYPE=${BUILD_TYPE}" ] Loading Loading @@ -516,7 +570,8 @@ let # raises an segmentation fault postFixup = let LD_LIBRARY_PATH = [ ] LD_LIBRARY_PATH = [ ] ++ lib.optionals with_cublas [ # driverLink has to be first to avoid loading the stub version of libcuda.so # https://github.com/NixOS/nixpkgs/issues/320145#issuecomment-2190319327 Loading @@ -524,10 +579,16 @@ let (lib.getLib libcublas) cuda_cudart ] ++ lib.optionals with_clblas [ clblast ocl-icd ] ++ lib.optionals with_clblas [ clblast ocl-icd ] ++ lib.optionals with_openblas [ openblas ] ++ lib.optionals with_tts [ piper-phonemize ] ++ lib.optionals (with_tts && enable_upx) [ fmt spdlog ]; ++ lib.optionals (with_tts && enable_upx) [ fmt spdlog ]; in '' wrapProgram $out/bin/${pname} \ Loading @@ -537,15 +598,30 @@ let passthru.local-packages = { inherit go-tiny-dream go-rwkv go-bert go-llama gpt4all go-piper llama-cpp-grpc whisper-cpp go-tiny-dream-ncnn espeak-ng' piper-phonemize piper-tts' llama-cpp-rpc; go-tiny-dream go-rwkv go-bert go-llama go-piper llama-cpp-grpc whisper-cpp go-tiny-dream-ncnn espeak-ng' piper-phonemize piper-tts' llama-cpp-rpc ; }; passthru.features = { inherit with_cublas with_openblas with_tts with_stablediffusion with_tinydream with_clblas; with_cublas with_openblas with_tts with_stablediffusion with_tinydream with_clblas ; }; passthru.tests = callPackages ./tests.nix { inherit self; }; Loading @@ -555,7 +631,10 @@ let description = "OpenAI alternative to run local LLMs, image and audio generation"; homepage = "https://localai.io"; license = licenses.mit; maintainers = with maintainers; [ onny ck3d ]; maintainers = with maintainers; [ onny ck3d ]; platforms = platforms.linux; }; }; Loading
pkgs/by-name/lo/local-ai/tests.nix +106 −85 Original line number Diff line number Diff line { self , lib , testers , fetchzip , fetchurl , writers , symlinkJoin , jq , prom2json { self, lib, testers, fetchzip, fetchurl, writers, symlinkJoin, jq, prom2json, }: let common-config = { config, ... }: { common-config = { config, ... }: { imports = [ ./module.nix ]; services.local-ai = { enable = true; Loading Loading @@ -73,7 +76,9 @@ in virtualisation.memorySize = 2048; services.local-ai.models = models; }; passthru = { inherit models requests; }; passthru = { inherit models requests; }; testScript = let port = "8080"; Loading @@ -93,7 +98,8 @@ in ''; }; } // lib.optionalAttrs (!self.features.with_cublas && !self.features.with_clblas) { } // lib.optionalAttrs (!self.features.with_cublas && !self.features.with_clblas) { # https://localai.io/docs/getting-started/manual/ llama = let Loading Loading @@ -146,7 +152,12 @@ in # https://localai.io/features/text-generation/#chat-completions chat-completions = { inherit model; messages = [{ role = "user"; content = "1 + 2 = ?"; }]; messages = [ { role = "user"; content = "1 + 2 = ?"; } ]; }; # https://localai.io/features/text-generation/#edit-completions edit-completions = { Loading @@ -172,7 +183,9 @@ in # TODO: Add test case parallel requests services.local-ai.parallelRequests = 2; }; passthru = { inherit models requests; }; passthru = { inherit models requests; }; testScript = let port = "8080"; Loading @@ -196,14 +209,20 @@ in machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" requests.completions} --output completions.json") machine.copy_from_vm("completions.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .object ==\"text_completion\"' completions.json") machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString model-configs.${model}.parameters.max_tokens}' completions.json") machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${ toString model-configs.${model}.parameters.max_tokens }' completions.json") machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json") machine.copy_from_vm("metrics.json") ''; }; } // lib.optionalAttrs (self.features.with_tts && !self.features.with_cublas && !self.features.with_clblas) { } // lib.optionalAttrs (self.features.with_tts && !self.features.with_cublas && !self.features.with_clblas) { # https://localai.io/features/text-to-audio/#piper tts = let Loading Loading @@ -250,7 +269,9 @@ in virtualisation.cores = 2; services.local-ai.models = models; }; passthru = { inherit models requests; }; passthru = { inherit models requests; }; testScript = let port = "8080"; Loading