local-ai: 2.19.4 -> 2.20.1 (#336871) (14f8e6c6) · Commits · mdf / nixpkgs

pkgs/by-name/lo/local-ai/lib.nix

+29 −20

Original line number	Diff line number	Diff line
		{ lib
		, writers
		, writeText
		, linkFarmFromDrvs
		}: {
		genModels = configs:
		{
		lib,
		writers,
		writeText,
		linkFarmFromDrvs,
		}:
		{
		genModels =
		configs:
		let
		name = lib.strings.sanitizeDerivationName
		(builtins.concatStringsSep "_" ([ "local-ai-models" ] ++ (builtins.attrNames configs)));
		name = lib.strings.sanitizeDerivationName (
		builtins.concatStringsSep "_" ([ "local-ai-models" ] ++ (builtins.attrNames configs))
		);

		genModelFiles = name: config:
		genModelFiles =
		name: config:
		let
		templateName = type: name + "_" + type;

		config' = lib.recursiveUpdate config ({
		config' = lib.recursiveUpdate config (
		{
		inherit name;
		} // lib.optionalAttrs (lib.isDerivation config.parameters.model) {
		}
		// lib.optionalAttrs (lib.isDerivation config.parameters.model) {
		parameters.model = config.parameters.model.name;
		} // lib.optionalAttrs (config ? template) {
		}
		// lib.optionalAttrs (config ? template) {
		template = builtins.mapAttrs (n: _: templateName n) config.template;
		});
		}
		);
		in
		[ (writers.writeYAML "${name}.yaml" config') ]
		++ lib.optional (lib.isDerivation config.parameters.model)
		config.parameters.model
		++ lib.optionals (config ? template)
		(lib.mapAttrsToList (n: writeText "${templateName n}.tmpl") config.template);
		++ lib.optional (lib.isDerivation config.parameters.model) config.parameters.model
		++ lib.optionals (config ? template) (
		lib.mapAttrsToList (n: writeText "${templateName n}.tmpl") config.template
		);
		in
		linkFarmFromDrvs name (lib.flatten (lib.mapAttrsToList genModelFiles configs));
		}

pkgs/by-name/lo/local-ai/module.nix

+25 −12

Original line number	Diff line number	Diff line
		{ pkgs, config, lib, ... }:
		{
		pkgs,
		config,
		lib,
		...
		}:
		let
		cfg = config.services.local-ai;
		inherit (lib) mkOption types;
		@@ -35,7 +40,13 @@ in
		};

		logLevel = mkOption {
		type = types.enum [ "error" "warn" "info" "debug" "trace" ];
		type = types.enum [
		"error"
		"warn"
		"info"
		"debug"
		"trace"
		];
		default = "warn";
		};
		};
		@@ -46,7 +57,8 @@ in
		environment.LLAMACPP_PARALLEL = toString cfg.parallelRequests;
		serviceConfig = {
		DynamicUser = true;
		ExecStart = lib.escapeShellArgs ([
		ExecStart = lib.escapeShellArgs (
		[
		"${cfg.package}/bin/local-ai"
		"--address=:${toString cfg.port}"
		"--threads=${toString cfg.threads}"
		@@ -55,7 +67,8 @@ in
		"--log-level=${cfg.logLevel}"
		]
		++ lib.optional (cfg.parallelRequests > 1) "--parallel-requests"
		++ cfg.extraArgs);
		++ cfg.extraArgs
		);
		RuntimeDirectory = "local-ai";
		WorkingDirectory = "%t/local-ai";
		};

pkgs/by-name/lo/local-ai/package.nix

+288 −209

Original line number	Diff line number	Diff line
		{ config
		, callPackages
		, stdenv
		, lib
		, addDriverRunpath
		, fetchFromGitHub
		, protobuf
		, protoc-gen-go
		, protoc-gen-go-grpc
		, grpc
		, openssl
		, llama-cpp
		{
		config,
		callPackages,
		stdenv,
		lib,
		addDriverRunpath,
		fetchFromGitHub,
		protobuf,
		protoc-gen-go,
		protoc-gen-go-grpc,
		grpc,
		openssl,
		llama-cpp,
		# needed for audio-to-text
		, ffmpeg
		, cmake
		, pkg-config
		, buildGoModule
		, makeWrapper
		, ncurses
		, which
		ffmpeg,
		cmake,
		pkg-config,
		buildGoModule,
		makeWrapper,
		ncurses,
		which,

		, enable_upx ? true
		, upx
		enable_upx ? true,
		upx,

		# apply feature parameter names according to
		# https://github.com/NixOS/rfcs/pull/169

		# CPU extensions
		, enable_avx ? true
		, enable_avx2 ? true
		, enable_avx512 ? stdenv.hostPlatform.avx512Support
		, enable_f16c ? true
		, enable_fma ? true

		, with_openblas ? false
		, openblas

		, with_cublas ? config.cudaSupport
		, cudaPackages

		, with_clblas ? false
		, clblast
		, ocl-icd
		, opencl-headers

		, with_tinydream ? false # do not compile with cublas
		, ncnn

		, with_stablediffusion ? true
		, opencv

		, with_tts ? true
		, onnxruntime
		, sonic
		, spdlog
		, fmt
		, espeak-ng
		, piper-tts
		enable_avx ? true,
		enable_avx2 ? true,
		enable_avx512 ? stdenv.hostPlatform.avx512Support,
		enable_f16c ? true,
		enable_fma ? true,

		with_openblas ? false,
		openblas,

		with_cublas ? config.cudaSupport,
		cudaPackages,

		with_clblas ? false,
		clblast,
		ocl-icd,
		opencl-headers,

		with_tinydream ? false, # do not compile with cublas
		ncnn,

		with_stablediffusion ? true,
		opencv,

		with_tts ? true,
		onnxruntime,
		sonic,
		spdlog,
		fmt,
		espeak-ng,
		piper-tts,
		}:
		let
		BUILD_TYPE =
		assert (lib.count lib.id [ with_openblas with_cublas with_clblas ]) <= 1;
		if with_openblas then "openblas"
		else if with_cublas then "cublas"
		else if with_clblas then "clblas"
		else "";
		assert
		(lib.count lib.id [
		with_openblas
		with_cublas
		with_clblas
		]) <= 1;
		if with_openblas then
		"openblas"
		else if with_cublas then
		"cublas"
		else if with_clblas then
		"clblas"
		else
		"";

		inherit (cudaPackages) libcublas cuda_nvcc cuda_cccl cuda_cudart libcufft;
		inherit (cudaPackages)
		libcublas
		cuda_nvcc
		cuda_cccl
		cuda_cudart
		libcufft
		;

		go-llama = effectiveStdenv.mkDerivation {
		name = "go-llama";
		@@ -81,13 +97,21 @@ let
		"BUILD_TYPE=${BUILD_TYPE}"
		];

		buildInputs = [ ]
		++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas ]
		++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
		buildInputs =
		[ ]
		++ lib.optionals with_cublas [
		cuda_cccl
		cuda_cudart
		libcublas
		]
		++ lib.optionals with_clblas [
		clblast
		ocl-icd
		opencl-headers
		]
		++ lib.optionals with_openblas [ openblas.dev ];

		nativeBuildInputs = [ cmake ]
		++ lib.optionals with_cublas [ cuda_nvcc ];
		nativeBuildInputs = [ cmake ] ++ lib.optionals with_cublas [ cuda_nvcc ];

		dontUseCmakeConfigure = true;

		@@ -98,7 +122,8 @@ let
		'';
		};

		llama-cpp-rpc = (llama-cpp-grpc.overrideAttrs (prev: {
		llama-cpp-rpc =
		(llama-cpp-grpc.overrideAttrs (prev: {
		name = "llama-cpp-rpc";
		cmakeFlags = prev.cmakeFlags ++ [
		(lib.cmakeBool "GGML_AVX" false)
		@@ -107,23 +132,28 @@ let
		(lib.cmakeBool "GGML_FMA" false)
		(lib.cmakeBool "GGML_F16C" false)
		];
		})).override {
		})).override
		{
		cudaSupport = false;
		openclSupport = false;
		blasSupport = false;
		rpcSupport = true;
		};

		llama-cpp-grpc = (llama-cpp.overrideAttrs (final: prev: {
		llama-cpp-grpc =
		(llama-cpp.overrideAttrs (
		final: prev: {
		name = "llama-cpp-grpc";
		src = fetchFromGitHub {
		owner = "ggerganov";
		repo = "llama.cpp";
		rev = "ed9d2854c9de4ae1f448334294e61167b04bec2a";
		hash = "sha256-Xu2h9Zu+Q9utfFFmDWBOEu/EXth4xWRNoTMvPF5Fo/A=";
		rev = "fc54ef0d1c138133a01933296d50a36a1ab64735";
		hash = "sha256-o87EhrA2Oa98pwyb6GSUgwERY0/GWJiX7kvlxDv4zb4=";
		fetchSubmodules = true;
		};
		postPatch = prev.postPatch + ''
		postPatch =
		prev.postPatch
		+ ''
		cd examples
		cp -r --no-preserve=mode ${src}/backend/cpp/llama grpc-server
		cp llava/clip.* llava/llava.* grpc-server
		@@ -148,33 +178,15 @@ let
		grpc
		openssl
		];
		})).override {
		}
		)).override
		{
		cudaSupport = with_cublas;
		rocmSupport = false;
		openclSupport = with_clblas;
		blasSupport = with_openblas;
		};

		gpt4all = stdenv.mkDerivation {
		name = "gpt4all";
		src = fetchFromGitHub {
		owner = "nomic-ai";
		repo = "gpt4all";
		rev = "27a8b020c36b0df8f8b82a252d261cda47cf44b8";
		hash = "sha256-djq1eK6ncvhkO3MNDgasDBUY/7WWcmZt/GJsHAulLdI=";
		fetchSubmodules = true;
		};
		makeFlags = [ "-C gpt4all-bindings/golang" ];
		buildFlags = [ "libgpt4all.a" ];
		dontUseCmakeConfigure = true;
		nativeBuildInputs = [ cmake ];
		installPhase = ''
		mkdir $out
		tar cf - --exclude=CMakeFiles . \
		\| tar xf - -C $out
		'';
		};

		espeak-ng' = espeak-ng.overrideAttrs (self: {
		name = "espeak-ng'";
		inherit (go-piper) src;
		@@ -199,8 +211,14 @@ let
		name = "piper-phonemize";
		inherit (go-piper) src;
		sourceRoot = "${go-piper.src.name}/piper-phonemize";
		buildInputs = [ espeak-ng' onnxruntime ];
		nativeBuildInputs = [ cmake pkg-config ];
		buildInputs = [
		espeak-ng'
		onnxruntime
		];
		nativeBuildInputs = [
		cmake
		pkg-config
		];
		cmakeFlags = [
		(lib.cmakeFeature "ONNXRUNTIME_DIR" "${onnxruntime.dev}")
		(lib.cmakeFeature "ESPEAK_NG_DIR" "${espeak-ng'}")
		@@ -240,7 +258,15 @@ let
		-e '/CXXFLAGS *= / s;$; -DSPDLOG_FMT_EXTERNAL=1;'
		'';
		buildFlags = [ "libpiper_binding.a" ];
		buildInputs = [ piper-tts' espeak-ng' piper-phonemize sonic fmt spdlog onnxruntime ];
		buildInputs = [
		piper-tts'
		espeak-ng'
		piper-phonemize
		sonic
		fmt
		spdlog
		onnxruntime
		];
		installPhase = ''
		cp -r --no-preserve=mode $src $out
		mkdir -p $out/piper-phonemize/pi
		@@ -273,16 +299,28 @@ let
		src = fetchFromGitHub {
		owner = "ggerganov";
		repo = "whisper.cpp";
		rev = "6739eb83c3ca5cf40d24c6fe8442a761a1eb6248";
		hash = "sha256-1yDdJVjIwYDJKn93zn4xOJXMoDTqaG2TvakjdHIMCxk=";
		rev = "9e3c5345cd46ea718209db53464e426c3fe7a25e";
		hash = "sha256-JOptyveuaKRLzeZ6GuB3A70IM7dk4we95g5o25XVXJI=";
		};

		nativeBuildInputs = [ cmake pkg-config ]
		++ lib.optionals with_cublas [ cuda_nvcc ];
		nativeBuildInputs = [
		cmake
		pkg-config
		] ++ lib.optionals with_cublas [ cuda_nvcc ];

		buildInputs = [ ]
		++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas libcufft ]
		++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
		buildInputs =
		[ ]
		++ lib.optionals with_cublas [
		cuda_cccl
		cuda_cudart
		libcublas
		libcufft
		]
		++ lib.optionals with_clblas [
		clblast
		ocl-icd
		opencl-headers
		]
		++ lib.optionals with_openblas [ openblas.dev ];

		cmakeFlags = [
		@@ -379,7 +417,8 @@ let
		meta.broken = lib.versionOlder go-tiny-dream.stdenv.cc.version "13";
		};

		GO_TAGS = lib.optional with_tinydream "tinydream"
		GO_TAGS =
		lib.optional with_tinydream "tinydream"
		++ lib.optional with_tts "tts"
		++ lib.optional with_stablediffusion "stablediffusion";

		@@ -392,12 +431,12 @@ let
		stdenv;

		pname = "local-ai";
		version = "2.19.4";
		version = "2.20.1";
		src = fetchFromGitHub {
		owner = "go-skynet";
		repo = "LocalAI";
		rev = "v${version}";
		hash = "sha256-aKq6/DI+4+BvIEw6eONqPr3mZXuz7rMFN+FBypVj0Gc=";
		hash = "sha256-FeZZC0Tg9JT9Yj0e27GOLSdHEtWl17AHK3j7epwPyY8=";
		};

		prepare-sources =
		@@ -407,36 +446,41 @@ let
		''
		mkdir sources
		${cp} ${go-llama} sources/go-llama.cpp
		${cp} ${gpt4all} sources/gpt4all
		${cp} ${if with_tts then go-piper else go-piper.src} sources/go-piper
		${cp} ${go-rwkv} sources/go-rwkv.cpp
		${cp} ${whisper-cpp.src} sources/whisper.cpp
		cp ${whisper-cpp}/lib/lib*.a sources/whisper.cpp
		${cp} ${go-bert} sources/go-bert.cpp
		${cp} ${if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src} sources/go-stable-diffusion
		${cp} ${
		if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src
		} sources/go-stable-diffusion
		${cp} ${if with_tinydream then go-tiny-dream else go-tiny-dream.src} sources/go-tiny-dream
		'';

		self = buildGoModule.override { stdenv = effectiveStdenv; } {
		inherit pname version src;

		vendorHash = "sha256-HEKE75+ixuNbM+KEuhbQQ/NYYEzVlGYOttPavftWKhk=";
		vendorHash = "sha256-mDxp5frUIECSHKjxaJVqIP7mnIusvdT45Xlxc9+P5tE=";

		env.NIX_CFLAGS_COMPILE = lib.optionalString with_stablediffusion " -isystem ${opencv}/include/opencv4";

		postPatch = ''
		postPatch =
		''
		sed -i Makefile \
		-e '/mod download/ d' \
		-e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-fallback/ d' \
		-e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-avx/ d' \
		-e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-cuda/ d' \

		'' + lib.optionalString with_cublas ''
		''
		+ lib.optionalString with_cublas ''
		sed -i Makefile \
		-e '/^CGO_LDFLAGS_WHISPER?=/ s;$;-L${libcufft}/lib -L${cuda_cudart}/lib;'
		'';

		postConfigure = prepare-sources + ''
		postConfigure =
		prepare-sources
		+ ''
		shopt -s extglob
		mkdir -p backend-assets/grpc
		cp ${llama-cpp-grpc}/bin/grpc-server backend-assets/grpc/llama-cpp-avx2
		@@ -451,9 +495,18 @@ let
		touch backend-assets/grpc/* backend-assets/util/* sources/*/lib.a
		'';

		buildInputs = [ ]
		++ lib.optionals with_cublas [ cuda_cudart libcublas libcufft ]
		++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
		buildInputs =
		[ ]
		++ lib.optionals with_cublas [
		cuda_cudart
		libcublas
		libcufft
		]
		++ lib.optionals with_clblas [
		clblast
		ocl-icd
		opencl-headers
		]
		++ lib.optionals with_openblas [ openblas.dev ]
		++ lib.optionals with_stablediffusion go-stable-diffusion.buildInputs
		++ lib.optionals with_tts go-piper.buildInputs;
		@@ -465,13 +518,13 @@ let
		makeWrapper
		ncurses # tput
		which
		]
		++ lib.optional enable_upx upx
		++ lib.optionals with_cublas [ cuda_nvcc ];
		] ++ lib.optional enable_upx upx ++ lib.optionals with_cublas [ cuda_nvcc ];

		enableParallelBuilding = false;

		modBuildPhase = prepare-sources + ''
		modBuildPhase =
		prepare-sources
		+ ''
		make protogen-go
		go mod tidy -v
		'';
		@@ -482,7 +535,8 @@ let
		# containing spaces
		env.GO_TAGS = builtins.concatStringsSep " " GO_TAGS;

		makeFlags = [
		makeFlags =
		[
		"VERSION=v${version}"
		"BUILD_TYPE=${BUILD_TYPE}"
		]
		@@ -516,7 +570,8 @@ let
		# raises an segmentation fault
		postFixup =
		let
		LD_LIBRARY_PATH = [ ]
		LD_LIBRARY_PATH =
		[ ]
		++ lib.optionals with_cublas [
		# driverLink has to be first to avoid loading the stub version of libcuda.so
		# https://github.com/NixOS/nixpkgs/issues/320145#issuecomment-2190319327
		@@ -524,10 +579,16 @@ let
		(lib.getLib libcublas)
		cuda_cudart
		]
		++ lib.optionals with_clblas [ clblast ocl-icd ]
		++ lib.optionals with_clblas [
		clblast
		ocl-icd
		]
		++ lib.optionals with_openblas [ openblas ]
		++ lib.optionals with_tts [ piper-phonemize ]
		++ lib.optionals (with_tts && enable_upx) [ fmt spdlog ];
		++ lib.optionals (with_tts && enable_upx) [
		fmt
		spdlog
		];
		in
		''
		wrapProgram $out/bin/${pname} \
		@@ -537,15 +598,30 @@ let

		passthru.local-packages = {
		inherit
		go-tiny-dream go-rwkv go-bert go-llama gpt4all go-piper
		llama-cpp-grpc whisper-cpp go-tiny-dream-ncnn espeak-ng' piper-phonemize
		piper-tts' llama-cpp-rpc;
		go-tiny-dream
		go-rwkv
		go-bert
		go-llama
		go-piper
		llama-cpp-grpc
		whisper-cpp
		go-tiny-dream-ncnn
		espeak-ng'
		piper-phonemize
		piper-tts'
		llama-cpp-rpc
		;
		};

		passthru.features = {
		inherit
		with_cublas with_openblas with_tts with_stablediffusion
		with_tinydream with_clblas;
		with_cublas
		with_openblas
		with_tts
		with_stablediffusion
		with_tinydream
		with_clblas
		;
		};

		passthru.tests = callPackages ./tests.nix { inherit self; };
		@@ -555,7 +631,10 @@ let
		description = "OpenAI alternative to run local LLMs, image and audio generation";
		homepage = "https://localai.io";
		license = licenses.mit;
		maintainers = with maintainers; [ onny ck3d ];
		maintainers = with maintainers; [
		onny
		ck3d
		];
		platforms = platforms.linux;
		};
		};

pkgs/by-name/lo/local-ai/tests.nix

+106 −85

Original line number	Diff line number	Diff line
		{ self
		, lib
		, testers
		, fetchzip
		, fetchurl
		, writers
		, symlinkJoin
		, jq
		, prom2json
		{
		self,
		lib,
		testers,
		fetchzip,
		fetchurl,
		writers,
		symlinkJoin,
		jq,
		prom2json,
		}:
		let
		common-config = { config, ... }: {
		common-config =
		{ config, ... }:
		{
		imports = [ ./module.nix ];
		services.local-ai = {
		enable = true;
		@@ -73,7 +76,9 @@ in
		virtualisation.memorySize = 2048;
		services.local-ai.models = models;
		};
		passthru = { inherit models requests; };
		passthru = {
		inherit models requests;
		};
		testScript =
		let
		port = "8080";
		@@ -93,7 +98,8 @@ in
		'';
		};

		} // lib.optionalAttrs (!self.features.with_cublas && !self.features.with_clblas) {
		}
		// lib.optionalAttrs (!self.features.with_cublas && !self.features.with_clblas) {
		# https://localai.io/docs/getting-started/manual/
		llama =
		let
		@@ -146,7 +152,12 @@ in
		# https://localai.io/features/text-generation/#chat-completions
		chat-completions = {
		inherit model;
		messages = [{ role = "user"; content = "1 + 2 = ?"; }];
		messages = [
		{
		role = "user";
		content = "1 + 2 = ?";
		}
		];
		};
		# https://localai.io/features/text-generation/#edit-completions
		edit-completions = {
		@@ -172,7 +183,9 @@ in
		# TODO: Add test case parallel requests
		services.local-ai.parallelRequests = 2;
		};
		passthru = { inherit models requests; };
		passthru = {
		inherit models requests;
		};
		testScript =
		let
		port = "8080";
		@@ -196,14 +209,20 @@ in
		machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" requests.completions} --output completions.json")
		machine.copy_from_vm("completions.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .object ==\"text_completion\"' completions.json")
		machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens \| debug == ${toString model-configs.${model}.parameters.max_tokens}' completions.json")
		machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens \| debug == ${
		toString model-configs.${model}.parameters.max_tokens
		}' completions.json")

		machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
		machine.copy_from_vm("metrics.json")
		'';
		};

		} // lib.optionalAttrs (self.features.with_tts && !self.features.with_cublas && !self.features.with_clblas) {
		}
		//
		lib.optionalAttrs
		(self.features.with_tts && !self.features.with_cublas && !self.features.with_clblas)
		{
		# https://localai.io/features/text-to-audio/#piper
		tts =
		let
		@@ -250,7 +269,9 @@ in
		virtualisation.cores = 2;
		services.local-ai.models = models;
		};
		passthru = { inherit models requests; };
		passthru = {
		inherit models requests;
		};
		testScript =
		let
		port = "8080";