Merge pull request #302907 from ck3d/localai-2121 (6d0d47e8) · Commits · nix / nixpkgs

pkgs/by-name/lo/local-ai/package.nix

+49 −129

Original line number	Diff line number	Diff line
		{ config
		, callPackages
		, stdenv
		, lib
		, addDriverRunpath
		@@ -14,8 +15,6 @@
		, pkg-config
		, buildGoModule
		, makeWrapper
		, runCommand
		, testers

		# apply feature parameter names according to
		# https://github.com/NixOS/rfcs/pull/169
		@@ -27,9 +26,6 @@
		, enable_f16c ? true
		, enable_fma ? true

		, with_tinydream ? false
		, ncnn

		, with_openblas ? false
		, openblas

		@@ -41,24 +37,19 @@
		, ocl-icd
		, opencl-headers

		, with_stablediffusion ? false
		, with_tinydream ? false # do not compile with cublas
		, ncnn

		, with_stablediffusion ? true
		, opencv

		, with_tts ? false
		, with_tts ? true
		, onnxruntime
		, sonic
		, spdlog
		, fmt
		, espeak-ng
		, piper-tts

		# tests
		, fetchzip
		, fetchurl
		, writeText
		, symlinkJoin
		, linkFarmFromDrvs
		, jq
		}:
		let
		BUILD_TYPE =
		@@ -68,21 +59,7 @@ let
		else if with_clblas then "clblas"
		else "";

		inherit (cudaPackages) libcublas cuda_nvcc cuda_cccl cuda_cudart;

		typedBuiltInputs =
		lib.optionals with_cublas
		[
		cuda_nvcc # should be part of nativeBuildInputs
		cuda_cudart
		cuda_cccl
		(lib.getDev libcublas)
		(lib.getLib libcublas)
		]
		++ lib.optionals with_clblas
		[ clblast ocl-icd opencl-headers ]
		++ lib.optionals with_openblas
		[ openblas.dev ];
		inherit (cudaPackages) libcublas cuda_nvcc cuda_cccl cuda_cudart cudatoolkit;

		go-llama-ggml = effectiveStdenv.mkDerivation {
		name = "go-llama-ggml";
		@@ -97,9 +74,18 @@ let
		"libbinding.a"
		"BUILD_TYPE=${BUILD_TYPE}"
		];
		buildInputs = typedBuiltInputs;

		buildInputs = [ ]
		++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
		++ lib.optionals with_openblas [ openblas.dev ];

		nativeBuildInputs = [ cmake ]
		# backward compatiblity with nixos-23.11
		# use cuda_nvcc after release of nixos-24.05
		++ lib.optionals with_cublas [ cudatoolkit ];

		dontUseCmakeConfigure = true;
		nativeBuildInputs = [ cmake ];

		installPhase = ''
		mkdir $out
		tar cf - --exclude=build --exclude=CMakeFiles --exclude="*.o" . \
		@@ -112,8 +98,8 @@ let
		src = fetchFromGitHub {
		owner = "ggerganov";
		repo = "llama.cpp";
		rev = "b06c16ef9f81d84da520232c125d4d8a1d273736";
		hash = "sha256-t1AIx/Ir5RhasjblH4BSpGOXVvO84SJPSqa7rXWj6b4=";
		rev = "1b67731e184e27a465b8c5476061294a4af668ea";
		hash = "sha256-0WWbsklpW6HhFRkvWpYh8Lhi8VIansS/zmyIKNQRkIs=";
		fetchSubmodules = true;
		};
		postPatch = prev.postPatch + ''
		@@ -266,13 +252,20 @@ let
		src = fetchFromGitHub {
		owner = "ggerganov";
		repo = "whisper.cpp";
		rev = "1558ec5a16cb2b2a0bf54815df1d41f83dc3815b";
		hash = "sha256-UAqWU3kvkHM+fV+T6gFVsAKuOG6N4FoFgTKGUptwjmE=";
		rev = "8f253ef3af1c62c04316ba4afa7145fc4d701a8c";
		hash = "sha256-yHHjhpQIn99A/hqFwAb7TfTf4Q9KnKat93zyXS70bT8=";
		};
		nativeBuildInputs = [ cmake pkg-config ];
		buildInputs = typedBuiltInputs;

		nativeBuildInputs = [ cmake pkg-config ]
		++ lib.optionals with_cublas [ cuda_nvcc ];

		buildInputs = [ ]
		++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas ]
		++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
		++ lib.optionals with_openblas [ openblas.dev ];

		cmakeFlags = [
		(lib.cmakeBool "WHISPER_CUBLAS" with_cublas)
		(lib.cmakeBool "WHISPER_CUDA" with_cublas)
		(lib.cmakeBool "WHISPER_CLBLAST" with_clblas)
		(lib.cmakeBool "WHISPER_OPENBLAS" with_openblas)
		(lib.cmakeBool "WHISPER_NO_AVX" (!enable_avx))
		@@ -342,19 +335,19 @@ let
		];
		});

		go-tiny-dream = stdenv.mkDerivation {
		go-tiny-dream = effectiveStdenv.mkDerivation {
		name = "go-tiny-dream";
		src = fetchFromGitHub {
		owner = "M0Rf30";
		repo = "go-tiny-dream";
		rev = "772a9c0d9aaf768290e63cca3c904fe69faf677a";
		hash = "sha256-r+wzFIjaI6cxAm/eXN3q8LRZZz+lE5EA4lCTk5+ZnIY=";
		rev = "22a12a4bc0ac5455856f28f3b771331a551a4293";
		hash = "sha256-DAVHD6E0OKHf4C2ldoI0Mm7813DIrmWFONUhSCQPCfc=";
		fetchSubmodules = true;
		};
		postUnpack = ''
		rm -rf source/ncnn
		mkdir -p source/ncnn/build
		cp -r --no-preserve=mode ${go-tiny-dream-ncnn} source/ncnn/build/install
		mkdir -p source/ncnn/build/src
		cp -r --no-preserve=mode ${go-tiny-dream-ncnn}/lib/. ${go-tiny-dream-ncnn}/include/. source/ncnn/build/src
		'';
		buildFlags = [ "libtinydream.a" ];
		installPhase = ''
		@@ -378,18 +371,18 @@ let
		stdenv;

		pname = "local-ai";
		version = "2.11.0";
		version = "2.12.3";
		src = fetchFromGitHub {
		owner = "go-skynet";
		repo = "LocalAI";
		rev = "v${version}";
		hash = "sha256-Sqo4NOggUNb1ZemT9TRknBmz8dThe/X43R+4JFfQJ4M=";
		hash = "sha256-/Q0t5OozpgqmjUOYHvVAj1k7VnIixfOS8gNAguuu6p0=";
		};

		self = buildGoModule.override { stdenv = effectiveStdenv; } {
		inherit pname version src;

		vendorHash = "sha256-3bOr8DnAjTzOpVDB5wmlPxECNteWw3tI0yc1f2Wt4y0=";
		vendorHash = "sha256-8Hu1y/PK21twnB7D22ltslFFzRrsB8d1R2hkgIFB/XY=";

		env.NIX_CFLAGS_COMPILE = lib.optionalString with_stablediffusion " -isystem ${opencv}/include/opencv4";

		@@ -415,11 +408,15 @@ let
		''
		;

		buildInputs = typedBuiltInputs
		++ lib.optional with_stablediffusion go-stable-diffusion.buildInputs
		++ lib.optional with_tts go-piper.buildInputs;
		buildInputs = [ ]
		++ lib.optionals with_cublas [ libcublas ]
		++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
		++ lib.optionals with_openblas [ openblas.dev ]
		++ lib.optionals with_stablediffusion go-stable-diffusion.buildInputs
		++ lib.optionals with_tts go-piper.buildInputs;

		nativeBuildInputs = [ makeWrapper ];
		nativeBuildInputs = [ makeWrapper ]
		++ lib.optionals with_cublas [ cuda_nvcc ];

		enableParallelBuilding = false;

		@@ -500,84 +497,7 @@ let
		with_tinydream with_clblas;
		};

		passthru.tests = {
		version = testers.testVersion {
		package = self;
		version = "v" + version;
		};
		health =
		let
		port = "8080";
		in
		testers.runNixOSTest {
		name = pname + "-health";
		nodes.machine = {
		systemd.services.local-ai = {
		wantedBy = [ "multi-user.target" ];
		serviceConfig.ExecStart = "${self}/bin/local-ai --debug --localai-config-dir . --address :${port}";
		};
		};
		testScript = ''
		machine.wait_for_open_port(${port})
		machine.succeed("curl -f http://localhost:${port}/readyz")
		'';
		};
		}
		// lib.optionalAttrs with_tts {
		# https://localai.io/features/text-to-audio/#piper
		tts =
		let
		port = "8080";
		voice-en-us = fetchzip {
		url = "https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz";
		hash = "sha256-5wf+6H5HeQY0qgdqnAG1vSqtjIFM9lXH53OgouuPm0M=";
		stripRoot = false;
		};
		ggml-tiny-en = fetchurl {
		url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin";
		hash = "sha256-x3xXZvHO8JtrfUfyG1Rsvd1BV4hrO11tT3CekeZsfCs=";
		};
		whisper-en = {
		name = "whisper-en";
		backend = "whisper";
		parameters.model = ggml-tiny-en.name;
		};
		models = symlinkJoin {
		name = "models";
		paths = [
		voice-en-us
		(linkFarmFromDrvs "whisper-en" [
		(writeText "whisper-en.yaml" (builtins.toJSON whisper-en))
		ggml-tiny-en
		])
		];
		};
		in
		testers.runNixOSTest {
		name = pname + "-tts";
		nodes.machine = {
		systemd.services.local-ai = {
		wantedBy = [ "multi-user.target" ];
		serviceConfig.ExecStart = "${self}/bin/local-ai --debug --models-path ${models} --localai-config-dir . --address :${port}";
		};
		};
		testScript =
		let
		request = {
		model = "en-us-danny-low.onnx";
		backend = "piper";
		input = "Hello, how are you?";
		};
		in
		''
		machine.wait_for_open_port(${port})
		machine.succeed("curl -f http://localhost:${port}/readyz")
		machine.succeed("curl -f http://localhost:${port}/tts --json @${writeText "request.json" (builtins.toJSON request)} --output out.wav")
		machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${whisper-en.name} --output transcription.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .segments \| first.text == \"${request.input}\"' transcription.json")
		'';
		};
		};
		passthru.tests = callPackages ./tests.nix { inherit self; };

		meta = with lib; {
		description = "OpenAI alternative to run local LLMs, image and audio generation";

pkgs/by-name/lo/local-ai/tests.nix

0 → 100644

+160 −0

Original line number	Diff line number	Diff line
		{ self
		, lib
		, testers
		, fetchzip
		, fetchurl
		, writers
		, symlinkJoin
		, linkFarmFromDrvs
		, jq
		}:
		{
		version = testers.testVersion {
		package = self;
		version = "v" + self.version;
		};

		health =
		let
		port = "8080";
		in
		testers.runNixOSTest {
		name = self.name + "-health";
		nodes.machine = {
		systemd.services.local-ai = {
		wantedBy = [ "multi-user.target" ];
		serviceConfig.ExecStart = "${self}/bin/local-ai --debug --localai-config-dir . --address :${port}";
		};
		};
		testScript = ''
		machine.wait_for_open_port(${port})
		machine.succeed("curl -f http://localhost:${port}/readyz")
		'';
		};

		# https://localai.io/docs/getting-started/manual/
		llama =
		let
		port = "8080";
		gguf = fetchurl {
		url = "https://huggingface.co/TheBloke/Luna-AI-Llama2-Uncensored-GGUF/resolve/main/luna-ai-llama2-uncensored.Q4_K_M.gguf";
		sha256 = "6a9dc401c84f0d48996eaa405174999c3a33bf12c2bfd8ea4a1e98f376de1f15";
		};
		models = linkFarmFromDrvs "models" [
		gguf
		];
		in
		testers.runNixOSTest {
		name = self.name + "-llama";
		nodes.machine =
		let
		cores = 4;
		in
		{
		virtualisation = {
		inherit cores;
		memorySize = 8192;
		};
		systemd.services.local-ai = {
		wantedBy = [ "multi-user.target" ];
		serviceConfig.ExecStart = "${self}/bin/local-ai --debug --threads ${toString cores} --models-path ${models} --localai-config-dir . --address :${port}";
		};
		};
		testScript =
		let
		# https://localai.io/features/text-generation/#chat-completions
		request-chat-completions = {
		model = gguf.name;
		messages = [{ role = "user"; content = "Say this is a test!"; }];
		temperature = 0.7;
		};
		# https://localai.io/features/text-generation/#edit-completions
		request-edit-completions = {
		model = gguf.name;
		instruction = "rephrase";
		input = "Black cat jumped out of the window";
		temperature = 0.7;
		};
		# https://localai.io/features/text-generation/#completions
		request-completions = {
		model = gguf.name;
		prompt = "A long time ago in a galaxy far, far away";
		temperature = 0.7;
		};
		in
		''
		machine.wait_for_open_port(${port})
		machine.succeed("curl -f http://localhost:${port}/readyz")
		machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .data[].id == \"${gguf.name}\"' models.json")
		machine.succeed("curl -f http://localhost:${port}/v1/chat/completions --json @${writers.writeJSON "request-chat-completions.json" request-chat-completions} --output chat-completions.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .object == \"chat.completion\"' chat-completions.json")
		machine.succeed("curl -f http://localhost:${port}/v1/edits --json @${writers.writeJSON "request-edit-completions.json" request-edit-completions} --output edit-completions.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .object == \"edit\"' edit-completions.json")
		machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" request-completions} --output completions.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .object ==\"text_completion\"' completions.json")
		'';
		};

		} // lib.optionalAttrs self.features.with_tts {
		# https://localai.io/features/text-to-audio/#piper
		tts =
		let
		port = "8080";
		voice-en-us = fetchzip {
		url = "https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz";
		hash = "sha256-5wf+6H5HeQY0qgdqnAG1vSqtjIFM9lXH53OgouuPm0M=";
		stripRoot = false;
		};
		ggml-tiny-en = fetchurl {
		url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin";
		hash = "sha256-x3xXZvHO8JtrfUfyG1Rsvd1BV4hrO11tT3CekeZsfCs=";
		};
		whisper-en = {
		name = "whisper-en";
		backend = "whisper";
		parameters.model = ggml-tiny-en.name;
		};
		models = symlinkJoin {
		name = "models";
		paths = [
		voice-en-us
		(linkFarmFromDrvs "whisper-en" [
		(writers.writeYAML "whisper-en.yaml" whisper-en)
		ggml-tiny-en
		])
		];
		};
		in
		testers.runNixOSTest {
		name = self.name + "-tts";
		nodes.machine =
		let
		cores = 2;
		in
		{
		virtualisation = {
		inherit cores;
		};
		systemd.services.local-ai = {
		wantedBy = [ "multi-user.target" ];
		serviceConfig.ExecStart = "${self}/bin/local-ai --debug --threads ${toString cores} --models-path ${models} --localai-config-dir . --address :${port}";
		};
		};
		testScript =
		let
		request = {
		model = "en-us-danny-low.onnx";
		backend = "piper";
		input = "Hello, how are you?";
		};
		in
		''
		machine.wait_for_open_port(${port})
		machine.succeed("curl -f http://localhost:${port}/readyz")
		machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" request} --output out.wav")
		machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${whisper-en.name} --output transcription.json")
		machine.succeed("${jq}/bin/jq --exit-status 'debug \| .segments \| first.text == \"${request.input}\"' transcription.json")
		'';
		};
		}

Admin message