nixos/llama-cpp: support model presets (#487436) (b807b258) · Commits · nix / nixpkgs

nixos/modules/services/misc/llama-cpp.nix

+40 −0

Original line number	Diff line number	Diff line
		@@ -8,6 +8,12 @@

		let
		cfg = config.services.llama-cpp;

		modelsPresetFile =
		if cfg.modelsPreset != null then
		pkgs.writeText "llama-models.ini" (lib.generators.toINI { } cfg.modelsPreset)
		else
		null;
		in
		{

		@@ -32,6 +38,32 @@ in
		default = null;
		};

		modelsPreset = lib.mkOption {
		type = lib.types.nullOr (lib.types.attrsOf lib.types.attrs);
		default = null;
		description = ''
		Models preset configuration as a Nix attribute set.
		This is converted to an INI file and passed to llama-server via --model-preset.
		See llama-server documentation for available options.
		'';
		example = lib.literalExpression ''
		{
		"Qwen3-Coder-Next" = {
		hf-repo = "unsloth/Qwen3-Coder-Next-GGUF";
		hf-file = "Qwen3-Coder-Next-UD-Q4_K_XL.gguf";
		alias = "unsloth/Qwen3-Coder-Next";
		fit = "on";
		seed = "3407";
		temp = "1.0";
		top-p = "0.95";
		min-p = "0.01";
		top-k = "40";
		jinja = "on";
		};
		}
		'';
		};

		extraFlags = lib.mkOption {
		type = lib.types.listOf lib.types.str;
		description = "Extra flags passed to llama-cpp-server.";
		@@ -77,6 +109,10 @@ in
		serviceConfig = {
		Type = "idle";
		KillSignal = "SIGINT";
		StateDirectory = "llama-cpp";
		CacheDirectory = "llama-cpp";
		WorkingDirectory = "/var/lib/llama-cpp";
		Environment = [ "LLAMA_CACHE=/var/cache/llama-cpp" ];
		ExecStart =
		let
		args = [
		@@ -93,6 +129,10 @@ in
		"--models-dir"
		cfg.modelsDir
		]
		++ lib.optionals (cfg.modelsPreset != null) [
		"--models-preset"
		modelsPresetFile
		]
		++ cfg.extraFlags;
		in
		"${cfg.package}/bin/llama-server ${utils.escapeSystemdExecArgs args}";