Commit c8fd4604 authored by Christian Kögler's avatar Christian Kögler
Browse files

local-ai: Switch test to llama 3.1

parent d83edf31
Loading
Loading
Loading
Loading
+9 −8
Original line number Diff line number Diff line
@@ -101,17 +101,16 @@ in

      # https://localai.io/advanced/#full-config-model-file-reference
      model-configs.${model} = rec {
        context_size = 8192;
        context_size = 16 * 1024; # 128kb is possible, but needs 16GB RAM
        backend = "llama-cpp";
        parameters = {
          # https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF
          # https://ai.meta.com/blog/meta-llama-3/
          # https://ai.meta.com/blog/meta-llama-3-1/
          model = fetchurl {
            url = "https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf";
            sha256 = "ab9e4eec7e80892fd78f74d9a15d0299f1e22121cea44efd68a7a02a3fe9a1da";
            url = "https://huggingface.co/lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf";
            sha256 = "f2be3e1a239c12c9f3f01a962b11fb2807f8032fdb63b0a5502ea42ddef55e44";
          };
          # defaults from:
          # https://deepinfra.com/meta-llama/Meta-Llama-3-8B-Instruct
          # https://deepinfra.com/meta-llama/Meta-Llama-3.1-8B-Instruct
          temperature = 0.7;
          top_p = 0.9;
          top_k = 0;
@@ -135,7 +134,9 @@ in

            {{.Content}}${builtins.head stopwords}'';

          chat = "<|begin_of_text|>{{.Input}}<|start_header_id|>assistant<|end_header_id|>";
          chat = "{{.Input}}<|start_header_id|>assistant<|end_header_id|>";

          completion = "{{.Input}}";
        };
      };

@@ -185,7 +186,7 @@ in
          machine.succeed("curl -f http://localhost:${port}/v1/chat/completions --json @${writers.writeJSON "request-chat-completions.json" requests.chat-completions} --output chat-completions.json")
          machine.copy_from_vm("chat-completions.json")
          machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"chat.completion\"' chat-completions.json")
          machine.succeed("${jq}/bin/jq --exit-status 'debug | .choices | first.message.content | tonumber == 3' chat-completions.json")
          machine.succeed("${jq}/bin/jq --exit-status 'debug | .choices | first.message.content | split(\" \") | last | tonumber == 3' chat-completions.json")

          machine.succeed("curl -f http://localhost:${port}/v1/edits --json @${writers.writeJSON "request-edit-completions.json" requests.edit-completions} --output edit-completions.json")
          machine.copy_from_vm("edit-completions.json")