Unverified Commit 14f8e6c6 authored by Christian Kögler's avatar Christian Kögler Committed by GitHub
Browse files

local-ai: 2.19.4 -> 2.20.1 (#336871)

parents c98ddb92 e8f5d6f5
Loading
Loading
Loading
Loading
+29 −20
Original line number Diff line number Diff line
{ lib
, writers
, writeText
, linkFarmFromDrvs
}: {
  genModels = configs:
{
  lib,
  writers,
  writeText,
  linkFarmFromDrvs,
}:
{
  genModels =
    configs:
    let
      name = lib.strings.sanitizeDerivationName
        (builtins.concatStringsSep "_" ([ "local-ai-models" ] ++ (builtins.attrNames configs)));
      name = lib.strings.sanitizeDerivationName (
        builtins.concatStringsSep "_" ([ "local-ai-models" ] ++ (builtins.attrNames configs))
      );

      genModelFiles = name: config:
      genModelFiles =
        name: config:
        let
          templateName = type: name + "_" + type;

          config' = lib.recursiveUpdate config ({
          config' = lib.recursiveUpdate config (
            {
              inherit name;
          } // lib.optionalAttrs (lib.isDerivation config.parameters.model) {
            }
            // lib.optionalAttrs (lib.isDerivation config.parameters.model) {
              parameters.model = config.parameters.model.name;
          } // lib.optionalAttrs (config ? template) {
            }
            // lib.optionalAttrs (config ? template) {
              template = builtins.mapAttrs (n: _: templateName n) config.template;
          });
            }
          );
        in
        [ (writers.writeYAML "${name}.yaml" config') ]
        ++ lib.optional (lib.isDerivation config.parameters.model)
          config.parameters.model
        ++ lib.optionals (config ? template)
          (lib.mapAttrsToList (n: writeText "${templateName n}.tmpl") config.template);
        ++ lib.optional (lib.isDerivation config.parameters.model) config.parameters.model
        ++ lib.optionals (config ? template) (
          lib.mapAttrsToList (n: writeText "${templateName n}.tmpl") config.template
        );
    in
    linkFarmFromDrvs name (lib.flatten (lib.mapAttrsToList genModelFiles configs));
}
+25 −12
Original line number Diff line number Diff line
{ pkgs, config, lib, ... }:
{
  pkgs,
  config,
  lib,
  ...
}:
let
  cfg = config.services.local-ai;
  inherit (lib) mkOption types;
@@ -35,7 +40,13 @@ in
    };

    logLevel = mkOption {
      type = types.enum [ "error" "warn" "info" "debug" "trace" ];
      type = types.enum [
        "error"
        "warn"
        "info"
        "debug"
        "trace"
      ];
      default = "warn";
    };
  };
@@ -46,7 +57,8 @@ in
      environment.LLAMACPP_PARALLEL = toString cfg.parallelRequests;
      serviceConfig = {
        DynamicUser = true;
        ExecStart = lib.escapeShellArgs ([
        ExecStart = lib.escapeShellArgs (
          [
            "${cfg.package}/bin/local-ai"
            "--address=:${toString cfg.port}"
            "--threads=${toString cfg.threads}"
@@ -55,7 +67,8 @@ in
            "--log-level=${cfg.logLevel}"
          ]
          ++ lib.optional (cfg.parallelRequests > 1) "--parallel-requests"
        ++ cfg.extraArgs);
          ++ cfg.extraArgs
        );
        RuntimeDirectory = "local-ai";
        WorkingDirectory = "%t/local-ai";
      };
+288 −209
Original line number Diff line number Diff line
{ config
, callPackages
, stdenv
, lib
, addDriverRunpath
, fetchFromGitHub
, protobuf
, protoc-gen-go
, protoc-gen-go-grpc
, grpc
, openssl
, llama-cpp
{
  config,
  callPackages,
  stdenv,
  lib,
  addDriverRunpath,
  fetchFromGitHub,
  protobuf,
  protoc-gen-go,
  protoc-gen-go-grpc,
  grpc,
  openssl,
  llama-cpp,
  # needed for audio-to-text
, ffmpeg
, cmake
, pkg-config
, buildGoModule
, makeWrapper
, ncurses
, which
  ffmpeg,
  cmake,
  pkg-config,
  buildGoModule,
  makeWrapper,
  ncurses,
  which,

, enable_upx ? true
, upx
  enable_upx ? true,
  upx,

  # apply feature parameter names according to
  # https://github.com/NixOS/rfcs/pull/169

  # CPU extensions
, enable_avx ? true
, enable_avx2 ? true
, enable_avx512 ? stdenv.hostPlatform.avx512Support
, enable_f16c ? true
, enable_fma ? true

, with_openblas ? false
, openblas

, with_cublas ? config.cudaSupport
, cudaPackages

, with_clblas ? false
, clblast
, ocl-icd
, opencl-headers

, with_tinydream ? false # do not compile with cublas
, ncnn

, with_stablediffusion ? true
, opencv

, with_tts ? true
, onnxruntime
, sonic
, spdlog
, fmt
, espeak-ng
, piper-tts
  enable_avx ? true,
  enable_avx2 ? true,
  enable_avx512 ? stdenv.hostPlatform.avx512Support,
  enable_f16c ? true,
  enable_fma ? true,

  with_openblas ? false,
  openblas,

  with_cublas ? config.cudaSupport,
  cudaPackages,

  with_clblas ? false,
  clblast,
  ocl-icd,
  opencl-headers,

  with_tinydream ? false, # do not compile with cublas
  ncnn,

  with_stablediffusion ? true,
  opencv,

  with_tts ? true,
  onnxruntime,
  sonic,
  spdlog,
  fmt,
  espeak-ng,
  piper-tts,
}:
let
  BUILD_TYPE =
    assert (lib.count lib.id [ with_openblas with_cublas with_clblas ]) <= 1;
    if with_openblas then "openblas"
    else if with_cublas then "cublas"
    else if with_clblas then "clblas"
    else "";
    assert
      (lib.count lib.id [
        with_openblas
        with_cublas
        with_clblas
      ]) <= 1;
    if with_openblas then
      "openblas"
    else if with_cublas then
      "cublas"
    else if with_clblas then
      "clblas"
    else
      "";

  inherit (cudaPackages) libcublas cuda_nvcc cuda_cccl cuda_cudart libcufft;
  inherit (cudaPackages)
    libcublas
    cuda_nvcc
    cuda_cccl
    cuda_cudart
    libcufft
    ;

  go-llama = effectiveStdenv.mkDerivation {
    name = "go-llama";
@@ -81,13 +97,21 @@ let
      "BUILD_TYPE=${BUILD_TYPE}"
    ];

    buildInputs = [ ]
      ++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas ]
      ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
    buildInputs =
      [ ]
      ++ lib.optionals with_cublas [
        cuda_cccl
        cuda_cudart
        libcublas
      ]
      ++ lib.optionals with_clblas [
        clblast
        ocl-icd
        opencl-headers
      ]
      ++ lib.optionals with_openblas [ openblas.dev ];

    nativeBuildInputs = [ cmake ]
      ++ lib.optionals with_cublas [ cuda_nvcc ];
    nativeBuildInputs = [ cmake ] ++ lib.optionals with_cublas [ cuda_nvcc ];

    dontUseCmakeConfigure = true;

@@ -98,7 +122,8 @@ let
    '';
  };

  llama-cpp-rpc = (llama-cpp-grpc.overrideAttrs (prev: {
  llama-cpp-rpc =
    (llama-cpp-grpc.overrideAttrs (prev: {
      name = "llama-cpp-rpc";
      cmakeFlags = prev.cmakeFlags ++ [
        (lib.cmakeBool "GGML_AVX" false)
@@ -107,23 +132,28 @@ let
        (lib.cmakeBool "GGML_FMA" false)
        (lib.cmakeBool "GGML_F16C" false)
      ];
  })).override {
    })).override
      {
        cudaSupport = false;
        openclSupport = false;
        blasSupport = false;
        rpcSupport = true;
      };

  llama-cpp-grpc = (llama-cpp.overrideAttrs (final: prev: {
  llama-cpp-grpc =
    (llama-cpp.overrideAttrs (
      final: prev: {
        name = "llama-cpp-grpc";
        src = fetchFromGitHub {
          owner = "ggerganov";
          repo = "llama.cpp";
      rev = "ed9d2854c9de4ae1f448334294e61167b04bec2a";
      hash = "sha256-Xu2h9Zu+Q9utfFFmDWBOEu/EXth4xWRNoTMvPF5Fo/A=";
          rev = "fc54ef0d1c138133a01933296d50a36a1ab64735";
          hash = "sha256-o87EhrA2Oa98pwyb6GSUgwERY0/GWJiX7kvlxDv4zb4=";
          fetchSubmodules = true;
        };
    postPatch = prev.postPatch + ''
        postPatch =
          prev.postPatch
          + ''
            cd examples
            cp -r --no-preserve=mode ${src}/backend/cpp/llama grpc-server
            cp llava/clip.* llava/llava.* grpc-server
@@ -148,33 +178,15 @@ let
          grpc
          openssl
        ];
  })).override {
      }
    )).override
      {
        cudaSupport = with_cublas;
        rocmSupport = false;
        openclSupport = with_clblas;
        blasSupport = with_openblas;
      };

  gpt4all = stdenv.mkDerivation {
    name = "gpt4all";
    src = fetchFromGitHub {
      owner = "nomic-ai";
      repo = "gpt4all";
      rev = "27a8b020c36b0df8f8b82a252d261cda47cf44b8";
      hash = "sha256-djq1eK6ncvhkO3MNDgasDBUY/7WWcmZt/GJsHAulLdI=";
      fetchSubmodules = true;
    };
    makeFlags = [ "-C gpt4all-bindings/golang" ];
    buildFlags = [ "libgpt4all.a" ];
    dontUseCmakeConfigure = true;
    nativeBuildInputs = [ cmake ];
    installPhase = ''
      mkdir $out
      tar cf - --exclude=CMakeFiles . \
        | tar xf - -C $out
    '';
  };

  espeak-ng' = espeak-ng.overrideAttrs (self: {
    name = "espeak-ng'";
    inherit (go-piper) src;
@@ -199,8 +211,14 @@ let
    name = "piper-phonemize";
    inherit (go-piper) src;
    sourceRoot = "${go-piper.src.name}/piper-phonemize";
    buildInputs = [ espeak-ng' onnxruntime ];
    nativeBuildInputs = [ cmake pkg-config ];
    buildInputs = [
      espeak-ng'
      onnxruntime
    ];
    nativeBuildInputs = [
      cmake
      pkg-config
    ];
    cmakeFlags = [
      (lib.cmakeFeature "ONNXRUNTIME_DIR" "${onnxruntime.dev}")
      (lib.cmakeFeature "ESPEAK_NG_DIR" "${espeak-ng'}")
@@ -240,7 +258,15 @@ let
        -e '/CXXFLAGS *= / s;$; -DSPDLOG_FMT_EXTERNAL=1;'
    '';
    buildFlags = [ "libpiper_binding.a" ];
    buildInputs = [ piper-tts' espeak-ng' piper-phonemize sonic fmt spdlog onnxruntime ];
    buildInputs = [
      piper-tts'
      espeak-ng'
      piper-phonemize
      sonic
      fmt
      spdlog
      onnxruntime
    ];
    installPhase = ''
      cp -r --no-preserve=mode $src $out
      mkdir -p $out/piper-phonemize/pi
@@ -273,16 +299,28 @@ let
    src = fetchFromGitHub {
      owner = "ggerganov";
      repo = "whisper.cpp";
      rev = "6739eb83c3ca5cf40d24c6fe8442a761a1eb6248";
      hash = "sha256-1yDdJVjIwYDJKn93zn4xOJXMoDTqaG2TvakjdHIMCxk=";
      rev = "9e3c5345cd46ea718209db53464e426c3fe7a25e";
      hash = "sha256-JOptyveuaKRLzeZ6GuB3A70IM7dk4we95g5o25XVXJI=";
    };

    nativeBuildInputs = [ cmake pkg-config ]
      ++ lib.optionals with_cublas [ cuda_nvcc ];
    nativeBuildInputs = [
      cmake
      pkg-config
    ] ++ lib.optionals with_cublas [ cuda_nvcc ];

    buildInputs = [ ]
      ++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas libcufft ]
      ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
    buildInputs =
      [ ]
      ++ lib.optionals with_cublas [
        cuda_cccl
        cuda_cudart
        libcublas
        libcufft
      ]
      ++ lib.optionals with_clblas [
        clblast
        ocl-icd
        opencl-headers
      ]
      ++ lib.optionals with_openblas [ openblas.dev ];

    cmakeFlags = [
@@ -379,7 +417,8 @@ let
    meta.broken = lib.versionOlder go-tiny-dream.stdenv.cc.version "13";
  };

  GO_TAGS = lib.optional with_tinydream "tinydream"
  GO_TAGS =
    lib.optional with_tinydream "tinydream"
    ++ lib.optional with_tts "tts"
    ++ lib.optional with_stablediffusion "stablediffusion";

@@ -392,12 +431,12 @@ let
      stdenv;

  pname = "local-ai";
  version = "2.19.4";
  version = "2.20.1";
  src = fetchFromGitHub {
    owner = "go-skynet";
    repo = "LocalAI";
    rev = "v${version}";
    hash = "sha256-aKq6/DI+4+BvIEw6eONqPr3mZXuz7rMFN+FBypVj0Gc=";
    hash = "sha256-FeZZC0Tg9JT9Yj0e27GOLSdHEtWl17AHK3j7epwPyY8=";
  };

  prepare-sources =
@@ -407,36 +446,41 @@ let
    ''
      mkdir sources
      ${cp} ${go-llama} sources/go-llama.cpp
      ${cp} ${gpt4all} sources/gpt4all
      ${cp} ${if with_tts then go-piper else go-piper.src} sources/go-piper
      ${cp} ${go-rwkv} sources/go-rwkv.cpp
      ${cp} ${whisper-cpp.src} sources/whisper.cpp
      cp ${whisper-cpp}/lib/lib*.a sources/whisper.cpp
      ${cp} ${go-bert} sources/go-bert.cpp
      ${cp} ${if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src} sources/go-stable-diffusion
      ${cp} ${
        if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src
      } sources/go-stable-diffusion
      ${cp} ${if with_tinydream then go-tiny-dream else go-tiny-dream.src} sources/go-tiny-dream
    '';

  self = buildGoModule.override { stdenv = effectiveStdenv; } {
    inherit pname version src;

    vendorHash = "sha256-HEKE75+ixuNbM+KEuhbQQ/NYYEzVlGYOttPavftWKhk=";
    vendorHash = "sha256-mDxp5frUIECSHKjxaJVqIP7mnIusvdT45Xlxc9+P5tE=";

    env.NIX_CFLAGS_COMPILE = lib.optionalString with_stablediffusion " -isystem ${opencv}/include/opencv4";

    postPatch = ''
    postPatch =
      ''
        sed -i Makefile \
          -e '/mod download/ d' \
          -e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-fallback/ d' \
          -e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-avx/ d' \
          -e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-cuda/ d' \

    '' + lib.optionalString with_cublas ''
      ''
      + lib.optionalString with_cublas ''
        sed -i Makefile \
          -e '/^CGO_LDFLAGS_WHISPER?=/ s;$;-L${libcufft}/lib -L${cuda_cudart}/lib;'
      '';

    postConfigure = prepare-sources + ''
    postConfigure =
      prepare-sources
      + ''
        shopt -s extglob
        mkdir -p backend-assets/grpc
        cp ${llama-cpp-grpc}/bin/grpc-server backend-assets/grpc/llama-cpp-avx2
@@ -451,9 +495,18 @@ let
        touch backend-assets/grpc/* backend-assets/util/* sources/**/lib*.a
      '';

    buildInputs = [ ]
      ++ lib.optionals with_cublas [ cuda_cudart libcublas libcufft ]
      ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
    buildInputs =
      [ ]
      ++ lib.optionals with_cublas [
        cuda_cudart
        libcublas
        libcufft
      ]
      ++ lib.optionals with_clblas [
        clblast
        ocl-icd
        opencl-headers
      ]
      ++ lib.optionals with_openblas [ openblas.dev ]
      ++ lib.optionals with_stablediffusion go-stable-diffusion.buildInputs
      ++ lib.optionals with_tts go-piper.buildInputs;
@@ -465,13 +518,13 @@ let
      makeWrapper
      ncurses # tput
      which
    ]
    ++ lib.optional enable_upx upx
    ++ lib.optionals with_cublas [ cuda_nvcc ];
    ] ++ lib.optional enable_upx upx ++ lib.optionals with_cublas [ cuda_nvcc ];

    enableParallelBuilding = false;

    modBuildPhase = prepare-sources + ''
    modBuildPhase =
      prepare-sources
      + ''
        make protogen-go
        go mod tidy -v
      '';
@@ -482,7 +535,8 @@ let
    # containing spaces
    env.GO_TAGS = builtins.concatStringsSep " " GO_TAGS;

    makeFlags = [
    makeFlags =
      [
        "VERSION=v${version}"
        "BUILD_TYPE=${BUILD_TYPE}"
      ]
@@ -516,7 +570,8 @@ let
    # raises an segmentation fault
    postFixup =
      let
        LD_LIBRARY_PATH = [ ]
        LD_LIBRARY_PATH =
          [ ]
          ++ lib.optionals with_cublas [
            # driverLink has to be first to avoid loading the stub version of libcuda.so
            # https://github.com/NixOS/nixpkgs/issues/320145#issuecomment-2190319327
@@ -524,10 +579,16 @@ let
            (lib.getLib libcublas)
            cuda_cudart
          ]
          ++ lib.optionals with_clblas [ clblast ocl-icd ]
          ++ lib.optionals with_clblas [
            clblast
            ocl-icd
          ]
          ++ lib.optionals with_openblas [ openblas ]
          ++ lib.optionals with_tts [ piper-phonemize ]
          ++ lib.optionals (with_tts && enable_upx) [ fmt spdlog ];
          ++ lib.optionals (with_tts && enable_upx) [
            fmt
            spdlog
          ];
      in
      ''
        wrapProgram $out/bin/${pname} \
@@ -537,15 +598,30 @@ let

    passthru.local-packages = {
      inherit
        go-tiny-dream go-rwkv go-bert go-llama gpt4all go-piper
        llama-cpp-grpc whisper-cpp go-tiny-dream-ncnn espeak-ng' piper-phonemize
        piper-tts' llama-cpp-rpc;
        go-tiny-dream
        go-rwkv
        go-bert
        go-llama
        go-piper
        llama-cpp-grpc
        whisper-cpp
        go-tiny-dream-ncnn
        espeak-ng'
        piper-phonemize
        piper-tts'
        llama-cpp-rpc
        ;
    };

    passthru.features = {
      inherit
        with_cublas with_openblas with_tts with_stablediffusion
        with_tinydream with_clblas;
        with_cublas
        with_openblas
        with_tts
        with_stablediffusion
        with_tinydream
        with_clblas
        ;
    };

    passthru.tests = callPackages ./tests.nix { inherit self; };
@@ -555,7 +631,10 @@ let
      description = "OpenAI alternative to run local LLMs, image and audio generation";
      homepage = "https://localai.io";
      license = licenses.mit;
      maintainers = with maintainers; [ onny ck3d ];
      maintainers = with maintainers; [
        onny
        ck3d
      ];
      platforms = platforms.linux;
    };
  };
+106 −85
Original line number Diff line number Diff line
{ self
, lib
, testers
, fetchzip
, fetchurl
, writers
, symlinkJoin
, jq
, prom2json
{
  self,
  lib,
  testers,
  fetchzip,
  fetchurl,
  writers,
  symlinkJoin,
  jq,
  prom2json,
}:
let
  common-config = { config, ... }: {
  common-config =
    { config, ... }:
    {
      imports = [ ./module.nix ];
      services.local-ai = {
        enable = true;
@@ -73,7 +76,9 @@ in
        virtualisation.memorySize = 2048;
        services.local-ai.models = models;
      };
      passthru = { inherit models requests; };
      passthru = {
        inherit models requests;
      };
      testScript =
        let
          port = "8080";
@@ -93,7 +98,8 @@ in
        '';
    };

} // lib.optionalAttrs (!self.features.with_cublas && !self.features.with_clblas) {
}
// lib.optionalAttrs (!self.features.with_cublas && !self.features.with_clblas) {
  # https://localai.io/docs/getting-started/manual/
  llama =
    let
@@ -146,7 +152,12 @@ in
        # https://localai.io/features/text-generation/#chat-completions
        chat-completions = {
          inherit model;
          messages = [{ role = "user"; content = "1 + 2 = ?"; }];
          messages = [
            {
              role = "user";
              content = "1 + 2 = ?";
            }
          ];
        };
        # https://localai.io/features/text-generation/#edit-completions
        edit-completions = {
@@ -172,7 +183,9 @@ in
        # TODO: Add test case parallel requests
        services.local-ai.parallelRequests = 2;
      };
      passthru = { inherit models requests; };
      passthru = {
        inherit models requests;
      };
      testScript =
        let
          port = "8080";
@@ -196,14 +209,20 @@ in
          machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" requests.completions} --output completions.json")
          machine.copy_from_vm("completions.json")
          machine.succeed("${jq}/bin/jq --exit-status 'debug | .object ==\"text_completion\"' completions.json")
          machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString model-configs.${model}.parameters.max_tokens}' completions.json")
          machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${
            toString model-configs.${model}.parameters.max_tokens
          }' completions.json")

          machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
          machine.copy_from_vm("metrics.json")
        '';
    };

} // lib.optionalAttrs (self.features.with_tts && !self.features.with_cublas && !self.features.with_clblas) {
}
//
  lib.optionalAttrs
    (self.features.with_tts && !self.features.with_cublas && !self.features.with_clblas)
    {
      # https://localai.io/features/text-to-audio/#piper
      tts =
        let
@@ -250,7 +269,9 @@ in
            virtualisation.cores = 2;
            services.local-ai.models = models;
          };
      passthru = { inherit models requests; };
          passthru = {
            inherit models requests;
          };
          testScript =
            let
              port = "8080";