Unverified Commit d382cf7a authored by Gaétan Lepage's avatar Gaétan Lepage Committed by GitHub
Browse files

whisper-cpp: add ROCm and Vulkan support, rename from openai-whisper-cpp (#365033)

parents a61a1ee9 89de5a96
Loading
Loading
Loading
Loading
+199 −0
Original line number Diff line number Diff line
{
  lib,
  stdenv,
  cmake,
  apple-sdk_11,
  ninja,
  fetchFromGitHub,
  SDL2,
  wget,
  which,
  autoAddDriverRunpath,
  makeWrapper,

  metalSupport ? stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64,
  coreMLSupport ? stdenv.hostPlatform.isDarwin && false, # FIXME currently broken

  config,
  cudaSupport ? config.cudaSupport,
  cudaPackages ? { },

  rocmSupport ? config.rocmSupport,
  rocmPackages ? { },
  rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,

  vulkanSupport ? false,
  shaderc,
  vulkan-headers,
  vulkan-loader,

  withSDL ? true,
}:

assert metalSupport -> stdenv.hostPlatform.isDarwin;
assert coreMLSupport -> stdenv.hostPlatform.isDarwin;

let
  # It's necessary to consistently use backendStdenv when building with CUDA support,
  # otherwise we get libstdc++ errors downstream.
  # cuda imposes an upper bound on the gcc version, e.g. the latest gcc compatible with cudaPackages_11 is gcc11
  effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv;
  inherit (lib)
    cmakeBool
    cmakeFeature
    optional
    optionals
    optionalString
    forEach
    ;

  darwinBuildInputs = [ apple-sdk_11 ];

  cudaBuildInputs = with cudaPackages; [
    cuda_cccl # <nv/target>

    # A temporary hack for reducing the closure size, remove once cudaPackages
    # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
    cuda_cudart
    libcublas
  ];

  rocmBuildInputs = with rocmPackages; [
    clr
    hipblas
    rocblas
  ];

  vulkanBuildInputs = [
    shaderc
    vulkan-headers
    vulkan-loader
  ];

in
effectiveStdenv.mkDerivation (finalAttrs: {
  pname = "whisper-cpp";
  version = "1.7.2";

  src = fetchFromGitHub {
    owner = "ggerganov";
    repo = "whisper.cpp";
    rev = "refs/tags/v${finalAttrs.version}";
    hash = "sha256-y30ZccpF3SCdRGa+P3ddF1tT1KnvlI4Fexx81wZxfTk=";
  };

  # The upstream download script tries to download the models to the
  # directory of the script, which is not writable due to being
  # inside the nix store. This patch changes the script to download
  # the models to the current directory of where it is being run from.
  patches = [ ./download-models.patch ];

  postPatch = ''
    for target in examples/{bench,command,main,quantize,server,stream,talk}/CMakeLists.txt; do
      if ! grep -q -F 'install('; then
        echo 'install(TARGETS ''${TARGET} RUNTIME)' >> $target
      fi
    done
  '';

  nativeBuildInputs =
    [
      cmake
      ninja
      which
      makeWrapper
    ]
    ++ lib.optionals cudaSupport [
      cudaPackages.cuda_nvcc
      autoAddDriverRunpath
    ];

  buildInputs =
    optional withSDL SDL2
    ++ optionals effectiveStdenv.hostPlatform.isDarwin darwinBuildInputs
    ++ optionals cudaSupport cudaBuildInputs
    ++ optionals rocmSupport rocmBuildInputs
    ++ optionals vulkanSupport vulkanBuildInputs;

  cmakeFlags =
    [
      (cmakeBool "WHISPER_BUILD_EXAMPLES" true)
      (cmakeBool "GGML_CUDA" cudaSupport)
      (cmakeBool "GGML_HIPBLAS" rocmSupport)
      (cmakeBool "GGML_VULKAN" vulkanSupport)
      (cmakeBool "WHISPER_SDL2" withSDL)
      (cmakeBool "GGML_LTO" true)
      (cmakeBool "GGML_NATIVE" false)
      (cmakeBool "BUILD_SHARED_LIBS" (!effectiveStdenv.hostPlatform.isStatic))
    ]
    ++ optionals (effectiveStdenv.hostPlatform.isx86 && !effectiveStdenv.hostPlatform.isStatic) [
      (cmakeBool "GGML_BACKEND_DL" true)
      (cmakeBool "GGML_CPU_ALL_VARIANTS" true)
    ]
    ++ optionals cudaSupport [
      (cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString)
    ]
    ++ optionals rocmSupport [
      (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
      (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")

      # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
      # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
      # and select the line that matches the current nixpkgs version of rocBLAS.
      "-DAMDGPU_TARGETS=${rocmGpuTargets}"
    ]
    ++ optionals coreMLSupport [
      (cmakeBool "WHISPER_COREML" true)
      (cmakeBool "WHISPER_COREML_ALLOW_FALLBACK" true)
    ]
    ++ optionals metalSupport [
      (cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
      (cmakeBool "GGML_METAL" true)
      (cmakeBool "GGML_METAL_EMBED_LIBRARY" true)
    ];

  postInstall = ''
    # Add "whisper-cpp" prefix before every command
    mv -v $out/bin/{main,whisper-cpp}

    for file in $out/bin/*; do
      if [[ -x "$file" && -f "$file" && "$(basename $file)" != "whisper-cpp" ]]; then
        mv -v "$file" "$out/bin/whisper-cpp-$(basename $file)"
      fi
    done

    install -v -D -m755 $src/models/download-ggml-model.sh $out/bin/whisper-cpp-download-ggml-model

    wrapProgram $out/bin/whisper-cpp-download-ggml-model \
      --prefix PATH : ${lib.makeBinPath [ wget ]}
  '';

  requiredSystemFeatures = optionals rocmSupport [ "big-parallel" ]; # rocmSupport multiplies build time by the number of GPU targets, which takes arround 30 minutes on a 16-cores system to build

  doInstallCheck = true;

  installCheckPhase = ''
    runHook preInstallCheck
    $out/bin/whisper-cpp --help >/dev/null
    runHook postInstallCheck
  '';

  meta = {
    description = "Port of OpenAI's Whisper model in C/C++";
    longDescription = ''
      To download the models as described in the project's readme, you may
      use the `whisper-cpp-download-ggml-model` binary from this package.
    '';
    homepage = "https://github.com/ggerganov/whisper.cpp";
    license = lib.licenses.mit;
    mainProgram = "whisper-cpp";
    platforms = lib.platforms.all;
    broken = coreMLSupport;
    badPlatforms = optionals cudaSupport lib.platforms.darwin;
    maintainers = with lib.maintainers; [
      dit7ya
      hughobrien
      aviallon
    ];
  };
})
+0 −113
Original line number Diff line number Diff line
{ lib
, stdenv
, fetchFromGitHub
, SDL2
, makeWrapper
, wget
, which
, Accelerate
, CoreGraphics
, CoreML
, CoreVideo
, MetalKit

, config
, autoAddDriverRunpath
, cudaSupport ? config.cudaSupport
, cudaPackages ? {}
}:

let
  # It's necessary to consistently use backendStdenv when building with CUDA support,
  # otherwise we get libstdc++ errors downstream.
  # cuda imposes an upper bound on the gcc version, e.g. the latest gcc compatible with cudaPackages_11 is gcc11
  effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv;
in
effectiveStdenv.mkDerivation (finalAttrs: {
  pname = "whisper-cpp";
  version = "1.7.2";

  src = fetchFromGitHub {
    owner = "ggerganov";
    repo = "whisper.cpp";
    rev = "refs/tags/v${finalAttrs.version}" ;
    hash = "sha256-y30ZccpF3SCdRGa+P3ddF1tT1KnvlI4Fexx81wZxfTk=";
  };

  # The upstream download script tries to download the models to the
  # directory of the script, which is not writable due to being
  # inside the nix store. This patch changes the script to download
  # the models to the current directory of where it is being run from.
  patches = [ ./download-models.patch ];

  nativeBuildInputs = [
      which
      makeWrapper
    ] ++ lib.optionals cudaSupport [
      cudaPackages.cuda_nvcc
      autoAddDriverRunpath
    ];

  buildInputs = [
      SDL2
    ] ++ lib.optionals stdenv.hostPlatform.isDarwin [
      Accelerate
      CoreGraphics
      CoreML
      CoreVideo
      MetalKit
    ] ++ lib.optionals cudaSupport ( with cudaPackages; [
      cuda_cccl # provides nv/target
      cuda_cudart
      libcublas
    ]);

  postPatch = let
    cudaOldStr = "-lcuda ";
    cudaNewStr = "-lcuda -L${cudaPackages.cuda_cudart}/lib/stubs ";
  in lib.optionalString cudaSupport ''
    substituteInPlace Makefile \
      --replace-fail '${cudaOldStr}' '${cudaNewStr}'
  '';

  env = lib.optionalAttrs stdenv.hostPlatform.isDarwin {
    WHISPER_COREML = "1";
    WHISPER_COREML_ALLOW_FALLBACK = "1";
    WHISPER_METAL_EMBED_LIBRARY = "1";
  } // lib.optionalAttrs cudaSupport {
    GGML_CUDA = "1";
  };

  installPhase = ''
    runHook preInstall

    mkdir -p $out/bin

    cp ./main $out/bin/whisper-cpp

    for file in *; do
      if [[ -x "$file" && -f "$file" && "$file" != "main" ]]; then
        cp "$file" "$out/bin/whisper-cpp-$file"
      fi
    done

    cp models/download-ggml-model.sh $out/bin/whisper-cpp-download-ggml-model

    wrapProgram $out/bin/whisper-cpp-download-ggml-model \
      --prefix PATH : ${lib.makeBinPath [wget]}

    runHook postInstall
  '';

  meta = with lib; {
    description = "Port of OpenAI's Whisper model in C/C++";
    longDescription = ''
      To download the models as described in the project's readme, you may
      use the `whisper-cpp-download-ggml-model` binary from this package.
    '';
    homepage = "https://github.com/ggerganov/whisper.cpp";
    license = licenses.mit;
    platforms = platforms.all;
    maintainers = with maintainers; [ dit7ya hughobrien ];
  };
})
+1 −0
Original line number Diff line number Diff line
@@ -999,6 +999,7 @@ mapAliases {
  oauth2_proxy = throw "'oauth2_proxy' has been renamed to/replaced by 'oauth2-proxy'"; # Converted to throw 2024-10-17
  oil = lib.warnOnInstantiate "Oil has been replaced with the faster native C++ version and renamed to 'oils-for-unix'. See also https://github.com/oils-for-unix/oils/wiki/Oils-Deployments" oils-for-unix; # Added 2024-10-22
  onevpl-intel-gpu = lib.warnOnInstantiate "onevpl-intel-gpu has been renamed to vpl-gpu-rt" vpl-gpu-rt; # Added 2024-06-04
  openai-whisper-cpp = whisper-cpp; # Added 2024-12-13
  opencv2 = throw "opencv2 has been removed as it is obsolete and was not used by any other package; please migrate to OpenCV 4"; # Added 2024-08-20
  opencv3 = throw "opencv3 has been removed as it is obsolete and was not used by any other package; please migrate to OpenCV 4"; # Added 2024-08-20
  openafs_1_8 = openafs; # Added 2022-08-22
+4 −4
Original line number Diff line number Diff line
@@ -8137,10 +8137,6 @@ with pkgs;
  openai-whisper = with python3.pkgs; toPythonApplication openai-whisper;
  openai-whisper-cpp = darwin.apple_sdk_11_0.callPackage ../tools/audio/openai-whisper-cpp {
    inherit (darwin.apple_sdk_11_0.frameworks) Accelerate CoreGraphics CoreML CoreVideo MetalKit;
  };
  openocd-rp2040 = openocd.overrideAttrs (old: {
    pname = "openocd-rp2040";
    src = fetchFromGitHub {
@@ -8412,6 +8408,10 @@ with pkgs;
    inherit (llvmPackages) clang-unwrapped;
  };
  whisper-cpp-vulkan = whisper-cpp.override {
    vulkanSupport = true;
  };
  watson-ruby = callPackage ../development/tools/misc/watson-ruby { };
  xmake = darwin.apple_sdk_11_0.callPackage ../development/tools/build-managers/xmake {