Unverified Commit f7b4d574 authored by Rafael Fernández López's avatar Rafael Fernández López
Browse files

virtualisation/docker: fix nvidia wrapper

Although CDI should be used in order to not require container runtime
wrappers anymore, fix the nvidia-container-runtime integration with
Docker for cases when Docker < 25.
parent df2df4c3
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -256,7 +256,11 @@ in
        live-restore = mkDefault cfg.liveRestore;
        runtimes = mkIf cfg.enableNvidia {
          nvidia = {
            path = "${pkgs.nvidia-docker}/bin/nvidia-container-runtime";
            # Use the legacy nvidia-container-runtime wrapper to allow
            # the `--runtime=nvidia` approach to expose
            # GPU's. Starting with Docker > 25, CDI can be used
            # instead, removing the need for runtime wrappers.
            path = lib.getExe' pkgs.nvidia-docker "nvidia-container-runtime.legacy";
          };
        };
      };
+3 −48
Original line number Diff line number Diff line
@@ -3,10 +3,7 @@
, fetchFromGitLab
, makeWrapper
, buildGoModule
, linkFarm
, writeShellScript
, formats
, containerRuntimePath ? null
, configTemplate ? null
, configTemplatePath ? null
, libnvidia-container
@@ -17,20 +14,6 @@ assert configTemplate != null -> (lib.isAttrs configTemplate && configTemplatePa
assert configTemplatePath != null -> (lib.isStringLike configTemplatePath && configTemplate == null);

let
  isolatedContainerRuntimePath = linkFarm "isolated_container_runtime_path" [
    {
      name = "runc";
      path = containerRuntimePath;
    }
  ];
  warnIfXdgConfigHomeIsSet = writeShellScript "warn_if_xdg_config_home_is_set" ''
    set -eo pipefail

    if [ -n "$XDG_CONFIG_HOME" ]; then
      echo >&2 "$(tput setaf 3)warning: \$XDG_CONFIG_HOME=$XDG_CONFIG_HOME$(tput sgr 0)"
    fi
  '';

  configToml = if configTemplatePath != null then configTemplatePath else (formats.toml { }).generate "config.toml" configTemplate;

  # From https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/03cbf9c6cd26c75afef8a2dd68e0306aace80401/Makefile#L54
@@ -93,14 +76,6 @@ buildGoModule rec {
    makeWrapper
  ];

  preConfigure = lib.optionalString (containerRuntimePath != null) ''
    # Ensure the runc symlink isn't broken:
    if ! readlink --quiet --canonicalize-existing "${isolatedContainerRuntimePath}/runc" ; then
      echo "${isolatedContainerRuntimePath}/runc: broken symlink" >&2
      exit 1
    fi
  '';

  checkFlags =
    let
      skippedTests = [
@@ -112,38 +87,18 @@ buildGoModule rec {
    [ "-skip" "${builtins.concatStringsSep "|" skippedTests}" ];

  postInstall = ''
    mkdir -p $tools/bin
    mv $out/bin/{containerd,crio,docker,nvidia-toolkit,toolkit} -t $tools/bin

    wrapProgram $out/bin/nvidia-container-runtime-hook \
      --prefix PATH : ${libnvidia-container}/bin

    mkdir -p $tools/bin
    mv $out/bin/{containerd,crio,docker,nvidia-toolkit,toolkit} $tools/bin
  '' + lib.optionalString (configTemplate != null || configTemplatePath != null) ''
    mkdir -p $out/etc/nvidia-container-runtime

    # nvidia-container-runtime invokes docker-runc or runc if that isn't
    # available on PATH.
    #
    # Also set XDG_CONFIG_HOME if it isn't already to allow overriding
    # configuration. This in turn allows users to have the nvidia container
    # runtime enabled for any number of higher level runtimes like docker and
    # podman, i.e., there's no need to have mutually exclusivity on what high
    # level runtime can enable the nvidia runtime because each high level
    # runtime has its own config.toml file.
    wrapProgram $out/bin/nvidia-container-runtime \
      --run "${warnIfXdgConfigHomeIsSet}" \
      --prefix PATH : ${isolatedContainerRuntimePath}:${libnvidia-container}/bin \
      --set-default XDG_CONFIG_HOME $out/etc

    cp ${configToml} $out/etc/nvidia-container-runtime/config.toml

    substituteInPlace $out/etc/nvidia-container-runtime/config.toml \
      --subst-var-by glibcbin ${lib.getBin glibc}

    # See: https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/03cbf9c6cd26c75afef8a2dd68e0306aace80401/packaging/debian/nvidia-container-toolkit.postinst#L12
    ln -s $out/bin/nvidia-container-runtime-hook $out/bin/nvidia-container-toolkit

    wrapProgram $out/bin/nvidia-container-toolkit \
      --add-flags "-config ${placeholder "out"}/etc/nvidia-container-runtime/config.toml"
  '';

  meta = with lib; {
+0 −8
Original line number Diff line number Diff line
{
  lib,
  newScope,
  docker,
  libnvidia-container,
  runc,
  symlinkJoin,
}:

@@ -30,7 +27,6 @@ lib.makeScope newScope (
      };
    };
    nvidia-container-toolkit-docker = self.callPackage ./package.nix {
      containerRuntimePath = "${docker}/libexec/docker/docker";
      configTemplate = self.dockerConfig;
    };

@@ -51,15 +47,12 @@ lib.makeScope newScope (
      };
    };
    nvidia-container-toolkit-podman = self.nvidia-container-toolkit-docker.override {
      containerRuntimePath = lib.getExe runc;

      configTemplate = self.podmanConfig;
    };

    nvidia-docker = symlinkJoin {
      name = "nvidia-docker";
      paths = [
        libnvidia-container
        self.nvidia-docker-unwrapped
        self.nvidia-container-toolkit-docker
      ];
@@ -71,7 +64,6 @@ lib.makeScope newScope (
    nvidia-podman = symlinkJoin {
      name = "nvidia-podman";
      paths = [
        libnvidia-container
        self.nvidia-container-toolkit-podman
      ];
      inherit (self.nvidia-container-toolkit-podman) meta;