Unverified Commit 4c930c06 authored by Someone's avatar Someone Committed by GitHub
Browse files

Merge pull request #331071 from ereslibre/fix-nvidia-container-toolkit-docker-contamination

Fix nvidia container toolkit docker contamination
parents b9459bb5 058e8f5e
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -256,7 +256,11 @@ in
        live-restore = mkDefault cfg.liveRestore;
        runtimes = mkIf cfg.enableNvidia {
          nvidia = {
            path = "${pkgs.nvidia-docker}/bin/nvidia-container-runtime";
            # Use the legacy nvidia-container-runtime wrapper to allow
            # the `--runtime=nvidia` approach to expose
            # GPU's. Starting with Docker > 25, CDI can be used
            # instead, removing the need for runtime wrappers.
            path = lib.getExe' pkgs.nvidia-docker "nvidia-container-runtime.legacy";
          };
        };
      };
+1 −6
Original line number Diff line number Diff line
@@ -170,7 +170,7 @@ in
    lib.mkIf cfg.enable {
      warnings = lib.optionals cfg.enableNvidia [
        ''
          You have set virtualisation.podman.enableNvidia. This option is deprecated, please set virtualisation.containers.cdi.dynamic.nvidia.enable instead.
          You have set virtualisation.podman.enableNvidia. This option is deprecated, please set hardware.nvidia-container-toolkit.enable instead.
        ''
      ];

@@ -189,11 +189,6 @@ in
        enable = true; # Enable common /etc/containers configuration
        containersConf.settings = {
          network.network_backend = "netavark";
        } // lib.optionalAttrs cfg.enableNvidia {
          engine = {
            conmon_env_vars = [ "PATH=${lib.makeBinPath [ pkgs.nvidia-podman ]}" ];
            runtimes.nvidia = [ "${pkgs.nvidia-podman}/bin/nvidia-container-runtime" ];
          };
        };
      };

+9 −46
Original line number Diff line number Diff line
@@ -3,10 +3,7 @@
, fetchFromGitLab
, makeWrapper
, buildGoModule
, linkFarm
, writeShellScript
, formats
, containerRuntimePath ? null
, configTemplate ? null
, configTemplatePath ? null
, libnvidia-container
@@ -17,20 +14,6 @@ assert configTemplate != null -> (lib.isAttrs configTemplate && configTemplatePa
assert configTemplatePath != null -> (lib.isStringLike configTemplatePath && configTemplate == null);

let
  isolatedContainerRuntimePath = linkFarm "isolated_container_runtime_path" [
    {
      name = "runc";
      path = containerRuntimePath;
    }
  ];
  warnIfXdgConfigHomeIsSet = writeShellScript "warn_if_xdg_config_home_is_set" ''
    set -eo pipefail

    if [ -n "$XDG_CONFIG_HOME" ]; then
      echo >&2 "$(tput setaf 3)warning: \$XDG_CONFIG_HOME=$XDG_CONFIG_HOME$(tput sgr 0)"
    fi
  '';

  configToml = if configTemplatePath != null then configTemplatePath else (formats.toml { }).generate "config.toml" configTemplate;

  # From https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/03cbf9c6cd26c75afef8a2dd68e0306aace80401/Makefile#L54
@@ -48,6 +31,8 @@ buildGoModule rec {

  };

  outputs = [ "out" "tools" ];

  vendorHash = null;

  patches = [
@@ -91,14 +76,6 @@ buildGoModule rec {
    makeWrapper
  ];

  preConfigure = lib.optionalString (containerRuntimePath != null) ''
    # Ensure the runc symlink isn't broken:
    if ! readlink --quiet --canonicalize-existing "${isolatedContainerRuntimePath}/runc" ; then
      echo "${isolatedContainerRuntimePath}/runc: broken symlink" >&2
      exit 1
    fi
  '';

  checkFlags =
    let
      skippedTests = [
@@ -109,33 +86,19 @@ buildGoModule rec {
    in
    [ "-skip" "${builtins.concatStringsSep "|" skippedTests}" ];

  postInstall = lib.optionalString (containerRuntimePath != null) ''
    mkdir -p $out/etc/nvidia-container-runtime
  postInstall = ''
    wrapProgram $out/bin/nvidia-container-runtime-hook \
      --prefix PATH : ${libnvidia-container}/bin

    # nvidia-container-runtime invokes docker-runc or runc if that isn't
    # available on PATH.
    #
    # Also set XDG_CONFIG_HOME if it isn't already to allow overriding
    # configuration. This in turn allows users to have the nvidia container
    # runtime enabled for any number of higher level runtimes like docker and
    # podman, i.e., there's no need to have mutually exclusivity on what high
    # level runtime can enable the nvidia runtime because each high level
    # runtime has its own config.toml file.
    wrapProgram $out/bin/nvidia-container-runtime \
      --run "${warnIfXdgConfigHomeIsSet}" \
      --prefix PATH : ${isolatedContainerRuntimePath}:${libnvidia-container}/bin \
      --set-default XDG_CONFIG_HOME $out/etc
    mkdir -p $tools/bin
    mv $out/bin/{containerd,crio,docker,nvidia-toolkit,toolkit} $tools/bin
  '' + lib.optionalString (configTemplate != null || configTemplatePath != null) ''
    mkdir -p $out/etc/nvidia-container-runtime

    cp ${configToml} $out/etc/nvidia-container-runtime/config.toml

    substituteInPlace $out/etc/nvidia-container-runtime/config.toml \
      --subst-var-by glibcbin ${lib.getBin glibc}

    # See: https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/03cbf9c6cd26c75afef8a2dd68e0306aace80401/packaging/debian/nvidia-container-toolkit.postinst#L12
    ln -s $out/bin/nvidia-container-runtime-hook $out/bin/nvidia-container-toolkit

    wrapProgram $out/bin/nvidia-container-toolkit \
      --add-flags "-config ${placeholder "out"}/etc/nvidia-container-runtime/config.toml"
  '';

  meta = with lib; {
+0 −36
Original line number Diff line number Diff line
{
  lib,
  newScope,
  docker,
  libnvidia-container,
  runc,
  symlinkJoin,
}:

@@ -30,36 +27,12 @@ lib.makeScope newScope (
      };
    };
    nvidia-container-toolkit-docker = self.callPackage ./package.nix {
      containerRuntimePath = "${docker}/libexec/docker/docker";
      configTemplate = self.dockerConfig;
    };

    podmanConfig = {
      disable-require = true;
      #swarm-resource = "DOCKER_RESOURCE_GPU";

      nvidia-container-cli = {
        #root = "/run/nvidia/driver";
        #path = "/usr/bin/nvidia-container-cli";
        environment = [ ];
        #debug = "/var/log/nvidia-container-runtime-hook.log";
        ldcache = "/tmp/ld.so.cache";
        load-kmods = true;
        no-cgroups = true;
        #user = "root:video";
        ldconfig = "@@glibcbin@/bin/ldconfig";
      };
    };
    nvidia-container-toolkit-podman = self.nvidia-container-toolkit-docker.override {
      containerRuntimePath = lib.getExe runc;

      configTemplate = self.podmanConfig;
    };

    nvidia-docker = symlinkJoin {
      name = "nvidia-docker";
      paths = [
        libnvidia-container
        self.nvidia-docker-unwrapped
        self.nvidia-container-toolkit-docker
      ];
@@ -67,14 +40,5 @@ lib.makeScope newScope (
    };
    nvidia-docker-unwrapped =
      self.callPackage ./nvidia-docker.nix { };

    nvidia-podman = symlinkJoin {
      name = "nvidia-podman";
      paths = [
        libnvidia-container
        self.nvidia-container-toolkit-podman
      ];
      inherit (self.nvidia-container-toolkit-podman) meta;
    };
  }
)
+1 −0
Original line number Diff line number Diff line
@@ -1060,6 +1060,7 @@ mapAliases ({
  noto-fonts-extra = noto-fonts; # Added 2023-04-08
  NSPlist = nsplist; # Added 2024-01-05
  nushellFull = lib.warn "`nushellFull` has has been replaced by `nushell` as it's features no longer exist" nushell; # Added 2024-05-30
  nvidia-podman = throw "podman should use the Container Device Interface (CDI) instead. See https://web.archive.org/web/20240729183805/https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#configuring-podman"; # Added 2024-08-02
  nvidia-thrust = throw "nvidia-thrust has been removed because the project was deprecated; use cudaPackages.cuda_cccl";
  nvtop = lib.warn "nvtop has been renamed to nvtopPackages.full" nvtopPackages.full; # Added 2024-02-25
  nvtop-amd = lib.warn "nvtop-amd has been renamed to nvtopPackages.amd" nvtopPackages.amd; # Added 2024-02-25
Loading