Unverified Commit 7953a786 authored by Someone's avatar Someone Committed by GitHub
Browse files

Merge pull request #280076 from SomeoneSerge/fix/apptainer-nv-cherry

apptainer: unbreak --nv (userns+nvliblist.conf variant)
parents 2f7f9e4a 35de6f1c
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -61,7 +61,12 @@ in
    };
    enableSuid = mkOption {
      type = types.bool;
      default = true;
      # SingularityCE requires SETUID for most things. Apptainer prefers user
      # namespaces, e.g. `apptainer exec --nv` would fail if built
      # `--with-suid`:
      # > `FATAL: nvidia-container-cli not allowed in setuid mode`
      default = cfg.package.projectName != "apptainer";
      defaultText = literalExpression ''config.services.singularity.package.projectName != "apptainer"'';
      example = false;
      description = mdDoc ''
        Whether to enable the SUID support of Singularity/Apptainer.
+84 −0
Original line number Diff line number Diff line
From 783ec26c0d83013baf04579a6a415d7f8776ac93 Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Sun, 7 Jan 2024 11:48:24 +0000
Subject: [PATCH] ldCache(): patch for @driverLink@

---
 internal/pkg/util/paths/resolve.go | 41 +++++++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/internal/pkg/util/paths/resolve.go b/internal/pkg/util/paths/resolve.go
index db45d9db1..9d0110b6b 100644
--- a/internal/pkg/util/paths/resolve.go
+++ b/internal/pkg/util/paths/resolve.go
@@ -14,6 +14,7 @@ import (
 	"fmt"
 	"os"
 	"os/exec"
+	"path"
 	"path/filepath"
 	"regexp"
 	"strings"
@@ -154,14 +155,49 @@ func Resolve(fileList []string) ([]string, []string, error) {
 // lists three variants of libEGL.so.1 that are in different locations, we only
 // report the first, highest priority, variant.
 func ldCache() (map[string]string, error) {
+    driverDirs := strings.Split("@driverLink@/lib", ":")
+    if machine, err := elfMachine(); err == nil && machine == elf.EM_386 {
+        driverDirs = strings.Split("@driverLink@-32/lib", ":")
+    }
+
+    soPattern, err := regexp.Compile(`[^\s]+\.so(\.\d+(\.\d+(\.\d+)?)?)?$`)
+    if err != nil {
+		return nil, fmt.Errorf("could not compile ldconfig regexp: %v", err)
+    }
+
+	ldCache := make(map[string]string)
+    for _, dirPath := range driverDirs {
+        dir, err := os.Open(dirPath)
+        if err != nil {
+            /* Maybe we're not running under NixOS */
+            continue
+        }
+        files, err := dir.ReadDir(-1)
+        if err != nil {
+            continue
+        }
+        for _, f := range files {
+            if !soPattern.MatchString(f.Name()) {
+                continue
+            }
+            libName := f.Name()
+            libPath := path.Join(dirPath, f.Name())
+			if _, ok := ldCache[libName]; !ok {
+				ldCache[libName] = libPath
+			}
+        }
+    }
+
 	// walk through the ldconfig output and add entries which contain the filenames
 	// returned by nvidia-container-cli OR the nvliblist.conf file contents
 	ldconfig, err := bin.FindBin("ldconfig")
-	if err != nil {
+	if err != nil && len(ldCache) == 0 {
+        // Note that missing ldconfig is only an "error" as long
+        // as there's no driverLink
 		return nil, err
 	}
 	out, err := exec.Command(ldconfig, "-p").Output()
-	if err != nil {
+	if err != nil && len(ldCache) == 0 {
 		return nil, fmt.Errorf("could not execute ldconfig: %v", err)
 	}
 
@@ -173,7 +209,6 @@ func ldCache() (map[string]string, error) {
 	}
 
 	// store library name with associated path
-	ldCache := make(map[string]string)
 	for _, match := range r.FindAllSubmatch(out, -1) {
 		if match != nil {
 			// libName is the "libnvidia-ml.so.1" (from the above example)
-- 
2.42.0
+51 −1
Original line number Diff line number Diff line
@@ -27,12 +27,14 @@ in
, buildGoModule
, runCommandLocal
  # Native build inputs
, addDriverRunpath
, makeWrapper
, pkg-config
, util-linux
, which
  # Build inputs
, bash
, callPackage
, conmon
, coreutils
, cryptsetup
@@ -54,6 +56,9 @@ in
, hello
  # Overridable configurations
, enableNvidiaContainerCli ? true
  # --nvccli currently requires extra privileges:
  # https://github.com/apptainer/apptainer/issues/1893#issuecomment-1881240800
, forceNvcCli ? false
  # Compile with seccomp support
  # SingularityCE 3.10.0 and above requires explicit --without-seccomp when libseccomp is not available.
, enableSeccomp ? true
@@ -65,6 +70,7 @@ in
  # Whether to compile with SUID support
, enableSuid ? false
, starterSuidPath ? null
, substituteAll
  # newuidmapPath and newgidmapPath are to support --fakeroot
  # where those SUID-ed executables are unavailable from the FHS system PATH.
  # Path to SUID-ed newuidmap executable
@@ -94,6 +100,10 @@ in
(buildGoModule {
  inherit pname version src;

  patches = lib.optionals (projectName == "apptainer") [
    (substituteAll { src = ./apptainer/0001-ldCache-patch-for-driverLink.patch; inherit (addDriverRunpath) driverLink; })
  ];

  # Override vendorHash with the output got from
  # nix-prefetch -E "{ sha256 }: ((import ./. { }).apptainer.override { vendorHash = sha256; }).goModules"
  # or with `null` when using vendored source tarball.
@@ -175,11 +185,18 @@ in
    if [[ ! -e .git || ! -e VERSION ]]; then
      echo "${version}" > VERSION
    fi

    # Patch shebangs for script run during build
    patchShebangs --build "$configureScript" makeit e2e scripts mlocal/scripts

    # Patching the hard-coded defaultPath by prefixing the packages in defaultPathInputs
    substituteInPlace cmd/internal/cli/actions.go \
      --replace "defaultPath = \"${defaultPathOriginal}\"" "defaultPath = \"''${defaultPathInputs// /\/bin:}''${defaultPathInputs:+/bin:}${defaultPathOriginal}\""

    substituteInPlace internal/pkg/util/gpu/nvidia.go \
      --replace \
        'return fmt.Errorf("/usr/bin not writable in the container")' \
        ""
  '';

  postConfigure = ''
@@ -212,7 +229,7 @@ in
    wrapProgram "$out/bin/${projectName}" \
      --prefix PATH : "''${defaultPathInputs// /\/bin:}''${defaultPathInputs:+/bin:}"
    # Make changes in the config file
    ${lib.optionalString enableNvidiaContainerCli ''
    ${lib.optionalString forceNvcCli ''
      substituteInPlace "$out/etc/${projectName}/${projectName}.conf" \
        --replace "use nvidia-container-cli = no" "use nvidia-container-cli = yes"
    ''}
@@ -264,5 +281,38 @@ in
        singularity = finalAttrs.finalPackage;
      };
    };
    gpuChecks = lib.optionalAttrs (projectName == "apptainer") {
      # Should be in tests, but Ofborg would skip image-hello-cowsay because
      # saxpy is unfree.
      image-saxpy = callPackage
        ({ singularity-tools, cudaPackages }:
          singularity-tools.buildImage {
            name = "saxpy";
            contents = [ cudaPackages.saxpy ];
            memSize = 2048;
            diskSize = 2048;
            singularity = finalAttrs.finalPackage;
          })
        { };
      saxpy =
        callPackage
          ({ runCommand, writeShellScriptBin }:
            let
              unwrapped = writeShellScriptBin "apptainer-cuda-saxpy"
                ''
                  ${lib.getExe finalAttrs.finalPackage} exec --nv $@ ${finalAttrs.passthru.tests.image-saxpy} saxpy
                '';
            in
            runCommand "run-apptainer-cuda-saxpy"
              {
                requiredSystemFeatures = [ "cuda" ];
                nativeBuildInputs = [ unwrapped ];
                passthru = { inherit unwrapped; };
              }
              ''
                apptainer-cuda-saxpy
              '')
          { };
    };
  };
})
+3 −1
Original line number Diff line number Diff line
@@ -36,7 +36,9 @@ backendStdenv.mkDerivation {
  buildInputs =
    lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]
    ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [
      libcublas
      libcublas.dev
      libcublas.lib
      libcublas.static
      cuda_cudart
    ]
    ++ lib.optionals (lib.versionAtLeast cudaVersion "12.0") [cuda_cccl];