Unverified Commit 83921582 authored by Jason Miller's avatar Jason Miller Committed by GitHub
Browse files

cudaPackages: add cudaFlags (#205351)

* cudaPackages: add cudaFlags

* cudaNames -> cudaMicroarchitectureNames

* update documentation, remove config for static library removal

* doc link added to flags

* fix whitespace in assignment
parent 08b5fc6d
Loading
Loading
Loading
Loading
+19 −0
Original line number Diff line number Diff line
@@ -32,3 +32,22 @@ mypkg = let
  }});
in callPackage { inherit cudaPackages; };
```

The CUDA NVCC compiler requires flags to determine which hardware you
want to target for in terms of SASS (real hardware) or PTX (JIT kernels).

Nixpkgs tries to target support real architecture defaults based on the
CUDA toolkit version with PTX support for future hardware.  Experienced
users may optmize this configuration for a variety of reasons such as
reducing binary size and compile time, supporting legacy hardware, or
optimizing for specific hardware.

You may provide capabilities to add support or reduce binary size through
`config` using `cudaCapabilities = [ "6.0" "7.0" ];` and
`cudaForwardCompat = true;` if you want PTX support for future hardware.

Please consult [GPUs supported](https://en.wikipedia.org/wiki/CUDA#GPUs_supported)
for your specific card(s).

Library maintainers should consult [NVCC Docs](https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/)
and release notes for their software package.
+2 −3
Original line number Diff line number Diff line
@@ -2,11 +2,10 @@
, opencv3, gtest, blas, gomp, llvmPackages, perl
, cudaSupport ? config.cudaSupport or false, cudaPackages ? {}, nvidia_x11
, cudnnSupport ? cudaSupport
, cudaCapabilities ? [ "3.7" "5.0" "6.0" "7.0" "7.5" "8.0" "8.6" ]
}:

let
  inherit (cudaPackages) cudatoolkit cudnn;
  inherit (cudaPackages) cudatoolkit cudaFlags cudnn;
in

assert cudnnSupport -> cudaSupport;
@@ -51,7 +50,7 @@ stdenv.mkDerivation rec {
      "-DUSE_OLDCMAKECUDA=ON"  # see https://github.com/apache/incubator-mxnet/issues/10743
      "-DCUDA_ARCH_NAME=All"
      "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
      "-DMXNET_CUDA_ARCH=${lib.concatStringsSep ";" cudaCapabilities}"
      "-DMXNET_CUDA_ARCH=${cudaFlags.cudaCapabilitiesSemiColonString}"
    ] else [ "-DUSE_CUDA=OFF" ])
    ++ lib.optional (!cudnnSupport) "-DUSE_CUDNN=OFF";

+3 −1
Original line number Diff line number Diff line
@@ -10,6 +10,8 @@ final: prev: let
  ### Add classic cudatoolkit package
  cudatoolkit = buildCudaToolkitPackage ((attrs: attrs // { gcc = prev.pkgs.${attrs.gcc}; }) cudatoolkitVersions.${final.cudaVersion});

  cudaFlags = final.callPackage ./flags.nix {};

in {
  inherit cudatoolkit;
  inherit cudatoolkit cudaFlags;
}
+78 −0
Original line number Diff line number Diff line
{ config
, lib
, cudatoolkit
}:
let

  # Flags are determined based on your CUDA toolkit by default.  You may benefit
  # from improved performance, reduced file size, or greater hardware suppport by
  # passing a configuration based on your specific GPU environment.
  #
  # config.cudaCapabilities: list of hardware generations to support (e.g., "8.0")
  # config.cudaForwardCompat: bool for compatibility with future GPU generations
  #
  # Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351

  defaultCudaCapabilities = rec {
    cuda9 = [
      "3.0"
      "3.5"
      "5.0"
      "5.2"
      "6.0"
      "6.1"
      "7.0"
    ];

    cuda10 = cuda9 ++ [
      "7.5"
    ];

    cuda11 = [
      "3.5"
      "5.0"
      "5.2"
      "6.0"
      "6.1"
      "7.0"
      "7.5"
      "8.0"
      "8.6"
    ];

  };

  cudaMicroarchitectureNames = {
    "3" = "Kepler";
    "5" = "Maxwell";
    "6" = "Pascal";
    "7" = "Volta";
    "8" = "Ampere";
    "9" = "Hopper";
  };

  defaultCudaArchList = defaultCudaCapabilities."cuda${lib.versions.major cudatoolkit.version}";
  cudaRealCapabilities = config.cudaCapabilities or defaultCudaArchList;
  capabilitiesForward = "${lib.last cudaRealCapabilities}+PTX";

  dropDot = ver: builtins.replaceStrings ["."] [""] ver;

  archMapper = feat: map (ver: "${feat}_${dropDot ver}");
  gencodeMapper = feat: map (ver: "-gencode=arch=compute_${dropDot ver},code=${feat}_${dropDot ver}");
  cudaRealArchs = archMapper "sm" cudaRealCapabilities;
  cudaPTXArchs = archMapper "compute" cudaRealCapabilities;
  cudaArchs = cudaRealArchs ++ [ (lib.last cudaPTXArchs) ];

  cudaArchNames = lib.unique (map (v: cudaMicroarchitectureNames.${lib.versions.major v}) cudaRealCapabilities);
  cudaCapabilities = cudaRealCapabilities ++ lib.optional (config.cudaForwardCompat or true) capabilitiesForward;
  cudaGencode = gencodeMapper "sm" cudaRealCapabilities ++ lib.optionals (config.cudaForwardCompat or true) (gencodeMapper "compute" [ (lib.last cudaPTXArchs) ]);

  cudaCapabilitiesCommaString = lib.strings.concatStringsSep "," cudaCapabilities;
  cudaCapabilitiesSemiColonString = lib.strings.concatStringsSep ";" cudaCapabilities;
  cudaRealCapabilitiesCommaString = lib.strings.concatStringsSep "," cudaRealCapabilities;

in
{
   inherit cudaArchs cudaArchNames cudaCapabilities cudaCapabilitiesCommaString cudaCapabilitiesSemiColonString
     cudaRealCapabilities cudaRealCapabilitiesCommaString cudaGencode cudaRealArchs cudaPTXArchs;
}
+4 −32
Original line number Diff line number Diff line
{ lib, stdenv, fetchurl, cmake, gfortran, ninja, cudaPackages, libpthreadstubs, lapack, blas }:

let
  inherit (cudaPackages) cudatoolkit;
  inherit (cudaPackages) cudatoolkit cudaFlags;
in

assert let majorIs = lib.versions.major cudatoolkit.version;
@@ -10,36 +10,6 @@ assert let majorIs = lib.versions.major cudatoolkit.version;
let
  version = "2.6.2";

  # We define a specific set of CUDA compute capabilities here,
  # because CUDA 11 does not support compute capability 3.0. Also,
  # we use it to enable newer capabilities that are not enabled
  # by magma by default. The list of supported architectures
  # can be found in magma's top-level CMakeLists.txt.
  cudaCapabilities = rec {
    cuda9 = [
      "Kepler"  # 3.0, 3.5
      "Maxwell" # 5.0
      "Pascal"  # 6.0
      "Volta"   # 7.0
    ];

    cuda10 = [
      "Turing"  # 7.5
    ] ++ cuda9;

    cuda11 = [
      "sm_35"   # sm_30 is not supported by CUDA 11
      "Maxwell" # 5.0
      "Pascal"  # 6.0
      "Volta"   # 7.0
      "Turing"  # 7.5
      "Ampere"  # 8.0
    ];
  };

  capabilityString = lib.strings.concatStringsSep ","
    cudaCapabilities."cuda${lib.versions.major cudatoolkit.version}";

in stdenv.mkDerivation {
  pname = "magma";
  inherit version;
@@ -53,7 +23,9 @@ in stdenv.mkDerivation {

  buildInputs = [ cudatoolkit libpthreadstubs lapack blas ];

  cmakeFlags = [ "-DGPU_TARGET=${capabilityString}" ];
  cmakeFlags = [
    "-DGPU_TARGET=${builtins.concatStringsSep "," cudaFlags.cudaRealArchs}"
  ];

  doCheck = false;

Loading