Commit dddc103f authored by Connor Baker's avatar Connor Baker Committed by Connor Baker
Browse files

cudaFlags: rewrite to capture all architechtures and fix #215436

parent 7688c980
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -50,7 +50,7 @@ stdenv.mkDerivation rec {
      "-DUSE_OLDCMAKECUDA=ON"  # see https://github.com/apache/incubator-mxnet/issues/10743
      "-DCUDA_ARCH_NAME=All"
      "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
      "-DMXNET_CUDA_ARCH=${cudaFlags.cudaCapabilitiesSemiColonString}"
      "-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.cudaRealArches}"
    ] else [ "-DUSE_CUDA=OFF" ])
    ++ lib.optional (!cudnnSupport) "-DUSE_CUDNN=OFF";

+120 −59
Original line number Diff line number Diff line
@@ -2,7 +2,18 @@
, lib
, cudatoolkit
}:

# Type aliases
# Gpu = {
#   archName: String, # e.g., "Hopper"
#   computeCapability: String, # e.g., "9.0"
#   minCudaVersion: String, # e.g., "11.8"
#   maxCudaVersion: String, # e.g., "12.0"
# }

let
  inherit (lib) attrsets lists strings trivial versions;
  cudaVersion = cudatoolkit.version;

  # Flags are determined based on your CUDA toolkit by default.  You may benefit
  # from improved performance, reduced file size, or greater hardware suppport by
@@ -13,66 +24,116 @@ let
  #
  # Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351

  defaultCudaCapabilities = rec {
    cuda9 = [
      "3.0"
      "3.5"
      "5.0"
      "5.2"
      "6.0"
      "6.1"
      "7.0"
    ];

    cuda10 = cuda9 ++ [
      "7.5"
    ];

    cuda11 = [
      "3.5"
      "5.0"
      "5.2"
      "6.0"
      "6.1"
      "7.0"
      "7.5"
      "8.0"
      "8.6"
    ];

  };

  cudaMicroarchitectureNames = {
    "3" = "Kepler";
    "5" = "Maxwell";
    "6" = "Pascal";
    "7" = "Volta";
    "8" = "Ampere";
    "9" = "Hopper";
  };

  defaultCudaArchList = defaultCudaCapabilities."cuda${lib.versions.major cudatoolkit.version}";
  cudaRealCapabilities = config.cudaCapabilities or defaultCudaArchList;
  capabilitiesForward = "${lib.last cudaRealCapabilities}+PTX";
  # gpus :: List Gpu
  gpus = builtins.import ./gpus.nix;

  # isVersionIn :: Gpu -> Bool
  isSupported = gpu:
    let
      inherit (gpu) minCudaVersion maxCudaVersion;
      lowerBoundSatisfied = strings.versionAtLeast cudaVersion minCudaVersion;
      upperBoundSatisfied = !(strings.versionOlder maxCudaVersion cudaVersion);
    in
    lowerBoundSatisfied && upperBoundSatisfied;

  # supportedGpus :: List Gpu
  # GPUs which are supported by the provided CUDA version.
  supportedGpus = builtins.filter isSupported gpus;

  # cudaArchNameToVersions :: AttrSet String (List String)
  # Maps the name of a GPU architecture to different versions of that architecture.
  # For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ].
  cudaArchNameToVersions =
    lists.groupBy'
      (versions: gpu: versions ++ [ gpu.computeCapability ])
      [ ]
      (gpu: gpu.archName)
      supportedGpus;

  # cudaArchNames :: List String
  # NOTE: It's important that we don't rely on builtins.attrNames cudaArchNameToVersions here;
  #   otherwise, we'll get the names sorted in alphabetical order. The JSON array we read them
  #   from is already sorted, so we'll preserve that order here.
  cudaArchNames = lists.unique (lists.map (gpu: gpu.archName) supportedGpus);

  # cudaComputeCapabilityToName :: AttrSet String String
  # Maps the version of a GPU architecture to the name of that architecture.
  # For example, "8.0" maps to "Ampere".
  cudaComputeCapabilityToName = builtins.listToAttrs (
    lists.map
      (gpu: {
        name = gpu.computeCapability;
        value = gpu.archName;
      })
      supportedGpus
  );

  # cudaComputeCapabilities :: List String
  # NOTE: It's important that we don't rely on builtins.attrNames cudaComputeCapabilityToName here;
  #   otherwise, we'll get the versions sorted in alphabetical order. The JSON array we read them
  #   from is already sorted, so we'll preserve that order here.
  # Use the user-provided list of CUDA capabilities if it's provided.
  cudaComputeCapabilities = config.cudaCapabilities
    or (lists.map (gpu: gpu.computeCapability) supportedGpus);

  # cudaForwardComputeCapability :: String
  cudaForwardComputeCapability = (lists.last cudaComputeCapabilities) + "+PTX";

  # cudaComputeCapabilitiesAndForward :: List String
  # The list of supported CUDA architectures, including the forward compatibility architecture.
  # If forward compatibility is disabled, this will be the same as cudaComputeCapabilities.
  cudaComputeCapabilitiesAndForward = cudaComputeCapabilities
    ++ lists.optional (config.cudaForwardCompat or true) cudaForwardComputeCapability;

  # dropDot :: String -> String
  dropDot = ver: builtins.replaceStrings [ "." ] [ "" ] ver;

  archMapper = feat: map (ver: "${feat}_${dropDot ver}");
  gencodeMapper = feat: map (ver: "-gencode=arch=compute_${dropDot ver},code=${feat}_${dropDot ver}");
  cudaRealArchs = archMapper "sm" cudaRealCapabilities;
  cudaPTXArchs = archMapper "compute" cudaRealCapabilities;
  cudaArchs = cudaRealArchs ++ [ (lib.last cudaPTXArchs) ];
  # archMapper :: String -> List String -> List String
  # Maps a feature across a list of architecture versions to produce a list of architectures.
  # For example, "sm" and [ "8.0" "8.6" "8.7" ] produces [ "sm_80" "sm_86" "sm_87" ].
  archMapper = feat: lists.map (computeCapability: "${feat}_${dropDot computeCapability}");

  # gencodeMapper :: String -> List String -> List String
  # Maps a feature across a list of architecture versions to produce a list of gencode arguments.
  # For example, "sm" and [ "8.0" "8.6" "8.7" ] produces [ "-gencode=arch=compute_80,code=sm_80"
  # "-gencode=arch=compute_86,code=sm_86" "-gencode=arch=compute_87,code=sm_87" ].
  gencodeMapper = feat: lists.map (
    computeCapability:
    "-gencode=arch=compute_${dropDot computeCapability},code=${feat}_${dropDot computeCapability}"
  );

  cudaArchNames = lib.unique (map (v: cudaMicroarchitectureNames.${lib.versions.major v}) cudaRealCapabilities);
  cudaCapabilities = cudaRealCapabilities ++ lib.optional (config.cudaForwardCompat or true) capabilitiesForward;
  cudaGencode = gencodeMapper "sm" cudaRealCapabilities ++ lib.optionals (config.cudaForwardCompat or true) (gencodeMapper "compute" [ (lib.last cudaPTXArchs) ]);
  # cudaRealArches :: List String
  # The real architectures are physical architectures supported by the CUDA version.
  # For example, "sm_80".
  cudaRealArches = archMapper "sm" cudaComputeCapabilities;

  cudaCapabilitiesCommaString = lib.strings.concatStringsSep "," cudaCapabilities;
  cudaCapabilitiesSemiColonString = lib.strings.concatStringsSep ";" cudaCapabilities;
  cudaRealCapabilitiesCommaString = lib.strings.concatStringsSep "," cudaRealCapabilities;
  # cudaVirtualArches :: List String
  # The virtual architectures are typically used for forward compatibility, when trying to support
  # an architecture newer than the CUDA version allows.
  # For example, "compute_80".
  cudaVirtualArches = archMapper "compute" cudaComputeCapabilities;

  # cudaArches :: List String
  # By default, build for all supported architectures and forward compatibility via a virtual
  # architecture for the newest supported architecture.
  cudaArches = cudaRealArches ++
    lists.optional (config.cudaForwardCompat or true) (lists.last cudaVirtualArches);

  # cudaGencode :: List String
  # A list of CUDA gencode arguments to pass to NVCC.
  cudaGencode =
    let
      base = gencodeMapper "sm" cudaComputeCapabilities;
      forwardCompat = gencodeMapper "compute" [ (lists.last cudaComputeCapabilities) ];
    in
    base ++ lists.optionals (config.cudaForwardCompat or true) forwardCompat;

in
{
   inherit cudaArchs cudaArchNames cudaCapabilities cudaCapabilitiesCommaString cudaCapabilitiesSemiColonString
     cudaRealCapabilities cudaRealCapabilitiesCommaString cudaGencode cudaRealArchs cudaPTXArchs;
  inherit
    cudaArchNames
    cudaArchNameToVersions cudaComputeCapabilityToName
    cudaRealArches cudaVirtualArches cudaArches
    cudaGencode;
  cudaCapabilities = cudaComputeCapabilitiesAndForward;
}
+110 −0
Original line number Diff line number Diff line
[
  {
    archName = "Kepler";
    computeCapability = "3.0";
    minCudaVersion = "10.0";
    maxCudaVersion = "10.2";
  }
  {
    archName = "Kepler";
    computeCapability = "3.2";
    minCudaVersion = "10.0";
    maxCudaVersion = "10.2";
  }
  {
    archName = "Kepler";
    computeCapability = "3.5";
    minCudaVersion = "10.0";
    maxCudaVersion = "11.8";
  }
  {
    archName = "Kepler";
    computeCapability = "3.7";
    minCudaVersion = "10.0";
    maxCudaVersion = "11.8";
  }
  {
    archName = "Maxwell";
    computeCapability = "5.0";
    minCudaVersion = "10.0";
    maxCudaVersion = "12.0";
  }
  {
    archName = "Maxwell";
    computeCapability = "5.2";
    minCudaVersion = "10.0";
    maxCudaVersion = "12.0";
  }
  {
    archName = "Maxwell";
    computeCapability = "5.3";
    minCudaVersion = "10.0";
    maxCudaVersion = "12.0";
  }
  {
    archName = "Pascal";
    computeCapability = "6.0";
    minCudaVersion = "10.0";
    maxCudaVersion = "12.0";
  }
  {
    archName = "Pascal";
    computeCapability = "6.1";
    minCudaVersion = "10.0";
    maxCudaVersion = "12.0";
  }
  {
    archName = "Pascal";
    computeCapability = "6.2";
    minCudaVersion = "10.0";
    maxCudaVersion = "12.0";
  }
  {
    archName = "Volta";
    computeCapability = "7.0";
    minCudaVersion = "10.0";
    maxCudaVersion = "12.0";
  }
  {
    archName = "Volta";
    computeCapability = "7.2";
    minCudaVersion = "10.0";
    maxCudaVersion = "12.0";
  }
  {
    archName = "Turing";
    computeCapability = "7.5";
    minCudaVersion = "10.0";
    maxCudaVersion = "12.0";
  }
  {
    archName = "Ampere";
    computeCapability = "8.0";
    minCudaVersion = "11.2";
    maxCudaVersion = "12.0";
  }
  {
    archName = "Ampere";
    computeCapability = "8.6";
    minCudaVersion = "11.2";
    maxCudaVersion = "12.0";
  }
  {
    archName = "Ampere";
    computeCapability = "8.7";
    minCudaVersion = "11.5";
    maxCudaVersion = "12.0";
  }
  {
    archName = "Ada";
    computeCapability = "8.9";
    minCudaVersion = "11.8";
    maxCudaVersion = "12.0";
  }
  {
    archName = "Hopper";
    computeCapability = "9.0";
    minCudaVersion = "11.8";
    maxCudaVersion = "12.0";
  }
]
+1 −1
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@ in stdenv.mkDerivation (finalAttrs: {
    "-DCMAKE_C_COMPILER=${cudatoolkit.cc}/bin/gcc"
    "-DCMAKE_CXX_COMPILER=${cudatoolkit.cc}/bin/g++"
    "-DMAGMA_ENABLE_CUDA=ON"
    "-DGPU_TARGET=${builtins.concatStringsSep "," cudaFlags.cudaRealArchs}"
    "-DGPU_TARGET=${builtins.concatStringsSep "," cudaFlags.cudaRealArches}"
  ] ++ lib.optionals useROCM [
    "-DCMAKE_C_COMPILER=${hip}/bin/hipcc"
    "-DCMAKE_CXX_COMPILER=${hip}/bin/hipcc"
+1 −1
Original line number Diff line number Diff line
@@ -164,7 +164,7 @@ let
      build --action_env TF_CUDA_PATHS="${cudatoolkit_joined},${cudnn},${nccl}"
      build --action_env TF_CUDA_VERSION="${lib.versions.majorMinor cudatoolkit.version}"
      build --action_env TF_CUDNN_VERSION="${lib.versions.major cudnn.version}"
      build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${cudaFlags.cudaRealCapabilitiesCommaString}"
      build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.cudaRealArches}"
    '' + ''
      CFG
    '';
Loading