Unverified Commit 540f031c authored by Connor Baker's avatar Connor Baker Committed by GitHub
Browse files

cudaPackages.nccl: 2.27.6-1 -> 2.28.7-1; support earlier CUDA releases (#456520)

parents 7b72ced7 69cdfaf6
Loading
Loading
Loading
Loading
+43 −13
Original line number Diff line number Diff line
# NOTE: Though NCCL is called within the cudaPackages package set, we avoid passing in
# the names of dependencies from that package set directly to avoid evaluation errors
# in the case redistributable packages are not available.
{
  _cuda,
  backendStdenv,
  cuda_cccl,
  cuda_cudart,
  cuda_nvcc,
  cudaAtLeast,
  cudaNamePrefix,
  fetchFromGitHub,
  flags,
@@ -19,11 +17,19 @@
let
  inherit (_cuda.lib) _mkMetaBadPlatforms;
  inherit (backendStdenv) hasJetsonCudaCapability requestedJetsonCudaCapabilities;
  inherit (lib) licenses maintainers teams;
  inherit (lib.attrsets)
  inherit (lib)
    all
    flip
    getAttr
    getBin
    getInclude
    getLib
    getOutput
    licenses
    maintainers
    optionalString
    teams
    versionAtLeast
    versionOlder
    ;
in
backendStdenv.mkDerivation (finalAttrs: {
@@ -33,13 +39,28 @@ backendStdenv.mkDerivation (finalAttrs: {
  # NOTE: Depends on the CUDA package set, so use cudaNamePrefix.
  name = "${cudaNamePrefix}-${finalAttrs.pname}-${finalAttrs.version}";
  pname = "nccl";
  version = "2.27.6-1";

  # NOTE:
  #   Compilation errors resulting from newer versions of NCCL on older releases of CUDA seem to be caused (mostly)
  #   by differences in assumed version of CCCL: using a newer CCCL with an older release of CUDA can (sometimes) allow
  #   newer versions of NCCL than what we provide here.
  version =
    if cudaAtLeast "11.7" then
      "2.28.7-1"
    else if cudaAtLeast "11.6" then
      "2.26.6-1"
    else
      "2.25.1-1";

  src = fetchFromGitHub {
    owner = "NVIDIA";
    repo = "nccl";
    tag = "v${finalAttrs.version}";
    hash = "sha256-/BiLSZaBbVIqOfd8nQlgUJub0YR3SR4B93x2vZpkeiU=";
    hash = getAttr finalAttrs.version {
      "2.28.7-1" = "sha256-NM19OiBBGmv3cGoVoRLKSh9Y59hiDoei9NIrRnTqWeA=";
      "2.26.6-1" = "sha256-vkWMGXCy+dIpYCecdafmOAGlnfRxIQ5Y2ZQuMjinraI=";
      "2.25.1-1" = "sha256-3snh0xdL9I5BYqdbqdl+noizJoI38mZRVOJChgEE1I8=";
    };
  };

  outputs = [
@@ -55,7 +76,7 @@ backendStdenv.mkDerivation (finalAttrs: {
  ];

  buildInputs = [
    (getOutput "include" cuda_nvcc)
    (getInclude cuda_nvcc)
    cuda_cccl
    cuda_cudart
  ];
@@ -71,12 +92,21 @@ backendStdenv.mkDerivation (finalAttrs: {
      --replace-fail \
        '-ccbin $(CXX)' \
        ""
  ''
  # 2.27.3-1 was the first to introuce CXXSTD
  + optionalString (versionOlder finalAttrs.version "2.27.3-1") ''
    nixLog "patching $PWD/makefiles/common.mk to remove NVIDIA's std hardcoding"
    substituteInPlace ./makefiles/common.mk \
      --replace-fail \
        '-std=c++11' \
        '$(CXXSTD)'
  '';

  # TODO: This would likely break under cross; need to delineate between build and host packages.
  makeFlags = [
    "CXXSTD=-std=c++17"
    "CUDA_HOME=${getBin cuda_nvcc}"
    "CUDA_INC=${getOutput "include" cuda_cudart}/include"
    "CUDA_INC=${getInclude cuda_cudart}/include"
    "CUDA_LIB=${getLib cuda_cudart}/lib"
    "NVCC_GENCODE=${flags.gencodeString}"
    "PREFIX=$(out)"
@@ -85,7 +115,8 @@ backendStdenv.mkDerivation (finalAttrs: {
  enableParallelBuilding = true;

  postFixup = ''
    moveToOutput lib/libnccl_static.a "$static"
    _overrideFirst outputStatic "static" "lib" "out"
    moveToOutput lib/libnccl_static.a "''${!outputStatic:?}"
  '';

  passthru = {
@@ -93,8 +124,7 @@ backendStdenv.mkDerivation (finalAttrs: {
      {
        message = "Pre-Thor Jetson devices (CUDA capabilities < 10.1) are not supported by NCCL";
        assertion =
          !hasJetsonCudaCapability
          || lib.all (lib.flip lib.versionAtLeast "10.1") requestedJetsonCudaCapabilities;
          !hasJetsonCudaCapability || all (flip versionAtLeast "10.1") requestedJetsonCudaCapabilities;
      }
    ];