Loading pkgs/development/python-modules/torch/default.nix +68 −35 Original line number Diff line number Diff line { stdenv, lib, fetchFromGitHub, buildPythonPackage, python, { stdenv, lib, fetchFromGitHub, fetchpatch, buildPythonPackage, python, config, cudaSupport ? config.cudaSupport, cudaPackages, magma, useSystemNccl ? true, MPISupport ? false, mpi, Loading Loading @@ -52,17 +52,8 @@ let inherit (lib) lists strings trivial; inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl; in assert cudaSupport -> stdenv.isLinux; assert cudaSupport -> (cudaPackages.cudaMajorVersion == "11"); # confirm that cudatoolkits are sync'd across dependencies assert !(MPISupport && cudaSupport) || mpi.cudatoolkit == cudatoolkit; assert !cudaSupport || magma.cudaPackages.cudatoolkit == cudatoolkit; inherit (cudaPackages) cudaFlags cudnn nccl; let setBool = v: if v then "1" else "0"; # https://github.com/pytorch/pytorch/blob/v2.0.1/torch/utils/cpp_extension.py#L1744 Loading Loading @@ -103,23 +94,6 @@ let throw "No GPU targets specified" ); cudatoolkit_joined = symlinkJoin { name = "${cudatoolkit.name}-unsplit"; # nccl is here purely for semantic grouping it could be moved to nativeBuildInputs paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ]; }; # Normally libcuda.so.1 is provided at runtime by nvidia-x11 via # LD_LIBRARY_PATH=/run/opengl-driver/lib. We only use the stub # libcuda.so from cudatoolkit for running tests, so that we don’t have # to recompile pytorch on every update to nvidia-x11 or the kernel. cudaStub = linkFarm "cuda-stub" [{ name = "libcuda.so.1"; path = "${cudatoolkit}/lib/stubs/libcuda.so"; }]; cudaStubEnv = lib.optionalString cudaSupport "LD_LIBRARY_PATH=${cudaStub}\${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH "; rocmtoolkit_joined = symlinkJoin { name = "rocm-merged"; Loading Loading @@ -160,6 +134,12 @@ in buildPythonPackage rec { # base is 10.12. Until we upgrade, we can fall back on the older # pthread support. ./pthreadpool-disable-gcd.diff ] ++ lib.optionals stdenv.isLinux [ # Propagate CUPTI to Kineto by overriding the search path with environment variables. (fetchpatch { url = "https://github.com/pytorch/pytorch/pull/108847/commits/7ae4d7c0e2dec358b4fe81538efe9da5eb580ec9.patch"; hash = "sha256-skFaDg98xcJqJfzxWk+qhUxPLHDStqvd0mec3PgksIg="; }) ]; postPatch = lib.optionalString rocmSupport '' Loading @@ -184,6 +164,13 @@ in buildPythonPackage rec { --replace "set(ROCM_PATH \$ENV{ROCM_PATH})" \ "set(ROCM_PATH \$ENV{ROCM_PATH})''\nset(ROCM_VERSION ${lib.concatStrings (lib.intersperse "0" (lib.splitString "." hip.version))})" '' # Detection of NCCL version doesn't work particularly well when using the static binary. + lib.optionalString cudaSupport '' substituteInPlace cmake/Modules/FindNCCL.cmake \ --replace \ 'message(FATAL_ERROR "Found NCCL header version and library version' \ 'message(WARNING "Found NCCL header version and library version' '' # error: no member named 'aligned_alloc' in the global namespace; did you mean simply 'aligned_alloc' # This lib overrided aligned_alloc hence the error message. Tltr: his function is linkable but not in header. + lib.optionalString (stdenv.isDarwin && lib.versionOlder stdenv.targetPlatform.darwinSdkVersion "11.0") '' Loading @@ -192,12 +179,16 @@ in buildPythonPackage rec { inline void *aligned_alloc(size_t align, size_t size)' ''; # NOTE(@connorbaker): Though we do not disable Gloo or MPI when building with CUDA support, caution should be taken # when using the different backends. Gloo's GPU support isn't great, and MPI and CUDA can't be used at the same time # without extreme care to ensure they don't lock each other out of shared resources. # For more, see https://github.com/open-mpi/ompi/issues/7733#issuecomment-629806195. preConfigure = lib.optionalString cudaSupport '' export TORCH_CUDA_ARCH_LIST="${gpuTargetString}" export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++ '' + lib.optionalString (cudaSupport && cudnn != null) '' export CUDNN_INCLUDE_DIR=${cudnn.dev}/include export CUDNN_LIB_DIR=${cudnn.lib}/lib export CUPTI_INCLUDE_DIR=${cudaPackages.cuda_cupti.dev}/include export CUPTI_LIBRARY_DIR=${cudaPackages.cuda_cupti.lib}/lib '' + lib.optionalString rocmSupport '' export ROCM_PATH=${rocmtoolkit_joined} export ROCM_SOURCE_DIR=${rocmtoolkit_joined} Loading Loading @@ -256,6 +247,7 @@ in buildPythonPackage rec { PYTORCH_BUILD_NUMBER = 0; USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL USE_STATIC_NCCL = setBool useSystemNccl; # Suppress a weird warning in mkl-dnn, part of ideep in pytorch # (upstream seems to have fixed this in the wrong place?) Loading Loading @@ -286,12 +278,43 @@ in buildPythonPackage rec { pybind11 pythonRelaxDepsHook removeReferencesTo ] ++ lib.optionals cudaSupport [ cudatoolkit_joined ] ] ++ lib.optionals cudaSupport (with cudaPackages; [ autoAddOpenGLRunpathHook cuda_nvcc ]) ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]; buildInputs = [ blas blas.provider pybind11 ] ++ lib.optionals stdenv.isLinux [ linuxHeaders_5_19 ] # TMP: avoid "flexible array member" errors for now ++ lib.optionals cudaSupport [ cudnn.dev cudnn.lib nccl ] ++ lib.optionals cudaSupport (with cudaPackages; [ cuda_cccl.dev # <thrust/*> cuda_cudart # cuda_runtime.h and libraries cuda_cupti.dev # For kineto cuda_cupti.lib # For kineto cuda_nvcc.dev # crt/host_config.h; even though we include this in nativeBuildinputs, it's needed here too cuda_nvml_dev.dev # <nvml.h> cuda_nvrtc.dev cuda_nvrtc.lib cuda_nvtx.dev cuda_nvtx.lib # -llibNVToolsExt cudnn.dev cudnn.lib libcublas.dev libcublas.lib libcufft.dev libcufft.lib libcurand.dev libcurand.lib libcusolver.dev libcusolver.lib libcusparse.dev libcusparse.lib nccl.dev # Provides nccl.h AND a static copy of NCCL! ] ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [ cuda_nvprof.dev # <cuda_profiler_api.h> ] ++ lists.optionals (strings.versionAtLeast cudaVersion "11.8") [ cuda_profiler_api.dev # <cuda_profiler_api.h> ]) ++ lib.optionals rocmSupport [ openmp ] ++ lib.optionals (cudaSupport || rocmSupport) [ magma ] ++ lib.optionals stdenv.isLinux [ numactl ] Loading Loading @@ -335,7 +358,6 @@ in buildPythonPackage rec { checkPhase = with lib.versions; with lib.strings; concatStringsSep " " [ "runHook preCheck" cudaStubEnv "${python.interpreter} test/run_test.py" "--exclude" (concatStringsSep " " [ Loading Loading @@ -419,6 +441,17 @@ in buildPythonPackage rec { license = licenses.bsd3; maintainers = with maintainers; [ teh thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds platforms = with platforms; linux ++ lib.optionals (!cudaSupport && !rocmSupport) darwin; broken = rocmSupport && cudaSupport; # CUDA and ROCm are mutually exclusive broken = builtins.any trivial.id [ # CUDA and ROCm are mutually exclusive (cudaSupport && rocmSupport) # CUDA is only supported on Linux (cudaSupport && !stdenv.isLinux) # Only CUDA 11 is currently supported (cudaSupport && (cudaPackages.cudaMajorVersion != "11")) # MPI cudatoolkit does not match cudaPackages.cudatoolkit (MPISupport && cudaSupport && (mpi.cudatoolkit != cudaPackages.cudatoolkit)) # Magma cudaPackages does not match cudaPackages (cudaSupport && (magma.cudaPackages != cudaPackages)) ]; }; } Loading
pkgs/development/python-modules/torch/default.nix +68 −35 Original line number Diff line number Diff line { stdenv, lib, fetchFromGitHub, buildPythonPackage, python, { stdenv, lib, fetchFromGitHub, fetchpatch, buildPythonPackage, python, config, cudaSupport ? config.cudaSupport, cudaPackages, magma, useSystemNccl ? true, MPISupport ? false, mpi, Loading Loading @@ -52,17 +52,8 @@ let inherit (lib) lists strings trivial; inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl; in assert cudaSupport -> stdenv.isLinux; assert cudaSupport -> (cudaPackages.cudaMajorVersion == "11"); # confirm that cudatoolkits are sync'd across dependencies assert !(MPISupport && cudaSupport) || mpi.cudatoolkit == cudatoolkit; assert !cudaSupport || magma.cudaPackages.cudatoolkit == cudatoolkit; inherit (cudaPackages) cudaFlags cudnn nccl; let setBool = v: if v then "1" else "0"; # https://github.com/pytorch/pytorch/blob/v2.0.1/torch/utils/cpp_extension.py#L1744 Loading Loading @@ -103,23 +94,6 @@ let throw "No GPU targets specified" ); cudatoolkit_joined = symlinkJoin { name = "${cudatoolkit.name}-unsplit"; # nccl is here purely for semantic grouping it could be moved to nativeBuildInputs paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ]; }; # Normally libcuda.so.1 is provided at runtime by nvidia-x11 via # LD_LIBRARY_PATH=/run/opengl-driver/lib. We only use the stub # libcuda.so from cudatoolkit for running tests, so that we don’t have # to recompile pytorch on every update to nvidia-x11 or the kernel. cudaStub = linkFarm "cuda-stub" [{ name = "libcuda.so.1"; path = "${cudatoolkit}/lib/stubs/libcuda.so"; }]; cudaStubEnv = lib.optionalString cudaSupport "LD_LIBRARY_PATH=${cudaStub}\${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH "; rocmtoolkit_joined = symlinkJoin { name = "rocm-merged"; Loading Loading @@ -160,6 +134,12 @@ in buildPythonPackage rec { # base is 10.12. Until we upgrade, we can fall back on the older # pthread support. ./pthreadpool-disable-gcd.diff ] ++ lib.optionals stdenv.isLinux [ # Propagate CUPTI to Kineto by overriding the search path with environment variables. (fetchpatch { url = "https://github.com/pytorch/pytorch/pull/108847/commits/7ae4d7c0e2dec358b4fe81538efe9da5eb580ec9.patch"; hash = "sha256-skFaDg98xcJqJfzxWk+qhUxPLHDStqvd0mec3PgksIg="; }) ]; postPatch = lib.optionalString rocmSupport '' Loading @@ -184,6 +164,13 @@ in buildPythonPackage rec { --replace "set(ROCM_PATH \$ENV{ROCM_PATH})" \ "set(ROCM_PATH \$ENV{ROCM_PATH})''\nset(ROCM_VERSION ${lib.concatStrings (lib.intersperse "0" (lib.splitString "." hip.version))})" '' # Detection of NCCL version doesn't work particularly well when using the static binary. + lib.optionalString cudaSupport '' substituteInPlace cmake/Modules/FindNCCL.cmake \ --replace \ 'message(FATAL_ERROR "Found NCCL header version and library version' \ 'message(WARNING "Found NCCL header version and library version' '' # error: no member named 'aligned_alloc' in the global namespace; did you mean simply 'aligned_alloc' # This lib overrided aligned_alloc hence the error message. Tltr: his function is linkable but not in header. + lib.optionalString (stdenv.isDarwin && lib.versionOlder stdenv.targetPlatform.darwinSdkVersion "11.0") '' Loading @@ -192,12 +179,16 @@ in buildPythonPackage rec { inline void *aligned_alloc(size_t align, size_t size)' ''; # NOTE(@connorbaker): Though we do not disable Gloo or MPI when building with CUDA support, caution should be taken # when using the different backends. Gloo's GPU support isn't great, and MPI and CUDA can't be used at the same time # without extreme care to ensure they don't lock each other out of shared resources. # For more, see https://github.com/open-mpi/ompi/issues/7733#issuecomment-629806195. preConfigure = lib.optionalString cudaSupport '' export TORCH_CUDA_ARCH_LIST="${gpuTargetString}" export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++ '' + lib.optionalString (cudaSupport && cudnn != null) '' export CUDNN_INCLUDE_DIR=${cudnn.dev}/include export CUDNN_LIB_DIR=${cudnn.lib}/lib export CUPTI_INCLUDE_DIR=${cudaPackages.cuda_cupti.dev}/include export CUPTI_LIBRARY_DIR=${cudaPackages.cuda_cupti.lib}/lib '' + lib.optionalString rocmSupport '' export ROCM_PATH=${rocmtoolkit_joined} export ROCM_SOURCE_DIR=${rocmtoolkit_joined} Loading Loading @@ -256,6 +247,7 @@ in buildPythonPackage rec { PYTORCH_BUILD_NUMBER = 0; USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL USE_STATIC_NCCL = setBool useSystemNccl; # Suppress a weird warning in mkl-dnn, part of ideep in pytorch # (upstream seems to have fixed this in the wrong place?) Loading Loading @@ -286,12 +278,43 @@ in buildPythonPackage rec { pybind11 pythonRelaxDepsHook removeReferencesTo ] ++ lib.optionals cudaSupport [ cudatoolkit_joined ] ] ++ lib.optionals cudaSupport (with cudaPackages; [ autoAddOpenGLRunpathHook cuda_nvcc ]) ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]; buildInputs = [ blas blas.provider pybind11 ] ++ lib.optionals stdenv.isLinux [ linuxHeaders_5_19 ] # TMP: avoid "flexible array member" errors for now ++ lib.optionals cudaSupport [ cudnn.dev cudnn.lib nccl ] ++ lib.optionals cudaSupport (with cudaPackages; [ cuda_cccl.dev # <thrust/*> cuda_cudart # cuda_runtime.h and libraries cuda_cupti.dev # For kineto cuda_cupti.lib # For kineto cuda_nvcc.dev # crt/host_config.h; even though we include this in nativeBuildinputs, it's needed here too cuda_nvml_dev.dev # <nvml.h> cuda_nvrtc.dev cuda_nvrtc.lib cuda_nvtx.dev cuda_nvtx.lib # -llibNVToolsExt cudnn.dev cudnn.lib libcublas.dev libcublas.lib libcufft.dev libcufft.lib libcurand.dev libcurand.lib libcusolver.dev libcusolver.lib libcusparse.dev libcusparse.lib nccl.dev # Provides nccl.h AND a static copy of NCCL! ] ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [ cuda_nvprof.dev # <cuda_profiler_api.h> ] ++ lists.optionals (strings.versionAtLeast cudaVersion "11.8") [ cuda_profiler_api.dev # <cuda_profiler_api.h> ]) ++ lib.optionals rocmSupport [ openmp ] ++ lib.optionals (cudaSupport || rocmSupport) [ magma ] ++ lib.optionals stdenv.isLinux [ numactl ] Loading Loading @@ -335,7 +358,6 @@ in buildPythonPackage rec { checkPhase = with lib.versions; with lib.strings; concatStringsSep " " [ "runHook preCheck" cudaStubEnv "${python.interpreter} test/run_test.py" "--exclude" (concatStringsSep " " [ Loading Loading @@ -419,6 +441,17 @@ in buildPythonPackage rec { license = licenses.bsd3; maintainers = with maintainers; [ teh thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds platforms = with platforms; linux ++ lib.optionals (!cudaSupport && !rocmSupport) darwin; broken = rocmSupport && cudaSupport; # CUDA and ROCm are mutually exclusive broken = builtins.any trivial.id [ # CUDA and ROCm are mutually exclusive (cudaSupport && rocmSupport) # CUDA is only supported on Linux (cudaSupport && !stdenv.isLinux) # Only CUDA 11 is currently supported (cudaSupport && (cudaPackages.cudaMajorVersion != "11")) # MPI cudatoolkit does not match cudaPackages.cudatoolkit (MPISupport && cudaSupport && (mpi.cudatoolkit != cudaPackages.cudatoolkit)) # Magma cudaPackages does not match cudaPackages (cudaSupport && (magma.cudaPackages != cudaPackages)) ]; }; }