Unverified Commit 039569ef authored by kirillrdy's avatar kirillrdy Committed by GitHub
Browse files

python3Packages.torch: 2.8.0 -> 2.9.0 (#456704)

parents 62006777 19f26684
Loading
Loading
Loading
Loading
+2 −15
Original line number Diff line number Diff line
@@ -282,7 +282,7 @@ in
buildPythonPackage.override { inherit stdenv; } rec {
  pname = "torch";
  # Don't forget to update torch-bin to the same version.
  version = "2.8.0";
  version = "2.9.0";
  pyproject = true;

  outputs = [
@@ -304,19 +304,6 @@ buildPythonPackage.override { inherit stdenv; } rec {

  patches = [
    ./clang19-template-warning.patch

    # Do not override PYTHONPATH, otherwise, the build fails with:
    # ModuleNotFoundError: No module named 'typing_extensions'
    (fetchpatch {
      name = "cmake-build-preserve-PYTHONPATH";
      url = "https://github.com/pytorch/pytorch/commit/231c72240d80091f099c95e326d3600cba866eee.patch";
      hash = "sha256-BBCjxzz2TUkx4nXRyRILA82kMwyb/4+C3eOtYqf5dhk=";
    })

    # Fixes GCC-14 compatibility on ARM
    # Adapted from https://github.com/pytorch/pytorch/pull/157867
    # TODO: remove at the next release
    ./gcc-14-arm-compat.path
  ]
  ++ lib.optionals cudaSupport [
    ./fix-cmake-cuda-toolkit.patch
@@ -336,7 +323,7 @@ buildPythonPackage.override { inherit stdenv; } rec {

  postPatch = ''
    substituteInPlace pyproject.toml \
      --replace-fail "setuptools>=62.3.0,<80.0" "setuptools"
      --replace-fail "setuptools>=70.1.0,<80.0" "setuptools"
  ''
  # Provide path to openssl binary for inductor code cache hash
  # InductorError: FileNotFoundError: [Errno 2] No such file or directory: 'openssl'
+0 −49
Original line number Diff line number Diff line
diff --git a/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h b/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h
index 7f05c2ad166..1632b595c4c 100644
--- a/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h
+++ b/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h
@@ -220,8 +220,12 @@ class Vectorized<BFloat16> {
   Vectorized<BFloat16> le(const Vectorized<BFloat16>& other) const;
 };

-inline std::tuple<Vectorized<float>, Vectorized<float>> convert_bfloat16_float(
-    const Vectorized<c10::BFloat16>& a) {
+#if defined(__GNUC__) && __GNUC__ == 14
+// Workaround for gcc-14.2.0 ICE during RTL pass: vregs when compiling for SVE
+__attribute__((optimize("no-tree-vectorize")))
+#endif
+inline std::tuple<Vectorized<float>, Vectorized<float>>
+convert_bfloat16_float(const Vectorized<c10::BFloat16>& a) {
   static_assert(
       Vectorized<c10::BFloat16>::size() == 2 * Vectorized<float>::size());
   auto zero = svreinterpret_bf16_f32(svdup_n_f32(0.0f));
diff --git a/aten/src/ATen/native/cpu/Activation.cpp b/aten/src/ATen/native/cpu/Activation.cpp
index 52d5383e60f..00c9f4eb253 100644
--- a/aten/src/ATen/native/cpu/Activation.cpp
+++ b/aten/src/ATen/native/cpu/Activation.cpp
@@ -26,6 +26,10 @@ namespace at::native {

 namespace {

+#if defined(__GNUC__) && __GNUC__ == 14 && defined(__aarch64__) && !defined(__ARM_FEATURE_SVE)
+// Workaround for gcc-14.2.0 ICE during RTL pass: expand when compiling for NEON
+__attribute__((optimize("no-tree-vectorize")))
+#endif
 static void log_sigmoid_cpu_kernel(TensorBase &output, TensorBase &buffer, const TensorBase &input) {
   if (at::isReducedFloatingType(input.scalar_type())) {
     AT_DISPATCH_REDUCED_FLOATING_TYPES(input.scalar_type(), "log_sigmoid_cpu", [&]() {
diff --git a/aten/src/ATen/native/cpu/Unfold2d.cpp b/aten/src/ATen/native/cpu/Unfold2d.cpp
index 8ef0741e77a..8c94decfff0 100644
--- a/aten/src/ATen/native/cpu/Unfold2d.cpp
+++ b/aten/src/ATen/native/cpu/Unfold2d.cpp
@@ -169,6 +169,10 @@ static void unfolded2d_acc_channels_last(

 /* note: due to write issues, this one cannot be parallelized as well as
  * unfolded2d_copy */
+#if defined(__GNUC__) && __GNUC__ == 14 && defined(__ARM_FEATURE_SVE) && !defined(__ARM_FEATURE_BF16)
+// Workaround for gcc-14.2.0 ICE during RTL pass: vregs when compiling for SVE without BF16
+__attribute__((optimize("no-tree-vectorize")))
+#endif
 void unfolded2d_acc_kernel(
     ScalarType dtype,
     void *finput_data,
+13 −16
Original line number Diff line number Diff line
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index a93386c27f8..7c6b98006bf 100644
index ef5c2fd4e97..6591296b704 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -967,7 +967,7 @@ endif()
@@ -952,15 +952,16 @@ endif()
 
 # ---[ nvtx
 if(USE_SYSTEM_NVTX)
-  find_path(nvtx3_dir NAMES nvtx3 PATHS ${CUDA_INCLUDE_DIRS})
+  find_path(nvtx3_dir NAMES nvtx3/nvtx3.hpp PATHS ${CUDA_INCLUDE_DIRS})
   find_package_handle_standard_args(nvtx3 DEFAULT_MSG nvtx3_dir)
   if(NOT nvtx3_FOUND)
     message(WARNING "Cannot find system NVTX3, find shipped NVTX3 instead")
@@ -977,9 +977,10 @@ if(NOT TARGET CUDA::nvtx3)
   add_library(CUDA::nvtx3 INTERFACE IMPORTED)
 endif()
 if(NOT nvtx3_dir)
 else()
-  find_path(nvtx3_dir NAMES nvtx3 PATHS "${PROJECT_SOURCE_DIR}/third_party/NVTX/c/include" NO_DEFAULT_PATH)
-  target_include_directories(CUDA::nvtx3 INTERFACE "${nvtx3_dir}")
+  find_path(nvtx3_dir NAMES nvtx3/nvtx3.hpp PATHS "${PROJECT_SOURCE_DIR}/third_party/NVTX/c/include" NO_DEFAULT_PATH)
 endif()
+target_include_directories(CUDA::nvtx3 INTERFACE "${nvtx3_dir}")
 find_package_handle_standard_args(nvtx3 DEFAULT_MSG nvtx3_dir)
 if(nvtx3_FOUND)
   add_library(torch::nvtx3 INTERFACE IMPORTED)
   target_include_directories(torch::nvtx3 INTERFACE "${nvtx3_dir}")
   target_compile_definitions(torch::nvtx3 INTERFACE TORCH_CUDA_USE_NVTX3)
+  message(STATUS "Using NVTX3 include directory: ${nvtx3_dir}")


 # ---[ HIP
 else()
   message(WARNING "Cannot find NVTX3, find old NVTX instead")
   add_library(torch::nvtoolsext INTERFACE IMPORTED)
+58 −38
Original line number Diff line number Diff line
@@ -4,13 +4,19 @@
  fetchFromGitHub,
  runCommand,
}:
assert version == "2.8.0";
assert version == "2.9.0";
rec {
  src_aiter = fetchFromGitHub {
    owner = "ROCm";
    repo = "aiter";
    rev = "01aae101b9e5e94d6c16a9514c9fb8df99c93150";
    hash = "sha256-rJwKTzUi066ZRroK7eFsOPmNRRRq7VMQz0Xw7qcgtNo=";
  };
  src_asmjit = fetchFromGitHub {
    owner = "asmjit";
    repo = "asmjit";
    rev = "e5d7c0bd5d9aec44d68830187138149e6a8c4e32";
    hash = "sha256-sI0/9szBMvopQAmVcZSU4D/oaZYdb08AHDSZKy/Qz1g=";
    rev = "a3199e8857792cd10b7589ff5d58343d2c9008ea";
    hash = "sha256-qb0lM1N1FIvoADNsZZdlg8HAheePv/LvSDvRhOAqZc0=";
  };
  src_benchmark = fetchFromGitHub {
    owner = "google";
@@ -45,14 +51,20 @@ rec {
  src_composable_kernel = fetchFromGitHub {
    owner = "ROCm";
    repo = "composable_kernel";
    rev = "8086bbe3a78d931eb96fe12fdc014082e18d18d3";
    hash = "sha256-fyL1SzRs5CXW5CWy6kCN1y1xX6cG+ur7iQlbKX2zbCM=";
    rev = "7fe50dc3da2069d6645d9deb8c017a876472a977";
    hash = "sha256-OxA0ekcaRxAmBFlXkvS7XAX40kcWCwyytHWV6vROWjo=";
  };
  src_composable_kernel_aiter = fetchFromGitHub {
    owner = "ROCm";
    repo = "composable_kernel";
    rev = "cffe8fa2a442ac8e80dd236a1a5d24fe3d7e0cbf";
    hash = "sha256-KDmSs9NDFYbyE4wzBedVDBZGhI1BAoJHWOStFkuEk9U=";
  };
  src_composable_kernel_fbgemm = fetchFromGitHub {
    owner = "jwfromm";
    repo = "composable_kernel";
    rev = "4a61bdd4bd4ed730e078aebc7c0fcf046ff29406";
    hash = "sha256-CxcpvW4QxkUDB2zMz7NB6Rt9jXjJeGVExfrYbn9ef5I=";
    rev = "b1281b8b08d973a7064f864f47eeb30f3e2596e9";
    hash = "sha256-ZWfTZ9UxnIpkoRnTmB5e3H/LY5a2HixkCCIMcnpc+Gw=";
  };
  src_composable_kernel_flash-attention = fetchFromGitHub {
    owner = "ROCm";
@@ -63,8 +75,8 @@ rec {
  src_cpp-httplib = fetchFromGitHub {
    owner = "yhirose";
    repo = "cpp-httplib";
    rev = "3af7f2c16147f3fbc6e4d717032daf505dc1652c";
    hash = "sha256-t/ddZjKelnXQdXQvZgv9pQcJt3M2rwgbtTQNW/T6Gpk=";
    rev = "89c932f313c6437c38f2982869beacc89c2f2246";
    hash = "sha256-+VPebnFMGNyChM20q4Z+kVOyI/qDLQjRsaGS0vo8kDM=";
  };
  src_cpr = fetchFromGitHub {
    owner = "libcpr";
@@ -87,20 +99,20 @@ rec {
  src_cudnn-frontend = fetchFromGitHub {
    owner = "NVIDIA";
    repo = "cudnn-frontend";
    rev = "666996fe3960f27170d1527e5579ba24c8d3380a";
    hash = "sha256-/ae5dNjqkn5dGciGSO+Pn9pzJg+hHWqqQCPcqggWezo=";
    rev = "f937055efc6d414d11f4c6577e3977fe74f35fb6";
    hash = "sha256-LiTajW2hrDth8wEC4Vp2lZO+CeMqK+tEKPLok7gXB/s=";
  };
  src_cutlass = fetchFromGitHub {
    owner = "NVIDIA";
    repo = "cutlass";
    rev = "ad7b2f5e84fcfa124cb02b91d5bd26d238c0459e";
    hash = "sha256-teziPNA9csYvhkG5t2ht8W8x5+1YGGbHm8VKx4JoxgI=";
    rev = "e51efbfe18fe4f4cbb66ab814c55bf4aa0185491";
    hash = "sha256-ZY+6Tg/CC6fqvU764k6QNudYDpY+s8OQklG+1aXQuns=";
  };
  src_cutlass_fbgemm = fetchFromGitHub {
    owner = "jwfromm";
    repo = "cutlass";
    rev = "3ed8d2ec4ba35ef5d9d8353826209b6f868f63d3";
    hash = "sha256-NntohGvqs6fbWusi2Qv5uzCJhMAfBv8qYoFi38D+mzk=";
    rev = "311f3c8e51dc0eb56310cfc6980bf63d0fbd7917";
    hash = "sha256-JSr48FkrYE9mvm1+ikrqUxrYuV4Bok2EOdcyeTsMdiA=";
  };
  src_cutlass_flash-attention = fetchFromGitHub {
    owner = "NVIDIA";
@@ -123,8 +135,8 @@ rec {
  src_fbgemm = fetchFromGitHub {
    owner = "pytorch";
    repo = "fbgemm";
    rev = "157e88b750c452bef2ab4653fe9d1eeb151ce4c3";
    hash = "sha256-Ka8/4gBsbtKNhKM/cWg1NmlKjVeBZvS+yS9SQQxb34A=";
    rev = "4b39c551efe15e6bbade20565b0ceb2d8ce3352d";
    hash = "sha256-a7oNR2RMQWiaX9jLAy5Y4aniByqj7f9g65snOjPPwK0=";
  };
  src_fbjni = fetchFromGitHub {
    owner = "facebookincubator";
@@ -201,8 +213,8 @@ rec {
  src_gloo = fetchFromGitHub {
    owner = "pytorch";
    repo = "gloo";
    rev = "c7b7b022c124d9643957d9bd55f57ac59fce8fa2";
    hash = "sha256-pZ08gs6wQTZNVDX9uuaQZvw5JKCps8EALegNF0UVV3c=";
    rev = "54cbae0d3a67fa890b4c3d9ee162b7860315e341";
    hash = "sha256-4g/AffVyU7iEHJI0KRkvJqeHSBMRB89V4/cfVf3yPf4=";
  };
  src_googletest = fetchFromGitHub {
    owner = "google";
@@ -216,12 +228,6 @@ rec {
    rev = "58d77fa8070e8cec2dc1ed015d66b454c8d78850";
    hash = "sha256-W+OxRTVtemt2esw4P7IyGWXOonUN5ZuscjvzqkYvZbM=";
  };
  src_googletest_fbgemm = fetchFromGitHub {
    owner = "google";
    repo = "googletest";
    rev = "f8d7d77c06936315286eb55f8de22cd23c188571";
    hash = "sha256-t0RchAHTJbuI5YW4uyBPykTvcjy90JW9AOPNjIhwh6U=";
  };
  src_googletest_kineto = fetchFromGitHub {
    owner = "google";
    repo = "googletest";
@@ -261,8 +267,8 @@ rec {
  src_hipify_torch = fetchFromGitHub {
    owner = "ROCmSoftwarePlatform";
    repo = "hipify_torch";
    rev = "a4337c69fe0e2552a7b7b0669178926beeed828c";
    hash = "sha256-B0+tDjSlZ9C5IAAgteRIgwaJNnptpp1jOP3hTF5AdOw=";
    rev = "63b6a7b541fa7f08f8475ca7d74054db36ff2691";
    hash = "sha256-TH9fyprP21sRsxGs4VrahhFSIXDhnLvV09c+ZCE27u0=";
  };
  src_ideep = fetchFromGitHub {
    owner = "intel";
@@ -321,14 +327,14 @@ rec {
  src_libuv = fetchFromGitHub {
    owner = "libuv";
    repo = "libuv";
    rev = "1dff88e5161cba5c59276d2070d2e304e4dcb242";
    hash = "sha256-i6AYD1Ony0L2+3yWK6bxOfwoZEvd9qCg33QSqA7bRXI=";
    rev = "5152db2cbfeb5582e9c27c5ea1dba2cd9e10759b";
    hash = "sha256-ayTk3qkeeAjrGj5ab7wF7vpWI8XWS1EeKKUqzaD/LY0=";
  };
  src_mimalloc = fetchFromGitHub {
    owner = "microsoft";
    repo = "mimalloc";
    rev = "94036de6fe20bfd8a73d4a6d142fcf532ea604d9";
    hash = "sha256-B0gngv16WFLBtrtG5NqA2m5e95bYVcQraeITcOX9A74=";
    rev = "fbd8b99c2b828428947d70fdc046bb55609be93e";
    hash = "sha256-+8xZT+mVEqlqabQc+1buVH/X6FZxvCd0rWMyjPu9i4o=";
  };
  src_mkl-dnn = fetchFromGitHub {
    owner = "intel";
@@ -415,6 +421,12 @@ rec {
    hash = "sha256-R4YmNzWEELSkAws/ejmNVxqXDTJwcqjLU/o/HvgRn2E=";
  };
  src_pybind11 = fetchFromGitHub {
    owner = "pybind";
    repo = "pybind11";
    rev = "f5fbe867d2d26e4a0a9177a51f6e568868ad3dc8";
    hash = "sha256-ZiwNGsE1FOkhnWv/1ib1akhQ4FZvrXRCDnnBZoPp6r4=";
  };
  src_pybind11_onnx = fetchFromGitHub {
    owner = "pybind";
    repo = "pybind11";
    rev = "a2e59f0e7065404b44dfe92a28aca47ba1378dc4";
@@ -429,8 +441,8 @@ rec {
  src_pytorch = fetchFromGitHub {
    owner = "pytorch";
    repo = "pytorch";
    rev = "v2.8.0";
    hash = "sha256-tFEpcgj0HiJcyBiZMtIrBrnmiCJApfTC1BgOXEGvqCo=";
    rev = "v2.9.0";
    hash = "sha256-0NdREKn9h3FtHKVe1Z3QtSOVdEcfgLlWXG/OiI+QrwA=";
  };
  src_sleef = fetchFromGitHub {
    owner = "shibatch";
@@ -441,8 +453,8 @@ rec {
  src_tensorpipe = fetchFromGitHub {
    owner = "pytorch";
    repo = "tensorpipe";
    rev = "52791a2fd214b2a9dc5759d36725909c1daa7f2e";
    hash = "sha256-i+CtjNFPDUzFCPxP0//jMLJDrQoorg0On9NfoVaMUxI=";
    rev = "af0118d13e52f5a08841464a768e01a0bf3e3075";
    hash = "sha256-X2YfYfDKxG0i2K8Uf7gpSb+LU2y0d4VBZM3lTa/ff1w=";
  };
  src_vcpkg = fetchFromGitHub {
    owner = "Microsoft";
@@ -462,6 +474,11 @@ rec {
    rev = "51a0103656eff6fc9bfd39a4597923c4b542c883";
    hash = "sha256-nhowllqv/hBs7xHdTwbWtiKJ1mvAYsVIyIZ35ZGsmkg=";
  };
  src_aiter_recursive = runCommand "aiter" { } ''
    cp -r ${src_aiter} $out
    chmod u+w $out/3rdparty/composable_kernel
    cp -r ${src_composable_kernel_aiter_recursive}/* $out/3rdparty/composable_kernel
  '';
  src_asmjit_recursive = src_asmjit;
  src_benchmark_recursive = src_benchmark;
  src_benchmark_opentelemetry-cpp_recursive = src_benchmark_opentelemetry-cpp;
@@ -469,6 +486,7 @@ rec {
  src_civetweb_recursive = src_civetweb;
  src_clang-cindex-python3_recursive = src_clang-cindex-python3;
  src_composable_kernel_recursive = src_composable_kernel;
  src_composable_kernel_aiter_recursive = src_composable_kernel_aiter;
  src_composable_kernel_fbgemm_recursive = src_composable_kernel_fbgemm;
  src_composable_kernel_flash-attention_recursive = src_composable_kernel_flash-attention;
  src_cpp-httplib_recursive = src_cpp-httplib;
@@ -510,7 +528,7 @@ rec {
    chmod u+w $out/external/cutlass
    cp -r ${src_cutlass_fbgemm_recursive}/* $out/external/cutlass
    chmod u+w $out/external/googletest
    cp -r ${src_googletest_fbgemm_recursive}/* $out/external/googletest
    cp -r ${src_googletest_recursive}/* $out/external/googletest
    chmod u+w $out/external/hipify_torch
    cp -r ${src_hipify_torch_recursive}/* $out/external/hipify_torch
    chmod u+w $out/external/json
@@ -541,7 +559,6 @@ rec {
  src_gloo_recursive = src_gloo;
  src_googletest_recursive = src_googletest;
  src_googletest_dynolog_recursive = src_googletest_dynolog;
  src_googletest_fbgemm_recursive = src_googletest_fbgemm;
  src_googletest_kineto_recursive = src_googletest_kineto;
  src_googletest_opentelemetry-cpp_recursive = src_googletest_opentelemetry-cpp;
  src_googletest_prometheus-cpp_recursive = src_googletest_prometheus-cpp;
@@ -578,7 +595,7 @@ rec {
  src_onnx_recursive = runCommand "onnx" { } ''
    cp -r ${src_onnx} $out
    chmod u+w $out/third_party/pybind11
    cp -r ${src_pybind11_recursive}/* $out/third_party/pybind11
    cp -r ${src_pybind11_onnx_recursive}/* $out/third_party/pybind11
  '';
  src_opentelemetry-cpp_recursive = runCommand "opentelemetry-cpp" { } ''
    cp -r ${src_opentelemetry-cpp} $out
@@ -621,6 +638,7 @@ rec {
  src_psimd_recursive = src_psimd;
  src_pthreadpool_recursive = src_pthreadpool;
  src_pybind11_recursive = src_pybind11;
  src_pybind11_onnx_recursive = src_pybind11_onnx;
  src_pybind11_tensorpipe_recursive = runCommand "pybind11_tensorpipe" { } ''
    cp -r ${src_pybind11_tensorpipe} $out
    chmod u+w $out/tools/clang
@@ -630,6 +648,8 @@ rec {
    cp -r ${src_pytorch} $out
    chmod u+w $out/android/libs/fbjni
    cp -r ${src_fbjni_recursive}/* $out/android/libs/fbjni
    chmod u+w $out/third_party/aiter
    cp -r ${src_aiter_recursive}/* $out/third_party/aiter
    chmod u+w $out/third_party/benchmark
    cp -r ${src_benchmark_recursive}/* $out/third_party/benchmark
    chmod u+w $out/third_party/composable_kernel
+2 −2
Original line number Diff line number Diff line
@@ -77,7 +77,7 @@ let
in
buildPythonPackage rec {
  pname = "torchaudio";
  version = "2.8.0";
  version = "2.9.0";
  pyproject = true;

  stdenv = torch.stdenv;
@@ -86,7 +86,7 @@ buildPythonPackage rec {
    owner = "pytorch";
    repo = "audio";
    tag = "v${version}";
    hash = "sha256-SPa6ZWA2AWawfL4Z4mb1nddGaAsGEl/0dwweBpex2Wo=";
    hash = "sha256-oZTe0LWqOJ0NUxmmUKZN3GhMgloOMCYMicbYoaW2pTw=";
  };

  patches = [
Loading