Unverified Commit cd711ad7 authored by Ulrik Strid's avatar Ulrik Strid Committed by GitHub
Browse files

Merge pull request #298388 from GZGavinZhao/rocm-gfx-compat

rocmPackages: extend ISA compatibility
parents 8596068d c3848d59
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -111,6 +111,16 @@ in stdenv.mkDerivation (finalAttrs: {
      url = "https://github.com/ROCm/clr/commit/77c581a3ebd47b5e2908973b70adea66891159ee.patch";
      hash = "sha256-auBedbd7rghlKav7A9V6l64J7VmtE9GizIdi5gWj+fs=";
    })
    (fetchpatch {
      name = "extend-hip-isa-compatibility-check.patch";
      url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0026-extend-hip-isa-compatibility-check.patch";
      hash = "sha256-eG0ALZZQLRzD7zJueJFhi2emontmYy6xx8Rsm346nQI=";
    })
    (fetchpatch {
      name = "improve-rocclr-isa-compatibility-check.patch";
      url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0025-improve-rocclr-isa-compatibility-check.patch";
      hash = "sha256-8eowuRiOAdd9ucKv4Eg9FPU7c6367H3eP3fRAGfXc6Y=";
    })
  ];

  postPatch = ''
@@ -124,6 +134,10 @@ in stdenv.mkDerivation (finalAttrs: {

    substituteInPlace hipamd/src/hip_embed_pch.sh \
      --replace "\''$LLVM_DIR/bin/clang" "${clang}/bin/clang"

    # https://lists.debian.org/debian-ai/2024/02/msg00178.html
    substituteInPlace rocclr/utils/flags.hpp \
      --replace-fail "HIP_USE_RUNTIME_UNBUNDLER, false" "HIP_USE_RUNTIME_UNBUNDLER, true"
  '';

  postInstall = ''
+1 −1
Original line number Diff line number Diff line
@@ -194,7 +194,7 @@ in rec {
  };

  rocblas = callPackage ./rocblas {
    inherit rocblas rocmUpdateScript rocm-cmake clr tensile;
    inherit rocmUpdateScript rocm-cmake clr tensile;
    inherit (llvm) openmp;
    stdenv = llvm.rocmClangStdenv;
  };
+5 −0
Original line number Diff line number Diff line
@@ -116,6 +116,11 @@ in stdenv.mkDerivation (finalAttrs: {
      url = "https://github.com/ROCm/MIOpen/commit/3413d2daaeb44b7d6eadcc03033a5954a118491e.patch";
      hash = "sha256-ST4snUcTmmSI1Ogx815KEX9GdMnmubsavDzXCGJkiKs=";
    })
    (fetchpatch {
      name = "Extend-MIOpen-ISA-compatibility.patch";
      url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch";
      hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU=";
    })
  ];

  outputs = [
+3 −1
Original line number Diff line number Diff line
@@ -65,7 +65,9 @@ stdenv.mkDerivation (finalAttrs: {

    # Really strange behavior, `#!/usr/bin/env perl` should work...
    substituteInPlace CMakeLists.txt \
      --replace "\''$ \''${hipify-perl_executable}" "${perl}/bin/perl ${hipify}/bin/hipify-perl"
      --replace "\''$ \''${hipify-perl_executable}" "${perl}/bin/perl ${hipify}/bin/hipify-perl" \
      --replace-warn "-parallel-jobs=12" "-parallel-jobs=1" \
      --replace-warn "-parallel-jobs=16" "-parallel-jobs=1"
  '';

  postInstall = lib.optionalString buildTests ''
+51 −119
Original line number Diff line number Diff line
{ rocblas
, lib
{ lib
, stdenv
, fetchFromGitHub
, fetchpatch
, rocmUpdateScript
, runCommand
, cmake
@@ -21,57 +21,26 @@
, buildBenchmarks ? false
, tensileLogic ? "asm_full"
, tensileCOVersion ? "default"
, tensileSepArch ? true
, tensileLazyLib ? true
# https://github.com/ROCm/Tensile/issues/1757
# Allows gfx101* users to use rocBLAS normally.
# Turn the below two values to `true` after the fix has been cherry-picked
# into a release. Just backporting that single fix is not enough because it
# depends on some previous commits.
, tensileSepArch ? false
, tensileLazyLib ? false
, tensileLibFormat ? "msgpack"
, gpuTargets ? [ "all" ]
# `gfx940`, `gfx941` are not present in this list because they are early
# engineering samples, and all final MI300 hardware are `gfx942`:
# https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130
#
# `gfx1012` is not present in this list because the ISA compatibility patches
# would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will
# always try to use `gfx1010` code objects, hence building for `gfx1012` is
# useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152
, gpuTargets ? [ "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" ]
}:

let
  # NOTE: Update the default GPU targets on every update
  gfx80 = (rocblas.override {
    gpuTargets = [
      "gfx803"
    ];
  }).overrideAttrs { pname = "rocblas-tensile-gfx80"; };

  gfx90 = (rocblas.override {
    gpuTargets = [
      "gfx900"
      "gfx906:xnack-"
      "gfx908:xnack-"
      "gfx90a:xnack+"
      "gfx90a:xnack-"
    ];
  }).overrideAttrs { pname = "rocblas-tensile-gfx90"; };

  gfx94 = (rocblas.override {
    gpuTargets = [
      "gfx940"
      "gfx941"
      "gfx942"
    ];
  }).overrideAttrs { pname = "rocblas-tensile-gfx94"; };

  gfx10 = (rocblas.override {
    gpuTargets = [
      "gfx1010"
      "gfx1012"
      "gfx1030"
    ];
  }).overrideAttrs { pname = "rocblas-tensile-gfx10"; };

  gfx11 = (rocblas.override {
    gpuTargets = [
      "gfx1100"
      "gfx1101"
      "gfx1102"
    ];
  }).overrideAttrs { pname = "rocblas-tensile-gfx11"; };

  # Unfortunately, we have to do two full builds, otherwise we get overlapping _fallback.dat files
  fallbacks = rocblas.overrideAttrs { pname = "rocblas-tensile-fallbacks"; };
in stdenv.mkDerivation (finalAttrs: {
stdenv.mkDerivation (finalAttrs: {
  pname = "rocblas";
  version = "6.0.2";

@@ -94,6 +63,8 @@ in stdenv.mkDerivation (finalAttrs: {
    cmake
    rocm-cmake
    clr
  ] ++ lib.optionals buildTensile [
    tensile
  ];

  buildInputs = [
@@ -114,80 +85,41 @@ in stdenv.mkDerivation (finalAttrs: {
  ];

  cmakeFlags = [
    "-DCMAKE_C_COMPILER=hipcc"
    "-DCMAKE_CXX_COMPILER=hipcc"
    "-Dpython=python3"
    "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
    "-DBUILD_WITH_TENSILE=${if buildTensile then "ON" else "OFF"}"
    # Manually define CMAKE_INSTALL_<DIR>
    # See: https://github.com/NixOS/nixpkgs/pull/197838
    "-DCMAKE_INSTALL_BINDIR=bin"
    "-DCMAKE_INSTALL_LIBDIR=lib"
    "-DCMAKE_INSTALL_INCLUDEDIR=include"
    (lib.cmakeFeature "CMAKE_C_COMPILER" "hipcc")
    (lib.cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
    (lib.cmakeFeature "python" "python3")
    (lib.cmakeFeature "AMDGPU_TARGETS" (lib.concatStringsSep ";" gpuTargets))
    (lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile)
    (lib.cmakeBool "ROCM_SYMLINK_LIBS" false)
    (lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas")
    (lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
    (lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
    # rocblas header files are not installed unless we set this
    (lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "include")
  ] ++ lib.optionals buildTensile [
    "-DVIRTUALENV_HOME_DIR=/build/source/tensile"
    "-DTensile_TEST_LOCAL_PATH=/build/source/tensile"
    "-DTensile_ROOT=/build/source/tensile/${python3.sitePackages}/Tensile"
    "-DTensile_LOGIC=${tensileLogic}"
    "-DTensile_CODE_OBJECT_VERSION=${tensileCOVersion}"
    "-DTensile_SEPARATE_ARCHITECTURES=${if tensileSepArch then "ON" else "OFF"}"
    "-DTensile_LAZY_LIBRARY_LOADING=${if tensileLazyLib then "ON" else "OFF"}"
    "-DTensile_LIBRARY_FORMAT=${tensileLibFormat}"
  ] ++ lib.optionals buildTests [
    "-DBUILD_CLIENTS_TESTS=ON"
  ] ++ lib.optionals buildBenchmarks [
    "-DBUILD_CLIENTS_BENCHMARKS=ON"
    (lib.cmakeBool "BUILD_WITH_PIP" false)
    (lib.cmakeFeature "Tensile_LOGIC" tensileLogic)
    (lib.cmakeFeature "Tensile_CODE_OBJECT_VERSION" tensileCOVersion)
    (lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch)
    (lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib)
    (lib.cmakeFeature "Tensile_LIBRARY_FORMAT" tensileLibFormat)
    (lib.cmakeBool "Tensile_PRINT_DEBUG" true)
  ] ++ lib.optionals (buildTests || buildBenchmarks) [
    "-DCMAKE_CXX_FLAGS=-I${amd-blis}/include/blis"
    (lib.cmakeFeature "CMAKE_CXX_FLAGS" "-I${amd-blis}/include/blis")
  ];

  postPatch = lib.optionalString (finalAttrs.pname != "rocblas") ''
    # Return early and install tensile files manually
    substituteInPlace library/src/CMakeLists.txt \
      --replace "set_target_properties( TensileHost PROPERTIES OUTPUT_NAME" "return()''\nset_target_properties( TensileHost PROPERTIES OUTPUT_NAME"
  '' + lib.optionalString (buildTensile && finalAttrs.pname == "rocblas") ''
    # Link the prebuilt Tensile files
    mkdir -p build/Tensile/library

    for path in ${gfx80} ${gfx90} ${gfx94} ${gfx10} ${gfx11} ${fallbacks}; do
      ln -s $path/lib/rocblas/library/* build/Tensile/library
    done

    unlink build/Tensile/library/TensileManifest.txt
  '' + lib.optionalString buildTensile ''
    # Tensile REALLY wants to write to the nix directory if we include it normally
    cp -a ${tensile} tensile
    chmod +w -R tensile

    # Rewrap Tensile
    substituteInPlace tensile/bin/{.t*,.T*,*} \
      --replace "${tensile}" "/build/source/tensile"

    substituteInPlace CMakeLists.txt \
      --replace "include(virtualenv)" "" \
      --replace "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" ""
  '';
  patches = [
    (fetchpatch {
      name = "Extend-rocBLAS-HIP-ISA-compatibility.patch";
      url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch";
      hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo=";
    })
  ];

  postInstall = lib.optionalString (finalAttrs.pname == "rocblas") ''
    ln -sf ${fallbacks}/lib/rocblas/library/TensileManifest.txt $out/lib/rocblas/library
  '' + lib.optionalString (finalAttrs.pname != "rocblas") ''
    mkdir -p $out/lib/rocblas/library
    rm -rf $out/share
  '' + lib.optionalString (finalAttrs.pname != "rocblas" && finalAttrs.pname != "rocblas-tensile-fallbacks") ''
    rm Tensile/library/{TensileManifest.txt,*_fallback.dat}
    mv Tensile/library/* $out/lib/rocblas/library
  '' + lib.optionalString (finalAttrs.pname == "rocblas-tensile-fallbacks") ''
    mv Tensile/library/{TensileManifest.txt,*_fallback.dat} $out/lib/rocblas/library
  '' + lib.optionalString buildTests ''
    mkdir -p $test/bin
    cp -a $out/bin/* $test/bin
    rm $test/bin/*-bench || true
  '' + lib.optionalString buildBenchmarks ''
    mkdir -p $benchmark/bin
    cp -a $out/bin/* $benchmark/bin
    rm $benchmark/bin/*-test || true
  '' + lib.optionalString (buildTests || buildBenchmarks ) ''
    rm -rf $out/bin
  # Pass $NIX_BUILD_CORES to Tensile
  postPatch = ''
    substituteInPlace cmake/build-options.cmake \
      --replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
  '';

  passthru.updateScript = rocmUpdateScript {
Loading