Unverified Commit 4774c536 authored by Weijia Wang's avatar Weijia Wang Committed by GitHub
Browse files

Merge pull request #267209 from Madouura/pr/triton-llvm

openai-triton-llvm: fix aarch64 and cross-compilation
parents 9415631d e3d4beac
Loading
Loading
Loading
Loading
+89 −38
Original line number Diff line number Diff line
{ config
, lib
{ lib
, stdenv
, fetchFromGitHub
, pkgsBuildBuild
, pkg-config
, cmake
, ninja
, git
, doxygen
, sphinx
, libxml2
, libxcrypt
, libedit
, libffi
, libpfm
, mpfr
, zlib
, ncurses
, doxygen
, sphinx
, which
, sysctl
, python3Packages
, buildDocs ? true
, buildMan ? true
, buildTests ? true
, llvmTargetsToBuild ? [ "NATIVE" ] # "NATIVE" resolves into x86 or aarch64 depending on stdenv
, llvmProjectsToBuild ? [ "llvm" "mlir" ]
}:

stdenv.mkDerivation (finalAttrs: {
let
  llvmNativeTarget =
    if stdenv.hostPlatform.isx86_64 then "X86"
    else if stdenv.hostPlatform.isAarch64 then "AArch64"
    else throw "Currently unsupported LLVM platform '${stdenv.hostPlatform.config}'";

  inferNativeTarget = t: if t == "NATIVE" then llvmNativeTarget else t;
  llvmTargetsToBuild' = [ "AMDGPU" "NVPTX" ] ++ builtins.map inferNativeTarget llvmTargetsToBuild;

  # This LLVM version can't seem to find pygments/pyyaml,
  # but a later update will likely fix this (openai-triton-2.1.0)
  python =
    if buildTests
    then python3Packages.python.withPackages (p: with p; [ psutil pygments pyyaml ])
    else python3Packages.python;

  isNative = stdenv.hostPlatform == stdenv.buildPlatform;
in stdenv.mkDerivation (finalAttrs: {
  pname = "openai-triton-llvm";
  version = "14.0.6-f28c006a5895";

@@ -33,7 +55,8 @@ stdenv.mkDerivation (finalAttrs: {
    "man"
  ];

  # See https://github.com/openai/triton/blob/main/python/setup.py and https://github.com/ptillet/triton-llvm-releases/releases
  # See https://github.com/openai/triton/blob/main/python/setup.py
  # and https://github.com/ptillet/triton-llvm-releases/releases
  src = fetchFromGitHub {
    owner = "llvm";
    repo = "llvm-project";
@@ -46,7 +69,7 @@ stdenv.mkDerivation (finalAttrs: {
    cmake
    ninja
    git
    python3Packages.python
    python
  ] ++ lib.optionals (buildDocs || buildMan) [
    doxygen
    sphinx
@@ -58,6 +81,7 @@ stdenv.mkDerivation (finalAttrs: {
    libxcrypt
    libedit
    libffi
    libpfm
    mpfr
  ];

@@ -69,37 +93,55 @@ stdenv.mkDerivation (finalAttrs: {
  sourceRoot = "${finalAttrs.src.name}/llvm";

  cmakeFlags = [
    "-DLLVM_TARGETS_TO_BUILD=${
      let
        # Targets can be found in
        # https://github.com/llvm/llvm-project/tree/f28c006a5895fc0e329fe15fead81e37457cb1d1/clang/lib/Basic/Targets
        # NOTE: Unsure of how "host" would function, especially given that we might be cross-compiling.
        llvmTargets = [ "AMDGPU" "NVPTX" ]
        ++ lib.optionals stdenv.isAarch64 [ "AArch64" ]
        ++ lib.optionals stdenv.isx86_64 [ "X86" ];
      in
      lib.concatStringsSep ";" llvmTargets
    }"
    "-DLLVM_ENABLE_PROJECTS=llvm;mlir"
    "-DLLVM_INSTALL_UTILS=ON"
  ] ++ lib.optionals (buildDocs || buildMan) [
    "-DLLVM_INCLUDE_DOCS=ON"
    "-DMLIR_INCLUDE_DOCS=ON"
    "-DLLVM_BUILD_DOCS=ON"
    # "-DLLVM_ENABLE_DOXYGEN=ON" Way too slow, only uses one core
    "-DLLVM_ENABLE_SPHINX=ON"
    "-DSPHINX_OUTPUT_HTML=ON"
    "-DSPHINX_OUTPUT_MAN=ON"
    "-DSPHINX_WARNINGS_AS_ERRORS=OFF"
  ] ++ lib.optionals buildTests [
    "-DLLVM_INCLUDE_TESTS=ON"
    "-DMLIR_INCLUDE_TESTS=ON"
    "-DLLVM_BUILD_TESTS=ON"
    (lib.cmakeFeature "LLVM_TARGETS_TO_BUILD" (lib.concatStringsSep ";" llvmTargetsToBuild'))
    (lib.cmakeFeature "LLVM_ENABLE_PROJECTS" (lib.concatStringsSep ";" llvmProjectsToBuild))
    (lib.cmakeFeature "LLVM_HOST_TRIPLE" stdenv.hostPlatform.config)
    (lib.cmakeFeature "LLVM_DEFAULT_TARGET_TRIPLE" stdenv.hostPlatform.config)
    (lib.cmakeBool "LLVM_INSTALL_UTILS" true)
    (lib.cmakeBool "LLVM_INCLUDE_DOCS" (buildDocs || buildMan))
    (lib.cmakeBool "MLIR_INCLUDE_DOCS" (buildDocs || buildMan))
    (lib.cmakeBool "LLVM_BUILD_DOCS" (buildDocs || buildMan))
    # Way too slow, only uses one core
    # (lib.cmakeBool "LLVM_ENABLE_DOXYGEN" (buildDocs || buildMan))
    (lib.cmakeBool "LLVM_ENABLE_SPHINX" (buildDocs || buildMan))
    (lib.cmakeBool "SPHINX_OUTPUT_HTML" buildDocs)
    (lib.cmakeBool "SPHINX_OUTPUT_MAN" buildMan)
    (lib.cmakeBool "SPHINX_WARNINGS_AS_ERRORS" false)
    (lib.cmakeBool "LLVM_INCLUDE_TESTS" buildTests)
    (lib.cmakeBool "MLIR_INCLUDE_TESTS" buildTests)
    (lib.cmakeBool "LLVM_BUILD_TESTS" buildTests)
  # Cross compilation code taken/modified from LLVM 16 derivation
  ] ++ lib.optionals (!isNative) (let
    nativeToolchainFlags = let
      nativeCC = pkgsBuildBuild.targetPackages.stdenv.cc;
      nativeBintools = nativeCC.bintools.bintools;
    in [
      (lib.cmakeFeature "CMAKE_C_COMPILER" "${nativeCC}/bin/${nativeCC.targetPrefix}cc")
      (lib.cmakeFeature "CMAKE_CXX_COMPILER" "${nativeCC}/bin/${nativeCC.targetPrefix}c++")
      (lib.cmakeFeature "CMAKE_AR" "${nativeBintools}/bin/${nativeBintools.targetPrefix}ar")
      (lib.cmakeFeature "CMAKE_STRIP" "${nativeBintools}/bin/${nativeBintools.targetPrefix}strip")
      (lib.cmakeFeature "CMAKE_RANLIB" "${nativeBintools}/bin/${nativeBintools.targetPrefix}ranlib")
    ];

    # We need to repass the custom GNUInstallDirs values, otherwise CMake
    # will choose them for us, leading to wrong results in llvm-config-native
    nativeInstallFlags = [
      (lib.cmakeFeature "CMAKE_INSTALL_PREFIX" (placeholder "out"))
      (lib.cmakeFeature "CMAKE_INSTALL_BINDIR" "${placeholder "out"}/bin")
      (lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "${placeholder "out"}/include")
      (lib.cmakeFeature "CMAKE_INSTALL_LIBDIR" "${placeholder "out"}/lib")
      (lib.cmakeFeature "CMAKE_INSTALL_LIBEXECDIR" "${placeholder "out"}/libexec")
    ];
  in [
    (lib.cmakeBool "CMAKE_CROSSCOMPILING" true)
    (lib.cmakeFeature "CROSS_TOOLCHAIN_FLAGS_NATIVE" (lib.concatStringsSep ";"
      (lib.concatLists [ nativeToolchainFlags nativeInstallFlags ])))
  ]);

  postPatch = ''
    # `CMake Error: cannot write to file "/build/source/llvm/build/lib/cmake/mlir/MLIRTargets.cmake": Permission denied`
    chmod +w -R ../mlir
    patchShebangs ../mlir/test/mlir-reduce

    # FileSystem permissions tests fail with various special bits
    rm test/tools/llvm-objcopy/ELF/mirror-permissions-unix.test
@@ -107,9 +149,21 @@ stdenv.mkDerivation (finalAttrs: {

    substituteInPlace unittests/Support/CMakeLists.txt \
      --replace "Path.cpp" ""
  '' + lib.optionalString stdenv.isAarch64 ''
    # Not sure why this fails
    rm test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s
  '';

  postInstall = lib.optionalString (!isNative) ''
    cp -a NATIVE/bin/llvm-config $out/bin/llvm-config-native
  '';

  doCheck = buildTests;

  nativeCheckInputs = [ which ]
    ++ lib.optionals stdenv.isDarwin [ sysctl ];

  checkTarget = "check-all";
  requiredSystemFeatures = [ "big-parallel" ];

  meta = with lib; {
@@ -117,9 +171,6 @@ stdenv.mkDerivation (finalAttrs: {
    homepage = "https://github.com/llvm/llvm-project";
    license = with licenses; [ ncsa ];
    maintainers = with maintainers; [ SomeoneSerge Madouura ];
    platforms = platforms.linux;
    # Consider the derivation broken if we're not building for CUDA or ROCm, or if we're building for aarch64
    # and ROCm is enabled. See https://github.com/RadeonOpenCompute/ROCm/issues/1831#issuecomment-1278205344.
    broken = stdenv.isAarch64 && !config.cudaSupport;
    platforms = with platforms; aarch64 ++ x86;
  };
})
+6 −6
Original line number Diff line number Diff line
@@ -16,9 +16,10 @@
  filelock,
  jinja2,
  networkx,
  openai-triton,
  sympy,
  numpy, pyyaml, cffi, click, typing-extensions,
  # ROCm build and `torch.compile` requires `openai-triton`
  tritonSupport ? (!stdenv.isDarwin), openai-triton,

  # Unit tests
  hypothesis, psutil,
@@ -303,12 +304,13 @@ in buildPythonPackage rec {
    "-Wno-pass-failed"
  ] ++ [
    "-Wno-unused-command-line-argument"
    "-Wno-maybe-uninitialized"
    "-Wno-uninitialized"
    "-Wno-array-bounds"
    "-Wno-stringop-overflow"
    "-Wno-free-nonheap-object"
    "-Wno-unused-result"
  ] ++ lib.optionals stdenv.cc.isGNU [
    "-Wno-maybe-uninitialized"
    "-Wno-stringop-overflow"
  ]));

  nativeBuildInputs = [
@@ -377,12 +379,10 @@ in buildPythonPackage rec {
    # the following are required for tensorboard support
    pillow six future tensorboard protobuf

    # ROCm build and `torch.compile` requires openai-triton
    openai-triton

    # torch/csrc requires `pybind11` at runtime
    pybind11
  ]
  ++ lib.optionals tritonSupport [ openai-triton ]
  ++ lib.optionals MPISupport [ mpi ]
  ++ lib.optionals rocmSupport [ rocmtoolkit_joined ];