Unverified Commit e8581604 authored by Luna Nova's avatar Luna Nova Committed by GitHub
Browse files

rocmPackages.composable_kernel: fix gfx906 only failure (#495648)

parents a83174ab 0922118c
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -5,7 +5,6 @@
  rocmUpdateScript,
  cmake,
  rocm-cmake,
  llvm,
  clr,
  rocminfo,
  python3,
@@ -124,7 +123,12 @@ stdenv.mkDerivation (finalAttrs: {
    "-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names
  ];

  # No flags to build selectively it seems...
  patches = [
    # Hacky fix for failure for some targets when all targets are selected out
    # for a non-optional at link time kernel
    ./fix-empty-offload-targets.diff
  ];

  postPatch =
    # Reduce configure time by preventing thousands of clang-tidy targets being added
    # We will never call them
+23 −0
Original line number Diff line number Diff line
diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
index 172f6681b8..d3ddbb2f15 100644
--- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
@@ -154,9 +154,15 @@ function(add_instance_library INSTANCE_NAME)
                 list(FILTER INST_TARGETS INCLUDE REGEX "gfx12")
             endif()
             set(offload_targets)
-            foreach(target IN LISTS INST_TARGETS)
-                string(APPEND offload_targets "--offload-arch=${target} ")
-            endforeach()
+            if(NOT INST_TARGETS)
+                # No valid GPU targets for this source, compile for 90a as fallback
+                # so the add_device…instances and hip_fatbin… symbols still exist
+                set(offload_targets "--offload-arch=gfx90a")
+            else()
+                foreach(target IN LISTS INST_TARGETS)
+                    string(APPEND offload_targets "--offload-arch=${target} ")
+                endforeach()
+            endif()
             set_source_files_properties(${source} PROPERTIES COMPILE_FLAGS ${offload_targets})
             list(APPEND INST_OBJ ${source})
         endforeach()