Unverified Commit c12ce421 authored by Łukasz Plewa's avatar Łukasz Plewa Committed by GitHub
Browse files

[offload] Add floating-point support detection queries (#193233)

Add device info queries to detect support for half-, single-, and
double-precision floating-point formats.

For the AMDGPU, CUDA, and Host plugins, add the new queries alongside
the existing capability reporting without changing current behavior.

For the Level Zero plugin, implement floating-point support detection
and capability querying.
parent 8d060c02
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -47,7 +47,12 @@ def ol_device_info_t : Enum {
  ];
  list<TaggedEtor> fp_configs = !foreach(type, ["Single", "Double", "Half"], TaggedEtor<type # "_FP_CONFIG", "ol_device_fp_capability_flags_t", type # " precision floating point capability">);
  list<TaggedEtor> native_vec_widths = !foreach(type, ["char","short","int","long","float","double","half"], TaggedEtor<"NATIVE_VECTOR_WIDTH_" # type, "uint32_t", "Native vector width for " # type>);
  let etors = !listconcat(basic_etors, fp_configs, native_vec_widths);
  list<TaggedEtor> fp_support =
      !foreach(type, ["Single", "Double", "Half"],
               TaggedEtor<type#"_FP_SUPPORT", "bool",
                          type#" precision floating point support">);
  let etors =
      !listconcat(basic_etors, fp_configs, native_vec_widths, fp_support);
}

def ol_device_fp_capability_flag_t : Enum {
+13 −16
Original line number Diff line number Diff line
@@ -429,22 +429,6 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
    else
      return Info.write<ol_device_type_t>(OL_DEVICE_TYPE_GPU);

  case OL_DEVICE_INFO_SINGLE_FP_CONFIG:
  case OL_DEVICE_INFO_DOUBLE_FP_CONFIG: {
    ol_device_fp_capability_flags_t flags{0};
    flags |= OL_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT |
             OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
             OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
             OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
             OL_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
             OL_DEVICE_FP_CAPABILITY_FLAG_DENORM |
             OL_DEVICE_FP_CAPABILITY_FLAG_FMA;
    return Info.write(flags);
  }

  case OL_DEVICE_INFO_HALF_FP_CONFIG:
    return Info.write<ol_device_fp_capability_flags_t>(0);

  case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR:
  case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT:
  case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT:
@@ -503,6 +487,9 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
  case OL_DEVICE_INFO_NUM_COMPUTE_UNITS:
  case OL_DEVICE_INFO_ADDRESS_BITS:
  case OL_DEVICE_INFO_MAX_CLOCK_FREQUENCY:
  case OL_DEVICE_INFO_SINGLE_FP_CONFIG:
  case OL_DEVICE_INFO_DOUBLE_FP_CONFIG:
  case OL_DEVICE_INFO_HALF_FP_CONFIG:
  case OL_DEVICE_INFO_MEMORY_CLOCK_RATE: {
    // Uint32 values
    if (!std::holds_alternative<uint64_t>(Entry->Value))
@@ -522,6 +509,16 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
    return Info.write(std::get<uint64_t>(Entry->Value));
  }

  case OL_DEVICE_INFO_SINGLE_FP_SUPPORT:
  case OL_DEVICE_INFO_DOUBLE_FP_SUPPORT:
  case OL_DEVICE_INFO_HALF_FP_SUPPORT: {
    // Boolean values
    if (!std::holds_alternative<bool>(Entry->Value))
      return makeError(ErrorCode::BACKEND_FAILURE,
                       "plugin returned incorrect type");
    return Info.write<bool>(std::get<bool>(Entry->Value));
  }

  case OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION:
  case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION: {
    // {x, y, z} triples
+23 −0
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@
#include "Utils/ELF.h"

#include "GlobalHandler.h"
#include "OffloadAPI.h"
#include "OpenMP/OMPT/Callback.h"
#include "PluginInterface.h"
#include "UtilitiesRTL.h"
@@ -3321,6 +3322,28 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
    if (Err)
      consumeError(std::move(Err));

    ol_device_fp_capability_flags_t FPFlags =
        OL_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT |
        OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
        OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
        OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
        OL_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
        OL_DEVICE_FP_CAPABILITY_FLAG_DENORM | OL_DEVICE_FP_CAPABILITY_FLAG_FMA;

    Info.add("Single FP Support", true, "", DeviceInfo::SINGLE_FP_SUPPORT);
    Info.add("Single FP Capabilities", FPFlags, "",
             DeviceInfo::SINGLE_FP_CONFIG);

    Info.add("Double FP Support", true, "", DeviceInfo::DOUBLE_FP_SUPPORT);
    Info.add("Double FP Capabilities", FPFlags, "",
             DeviceInfo::DOUBLE_FP_CONFIG);

    // TODO: Use HSA_AGENT_INFO_FAST_F16_OPERATION to detect FP16 support.
    Info.add("Half FP Support", ol_bool_t(false), "",
             DeviceInfo::HALF_FP_SUPPORT);
    Info.add("Half FP Capabilities", ol_device_fp_capability_flags_t{0}, "",
             DeviceInfo::HALF_FP_CONFIG);

    return Info;
  }

+1 −0
Original line number Diff line number Diff line
@@ -53,6 +53,7 @@ target_link_options(PluginCommon PUBLIC ${offload_link_flags})
target_include_directories(PluginCommon PUBLIC
  ${CMAKE_CURRENT_SOURCE_DIR}/include
  ${CMAKE_CURRENT_BINARY_DIR}/include
  ${CMAKE_CURRENT_BINARY_DIR}/../../liboffload/API
  ${LIBOMPTARGET_LLVM_INCLUDE_DIRS}
  ${LIBOMPTARGET_BINARY_INCLUDE_DIR}
  ${LIBOMPTARGET_INCLUDE_DIR}
+21 −0
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
#include "Shared/Environment.h"

#include "GlobalHandler.h"
#include "OffloadAPI.h"
#include "OpenMP/OMPT/Callback.h"
#include "PluginInterface.h"
#include "Utils/ELF.h"
@@ -1241,6 +1242,26 @@ struct CUDADeviceTy : public GenericDeviceTy {

    Info.add("Compute Capabilities", ComputeCapability.str());

    ol_device_fp_capability_flags_t FPFlags =
        OL_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT |
        OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
        OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
        OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
        OL_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
        OL_DEVICE_FP_CAPABILITY_FLAG_DENORM | OL_DEVICE_FP_CAPABILITY_FLAG_FMA;

    Info.add("Single FP Support", true, "", DeviceInfo::SINGLE_FP_SUPPORT);
    Info.add("Single FP Capabilities", FPFlags, "",
             DeviceInfo::SINGLE_FP_CONFIG);

    Info.add("Double FP Support", true, "", DeviceInfo::DOUBLE_FP_SUPPORT);
    Info.add("Double FP Capabilities", FPFlags, "",
             DeviceInfo::DOUBLE_FP_CONFIG);

    Info.add("Half FP Support", false, "", DeviceInfo::HALF_FP_SUPPORT);
    Info.add("Half FP Capabilities", ol_device_fp_capability_flags_t{0}, "",
             DeviceInfo::HALF_FP_CONFIG);

    return Info;
  }

Loading