Commit 02b9c5d9 authored by Alexey Bataev's avatar Alexey Bataev
Browse files

Revert "[libomptarget] Build a minimal deviceRTL for amdgcn"

This reverts commit 877ffa71 because it
breaks the build.
parent abe8de29
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -6,9 +6,8 @@
#
# ##===----------------------------------------------------------------------===##
#
# Build a device RTL for each available machine.
# Build a device RTL for each available machine available.
#
##===----------------------------------------------------------------------===##

add_subdirectory(amdgcn)
add_subdirectory(nvptx)
+0 −136
Original line number Diff line number Diff line
##===----------------------------------------------------------------------===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
##===----------------------------------------------------------------------===##
#
# Build the AMDGCN Device RTL if the ROCM tools are available
#
##===----------------------------------------------------------------------===##

find_package(LLVM QUIET CONFIG
  PATHS
  $ENV{AOMP}
  $ENV{HOME}/rocm/aomp
  /opt/rocm/aomp
  /usr/lib/rocm/aomp
  ${LIBOMPTARGET_NVPTX_CUDA_COMPILER_DIR}
  ${LIBOMPTARGET_NVPTX_CUDA_LINKER_DIR}
  ${CMAKE_CXX_COMPILER_DIR}
  NO_DEFAULT_PATH)

if (LLVM_DIR)
  libomptarget_say("Found LLVM ${LLVM_PACKAGE_VERSION}. Configure: ${LLVM_DIR}/LLVMConfig.cmake")
else()
  libomptarget_say("Not building AMDGCN device RTL: AOMP not found")
  return()
endif()

set(AOMP_INSTALL_PREFIX ${LLVM_INSTALL_PREFIX})

if (AOMP_INSTALL_PREFIX)
  set(AOMP_BINDIR ${AOMP_INSTALL_PREFIX}/bin)
else()
  set(AOMP_BINDIR ${LLVM_BUILD_BINARY_DIR}/bin)
endif()

libomptarget_say("Building AMDGCN device RTL. LLVM_COMPILER_PATH=${AOMP_BINDIR}")

project(omptarget-amdgcn)

add_custom_target(omptarget-amdgcn ALL)

#optimization level
set(optimization_level 2)

# Activate RTL message dumps if requested by the user.
if(LIBOMPTARGET_NVPTX_DEBUG)
  set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1)
endif()

get_filename_component(devicertl_base_directory
  ${CMAKE_CURRENT_SOURCE_DIR}
  DIRECTORY)

set(cuda_sources
  ${devicertl_base_directory}/common/src/cancel.cu
  ${devicertl_base_directory}/common/src/critical.cu)

set(h_files
  ${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h
  ${CMAKE_CURRENT_SOURCE_DIR}/src/device_environment.h
  ${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h
  ${devicertl_base_directory}/common/debug.h
  ${devicertl_base_directory}/common/state-queue.h
  ${devicertl_base_directory}/common/state-queuei.h
  ${devicertl_base_directory}/common/support.h)

# for both in-tree and out-of-tree build
if (NOT CMAKE_ARCHIVE_OUTPUT_DIRECTORY)
  set(OUTPUTDIR ${CMAKE_CURRENT_BINARY_DIR})
else()
  set(OUTPUTDIR ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
endif()

# create libraries
set(mcpus gfx700 gfx701 gfx801 gfx803 gfx900)
if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST)
  set(mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST})
endif()

macro(add_cuda_bc_library)
  set(cu_cmd ${AOMP_BINDIR}/clang++
    -std=c++11
    -fcuda-rdc
    -fvisibility=default
    --cuda-device-only
    -Wno-unused-value
    -x hip
    -O${optimization_level}
    --cuda-gpu-arch=${mcpu}
    ${CUDA_DEBUG}
    -I${CMAKE_CURRENT_SOURCE_DIR}/src
    -I${devicertl_base_directory})

  set(bc1_files)

  foreach(file ${ARGN})
    get_filename_component(fname ${file} NAME_WE)
    set(bc1_filename ${fname}.${mcpu}.bc)

    add_custom_command(
      OUTPUT ${bc1_filename}
      COMMAND ${cu_cmd} ${file} -o ${bc1_filename}
      DEPENDS ${file} ${h_files})

    list(APPEND bc1_files ${bc1_filename})
  endforeach()

  add_custom_command(
    OUTPUT linkout.cuda.${mcpu}.bc
    COMMAND ${AOMP_BINDIR}/llvm-link ${bc1_files} -o linkout.cuda.${mcpu}.bc
    DEPENDS ${bc1_files})

  list(APPEND bc_files linkout.cuda.${mcpu}.bc)
endmacro()

set(libname "omptarget-amdgcn")

foreach(mcpu ${mcpus})
  set(bc_files)
  add_cuda_bc_library(${cuda_sources})

  set(bc_libname lib${libname}-${mcpu}.bc)
  add_custom_command(
    OUTPUT ${bc_libname}
    COMMAND ${AOMP_BINDIR}/llvm-link ${bc_files} | ${AOMP_BINDIR}/opt --always-inline -o ${OUTPUTDIR}/${bc_libname}
    DEPENDS ${bc_files})

  add_custom_target(lib${libname}-${mcpu} ALL DEPENDS ${bc_libname})

  install(FILES ${OUTPUTDIR}/${bc_libname}
     DESTINATION "${OPENMP_INSTALL_LIBDIR}/libdevice"
  )
endforeach()
+0 −27
Original line number Diff line number Diff line
//===---- device_environment.h - OpenMP GPU device environment --- CUDA -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Global device environment
//
//===----------------------------------------------------------------------===//

#ifndef _OMPTARGET_DEVICE_ENVIRONMENT_H_
#define _OMPTARGET_DEVICE_ENVIRONMENT_H_

#include "target_impl.h"

struct omptarget_device_environmentTy {
  int32_t debug_level;   // gets value of envvar LIBOMPTARGET_DEVICE_RTL_DEBUG
                         // only useful for Debug build of deviceRTLs 
  int32_t num_devices;   // gets number of active offload devices 
  int32_t device_num;    // gets a value 0 to num_devices-1
};

extern DEVICE omptarget_device_environmentTy omptarget_device_environment;

#endif
+13 −18
Original line number Diff line number Diff line
@@ -72,6 +72,8 @@ EXTERN uint64_t __lanemask_lt();
// thread's lane number in the warp
EXTERN uint64_t __lanemask_gt();

EXTERN void llvm_amdgcn_s_barrier();

// CU id
EXTERN unsigned __smid();

@@ -99,21 +101,25 @@ INLINE uint32_t __kmpc_impl_smid() {
  return __smid();
}

INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __builtin_ffsl(x); }
INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __ffsll(x); }

INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __builtin_popcountl(x); }
INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __popcll(x); }

INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
  return __ballot64(1);
}

EXTERN int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
                                     int32_t SrcLane);
INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
                                     int32_t SrcLane) {
  return __shfl(Var, SrcLane, WARPSIZE);
}

EXTERN int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var,
                                          uint32_t Delta, int32_t Width);
INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var,
                                          uint32_t Delta, int32_t Width) {
  return __shfl_down(Var, Delta, Width);
}

INLINE void __kmpc_impl_syncthreads() { __builtin_amdgcn_s_barrier(); }
INLINE void __kmpc_impl_syncthreads() { llvm_amdgcn_s_barrier(); }

INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
  // we have protected the master warp from releasing from its barrier
@@ -122,15 +128,4 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
  __builtin_amdgcn_s_barrier();
}

// DEVICE versions of part of libc
extern "C" {
DEVICE __attribute__((noreturn)) void
__assertfail(const char *, const char *, unsigned, const char *, size_t);
INLINE static void __assert_fail(const char *__message, const char *__file,
                                 unsigned int __line, const char *__function) {
  __assertfail(__message, __file, __line, __function, sizeof(char));
}
DEVICE int printf(const char *, ...);
}

#endif