Commit 2c2b6c84 authored by David M. Rogers's avatar David M. Rogers
Browse files

Added initial cuda/hip support.

parent 7d025e77
Loading
Loading
Loading
Loading
+24 −2
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@
#  - an mpiwrap library and header file
#  - an mpitest executable that uses the library
#
cmake_minimum_required(VERSION 3.17)
cmake_minimum_required(VERSION 3.8)

project(mpitest VERSION 1.0 LANGUAGES CXX)
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake)
@@ -12,6 +12,23 @@ include(install)
#implicit option(BUILD_TESTING "Build tests accompanying this project" ON)
option(BUILD_SHARED_LIBS "Build using shared libraries" ON)

# As a convenience for cmake maintainers, setting this to "ON"
# will enable all the separable compilation flags for cuda/hip
# kernels.  This will likely lower performance.
# Since it depends on the code structure, it should not be a user-configurable option.
set(GPU_SEPARABLE_COMPILATION OFF)

if(CMAKE_CUDA_ARCHITECTURES)
    set(ENABLE_GPU TRUE)
    include(setup_cuda)
elseif(CMAKE_HIP_ARCHITECTURES)
    set(ENABLE_GPU TRUE)
    include(setup_hip)
else()
    set(ENABLE_GPU FALSE)
    set(GPU_LIBRARIES "")
endif()

find_package(MPI REQUIRED)

##########  TARGETS  ################################################
@@ -19,12 +36,17 @@ add_library(mpiwrap src/wrapper.cc)
add_executable(mpitest src/bcast.cc)
#####################################################################

target_compile_features(mpiwrap PUBLIC cxx_std_14)
# configured header file stashing options
configure_file(include/config.hh.in include/config.hh)

target_compile_features(mpiwrap PUBLIC cxx_std_17)
target_link_libraries(mpiwrap PUBLIC MPI::MPI_CXX)
target_include_directories(mpiwrap PUBLIC
                            $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
                            $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>
                            $<INSTALL_INTERFACE:include>
                           )

# lib needs PIC when BUILD_SHARED_LIBS=ON
set_target_properties(mpiwrap PROPERTIES
                      POSITION_INDEPENDENT_CODE ${BUILD_SHARED_LIBS}
+4 −2
Original line number Diff line number Diff line
@@ -18,8 +18,10 @@ to "out of the box".

## CUDA/HIP Kernel Calling Layer

Configuring with ``-DENABLE_GPU=ON`` adds CUDA/HIP kernel
calls using the simple cuda2hip.h header from
Configuring with ``-DCMAKE_CUDA_ARCHITECTURE=70`` enables CUDA,
and configuring with ``-DCMAKE_HIP_ARCHITECTURE=gfx908`` enables HIP.
The source code is identical for both, because the HIP
interface uses the simple cuda2hip.h header from
the [Quip project](https://github.com/twhite-cray/quip).


cmake/setup_cuda.cmake

0 → 100644
+32 −0
Original line number Diff line number Diff line
enable_language(CUDA)
#cmake_minimum_required(VERSION 3.17)
if(${CMAKE_VERSION} VERSION_LESS "3.17")
    message(FATAL_ERROR "Compilation for CUDA requires CMake 3.17 or later.")
endif()

message(STATUS "Setting up CUDA")

find_package(CUDAToolkit REQUIRED)
#set(GPU_LIBRARIES CUDA::cudart CUDA::cublas)

# Note: Technically, C++17 is only supported by cmake 3.18+
set(CMAKE_CUDA_STANDARD 17)

#link the correct gpu runtime library
function(gpu_runtime_link target_name)
    target_link_libraries(${target_name} PUBLIC CUDA::cudart)
endfunction()

#link the correct gpu runtime library
function(gpu_blas_link target_name)
    target_link_libraries(${target_name} PUBLIC CUDA::cublas)
endfunction()
	
function(gpu_device_link target_name)
    if(GPU_SEPARABLE_COMPILATION)
        set_target_properties( ${target_name}
          PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
        set_target_properties( ${target_name}
          PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
    endif()
endfunction()

cmake/setup_hip.cmake

0 → 100644
+58 −0
Original line number Diff line number Diff line
enable_language(HIP)
#cmake_minimum_required(VERSION 3.21.3)
if(${CMAKE_VERSION} VERSION_LESS "3.21.3")
    message(FATAL_ERROR "Compilation for HIP requires CMake 3.21.3 or later.")
endif()

message(STATUS "Setting up HIP using ROCM_ROOT = ${ROCM_ROOT}")

set(CMAKE_MODULE_PATH "${ROCM_ROOT}/hip/cmake" ${CMAKE_MODULE_PATH})
find_package(HIP REQUIRED)
find_package(hipblas REQUIRED)
#find_package(rocsolver REQUIRED)
#set(GPU_LIBRARIES hip::host roc::hipblas)

set(CMAKE_HIP_STANDARD 17)

#Indirect call to a device function defined in a source file different than the calling function/kernel is only supported when compiling the entire program with -fgpu-rdc
if(GPU_SEPARABLE_COMPILATION)
    list(APPEND CMAKE_HIP_FLAGS "-fgpu-rdc")
endif()

#link the correct gpu runtime library
function(gpu_runtime_link target_name)
    target_include_directories(${target_name} PRIVATE
                               ${CMAKE_SOURCE_DIR}/include/cuda2hip)
    target_link_libraries(${target_name} PUBLIC hip::host)
endfunction()

#link the correct gpu runtime library
function(gpu_blas_link target_name)
    target_include_directories(${target_name} PRIVATE
                               ${CMAKE_SOURCE_DIR}/include/cuda2hip)
    target_link_libraries(${target_name} PUBLIC roc::hipblas)
endfunction()

function(gpu_device_link target_name)
    target_include_directories(${target_name} PRIVATE
                               ${CMAKE_SOURCE_DIR}/include/cuda2hip)
    set_target_properties(${target_name} PROPERTIES
                          LINKER_LANGUAGE "HIP")
    # still non-functional in cmake 3.21 (use CMAKE_HIP_FLAGS)
    if(GPU_SEPARABLE_COMPILATION)
        set_target_properties( ${target_name}
          PROPERTIES HIP_SEPARABLE_COMPILATION ON)
        set_target_properties( ${target_name}
          PROPERTIES HIP_RESOLVE_DEVICE_SYMBOLS ON)
    endif()
    target_link_libraries(${target_name} PRIVATE hip::device)
    get_target_property(_srcs ${target_name} SOURCES)
    get_target_property(_src_dir ${target_name} SOURCE_DIR)
    #
    # Mark all cu source files as HIP code.
    foreach(_src IN LISTS _srcs)
        if(_src MATCHES ".*\.cu$")
            set_source_files_properties(${_src} PROPERTIES LANGUAGE HIP)
        endif()
    endforeach()
endfunction()

include/config.hh.in

0 → 100644
+13 −0
Original line number Diff line number Diff line
/* This file is intended to be included by mpiwrap.hh */

#ifndef _MPITEST_CONFIG_H
#define _MPITEST_CONFIG_H

/* the configured options and settings */
#cmakedefine ENABLE_GPU
#define MPITEST_VERSION_MAJOR @mpitest_VERSION_MAJOR@
#define MPITEST_VERSION_MINOR @mpitest_VERSION_MINOR@

int zero();
#endif
Loading