Loading CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -33,7 +33,7 @@ find_package(MPI REQUIRED) ########## TARGETS ################################################ add_library(mpiwrap src/wrapper.cc) add_executable(mpitest src/bcast.cc) add_executable(mpitest src/allreduce.cc) ##################################################################### # configured header file stashing options Loading README.md +2 −2 Original line number Diff line number Diff line Loading @@ -18,8 +18,8 @@ to "out of the box". ## CUDA/HIP Kernel Calling Layer Configuring with ``-DCMAKE_CUDA_ARCHITECTURE=70`` enables CUDA, and configuring with ``-DCMAKE_HIP_ARCHITECTURE=gfx908`` enables HIP. Configuring with ``-DCMAKE_CUDA_ARCHITECTURES=70`` enables CUDA, and configuring with ``-DCMAKE_HIP_ARCHITECTURES=gfx908`` enables HIP. The source code is identical for both, because the HIP interface uses the simple cuda2hip.h header from the [Quip project](https://github.com/twhite-cray/quip). Loading cmake/setup_cuda.cmake +1 −1 Original line number Diff line number Diff line enable_language(CUDA) #cmake_minimum_required(VERSION 3.17) if(${CMAKE_VERSION} VERSION_LESS "3.17") message(FATAL_ERROR "Compilation for CUDA requires CMake 3.17 or later.") endif() enable_language(CUDA) message(STATUS "Setting up CUDA") Loading cmake/setup_hip.cmake +1 −1 Original line number Diff line number Diff line enable_language(HIP) #cmake_minimum_required(VERSION 3.21.3) if(${CMAKE_VERSION} VERSION_LESS "3.21.3") message(FATAL_ERROR "Compilation for HIP requires CMake 3.21.3 or later.") endif() enable_language(HIP) message(STATUS "Setting up HIP using ROCM_ROOT = ${ROCM_ROOT}") Loading src/allreduce.cc 0 → 100644 +48 −0 Original line number Diff line number Diff line #include <mpiwrap.hh> #include <stdio.h> #ifdef ENABLE_GPU #include <cuda_runtime.h> #define CUDACHECK(cmd) do { \ cudaError_t e = cmd; \ if( e != cudaSuccess ) { \ printf("Failed: Cuda error %s:%d '%s'\n", \ __FILE__,__LINE__,cudaGetErrorString(e)); \ exit(EXIT_FAILURE); \ } \ } while(0) #define devMalloc(ptr,size) CUDACHECK( cudaMalloc(&(void *)ptr, size) ) #define devFree(ptr) CUDACHECK( cudaFree(ptr) ) #else #define ERRCHECK(cmd) do { \ int e = cmd; \ if( e != 0 ) { \ printf("Failed: error %s:%d '%d'\n", \ __FILE__,__LINE__,e); \ exit(EXIT_FAILURE); \ } \ } while(0) #define devMalloc(ptr,size) ERRCHECK( (ptr = reinterpret_cast<decltype(ptr)>(malloc(size))) == nullptr ) #define devFree(ptr) free( ptr ) #endif int main(int argc, char *argv[]) { MPIH mpi(&argc, &argv); double *src, *dst; int count = 1024; devMalloc(src, count*sizeof(double)); devMalloc(dst, count*sizeof(double)); if(mpi.rank == 0) printf("Broadcasting %d doubles from root.\n", count); //MPI_Bcast(src, count, MPI_DOUBLE, 0, mpi.comm); MPI_Allreduce(src, dst, count, MPI_DOUBLE, MPI_SUM, mpi.comm); devFree(src); devFree(dst); return 0; } Loading
CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -33,7 +33,7 @@ find_package(MPI REQUIRED) ########## TARGETS ################################################ add_library(mpiwrap src/wrapper.cc) add_executable(mpitest src/bcast.cc) add_executable(mpitest src/allreduce.cc) ##################################################################### # configured header file stashing options Loading
README.md +2 −2 Original line number Diff line number Diff line Loading @@ -18,8 +18,8 @@ to "out of the box". ## CUDA/HIP Kernel Calling Layer Configuring with ``-DCMAKE_CUDA_ARCHITECTURE=70`` enables CUDA, and configuring with ``-DCMAKE_HIP_ARCHITECTURE=gfx908`` enables HIP. Configuring with ``-DCMAKE_CUDA_ARCHITECTURES=70`` enables CUDA, and configuring with ``-DCMAKE_HIP_ARCHITECTURES=gfx908`` enables HIP. The source code is identical for both, because the HIP interface uses the simple cuda2hip.h header from the [Quip project](https://github.com/twhite-cray/quip). Loading
cmake/setup_cuda.cmake +1 −1 Original line number Diff line number Diff line enable_language(CUDA) #cmake_minimum_required(VERSION 3.17) if(${CMAKE_VERSION} VERSION_LESS "3.17") message(FATAL_ERROR "Compilation for CUDA requires CMake 3.17 or later.") endif() enable_language(CUDA) message(STATUS "Setting up CUDA") Loading
cmake/setup_hip.cmake +1 −1 Original line number Diff line number Diff line enable_language(HIP) #cmake_minimum_required(VERSION 3.21.3) if(${CMAKE_VERSION} VERSION_LESS "3.21.3") message(FATAL_ERROR "Compilation for HIP requires CMake 3.21.3 or later.") endif() enable_language(HIP) message(STATUS "Setting up HIP using ROCM_ROOT = ${ROCM_ROOT}") Loading
src/allreduce.cc 0 → 100644 +48 −0 Original line number Diff line number Diff line #include <mpiwrap.hh> #include <stdio.h> #ifdef ENABLE_GPU #include <cuda_runtime.h> #define CUDACHECK(cmd) do { \ cudaError_t e = cmd; \ if( e != cudaSuccess ) { \ printf("Failed: Cuda error %s:%d '%s'\n", \ __FILE__,__LINE__,cudaGetErrorString(e)); \ exit(EXIT_FAILURE); \ } \ } while(0) #define devMalloc(ptr,size) CUDACHECK( cudaMalloc(&(void *)ptr, size) ) #define devFree(ptr) CUDACHECK( cudaFree(ptr) ) #else #define ERRCHECK(cmd) do { \ int e = cmd; \ if( e != 0 ) { \ printf("Failed: error %s:%d '%d'\n", \ __FILE__,__LINE__,e); \ exit(EXIT_FAILURE); \ } \ } while(0) #define devMalloc(ptr,size) ERRCHECK( (ptr = reinterpret_cast<decltype(ptr)>(malloc(size))) == nullptr ) #define devFree(ptr) free( ptr ) #endif int main(int argc, char *argv[]) { MPIH mpi(&argc, &argv); double *src, *dst; int count = 1024; devMalloc(src, count*sizeof(double)); devMalloc(dst, count*sizeof(double)); if(mpi.rank == 0) printf("Broadcasting %d doubles from root.\n", count); //MPI_Bcast(src, count, MPI_DOUBLE, 0, mpi.comm); MPI_Allreduce(src, dst, count, MPI_DOUBLE, MPI_SUM, mpi.comm); devFree(src); devFree(dst); return 0; }