Commit 38c8c2ef authored by David M. Rogers's avatar David M. Rogers
Browse files

Added gpu functionality to source.

parent 2c2b6c84
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -33,7 +33,7 @@ find_package(MPI REQUIRED)

##########  TARGETS  ################################################
add_library(mpiwrap src/wrapper.cc)
add_executable(mpitest src/bcast.cc)
add_executable(mpitest src/allreduce.cc)
#####################################################################

# configured header file stashing options
+2 −2
Original line number Diff line number Diff line
@@ -18,8 +18,8 @@ to "out of the box".

## CUDA/HIP Kernel Calling Layer

Configuring with ``-DCMAKE_CUDA_ARCHITECTURE=70`` enables CUDA,
and configuring with ``-DCMAKE_HIP_ARCHITECTURE=gfx908`` enables HIP.
Configuring with ``-DCMAKE_CUDA_ARCHITECTURES=70`` enables CUDA,
and configuring with ``-DCMAKE_HIP_ARCHITECTURES=gfx908`` enables HIP.
The source code is identical for both, because the HIP
interface uses the simple cuda2hip.h header from
the [Quip project](https://github.com/twhite-cray/quip).
+1 −1
Original line number Diff line number Diff line
enable_language(CUDA)
#cmake_minimum_required(VERSION 3.17)
if(${CMAKE_VERSION} VERSION_LESS "3.17")
    message(FATAL_ERROR "Compilation for CUDA requires CMake 3.17 or later.")
endif()
enable_language(CUDA)

message(STATUS "Setting up CUDA")

+1 −1
Original line number Diff line number Diff line
enable_language(HIP)
#cmake_minimum_required(VERSION 3.21.3)
if(${CMAKE_VERSION} VERSION_LESS "3.21.3")
    message(FATAL_ERROR "Compilation for HIP requires CMake 3.21.3 or later.")
endif()
enable_language(HIP)

message(STATUS "Setting up HIP using ROCM_ROOT = ${ROCM_ROOT}")

src/allreduce.cc

0 → 100644
+48 −0
Original line number Diff line number Diff line
#include <mpiwrap.hh>
#include <stdio.h>

#ifdef ENABLE_GPU
#include <cuda_runtime.h>

#define CUDACHECK(cmd) do {                         \
  cudaError_t e = cmd;                              \
  if( e != cudaSuccess ) {                          \
    printf("Failed: Cuda error %s:%d '%s'\n",       \
        __FILE__,__LINE__,cudaGetErrorString(e));   \
    exit(EXIT_FAILURE);                             \
  }                                                 \
} while(0)
#define devMalloc(ptr,size) CUDACHECK( cudaMalloc(&(void *)ptr, size) )
#define devFree(ptr) CUDACHECK( cudaFree(ptr) )
#else

#define ERRCHECK(cmd) do {                          \
  int e = cmd;                                      \
  if( e != 0 ) {                                    \
    printf("Failed: error %s:%d '%d'\n",            \
        __FILE__,__LINE__,e);                       \
    exit(EXIT_FAILURE);                             \
  }                                                 \
} while(0)
#define devMalloc(ptr,size) ERRCHECK( (ptr = reinterpret_cast<decltype(ptr)>(malloc(size))) == nullptr )
#define devFree(ptr) free( ptr )
#endif

int main(int argc, char *argv[]) {
    MPIH mpi(&argc, &argv);
    double *src, *dst;
    int count = 1024;

    devMalloc(src, count*sizeof(double));
    devMalloc(dst, count*sizeof(double));

    if(mpi.rank == 0)
        printf("Broadcasting %d doubles from root.\n", count);
    //MPI_Bcast(src, count, MPI_DOUBLE, 0, mpi.comm);
    MPI_Allreduce(src, dst, count, MPI_DOUBLE, MPI_SUM, mpi.comm);

    devFree(src);
    devFree(dst);

    return 0;
}
Loading