Commit 883d0b92 authored by Berrill, Mark's avatar Berrill, Mark
Browse files

More work on HIP

parent 7988d4eb
Loading
Loading
Loading
Loading
+11 −14
Original line number Diff line number Diff line
@@ -13,6 +13,9 @@
#ifdef USE_CUDA
#include <cuda_runtime_api.h>
#endif
#ifdef USE_HIP
#include "hip/hip_runtime.h"
#endif

// Detect the OS
// clang-format off
@@ -249,18 +252,6 @@ std::string hostname()
    gethostname( hostname, sizeof( hostname ) );
    return std::string( hostname );
}
int getDeviceID()
{
#ifdef USE_CUDA
    int device;
    struct cudaDeviceProp prop;
    cudaGetDevice( &device );
    cudaGetDeviceProperties( &prop, device );
    return prop.pciDeviceID;
#else
    return -1;
#endif
}
void printHardware()
{
    // Get number of threads
@@ -269,6 +260,8 @@ void printHardware()
    int N_gpu = 0;
#ifdef USE_CUDA
    cudaGetDeviceCount( &N_gpu );
#elif defined( USE_HIP )
    hipGetDeviceCount( &N_gpu );
#endif
    // Get system memory
#if defined( USE_LINUX )
@@ -313,6 +306,10 @@ static inline std::vector<std::string> allMethods()
    methods.push_back( "Cuda" );
    methods.push_back( "Cuda-MultiGPU" );
#endif
#ifdef USE_HIP
    methods.push_back( "HIP" );
    methods.push_back( "HIP-MultiGPU" );
#endif
#ifdef USE_OPENACC
    methods.push_back( "OpenAcc" );
#endif
@@ -344,8 +341,8 @@ Options::Options( int argc, char *argv[] ) : iterations( 1 ), scale( 1.0 )
        "  CreateImage <args> file.dat\n"
        "Optional arguments:\n"
        "  -methods=METHODS  Comma seperated list of methods to test\n"
        "                    cpu, threads, OpenMP, Cuda, Cuda-MultiGPU, HIP, OpenAcc,"
        "                    Kokkos-Serial, Kokkos-Thread, Kokkos-OpenMP, Kokkos-Cuda\n"
        "                    cpu, threads, OpenMP, Cuda, Cuda-MultiGPU, HIP, HIP-MultiGPU,\n"
        "                    OpenAcc, Kokkos-Serial, Kokkos-Thread, Kokkos-OpenMP, Kokkos-Cuda\n"
        "                    all - run all availible tests (default)\n"
        "                    parallel - run all availible parallel tests\n"
        "  -iterations=N     Number of iterations to run.\n"
+10 −0
Original line number Diff line number Diff line
@@ -373,6 +373,16 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth
            scale, image, I_ang, failure_code, failed_rays );
#else
        RAY_ERROR( "HIP is not availible" );
#endif
    } else if ( compute_method == "hip-multigpu" ) {
#if defined( ENABLE_HIP )
        int N_gpu;
        hipGetDeviceCount( &N_gpu );
        RayTraceImageThreadLoop( N_gpu, RayTraceImageHIPLoop, setGPU, N,
            std::ref( *info->euv_beam ), info->gain, info->seed, method, rays, scale, image, I_ang,
            failure_code, failed_rays );
#else
        RAY_ERROR( "Cuda-MultiGPU is not availible" );
#endif
    } else if ( compute_method == "cpu" ) {
        RayTraceImageCPULoop( N, std::ref( *info->euv_beam ), info->gain, info->seed, method, rays,
+2 −49
Original line number Diff line number Diff line
@@ -4,46 +4,10 @@
#define HCC_COMPILE
#include "RayTrace/common/RayTraceImageHelper.h"

#include "cuda.h"
#include "hip/hip_runtime.h"


// Check for errors
#define CUDA_CHECK()                                          \
    do {                                                      \
        if ( cudaPeekAtLastError() != cudaSuccess ) {         \
            cudaError_t error = cudaGetLastError();           \
            printf( "cuda error: %i\n", error );              \
            printf( "   %s\n", cudaGetErrorString( error ) ); \
            printf( "   line: %i\n", (int) __LINE__ );        \
            printf( "   file: %s\n", __FILE__ );              \
            exit( -1 );                                       \
        }                                                     \
    } while ( 0 )
#define HIP_CHECK() \
    do {            \
    } while ( 0 )


#define CUDA_PRINT_FUNCTION( fun )                                \
    do {                                                          \
        hipFuncAttributes attr;                                   \
        hipFuncGetAttributes( &attr, fun );                       \
        printf( "%s:\n", #fun );                                  \
        printf( "  version = %i\n", attr.binaryVersion );         \
        printf( "  ptx = %i\n", attr.ptxVersion );                \
        printf( "  constSize = %i\n", attr.constSizeBytes );      \
        printf( "  localSize = %i\n", attr.localSizeBytes );      \
        printf( "  sharedSize = %i\n", attr.sharedSizeBytes );    \
        printf( "  maxThreads = %i\n", attr.maxThreadsPerBlock ); \
        printf( "  numRegs = %i\n", attr.numRegs );               \
    } while ( 0 )


// Atomic add operation for double
#if defined( __CUDA_ARCH__ ) && __CUDA_ARCH__ >= 600
#define atomicAdd2 atomicAdd
#else
__device__ double atomicAdd2( double* address, double val )
{
    unsigned long long int* address_as_ull = (unsigned long long int*) address;
@@ -55,7 +19,6 @@ __device__ double atomicAdd2( double* address, double val )
    } while ( assumed != old );
    return __longlong_as_double( old );
}
#endif


// Get the globally unique thread id
@@ -79,7 +42,7 @@ __device__ inline int getIndex( int n, const double x_range[2], double dx, doubl
}


// Kernel that executes on the CUDA device
// Kernel that executes on the device
__global__ __launch_bounds__( 128, 8 ) // Set bounds to limit the number of registers
    void RayTraceImageHIPKernel( int N, int nx, int ny, int na, int nb, int nv, double x0,
        double x1, double y0, double y1, double a0, double a1, double b0, double b1, double dx,
@@ -151,7 +114,7 @@ inline dim3 calcBlockSize( size_t N_blocks )
}


// Create the image and call the cuda kernel
// Create the image and call the kernel
void RayTraceImageHIPLoop( int N, const RayTrace::EUV_beam_struct& beam,
    const RayTrace::ray_gain_struct* gain_in, const RayTrace::ray_seed_struct* seed_in, int method,
    const std::vector<ray_struct>& rays, double scale, double* image, double* I_ang,
@@ -160,16 +123,6 @@ void RayTraceImageHIPLoop( int N, const RayTrace::EUV_beam_struct& beam,
    failure_code = 0; // Need to track failures on GPU
    // Get device properties
    static int maxThreadsPerBlock = 128;
    /*if ( maxThreadsPerBlock == 0 ) {
        hipFuncAttributes attr;
        hipFuncGetAttributes(&attr,RayTraceImageHIPKernel);
        maxThreadsPerBlock = attr.maxThreadsPerBlock;
        if ( maxThreadsPerBlock == 0 ) {
            //CUDA_PRINT_FUNCTION(RayTraceImageCudaKernel);
            RAY_WARNING("maxThreadsPerBlock=0 - Setting to default 128");
            maxThreadsPerBlock = 128;
        }
    }*/
    // place the ray gain and seed structures on the device
    const int nx = beam.nx;
    const int ny = beam.ny;
+1 −1
Original line number Diff line number Diff line
@@ -26,7 +26,7 @@
#define HOST_DEVICE
#endif

#if !defined( USE_OPENACC ) && !defined( USE_KOKKOS ) && !defined( USE_CUDA )
#if !defined( USE_OPENACC ) && !defined( USE_KOKKOS ) && !defined( USE_CUDA ) && !defined( USE_HIP )
#define RAY_DEBUG
#endif