More work on HIP (883d0b92) · Commits · Berrill, Mark A / RayTrace-miniapp

src/CreateImageHelpers.cpp

+11 −14

Original line number	Diff line number	Diff line
		@@ -13,6 +13,9 @@
		#ifdef USE_CUDA
		#include <cuda_runtime_api.h>
		#endif
		#ifdef USE_HIP
		#include "hip/hip_runtime.h"
		#endif

		// Detect the OS
		// clang-format off
		@@ -249,18 +252,6 @@ std::string hostname()
		gethostname( hostname, sizeof( hostname ) );
		return std::string( hostname );
		}
		int getDeviceID()
		{
		#ifdef USE_CUDA
		int device;
		struct cudaDeviceProp prop;
		cudaGetDevice( &device );
		cudaGetDeviceProperties( &prop, device );
		return prop.pciDeviceID;
		#else
		return -1;
		#endif
		}
		void printHardware()
		{
		// Get number of threads
		@@ -269,6 +260,8 @@ void printHardware()
		int N_gpu = 0;
		#ifdef USE_CUDA
		cudaGetDeviceCount( &N_gpu );
		#elif defined( USE_HIP )
		hipGetDeviceCount( &N_gpu );
		#endif
		// Get system memory
		#if defined( USE_LINUX )
		@@ -313,6 +306,10 @@ static inline std::vector<std::string> allMethods()
		methods.push_back( "Cuda" );
		methods.push_back( "Cuda-MultiGPU" );
		#endif
		#ifdef USE_HIP
		methods.push_back( "HIP" );
		methods.push_back( "HIP-MultiGPU" );
		#endif
		#ifdef USE_OPENACC
		methods.push_back( "OpenAcc" );
		#endif
		@@ -344,8 +341,8 @@ Options::Options( int argc, char *argv[] ) : iterations( 1 ), scale( 1.0 )
		" CreateImage <args> file.dat\n"
		"Optional arguments:\n"
		" -methods=METHODS Comma seperated list of methods to test\n"
		" cpu, threads, OpenMP, Cuda, Cuda-MultiGPU, HIP, OpenAcc,"
		" Kokkos-Serial, Kokkos-Thread, Kokkos-OpenMP, Kokkos-Cuda\n"
		" cpu, threads, OpenMP, Cuda, Cuda-MultiGPU, HIP, HIP-MultiGPU,\n"
		" OpenAcc, Kokkos-Serial, Kokkos-Thread, Kokkos-OpenMP, Kokkos-Cuda\n"
		" all - run all availible tests (default)\n"
		" parallel - run all availible parallel tests\n"
		" -iterations=N Number of iterations to run.\n"

src/RayTraceImage.cpp

+10 −0

Original line number	Diff line number	Diff line
		@@ -373,6 +373,16 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth
		scale, image, I_ang, failure_code, failed_rays );
		#else
		RAY_ERROR( "HIP is not availible" );
		#endif
		} else if ( compute_method == "hip-multigpu" ) {
		#if defined( ENABLE_HIP )
		int N_gpu;
		hipGetDeviceCount( &N_gpu );
		RayTraceImageThreadLoop( N_gpu, RayTraceImageHIPLoop, setGPU, N,
		std::ref( *info->euv_beam ), info->gain, info->seed, method, rays, scale, image, I_ang,
		failure_code, failed_rays );
		#else
		RAY_ERROR( "Cuda-MultiGPU is not availible" );
		#endif
		} else if ( compute_method == "cpu" ) {
		RayTraceImageCPULoop( N, std::ref( *info->euv_beam ), info->gain, info->seed, method, rays,

src/RayTraceImageHIP.cu

+2 −49

Original line number	Diff line number	Diff line
		@@ -4,46 +4,10 @@
		#define HCC_COMPILE
		#include "RayTrace/common/RayTraceImageHelper.h"

		#include "cuda.h"
		#include "hip/hip_runtime.h"


		// Check for errors
		#define CUDA_CHECK() \
		do { \
		if ( cudaPeekAtLastError() != cudaSuccess ) { \
		cudaError_t error = cudaGetLastError(); \
		printf( "cuda error: %i\n", error ); \
		printf( " %s\n", cudaGetErrorString( error ) ); \
		printf( " line: %i\n", (int) __LINE__ ); \
		printf( " file: %s\n", __FILE__ ); \
		exit( -1 ); \
		} \
		} while ( 0 )
		#define HIP_CHECK() \
		do { \
		} while ( 0 )


		#define CUDA_PRINT_FUNCTION( fun ) \
		do { \
		hipFuncAttributes attr; \
		hipFuncGetAttributes( &attr, fun ); \
		printf( "%s:\n", #fun ); \
		printf( " version = %i\n", attr.binaryVersion ); \
		printf( " ptx = %i\n", attr.ptxVersion ); \
		printf( " constSize = %i\n", attr.constSizeBytes ); \
		printf( " localSize = %i\n", attr.localSizeBytes ); \
		printf( " sharedSize = %i\n", attr.sharedSizeBytes ); \
		printf( " maxThreads = %i\n", attr.maxThreadsPerBlock ); \
		printf( " numRegs = %i\n", attr.numRegs ); \
		} while ( 0 )


		// Atomic add operation for double
		#if defined( __CUDA_ARCH__ ) && __CUDA_ARCH__ >= 600
		#define atomicAdd2 atomicAdd
		#else
		__device__ double atomicAdd2( double* address, double val )
		{
		unsigned long long int* address_as_ull = (unsigned long long int*) address;
		@@ -55,7 +19,6 @@ __device__ double atomicAdd2( double* address, double val )
		} while ( assumed != old );
		return __longlong_as_double( old );
		}
		#endif


		// Get the globally unique thread id
		@@ -79,7 +42,7 @@ __device__ inline int getIndex( int n, const double x_range[2], double dx, doubl
		}


		// Kernel that executes on the CUDA device
		// Kernel that executes on the device
		__global__ __launch_bounds__( 128, 8 ) // Set bounds to limit the number of registers
		void RayTraceImageHIPKernel( int N, int nx, int ny, int na, int nb, int nv, double x0,
		double x1, double y0, double y1, double a0, double a1, double b0, double b1, double dx,
		@@ -151,7 +114,7 @@ inline dim3 calcBlockSize( size_t N_blocks )
		}


		// Create the image and call the cuda kernel
		// Create the image and call the kernel
		void RayTraceImageHIPLoop( int N, const RayTrace::EUV_beam_struct& beam,
		const RayTrace::ray_gain_struct* gain_in, const RayTrace::ray_seed_struct* seed_in, int method,
		const std::vector<ray_struct>& rays, double scale, double* image, double* I_ang,
		@@ -160,16 +123,6 @@ void RayTraceImageHIPLoop( int N, const RayTrace::EUV_beam_struct& beam,
		failure_code = 0; // Need to track failures on GPU
		// Get device properties
		static int maxThreadsPerBlock = 128;
		/*if ( maxThreadsPerBlock == 0 ) {
		hipFuncAttributes attr;
		hipFuncGetAttributes(&attr,RayTraceImageHIPKernel);
		maxThreadsPerBlock = attr.maxThreadsPerBlock;
		if ( maxThreadsPerBlock == 0 ) {
		//CUDA_PRINT_FUNCTION(RayTraceImageCudaKernel);
		RAY_WARNING("maxThreadsPerBlock=0 - Setting to default 128");
		maxThreadsPerBlock = 128;
		}
		}*/
		// place the ray gain and seed structures on the device
		const int nx = beam.nx;
		const int ny = beam.ny;

src/common/RayTraceImageHelper.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -26,7 +26,7 @@
		#define HOST_DEVICE
		#endif

		#if !defined( USE_OPENACC ) && !defined( USE_KOKKOS ) && !defined( USE_CUDA )
		#if !defined( USE_OPENACC ) && !defined( USE_KOKKOS ) && !defined( USE_CUDA ) && !defined( USE_HIP )
		#define RAY_DEBUG
		#endif