Updating RayTraceImageHIP for changes to data structures (0230b135) · Commits · Berrill, Mark A / RayTrace-miniapp

.clang-format

0 → 100644

+107 −0

Original line number	Diff line number	Diff line
		# To run clang tools:
		# cd to root directory
		# To update format only:
		# find src -name ".cpp" -or -name ".cc" -or -name ".h" -or -name ".hpp" -or -name "*.I" \| xargs -I{} clang-format -i {}
		# git status -s . \| sed s/^...// \| grep -E "(\.cpp\|\.h\|\.cc\|\.hpp\|\.I)" \| xargs -I{} clang-format -i {}
		# To run modernize
		# export CLANG_PATH=/packages/llvm/install/clang+llvm-5.0.0-linux-x86_64-ubuntu14.04
		# export PATH=${CLANG_PATH}/bin:${CLANG_PATH}/share/clang:$PATH
		# find source -name ".cpp" -or -name ".cc" \| xargs -I{} clang-tidy -checks=modernize* -p=/projects/RayTrace/build/debug -fix {}
		# find source -name ".cpp" -or -name ".cc" -or -name ".h" -or -name ".hpp" -or -name "*.I" \| xargs -I{} clang-format -i {}





		---
		Language: Cpp
		# BasedOnStyle: LLVM
		AccessModifierOffset: -4
		AlignAfterOpenBracket: DontAlign
		AlignConsecutiveAssignments: true
		AlignConsecutiveDeclarations: false
		AlignEscapedNewlinesLeft: true
		AlignOperands: true
		AlignTrailingComments: true
		AllowAllParametersOfDeclarationOnNextLine: true
		AllowShortBlocksOnASingleLine: true
		AllowShortCaseLabelsOnASingleLine: false
		AllowShortFunctionsOnASingleLine: All
		AllowShortIfStatementsOnASingleLine: false
		AllowShortLoopsOnASingleLine: false
		AlwaysBreakAfterDefinitionReturnType: None
		AlwaysBreakBeforeMultilineStrings: false
		AlwaysBreakTemplateDeclarations: true
		BinPackArguments: true
		BinPackParameters: true
		BraceWrapping:
		AfterClass: true
		AfterControlStatement: false
		AfterEnum: false
		AfterFunction: true
		AfterNamespace: false
		AfterObjCDeclaration: true
		AfterStruct: false
		AfterUnion: false
		BeforeCatch: false
		BeforeElse: false
		IndentBraces: false
		BreakBeforeBinaryOperators: None
		#BreakBeforeBraces: Stroustrup
		BreakBeforeBraces: Custom
		BreakBeforeTernaryOperators: false
		BreakConstructorInitializersBeforeComma: false
		ColumnLimit: 100
		CommentPragmas: '^ IWYU pragma:'
		ConstructorInitializerAllOnOneLineOrOnePerLine: true
		ConstructorInitializerIndentWidth: 4
		ContinuationIndentWidth: 4
		Cpp11BracedListStyle: false
		DerivePointerAlignment: true
		DisableFormat: false
		ExperimentalAutoDetectBinPacking: false
		ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
		IncludeCategories:
		- Regex: '^"(llvm\|llvm-c\|clang\|clang-c)/'
		Priority: 2
		- Regex: '^(<\|"(gtest\|isl\|json)/)'
		Priority: 3
		- Regex: '.*'
		Priority: 1
		IndentCaseLabels: false
		IndentWidth: 4
		IndentWrappedFunctionNames: false
		KeepEmptyLinesAtTheStartOfBlocks: true
		MacroBlockBegin: ''
		MacroBlockEnd: ''
		MaxEmptyLinesToKeep: 2
		NamespaceIndentation: None
		ObjCBlockIndentWidth: 4
		ObjCSpaceAfterProperty: false
		ObjCSpaceBeforeProtocolList: true
		PenaltyBreakBeforeFirstCallParameter: 19
		PenaltyBreakComment: 300
		PenaltyBreakFirstLessLess: 120
		PenaltyBreakString: 1000
		PenaltyExcessCharacter: 10000
		PenaltyReturnTypeOnItsOwnLine: 60
		PointerAlignment: Right
		ReflowComments: true
		SortIncludes: true
		SortUsingDeclarations: true
		SpaceAfterCStyleCast: true
		SpaceAfterTemplateKeyword: false
		SpaceBeforeAssignmentOperators: true
		SpaceBeforeParens: ControlStatements
		SpaceInEmptyParentheses: false
		SpacesBeforeTrailingComments: 1
		SpacesInAngles: false
		SpacesInContainerLiterals: true
		SpacesInCStyleCastParentheses: false
		SpacesInParentheses: true
		SpacesInSquareBrackets: false
		Standard: Cpp11
		TabWidth: 4
		UseTab: Never
		...

src/RayTraceImageHIP.hip

+91 −101

Original line number	Diff line number	Diff line
		@@ -4,8 +4,8 @@
		#define HCC_COMPILE
		#include "RayTrace/common/RayTraceImageHelper.h"

		#include "hip/hip_runtime.h"
		#include "cuda.h"
		#include "hip/hip_runtime.h"


		// Check for errors
		@@ -51,50 +51,48 @@ __device__ double atomicAdd2(double* address, double val)
		do {
		assumed = old;
		old = atomicCAS( address_as_ull, assumed,
		__double_as_longlong(val +
		__longlong_as_double(assumed)));
		__double_as_longlong( val + __longlong_as_double( assumed ) ) );
		} while ( assumed != old );
		return __longlong_as_double( old );
		}
		#endif



		// Get the globally unique thread id
		__device__ int getGlobalIdx3D()
		{
		int blockId = blockIdx.x
		+ blockIdx.y * gridDim.x
		+ gridDim.x * gridDim.y * blockIdx.z;
		int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
		+ (threadIdx.z * (blockDim.x * blockDim.y))
		+ (threadIdx.y * blockDim.x)
		+ threadIdx.x;
		int blockId = blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
		int threadId = blockId * ( blockDim.x * blockDim.y * blockDim.z ) +
		( threadIdx.z * ( blockDim.x * blockDim.y ) ) + ( threadIdx.y * blockDim.x ) +
		threadIdx.x;
		return threadId;
		}


		// Get the index
		__device__ inline int getIndex( int n, const double *x, double dx, double y )
		__device__ inline int getIndex( int n, const double x_range[2], double dx, double y )
		{
		if ( y < x[0] - 0.5 * dx \|\| y > x[n - 1] + 0.5 * dx )
		if ( y < x_range[0] \|\| y > x_range[1] )
		return -1;
		return findfirstsingle( x, n, y - 0.5 * dx );
		double i = ( y - x_range[0] ) / dx;
		return i < n ? i : n - 1;
		}


		// Kernel that executes on the CUDA device
		__global__
		__launch_bounds__(128,8) // Set bounds to limit the number of registers
		void RayTraceImageHIPKernel( int N, int nx, int ny, int na, int nb, int nv,
		const double x, const double y, const double a, const double b,
		double dx, double dy, double dz, double da, double db, const double *dv,
		const RayTrace::ray_gain_struct* gain, const RayTrace::ray_seed_struct* seed,
		int method, int N_rays, const ray_struct *rays, double scale,
		double image, double I_ang )
		__global__ __launch_bounds__( 128, 8 ) // Set bounds to limit the number of registers
		void RayTraceImageHIPKernel( int N, int nx, int ny, int na, int nb, int nv, double x0,
		double x1, double y0, double y1, double a0, double a1, double b0, double b1, double dx,
		double dy, double dz, double da, double db, const double* dv,
		const RayTrace::ray_gain_struct* gain, const RayTrace::ray_seed_struct* seed, int method,
		int N_rays, const ray_struct* rays, double scale, double* image, double* I_ang )
		{
		int idx = getGlobalIdx3D();
		if ( idx < N_rays ) {
		double x_range[2] = { x0, x1 };
		double y_range[2] = { y0, y1 };
		double a_range[2] = { a0, a1 };
		double b_range[2] = { b0, b1 };
		const ray_struct ray = rays[idx];
		double Iv[K_MAX];
		ray_struct ray2;
		@@ -117,10 +115,10 @@ void RayTraceImageHIPKernel( int N, int nx, int ny, int na, int nb, int nv,
		}
		}
		// Get the indicies to the cells in image and I_ang
		int i1 = getIndex( nx, x, dx, ray2.x );
		int i2 = getIndex( ny, y, dy, ray2.y );
		int i3 = getIndex( na, a, da, ray2.a );
		int i4 = getIndex( nb, b, db, ray2.b );
		int i1 = getIndex( nx, x_range, dx, ray2.x );
		int i2 = getIndex( ny, y_range, dy, ray2.y );
		int i3 = getIndex( na, a_range, da, ray2.a );
		int i4 = getIndex( nb, b_range, db, ray2.b );
		// Copy I_out into image
		if ( i1 >= 0 && i2 >= 0 ) {
		double* Iv2 = &image[nv * ( i1 + i2 * nx )];
		@@ -155,9 +153,8 @@ inline dim3 calcBlockSize( size_t N_blocks )

		// Create the image and call the cuda kernel
		void RayTraceImageHIPLoop( int N, const RayTrace::EUV_beam_struct& beam,
		const RayTrace::ray_gain_struct* gain_in, const RayTrace::ray_seed_struct* seed_in,
		int method, const std::vector<ray_struct>& rays, double scale,
		double image, double I_ang,
		const RayTrace::ray_gain_struct* gain_in, const RayTrace::ray_seed_struct* seed_in, int method,
		const std::vector<ray_struct>& rays, double scale, double* image, double* I_ang,
		unsigned int& failure_code, std::vector<ray_struct>& failed_rays )
		{
		failure_code = 0; // Need to track failures on GPU
		@@ -167,9 +164,12 @@ void RayTraceImageHIPLoop( int N, const RayTrace::EUV_beam_struct& beam,
		hipFuncAttributes attr;
		hipFuncGetAttributes(&attr,RayTraceImageHIPKernel);
		maxThreadsPerBlock = attr.maxThreadsPerBlock;
		if ( maxThreadsPerBlock == 0 ) {
		//CUDA_PRINT_FUNCTION(RayTraceImageCudaKernel);
		RAY_ASSERT(maxThreadsPerBlock>0);
		}*/
		RAY_WARNING("maxThreadsPerBlock=0 - Setting to default 128");
		maxThreadsPerBlock = 128;
		}
		}
		// place the ray gain and seed structures on the device
		const int nx = beam.nx;
		const int ny = beam.ny;
		@@ -182,25 +182,17 @@ void RayTraceImageHIPLoop( int N, const RayTrace::EUV_beam_struct& beam,
		const double da = beam.da;
		const double db = beam.db;
		const RayTrace::ray_gain_struct* gain = RayTrace::ray_gain_struct::copy_device( N, gain_in );
		const RayTrace::ray_seed_struct* seed = NULL;
		const RayTrace::ray_seed_struct* seed = nullptr;
		if ( seed_in!=NULL )
		seed = seed_in->copy_device();
		// Allocate device memory
		size_t N_rays = rays.size();
		double x2, y2, a2, b2, dv2, image2, *I_ang2;
		double dv2, image2, *I_ang2;
		ray_struct *rays2;
		hipMalloc(&x2,nx*sizeof(double));
		hipMalloc(&y2,ny*sizeof(double));
		hipMalloc(&a2,na*sizeof(double));
		hipMalloc(&b2,nb*sizeof(double));
		hipMalloc(&dv2,nv*sizeof(double));
		hipMalloc(&image2,nxnynv*sizeof(double));
		hipMalloc(&I_ang2,nanbsizeof(double));
		hipMalloc(&rays2,N_rays*sizeof(ray_struct));
		hipMemcpy(x2,beam.x,nx*sizeof(double),hipMemcpyHostToDevice);
		hipMemcpy(y2,beam.y,ny*sizeof(double),hipMemcpyHostToDevice);
		hipMemcpy(a2,beam.a,na*sizeof(double),hipMemcpyHostToDevice);
		hipMemcpy(b2,beam.b,nb*sizeof(double),hipMemcpyHostToDevice);
		hipMemcpy(dv2,beam.dv,nv*sizeof(double),hipMemcpyHostToDevice);
		hipMemcpy(rays2,&rays[0],N_rays*sizeof(ray_struct),hipMemcpyHostToDevice);
		hipMemset(image2,0,nxnynv*sizeof(double));
		@@ -212,7 +204,9 @@ void RayTraceImageHIPLoop( int N, const RayTrace::EUV_beam_struct& beam,
		dim3 block_size = calcBlockSize(N_blocks);
		block_size.x = N_rays/threads + (N_rays%threads == 0 ? 0:1);
		hipLaunchKernelGGL( RayTraceImageHIPKernel, dim3(block_size), dim3(threads), 0, 0,
		N,nx,ny,na,nb,nv,x2,y2,a2,b2,
		N,nx,ny,na,nb,nv,
		beam.x_range[0],beam.x_range[1],beam.y_range[0],beam.y_range[1],
		beam.a_range[0],beam.a_range[1],beam.b_range[0],beam.b_range[1],
		dx,dy,dz,da,db,dv2,gain,seed,method,N_rays,rays2,scale,image2,I_ang2);
		HIP_CHECK();
		// Retrieve result from device and store it in host array
		@@ -220,10 +214,6 @@ void RayTraceImageHIPLoop( int N, const RayTrace::EUV_beam_struct& beam,
		hipMemcpy(I_ang,I_ang2,nanbsizeof(double),hipMemcpyDeviceToHost);
		HIP_CHECK();
		// Cleanup
		hipFree(x2);
		hipFree(y2);
		hipFree(a2);
		hipFree(b2);
		hipFree(dv2);
		hipFree(rays2);
		hipFree(image2);