Commit 0230b135 authored by Berrill, Mark's avatar Berrill, Mark
Browse files

Updating RayTraceImageHIP for changes to data structures

parent 89da37ec
Loading
Loading
Loading
Loading

.clang-format

0 → 100644
+107 −0
Original line number Diff line number Diff line
# To run clang tools:
#   cd to root directory
# To update format only:
#   find src -name "*.cpp" -or -name "*.cc" -or -name "*.h" -or -name "*.hpp" -or -name "*.I" | xargs -I{} clang-format -i {}
#   git status -s . | sed s/^...// | grep -E "(\.cpp|\.h|\.cc|\.hpp|\.I)" | xargs -I{} clang-format -i {}
# To run modernize
#   export CLANG_PATH=/packages/llvm/install/clang+llvm-5.0.0-linux-x86_64-ubuntu14.04
#   export PATH=${CLANG_PATH}/bin:${CLANG_PATH}/share/clang:$PATH
#   find source -name "*.cpp" -or -name "*.cc" | xargs -I{} clang-tidy -checks=modernize* -p=/projects/RayTrace/build/debug -fix {}
#   find source -name "*.cpp" -or -name "*.cc" -or -name "*.h" -or -name "*.hpp" -or -name "*.I" | xargs -I{} clang-format -i {}





---
Language:        Cpp
# BasedOnStyle:  LLVM
AccessModifierOffset: -4
AlignAfterOpenBracket: DontAlign
AlignConsecutiveAssignments: true
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands:   true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: true
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: true
BinPackArguments: true
BinPackParameters: true
BraceWrapping:   
  AfterClass:      true
  AfterControlStatement: false
  AfterEnum:       false
  AfterFunction:   true
  AfterNamespace:  false
  AfterObjCDeclaration: true
  AfterStruct:     false
  AfterUnion:      false
  BeforeCatch:     false
  BeforeElse:      false
  IndentBraces:    false
BreakBeforeBinaryOperators: None
#BreakBeforeBraces: Stroustrup
BreakBeforeBraces: Custom
BreakBeforeTernaryOperators: false
BreakConstructorInitializersBeforeComma: false
ColumnLimit:     100
CommentPragmas:  '^ IWYU pragma:'
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: false
DerivePointerAlignment: true
DisableFormat:   false
ExperimentalAutoDetectBinPacking: false
ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
IncludeCategories: 
  - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
    Priority:        2
  - Regex:           '^(<|"(gtest|isl|json)/)'
    Priority:        3
  - Regex:           '.*'
    Priority:        1
IndentCaseLabels: false
IndentWidth:     4
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockBegin: ''
MacroBlockEnd:   ''
MaxEmptyLinesToKeep: 2
NamespaceIndentation: None
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 10000
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Right
ReflowComments:  true
SortIncludes:    true
SortUsingDeclarations: true
SpaceAfterCStyleCast: true
SpaceAfterTemplateKeyword: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles:  false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: true
SpacesInSquareBrackets: false
Standard:        Cpp11
TabWidth:        4
UseTab:          Never
...
+91 −101
Original line number Diff line number Diff line
@@ -4,8 +4,8 @@
#define HCC_COMPILE
#include "RayTrace/common/RayTraceImageHelper.h"

#include "hip/hip_runtime.h"
#include "cuda.h"
#include "hip/hip_runtime.h"


// Check for errors
@@ -51,50 +51,48 @@ __device__ double atomicAdd2(double* address, double val)
    do {
        assumed = old;
        old     = atomicCAS( address_as_ull, assumed,
                        __double_as_longlong(val + 
                        __longlong_as_double(assumed)));
            __double_as_longlong( val + __longlong_as_double( assumed ) ) );
    } while ( assumed != old );
    return __longlong_as_double( old );
}
#endif



// Get the globally unique thread id
__device__ int getGlobalIdx3D()
{
	int blockId = blockIdx.x 
			 + blockIdx.y * gridDim.x 
			 + gridDim.x * gridDim.y * blockIdx.z; 
	int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
			  + (threadIdx.z * (blockDim.x * blockDim.y))
			  + (threadIdx.y * blockDim.x)
			  + threadIdx.x;
    int blockId  = blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
    int threadId = blockId * ( blockDim.x * blockDim.y * blockDim.z ) +
                   ( threadIdx.z * ( blockDim.x * blockDim.y ) ) + ( threadIdx.y * blockDim.x ) +
                   threadIdx.x;
    return threadId;
}


// Get the index
__device__ inline int getIndex( int n, const double *x, double dx, double y )
__device__ inline int getIndex( int n, const double x_range[2], double dx, double y )
{
    if ( y < x[0] - 0.5 * dx || y > x[n - 1] + 0.5 * dx )
    if ( y < x_range[0] || y > x_range[1] )
        return -1;
    return findfirstsingle( x, n, y - 0.5 * dx );
    double i = ( y - x_range[0] ) / dx;
    return i < n ? i : n - 1;
}


// Kernel that executes on the CUDA device
__global__
__launch_bounds__(128,8)    // Set bounds to limit the number of registers
void RayTraceImageHIPKernel( int N, int nx, int ny, int na, int nb, int nv, 
    const double *x, const double *y, const double *a, const double *b,
    double dx, double dy, double dz, double da, double db, const double *dv,
    const RayTrace::ray_gain_struct* gain, const RayTrace::ray_seed_struct* seed,
    int method, int N_rays, const ray_struct *rays, double scale,
    double *image, double *I_ang ) 
__global__ __launch_bounds__( 128, 8 ) // Set bounds to limit the number of registers
    void RayTraceImageHIPKernel( int N, int nx, int ny, int na, int nb, int nv, double x0,
        double x1, double y0, double y1, double a0, double a1, double b0, double b1, double dx,
        double dy, double dz, double da, double db, const double* dv,
        const RayTrace::ray_gain_struct* gain, const RayTrace::ray_seed_struct* seed, int method,
        int N_rays, const ray_struct* rays, double scale, double* image, double* I_ang )
{
    int idx = getGlobalIdx3D();
    if ( idx < N_rays ) {
        double x_range[2]    = { x0, x1 };
        double y_range[2]    = { y0, y1 };
        double a_range[2]    = { a0, a1 };
        double b_range[2]    = { b0, b1 };
        const ray_struct ray = rays[idx];
        double Iv[K_MAX];
        ray_struct ray2;
@@ -117,10 +115,10 @@ void RayTraceImageHIPKernel( int N, int nx, int ny, int na, int nb, int nv,
                }
            }
            // Get the indicies to the cells in image and I_ang
            int i1 = getIndex( nx, x, dx, ray2.x );
            int i2 = getIndex( ny, y, dy, ray2.y );
            int i3 = getIndex( na, a, da, ray2.a );
            int i4 = getIndex( nb, b, db, ray2.b );
            int i1 = getIndex( nx, x_range, dx, ray2.x );
            int i2 = getIndex( ny, y_range, dy, ray2.y );
            int i3 = getIndex( na, a_range, da, ray2.a );
            int i4 = getIndex( nb, b_range, db, ray2.b );
            // Copy I_out into image
            if ( i1 >= 0 && i2 >= 0 ) {
                double* Iv2 = &image[nv * ( i1 + i2 * nx )];
@@ -155,9 +153,8 @@ inline dim3 calcBlockSize( size_t N_blocks )

// Create the image and call the cuda kernel
void RayTraceImageHIPLoop( int N, const RayTrace::EUV_beam_struct& beam,
    const RayTrace::ray_gain_struct* gain_in, const RayTrace::ray_seed_struct* seed_in,
    int method, const std::vector<ray_struct>& rays, double scale,
    double *image, double *I_ang, 
    const RayTrace::ray_gain_struct* gain_in, const RayTrace::ray_seed_struct* seed_in, int method,
    const std::vector<ray_struct>& rays, double scale, double* image, double* I_ang,
    unsigned int& failure_code, std::vector<ray_struct>& failed_rays )
{
    failure_code = 0; // Need to track failures on GPU
@@ -167,9 +164,12 @@ void RayTraceImageHIPLoop( int N, const RayTrace::EUV_beam_struct& beam,
        hipFuncAttributes attr;
        hipFuncGetAttributes(&attr,RayTraceImageHIPKernel);
        maxThreadsPerBlock = attr.maxThreadsPerBlock;
        if ( maxThreadsPerBlock == 0 ) {
            //CUDA_PRINT_FUNCTION(RayTraceImageCudaKernel);
        RAY_ASSERT(maxThreadsPerBlock>0);
    }*/
            RAY_WARNING("maxThreadsPerBlock=0 - Setting to default 128");
            maxThreadsPerBlock = 128;
        }
    }
    // place the ray gain and seed structures on the device
    const int nx = beam.nx;
    const int ny = beam.ny;
@@ -182,25 +182,17 @@ void RayTraceImageHIPLoop( int N, const RayTrace::EUV_beam_struct& beam,
    const double da = beam.da;
    const double db = beam.db;
    const RayTrace::ray_gain_struct* gain = RayTrace::ray_gain_struct::copy_device( N, gain_in );
    const RayTrace::ray_seed_struct* seed = NULL;
    const RayTrace::ray_seed_struct* seed = nullptr;
    if ( seed_in!=NULL )
        seed = seed_in->copy_device();
    // Allocate device memory
    size_t N_rays = rays.size();
    double *x2, *y2, *a2, *b2, *dv2, *image2, *I_ang2;
    double *dv2, *image2, *I_ang2;
    ray_struct *rays2;
    hipMalloc(&x2,nx*sizeof(double));
    hipMalloc(&y2,ny*sizeof(double));
    hipMalloc(&a2,na*sizeof(double));
    hipMalloc(&b2,nb*sizeof(double));
    hipMalloc(&dv2,nv*sizeof(double));
    hipMalloc(&image2,nx*ny*nv*sizeof(double));
    hipMalloc(&I_ang2,na*nb*sizeof(double));
    hipMalloc(&rays2,N_rays*sizeof(ray_struct));
    hipMemcpy(x2,beam.x,nx*sizeof(double),hipMemcpyHostToDevice);
    hipMemcpy(y2,beam.y,ny*sizeof(double),hipMemcpyHostToDevice);
    hipMemcpy(a2,beam.a,na*sizeof(double),hipMemcpyHostToDevice);
    hipMemcpy(b2,beam.b,nb*sizeof(double),hipMemcpyHostToDevice);
    hipMemcpy(dv2,beam.dv,nv*sizeof(double),hipMemcpyHostToDevice);
    hipMemcpy(rays2,&rays[0],N_rays*sizeof(ray_struct),hipMemcpyHostToDevice);
    hipMemset(image2,0,nx*ny*nv*sizeof(double));
@@ -212,7 +204,9 @@ void RayTraceImageHIPLoop( int N, const RayTrace::EUV_beam_struct& beam,
    dim3 block_size = calcBlockSize(N_blocks);
    block_size.x = N_rays/threads + (N_rays%threads == 0 ? 0:1);
    hipLaunchKernelGGL( RayTraceImageHIPKernel, dim3(block_size), dim3(threads), 0, 0,
        N,nx,ny,na,nb,nv,x2,y2,a2,b2,
        N,nx,ny,na,nb,nv,
        beam.x_range[0],beam.x_range[1],beam.y_range[0],beam.y_range[1],
        beam.a_range[0],beam.a_range[1],beam.b_range[0],beam.b_range[1],
        dx,dy,dz,da,db,dv2,gain,seed,method,N_rays,rays2,scale,image2,I_ang2);
    HIP_CHECK();
    // Retrieve result from device and store it in host array
@@ -220,10 +214,6 @@ void RayTraceImageHIPLoop( int N, const RayTrace::EUV_beam_struct& beam,
    hipMemcpy(I_ang,I_ang2,na*nb*sizeof(double),hipMemcpyDeviceToHost);
    HIP_CHECK();
    // Cleanup
    hipFree(x2);
    hipFree(y2);
    hipFree(a2);
    hipFree(b2);
    hipFree(dv2);
    hipFree(rays2);
    hipFree(image2);