Commit 5ad9b569 authored by Berrill, Mark's avatar Berrill, Mark
Browse files

Creating SYCL port. Note that this required incompatible changes so it is in...

Creating SYCL port.  Note that this required incompatible changes so it is in a seperate branch until I can resolve this
parent 0aa84331
......@@ -7,7 +7,7 @@ rm -rf CMake*
cmake \
-D CMAKE_BUILD_TYPE=Release \
-D CMAKE_CXX_COMPILER=mpic++ \
-D CMAKE_CXX_STANDARD=11 \
-D CMAKE_CXX_STANDARD=14 \
-D USE_OPENACC=0 \
-D USE_OPENMP=1 \
-D USE_KOKKOS=0 \
......@@ -15,5 +15,8 @@ cmake \
-D KOKKOS_WRAPPER=${KOKKOS_DIR}/nvcc_wrapper \
-D USE_CUDA=1 \
-D CUDA_FLAGS="-arch sm_30" \
-D USE_COMPUTE_CPP=1 \
-D ComputeCpp_DIR=/packages/ComputeCpp/ComputeCpp-CE-1.1.4-Ubuntu-16.04-x86_64 \
-D COMPUTECPP_BITCODE=ptx64 \
../src
......@@ -138,6 +138,19 @@ IF ( USE_CUDA )
ENDIF()
# Enable ComputeCpp
CHECK_ENABLE_FLAG( USE_COMPUTE_CPP 0 )
IF ( USE_COMPUTE_CPP )
LIST( APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules )
FIND_PACKAGE( ComputeCpp REQUIRED )
SET( USE_SYCL TRUE )
ADD_DEFINITIONS( -DUSE_SYCL )
INCLUDE_DIRECTORIES( ${ComputeCpp_INCLUDE_DIRS} ${OpenCL_INCLUDE_DIRS} )
SET( CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_RPATH} "${ComputeCpp_DIR}/lib" )
ENDIF()
# Include some paths
INCLUDE_DIRECTORIES( "${RAYTRACE_INSTALL_DIR}/include" )
INCLUDE_DIRECTORIES( "${RAYTRACE_SOURCE_DIR}" )
......@@ -181,8 +194,13 @@ ADD_DEPENDENCIES( RayTrace copy-include )
# Add the applications
ADD_EXECUTABLE( CreateImage CreateImage.cpp )
TARGET_LINK_LIBRARIES( CreateImage RayTrace ${KOKKOS_LIB} ${TIMER_LIBS} ${LDFLAGS} ${LDLIBS} )
IF ( USE_SYCL )
CMAKE_PARSE_ARGUMENTS( SDK_ADD_SAMPLE "NO_TEST" "TARGET" "SOURCES" TARGET CreateImage SOURCES SYCL/RayTraceImageSYCL.cpp )
ADD_EXECUTABLE( CreateImage CreateImage.cpp ${SDK_ADD_SAMPLE_SOURCES} )
ADD_SYCL_TO_TARGET( TARGET CreateImage SOURCES ${SDK_ADD_SAMPLE_SOURCES} )
ELSE()
ADD_EXECUTABLE( CreateImage CreateImage.cpp )
ENDIF()
TARGET_LINK_LIBRARIES( CreateImage RayTrace ${KOKKOS_LIB} ${COMPUTECPP_RUNTIME_LIBRARY} ${OpenCL_LIBRARIES} ${TIMER_LIBS} ${LDFLAGS} ${LDLIBS} )
INSTALL( TARGETS CreateImage DESTINATION ${RAYTRACE_INSTALL_DIR}/bin )
......@@ -337,6 +337,9 @@ static inline std::vector<std::string> allMethods()
methods.push_back( "Cuda" );
methods.push_back( "Cuda-MultiGPU" );
#endif
#ifdef USE_SYCL
methods.push_back( "SYCL" );
#endif
#ifdef USE_OPENACC
methods.push_back( "OpenAcc" );
#endif
......@@ -354,6 +357,15 @@ static inline std::vector<std::string> allMethods()
#endif
return methods;
}
static std::string getMethodList()
{
auto methods = allMethods();
std::string methodList;
for ( const auto& method : methods )
methodList += method + ", ";
methodList.erase( methodList.size() - 2 );
return methodList;
}
static inline void erase( std::vector<std::string> &x, const std::string &v )
{
auto it = std::find( x.begin(), x.end(), v );
......@@ -363,13 +375,12 @@ static inline void erase( std::vector<std::string> &x, const std::string &v )
Options::Options() : benchmark( false ), iterations( 1 ), scale( 1.0 ) {}
Options::Options( int argc, char *argv[] ) : iterations( 1 ), scale( 1.0 )
{
const char *err_msg =
std::string err_msg =
"CreateImage called with the wrong number of arguments:\n"
" CreateImage <args> file.dat\n"
"Optional arguments:\n"
" -methods=METHODS Comma seperated list of methods to test\n"
" cpu, threads, OpenMP, Cuda, Cuda-MultiGPU, OpenAcc, Kokkos-Serial, "
"Kokkos-Thread, Kokkos-OpenMP, Kokkos-Cuda\n"
" -methods=METHODS Comma seperated list of methods to test:\n"
" " + getMethodList() + "\n"
" all - run all availible tests (default)\n"
" parallel - run all availible parallel tests\n"
" -iterations=N Number of iterations to run.\n"
......
......@@ -35,6 +35,9 @@
#ifdef ENABLE_CUDA
#include <cuda_runtime_api.h>
#endif
#ifdef USE_SYCL
#define ENABLE_SYCL
#endif
#include "RayTrace/common/RayTraceDefinitions.h"
#include "RayTrace/common/RayTraceImageHelper.h"
#include "RayTrace/utilities/RayUtilityMacros.h"
......@@ -66,6 +69,12 @@ extern void RayTraceImageCudaLoop( int N, const RayTrace::EUV_beam_struct &euv_b
const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang,
unsigned int &failure_code, std::vector<ray_struct> &failed_rays );
#endif
#if defined( ENABLE_SYCL )
extern void RayTraceImageSYCLLoop( int N, const RayTrace::EUV_beam_struct &euv_beam,
const RayTrace::ray_gain_struct *gain, const RayTrace::ray_seed_struct *seed, int method,
const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang,
unsigned int &failure_code, std::vector<ray_struct> &failed_rays );
#endif
/**********************************************************************
......@@ -365,6 +374,13 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth
failure_code, failed_rays );
#else
RAY_ERROR( "Cuda-MultiGPU is not availible" );
#endif
} else if ( compute_method == "sycl" ) {
#if defined( ENABLE_SYCL )
RayTraceImageSYCLLoop( N, std::ref( *info->euv_beam ), info->gain, info->seed, method, rays,
scale, image, I_ang, failure_code, failed_rays );
#else
RAY_ERROR( "SYCL is not availible" );
#endif
} else if ( compute_method == "cpu" ) {
RayTraceImageCPULoop( N, std::ref( *info->euv_beam ), info->gain, info->seed, method, rays,
......
......@@ -115,8 +115,9 @@ void RayTraceImageOpenMPLoop( int N, const RayTrace::EUV_beam_struct &beam,
if ( i1 >= 0 && i2 >= 0 ) {
double *Iv2 = &image[beam.nv * ( i1 + i2 * beam.nx )];
for ( int iv = 0; iv < beam.nv; iv++ ) {
int k = (iv+i)%beam.nv; // Offset index to reduce collisions
#pragma omp atomic
Iv2[iv] += Iv[iv] * scale;
Iv2[k] += Iv[k] * scale;
}
}
// Copy I_out into I_ang
......
......@@ -117,8 +117,10 @@ void RayTraceImageCudaKernel( int N, int nx, int ny, int na, int nb, int nv,
// Copy I_out into image
if (i1>=0 && i2>=0){
double *Iv2 = &image[nv*(i1+i2*nx)];
for (int iv=0; iv<nv; iv++)
atomicAdd2(&Iv2[iv],Iv[iv]*scale);
for (int iv=0; iv<nv; iv++) {
int k = (iv+idx)%nv; // Offset index to reduce collisions
atomicAdd2(&Iv2[k],Iv[k]*scale);
}
}
// Copy I_out into I_ang
if (i3>=0 && i4>=0) {
......
......@@ -1895,7 +1895,6 @@ void RayTrace::intensity_struct::swap( RayTrace::intensity_struct &rhs )
/**********************************************************************
* Constructors/Destructors for ray_gain_struct *
**********************************************************************/
RayTrace::ray_gain_struct::ray_gain_struct() { memset( this, 0, sizeof( ray_gain_struct ) ); }
void RayTrace::ray_gain_struct::initialize( int Nx_in, int Ny_in, int Nv_in, bool use_emiss )
{
Nx = Nx_in;
......@@ -1918,7 +1917,7 @@ void RayTrace::ray_gain_struct::initialize( int Nx_in, int Ny_in, int Nv_in, boo
memset( E0, 0, Nx * Ny * sizeof( float ) );
}
}
RayTrace::ray_gain_struct::~ray_gain_struct()
/*RayTrace::ray_gain_struct::~ray_gain_struct()
{
delete[] x;
x = nullptr;
......@@ -1934,7 +1933,7 @@ RayTrace::ray_gain_struct::~ray_gain_struct()
gv = nullptr;
delete[] gv0;
gv0 = nullptr;
}
}*/
static inline void writeVec(
FILE *fid, const char *prefix, const char *name, size_t N, const double *x )
{
......
......@@ -218,8 +218,8 @@ struct ray_gain_struct {
float *gv; //!< Normalized lineshape function ( K x Nx x Ny )
float *gv0; //!< Lineshape function at line-center ( Nx x Ny )
// Constructors/destructor
ray_gain_struct();
~ray_gain_struct();
ray_gain_struct(): Nx(0), Ny(0), Nv(0), x(nullptr), y(nullptr), n(nullptr), g0(nullptr), E0(nullptr), gv(nullptr), gv0(nullptr) {}
~ray_gain_struct() {}
ray_gain_struct( const ray_gain_struct & ) = delete;
ray_gain_struct &operator=( const ray_gain_struct & ) = delete;
//! Function to initialize the data
......
#include <CL/sycl.hpp>
#define USING_SYCL
#include "RayTrace/RayTraceStructures.h"
#include "RayTrace/common/RayTraceImageHelper.h"
#include "RayTrace/utilities/RayUtilities.h"
#include <chrono>
#include <thread>
class run_rays;
using namespace cl;
// Get the index
inline int getIndex( int n, const double *x, double dx, double y )
{
if ( y < x[0] - 0.5 * dx || y > x[n - 1] + 0.5 * dx )
return -1;
return findfirstsingle( x, n, y - 0.5 * dx );
}
// Helper MACROS to copy data
#define GET_GAIN_BUFFER(X) \
auto gain_x_##X = sycl::buffer<double>(&nulld,one); \
auto gain_y_##X = sycl::buffer<double>(&nulld,one); \
auto gain_n_##X = sycl::buffer<double>(&nulld,one); \
auto gain_g0_##X = sycl::buffer<float>(&nullf,one); \
auto gain_E0_##X = sycl::buffer<float>(&nullf,one); \
auto gain_gv_##X = sycl::buffer<float>(&nullf,one); \
auto gain_gv0_##X = sycl::buffer<float>(&nullf,one); \
if ( X < N ) { \
gnx[X] = gain_in[X].Nx; \
gny[X] = gain_in[X].Ny; \
gnv[X] = gain_in[X].Nv; \
gain_x_##X = sycl::buffer<double>(gain_in[X].x, sycl::range<1>(gnx[X])); \
gain_y_##X = sycl::buffer<double>(gain_in[X].y, sycl::range<1>(gny[X])); \
gain_n_##X = sycl::buffer<double>(gain_in[X].n, sycl::range<1>(gnx[X]*gny[X])); \
gain_g0_##X = sycl::buffer<float>(gain_in[X].g0, sycl::range<1>(gnx[X]*gny[X])); \
gain_E0_##X = sycl::buffer<float>(gain_in[X].E0, sycl::range<1>(gnx[X]*gny[X])); \
gain_gv_##X = sycl::buffer<float>(gain_in[X].gv, sycl::range<1>(gnx[X]*gny[X]*gnv[X])); \
gain_gv0_##X = sycl::buffer<float>(gain_in[X].gv0, sycl::range<1>(gnx[X]*gny[X])); \
}
#define GET_GAIN_ACCESS(X) \
auto gain2_x_##X = gain_x_##X.get_access<sycl::access::mode::read>(cgh); \
auto gain2_y_##X = gain_y_##X.get_access<sycl::access::mode::read>(cgh); \
auto gain2_n_##X = gain_n_##X.get_access<sycl::access::mode::read>(cgh); \
auto gain2_g0_##X = gain_g0_##X.get_access<sycl::access::mode::read>(cgh); \
auto gain2_E0_##X = gain_E0_##X.get_access<sycl::access::mode::read>(cgh); \
auto gain2_gv_##X = gain_gv_##X.get_access<sycl::access::mode::read>(cgh); \
auto gain2_gv0_##X = gain_gv0_##X.get_access<sycl::access::mode::read>(cgh)
#define GET_GAIN_POINTERS(X) \
gain2[X].Nx = gnx[X]; \
gain2[X].Ny = gny[X]; \
gain2[X].Nv = gnv[X]; \
gain2[X].x = gain2_x_##X.get_pointer(); \
gain2[X].y = gain2_y_##X.get_pointer(); \
gain2[X].n = gain2_n_##X.get_pointer(); \
gain2[X].g0 = gain2_g0_##X.get_pointer(); \
gain2[X].E0 = gain2_E0_##X.get_pointer(); \
gain2[X].gv = gain2_gv_##X.get_pointer(); \
gain2[X].gv0 = gain2_gv0_##X.get_pointer()
// Create the image and call the cuda kernel
void RayTraceImageSYCLLoop( int N, const RayTrace::EUV_beam_struct& beam,
const RayTrace::ray_gain_struct* gain_in, const RayTrace::ray_seed_struct* seed_in,
int method, const std::vector<ray_struct>& rays, double scale,
double *image_out, double *I_ang_out,
unsigned int& failure_code, std::vector<ray_struct>& failed_rays )
{
sycl::queue queue([&](sycl::exception_list eL) {
try {
for (auto& e : eL) {
std::rethrow_exception(e);
}
} catch (sycl::exception e) {
std::cout << " An exception has been thrown: " << e.what() << std::endl;
}
});
auto device = queue.get_device();
auto numGroups = device.get_info<sycl::info::device::max_compute_units>();
auto groupSize = device.get_info<sycl::info::device::max_work_group_size>();
auto numThreads = numGroups * groupSize;
failure_code = 0; // Need to track failures on GPU
// Copy basic parameters
const int nx = beam.nx;
const int ny = beam.ny;
const int na = beam.na;
const int nb = beam.nb;
const int nv = beam.nv;
const double dx = beam.dx;
const double dy = beam.dy;
const double dz = beam.dz;
const double da = beam.da;
const double db = beam.db;
// place the ray gain and seed structures on the device
//const RayTrace::ray_gain_struct* gain = RayTrace::ray_gain_struct::copy_device( N, gain_in );
//const RayTrace::ray_seed_struct* seed = seed_in ? seed_in->copy_device() : nullptr;
sycl::range<1> one(1);
double nulld = 1;
float nullf = 1;
RAY_ASSERT( N <= 9 );
int gnx[9], gny[9], gnv[9];
GET_GAIN_BUFFER(0);
GET_GAIN_BUFFER(1);
GET_GAIN_BUFFER(2);
GET_GAIN_BUFFER(3);
GET_GAIN_BUFFER(4);
GET_GAIN_BUFFER(5);
GET_GAIN_BUFFER(6);
GET_GAIN_BUFFER(7);
GET_GAIN_BUFFER(8);
// Allocate device memory
size_t N_rays = rays.size();
auto x2 = sycl::buffer<double>(beam.x, sycl::range<1>(nx));
auto y2 = sycl::buffer<double>(beam.y, sycl::range<1>(ny));
auto a2 = sycl::buffer<double>(beam.a, sycl::range<1>(na));
auto b2 = sycl::buffer<double>(beam.b, sycl::range<1>(nb));
auto dv2 = sycl::buffer<double>(beam.dv, sycl::range<1>(nv));
auto rays2 = sycl::buffer<ray_struct>(rays.data(), sycl::range<1>(rays.size()));
auto image2 = sycl::buffer<double>(image_out, sycl::range<1>(nx*ny*nv));
auto I_ang2 = sycl::buffer<double>(I_ang_out, sycl::range<1>(na*nb));
// Do calculation on device:
//auto gain_ptr = (size_t) gain;
//auto seed_ptr = (size_t) seed;
//RAY_ASSERT( (const RayTrace::ray_gain_struct*) gain_ptr == gain );
queue.submit([&](sycl::handler &cgh) {
// getting read access inside the device kernel
auto x = x2.get_access<sycl::access::mode::read>(cgh);
auto y = y2.get_access<sycl::access::mode::read>(cgh);
auto a = a2.get_access<sycl::access::mode::read>(cgh);
auto b = b2.get_access<sycl::access::mode::read>(cgh);
auto dv = dv2.get_access<sycl::access::mode::read>(cgh);
auto rays = rays2.get_access<sycl::access::mode::read>(cgh);
// Get write access
auto image = image2.get_access<sycl::access::mode::read_write>(cgh);
auto I_ang = I_ang2.get_access<sycl::access::mode::read_write>(cgh);
// Get ray data
GET_GAIN_ACCESS(0);
GET_GAIN_ACCESS(1);
GET_GAIN_ACCESS(2);
GET_GAIN_ACCESS(3);
GET_GAIN_ACCESS(4);
GET_GAIN_ACCESS(5);
GET_GAIN_ACCESS(6);
GET_GAIN_ACCESS(7);
GET_GAIN_ACCESS(8);
// Run the loop
cgh.parallel_for<class run_rays>(
sycl::range<1>{numThreads}, [=](sycl::item<1> itemId) {
size_t id = itemId.get_id()[0];
//auto gain2 = (const RayTrace::ray_gain_struct*) gain_ptr;
RayTrace::ray_seed_struct* seed2 = nullptr;
RayTrace::ray_gain_struct gain2[9];
GET_GAIN_POINTERS(0);
GET_GAIN_POINTERS(1);
GET_GAIN_POINTERS(2);
GET_GAIN_POINTERS(3);
GET_GAIN_POINTERS(4);
GET_GAIN_POINTERS(5);
GET_GAIN_POINTERS(6);
GET_GAIN_POINTERS(7);
GET_GAIN_POINTERS(8);
for (auto i = id; i < rays.get_count(); i += itemId.get_range()[0]) {
const ray_struct ray = rays[i];
double Iv[K_MAX];
ray_struct ray2;
int error = RayTrace_calc_ray( ray, N, dz, gain2, seed2, nv, method, Iv, ray2 );
if ( error!=0 ) {
//failed_rays.push_back(ray);
//set_bit(-error,failure_code);
} else {
if ( method == 1 ) {
// We are propagating backward, use ray for the cell updates
ray2 = ray;
} else {
// We are propagating forward, use ray2 for the cell updates
// Note: The sign of the angle is reversed with respect to the euv_beam
ray2.a = -ray2.a;
ray2.b = -ray2.b;
if ( ray2.y<0.0 && y[0]>=0.0 ) {
// We need to change the sign of y
ray2.y = -ray2.y;
}
}
// Get the indicies to the cells in image and I_ang
int i1 = getIndex( nx, x.get_pointer(), dx, ray2.x );
int i2 = getIndex( ny, y.get_pointer(), dy, ray2.y );
int i3 = getIndex( na, a.get_pointer(), da, ray2.a );
int i4 = getIndex( nb, b.get_pointer(), db, ray2.b );
// Copy I_out into image
if (i1>=0 && i2>=0){
for (int iv=0; iv<nv; iv++) {
int k = (iv+id)%nv; // Offset index to reduce collisions
image[k+nv*(i1+i2*nx)] += Iv[k]*scale;
//atomicAdd2(&Iv2[iv],Iv[iv]*scale);
}
}
// Copy I_out into I_ang
if (i3>=0 && i4>=0) {
double tmp = 0.0;
for (int iv=0; iv<nv; iv++)
tmp += 2.0*dv[iv]*Iv[iv];
//atomicAdd2(&I_ang[i3+i4*na],tmp);
I_ang[i3+i4*na] += tmp;
}
}
}
});
});
queue.wait_and_throw();
// Cleanup
//RayTrace::ray_gain_struct::free_device( N, gain_in, gain );
//RayTrace::ray_seed_struct::free_device( seed_in, seed );
}
#include <CL/sycl.hpp>
#define USING_SYCL
#include "RayTrace/RayTraceStructures.h"
#include "RayTrace/common/RayTraceImageHelper.h"
#include "RayTrace/utilities/RayUtilities.h"
class run_rays;
using namespace cl;
// Get the index
inline int getIndex( int n, const double *x, double dx, double y )
{
if ( y < x[0] - 0.5 * dx || y > x[n - 1] + 0.5 * dx )
return -1;
return findfirstsingle( x, n, y - 0.5 * dx );
}
struct ray_gain_struct2 {
ray_gain_struct2():
x( (double*) nullptr, sycl::range<1>(0) ),
y( (double*) nullptr, sycl::range<1>(0) ),
n( (double*) nullptr, sycl::range<1>(0) ),
g0( (float*) nullptr, sycl::range<1>(0) ),
E0( (float*) nullptr, sycl::range<1>(0) ),
gv( (float*) nullptr, sycl::range<1>(0) ),
gv0( (float*) nullptr, sycl::range<1>(0) )
{
}
ray_gain_struct2( const RayTrace::ray_gain_struct& rhs ):
Nx( rhs.Nx ),
Ny( rhs.Ny ),
Nv( rhs.Nv ),
x( rhs.x, sycl::range<1>(rhs.Nx) ),
y( rhs.y, sycl::range<1>(rhs.Ny) ),
n( rhs.n, sycl::range<1>(rhs.Nx*rhs.Ny) ),
g0( rhs.g0, sycl::range<1>(rhs.Nx*rhs.Ny) ),
E0( rhs.E0, sycl::range<1>(rhs.Nx*rhs.Ny) ),
gv( rhs.gv, sycl::range<1>(rhs.Nx*rhs.Ny*rhs.Nv) ),
gv0( rhs.gv0, sycl::range<1>(rhs.Nx*rhs.Ny) )
{
}
int Nx, Ny, Nv;
sycl::buffer<double> x, y, n;
sycl::buffer<float> g0, E0, gv, gv0;
};
// Remove const
template<class TYPE>
TYPE& removeConst( const TYPE& x ) { return const_cast<TYPE&>( x ); }
// Create the image and call the cuda kernel
void RayTraceImageSYCLLoop( int N, const RayTrace::EUV_beam_struct& beam,
const RayTrace::ray_gain_struct* gain_in, const RayTrace::ray_seed_struct* seed_in,
int method, const std::vector<ray_struct>& rays, double scale,
double *image_out, double *I_ang_out,
unsigned int& failure_code, std::vector<ray_struct>& failed_rays )
{
sycl::queue queue([&](sycl::exception_list eL) {
try {
for (auto& e : eL) {
std::rethrow_exception(e);
}
} catch (sycl::exception e) {
std::cout << " An exception has been thrown: " << e.what() << std::endl;
}
});
auto device = queue.get_device();
auto numGroups = device.get_info<sycl::info::device::max_compute_units>();
auto groupSize = device.get_info<sycl::info::device::max_work_group_size>();
auto numThreads = numGroups * groupSize;
failure_code = 0; // Need to track failures on GPU
// Copy basic parameters
const int nx = beam.nx;
const int ny = beam.ny;
const int na = beam.na;
const int nb = beam.nb;
const int nv = beam.nv;
const double dx = beam.dx;
const double dy = beam.dy;
const double dz = beam.dz;
const double da = beam.da;
const double db = beam.db;
// place the ray gain and seed structures on the device
constexpr int NN = 10;
RAY_ASSERT( N <= NN );
ray_gain_struct2 gain2[NN];
for ( int i=0; i<N; i++)
gain2[i] = ray_gain_struct2(gain_in[i]);
// Allocate device memory
size_t N_rays = rays.size();
auto x2 = sycl::buffer<double>(beam.x, sycl::range<1>(nx));
auto y2 = sycl::buffer<double>(beam.y, sycl::range<1>(ny));
auto a2 = sycl::buffer<double>(beam.a, sycl::range<1>(na));
auto b2 = sycl::buffer<double>(beam.b, sycl::range<1>(nb));
auto dv2 = sycl::buffer<double>(beam.dv, sycl::range<1>(nv));
auto rays2 = sycl::buffer<ray_struct>(rays.data(), sycl::range<1>(rays.size()));
auto image2 = sycl::buffer<double>(image_out, sycl::range<1>(nx*ny*nv));
auto I_ang2 = sycl::buffer<double>(I_ang_out, sycl::range<1>(na*nb));
// Do calculation on device:
queue.submit([&](sycl::handler &cgh) {
// getting read access inside the device kernel
auto x = x2.get_access<sycl::access::mode::read>(cgh);
auto y = y2.get_access<sycl::access::mode::read>(cgh);
auto a = a2.get_access<sycl::access::mode::read>(cgh);
auto b = b2.get_access<sycl::access::mode::read>(cgh);
auto dv = dv2.get_access<sycl::access::mode::read>(cgh);
auto rays = rays2.get_access<sycl::access::mode::read>(cgh);
// Get the ray gain and seed structures
RayTrace::ray_gain_struct gain[NN];
RayTrace::ray_seed_struct* seed = nullptr;
for (int i=0; i<N; i++) {
gain[i].Nx = gain2[i].Nx;
gain[i].Ny = gain2[i].Ny;
gain[i].Nv = gain2[i].Nv;
gain[i].x = gain2[i].x.get_access<sycl::access::mode::read>(cgh).get_pointer();
gain[i].y = gain2[i].y.get_access<sycl::access::mode::read>(cgh).get_pointer();
gain[i].n = gain2[i].n.get_access<sycl::access::mode::read>(cgh).get_pointer();
gain[i].g0 = gain2[i].g0.get_access<sycl::access::mode::read>(cgh).get_pointer();
gain[i].E0 = gain2[i].E0.get_access<sycl::access::mode::read>(cgh).get_pointer();
gain[i].gv = gain2[i].gv.get_access<sycl::access::mode::read>(cgh).get_pointer();
gain[i].gv0 = gain2[i].gv0.get_access<sycl::access::mode::read>(cgh).get_pointer();
}
// Get write access
auto image = image2.get_access<sycl::access::mode::read_write>(cgh);
auto I_ang = I_ang2.get_access<sycl::access::mode::read_write>(cgh);
// Run the loop
cgh.parallel_for<class run_rays>(
sycl::range<1>{numThreads}, [=](sycl::item<1> itemId) {
auto id = itemId.get_id(0);
// Loop through the rays
for (auto i = id; i < rays.get_count(); i += itemId.get_range()[0]) {
const ray_struct ray = rays[i];
double Iv[K_MAX];
ray_struct ray2;
int error = RayTrace_calc_ray( ray, 0, dz, gain, seed, nv, method, Iv, ray2 );
if ( error!=0 ) {
//failed_rays.push_back(ray);
//set_bit(-error,failure_code);
} else {
if ( method == 1 ) {
// We are propagating backward, use ray for the cell updates
ray2 = ray;
} else {
// We are propagating forward, use ray2 for the cell updates
// Note: The sign of the angle is reversed with respect to the euv_beam
ray2.a = -ray2.a;
ray2.b = -ray2.b;
if ( ray2.y<0.0 && y[0]>=0.0 ) {
// We need to change the sign of y
ray2.y = -ray2.y;
}
}
// Get the indicies to the cells in image and I_ang
int i1 = getIndex( nx, x.get_pointer(), dx, ray2.x );
int i2 = getIndex( ny, y.get_pointer(), dy, ray2.y );
int i3 = getIndex( na, a.get_pointer(), da, ray2.a );
int i4 = getIndex( nb, b.get_pointer(), db, ray2.b );
// Copy I_out into image
if (i1>=0 && i2>=0){
double *Iv2 = &image[nv*(i1+i2*nx)];
for (int iv=0; iv<nv; iv++)
Iv2[iv] += Iv[iv]*scale;
//atomicAdd2(&Iv2[iv],Iv[iv]*scale);
}
// Copy I_out into I_ang
if (i3>=0 && i4>=0) {
double tmp = 0.0;
for (int iv=0; iv<nv; iv++)
tmp += 2.0*dv[iv]*Iv[iv];
//atomicAdd2(&I_ang[i3+i4*na],tmp);
I_ang[i3+i4*na] += tmp;
}
}
}
});
// Cleanup
for (int i=0; i<N; i++) {
gain[i].x = nullptr;
gain[i].y = nullptr;
gain[i].n = nullptr;