Loading src/AtomicModel/interp.cpp +23 −8 Original line number Diff line number Diff line Loading @@ -207,11 +207,22 @@ double interp::bisection_coeff( { if ( N < 2 ) throw std::logic_error( "Error: N<2" ); // Copy and sort x,r by r // Copy x and r double *x = new double[N]; double *r = new double[N]; memcpy( x, x_in, N * sizeof( double ) ); memcpy( r, r_in, N * sizeof( double ) ); // Change the sign or r such that min(x) will be at the beginning auto tmp = std::make_pair( x[0], r[0] ); for (size_t i=0; i<N; i++) { if ( x[i] < tmp.first ) tmp = std::make_pair( x[i], r[i] ); } if ( tmp.second > 0 ) { for (size_t i=0; i<N; i++) r[i] = -r[i]; } // Sort x and r quicksort( N, r, x ); // Check that we have two signs for r if ( r[0] > 0.0 || r[N - 1] < 0.0 ) { Loading @@ -219,17 +230,21 @@ double interp::bisection_coeff( delete[] r; throw std::logic_error( "r does not have two different signs" ); } // Find r > 0 size_t i = findfirstsingle( r, N, 0.0 ); double range[2] = { std::min( x[i - 1], x[i] ), std::max( x[i - 1], x[i] ) }; // Find r >= 0 size_t index = findfirstsingle( r, N, 0.0 ); double range[2] = { x[index-1], x[index] }; for (size_t i=0; i<index; i++) range[0] = std::max( range[0], x[i] ); for (size_t i=index; i<N; i++) range[1] = std::min( range[1], x[i] ); double y = 0; if ( N < 5 ) { // Use simple bisection for the first couple of iterations y = 0.5 * ( range[0] + range[1] ); } else if ( i == 1 || i == N - 1 ) { } else if ( index == 1 || index == N - 1 ) { // We are at the boundary which reduces the gradient that we can use // Use an uneven bisection to try an ensure we have 2 points on each side if ( i == 1 ) { if ( index == 1 ) { y = 0.8 * x[0] + 0.2 * x[1]; } else { y = 0.2 * x[N - 2] + 0.8 * x[N - 1]; Loading @@ -245,7 +260,7 @@ double interp::bisection_coeff( // Finished delete[] x; delete[] r; if ( range_out != NULL ) { if ( range_out != nullptr ) { range_out[0] = range[0]; range_out[1] = range[1]; } Loading src/AtomicModel/interp.h +3 −3 Original line number Diff line number Diff line Loading @@ -428,7 +428,7 @@ HOST_DEVICE inline T1 integrate_simpson( * using an adaptive Simpson's rule. This requires N+1 function evaluations. * @param[in] fun The function to integrate of the form y = f(x) * @param[in] range The range to integrate * @param[in] tol Number of sub-intervals to use * @param[in] tol Absolute tolerance to use * @param[in] N_eval Total number of evalutations * @param[in] norm Function to compute the norm to use for the error */ Loading @@ -446,7 +446,7 @@ HOST_DEVICE inline T1 integrate( const std::function<T1( T2 )> &fun, const std:: * N=1). * @param[in] fun The function to integrate of the form y = f(x) * @param[in] range The range to integrate * @param[in] tol Number of sub-intervals to use * @param[in] tol Absolute tolerance to use * @param[in] N_eval Total number of evalutations * @param[in] norm Function to compute the norm to use for the error */ Loading @@ -463,7 +463,7 @@ HOST_DEVICE inline T1 integrate( const std::function<T1( T2, T2 )> &fun, * using an adaptive Simpson's rule. * @param[in] fun The function to integrate of the form y = f(x) * @param[in] range The range to integrate * @param[in] tol Number of sub-intervals to use * @param[in] tol Absolute tolerance to use * @param[in] N_eval Total number of evalutations * @param[in] norm Function to compute the norm to use for the error */ Loading src/CreateImage.cpp +1 −0 Original line number Diff line number Diff line Loading @@ -68,6 +68,7 @@ int run_tests( const std::string& filename, const Options& options ) #endif #ifdef USE_CUDA methods.push_back( "Cuda" ); methods.push_back( "Cuda-MultiGPU" ); #endif #ifdef USE_OPENACC methods.push_back( "OpenAcc" ); Loading src/CreateImageHelpers.h +1 −1 Original line number Diff line number Diff line Loading @@ -59,7 +59,7 @@ public: " CreateImage <args> file.dat\n" "Optional arguments:\n" " -methods=METHODS Comma seperated list of methods to test. Default is all availible methods\n" " cpu, threads, OpenMP, Cuda, OpenAcc, Kokkos-Serial, " " cpu, threads, OpenMP, Cuda, Cuda-MultiGPU, OpenAcc, Kokkos-Serial, " "Kokkos-Thread, Kokkos-OpenMP, Kokkos-Cuda\n" " -iterations=N Number of iterations to run. Time returned will be " "the average time/iteration.\n" Loading src/RayTraceImage.cpp +116 −71 Original line number Diff line number Diff line Loading @@ -6,6 +6,10 @@ #include <math.h> #include <stdint.h> #if CXX_STD == 11 || CXX_STD == 14 #include <thread> #endif // Timer include #include "ProfilerApp.h" Loading Loading @@ -35,61 +39,106 @@ #define ENABLE_OPENMP #undef USE_OPENMP #endif #ifdef ENABLE_CUDA #include <cuda_runtime_api.h> #endif #include "common/RayTraceImageHelper.h" #include "utilities/RayUtilityMacros.h" // Define the external loop interfaces extern void RayTraceImageCPULoop( int N, int nx, int ny, int na, int nb, int nv, const double *x, const double *y, const double *a, const double *b, double dx, double dy, double dz, double da, double db, const double *dv, const RayTrace::ray_gain_struct *gain, extern void RayTraceImageCPULoop( int N, const RayTrace::EUV_beam_struct& euv_beam, const RayTrace::ray_gain_struct *gain, const RayTrace::ray_seed_struct *seed, int method, const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang, unsigned int &failure_code, std::vector<ray_struct> &failed_rays ); #if defined( ENABLE_OPENMP ) extern void RayTraceImageOpenMPLoop( int N, int nx, int ny, int na, int nb, int nv, const double *x, const double *y, const double *a, const double *b, double dx, double dy, double dz, double da, double db, const double *dv, const RayTrace::ray_gain_struct *gain, const RayTrace::ray_seed_struct *seed, int method, const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang, unsigned int &failure_code, std::vector<ray_struct> &failed_rays ); #endif #if CXX_STD == 11 || CXX_STD == 14 extern void RayTraceImageThreadLoop( int N, int nx, int ny, int na, int nb, int nv, const double *x, const double *y, const double *a, const double *b, double dx, double dy, double dz, double da, double db, const double *dv, const RayTrace::ray_gain_struct *gain, extern void RayTraceImageOpenMPLoop( int N, const RayTrace::EUV_beam_struct& euv_beam, const RayTrace::ray_gain_struct *gain, const RayTrace::ray_seed_struct *seed, int method, const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang, unsigned int &failure_code, std::vector<ray_struct> &failed_rays ); #endif #if defined( ENABLE_OPENACC ) extern void RayTraceImageOpenAccLoop( int N, int nx, int ny, int na, int nb, int nv, const double *x, const double *y, const double *a, const double *b, double dx, double dy, double dz, double da, double db, const double *dv, const RayTrace::ray_gain_struct *gain, extern void RayTraceImageOpenAccLoop( int N, const RayTrace::EUV_beam_struct& euv_beam, const RayTrace::ray_gain_struct *gain, const RayTrace::ray_seed_struct *seed, int method, const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang, unsigned int &failure_code, std::vector<ray_struct> &failed_rays ); #endif #if defined( ENABLE_KOKKOS ) #include "kokkos/RayTraceImageKokkos.hpp" /*extern void RayTraceImageKokkosLoop( int N, int nx, int ny, int na, int nb, int nv, const double *x, const double *y, const double *a, const double *b, double dx, double dy, double dz, double da, double db, const double *dv, const RayTrace::ray_gain_struct* gain, const RayTrace::ray_seed_struct* seed, int method, const std::vector<ray_struct>& rays, double scale, double *image, double *I_ang, unsigned int& failure_code, std::vector<ray_struct>& failed_rays );*/ /*extern void RayTraceImageKokkosLoop( int N, const RayTrace::EUV_beam_struct& euv_beam, const RayTrace::ray_gain_struct *gain, const RayTrace::ray_seed_struct *seed, int method, const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang, unsigned int &failure_code, std::vector<ray_struct> &failed_rays );*/ #endif #if defined( ENABLE_CUDA ) extern void RayTraceImageCudaLoop( int N, int nx, int ny, int na, int nb, int nv, const double *x, const double *y, const double *a, const double *b, double dx, double dy, double dz, double da, double db, const double *dv, const RayTrace::ray_gain_struct *gain, extern void RayTraceImageCudaLoop( int N, const RayTrace::EUV_beam_struct& euv_beam, const RayTrace::ray_gain_struct *gain, const RayTrace::ray_seed_struct *seed, int method, const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang, unsigned int &failure_code, std::vector<ray_struct> &failed_rays ); #endif /********************************************************************** * Call RayTraceImage function from a thread loop * **********************************************************************/ void setGPU( int id ) { NULL_USE(id); #if defined( ENABLE_CUDA ) cudaSetDevice(id); #endif } #if CXX_STD == 11 || CXX_STD == 14 void RayTraceImageThreadLoop( size_t N_threads, std::function<void( int, const RayTrace::EUV_beam_struct&, const RayTrace::ray_gain_struct*, const RayTrace::ray_seed_struct*, int, const std::vector<ray_struct>&, double, double*, double*, unsigned int&, std::vector<ray_struct>& )> function, std::function<void( int )> setID, int N, const RayTrace::EUV_beam_struct& beam, const RayTrace::ray_gain_struct *gain, const RayTrace::ray_seed_struct *seed, int method, const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang, unsigned int &failure_code, std::vector<ray_struct> &failed_rays ) { std::vector<std::vector<ray_struct>> rays2( N_threads ); std::vector<std::vector<ray_struct>> failed_rays2( N_threads ); std::vector<unsigned int> failure_code2( N_threads, 0 ); std::vector<double *> image2( N_threads, NULL ); std::vector<double *> I_ang2( N_threads, NULL ); std::vector<std::thread> threads; for ( size_t i = 0, j = 0; i < N_threads; i++ ) { size_t N2 = rays.size() / N_threads + 1; if ( N2 == 0 ) { N2 = 8; } rays2.reserve( N2 ); for ( size_t k = 0; k < N2 && j < rays.size(); k++, j++ ) rays2[i].push_back( rays[j] ); image2[i] = (double *) calloc( beam.nx * beam.ny * beam.nv, sizeof( double ) ); I_ang2[i] = (double *) calloc( beam.na * beam.nb, sizeof( double ) ); setID( i ); threads.push_back( std::thread( function, N, std::ref(beam), gain, seed, method, std::ref( rays2[i] ), scale, image2[i], I_ang2[i], std::ref( failure_code2[i] ), std::ref( failed_rays2[i] ) ) ); } for ( size_t i = 0; i < N_threads; i++ ) { threads[i].join(); for ( int j = 0; j < beam.nx * beam.ny * beam.nv; j++ ) image[j] += image2[i][j]; for ( int j = 0; j < beam.na * beam.nb; j++ ) I_ang[j] += I_ang2[i][j]; failure_code = failure_code | failure_code2[i]; for ( size_t j = 0; j < failed_rays2[i].size(); j++ ) failed_rays.push_back( failed_rays2[i][j] ); free( image2[i] ); free( I_ang2[i] ); rays2[i].clear(); } } #endif /********************************************************************** * Write the failed rays to a file * **********************************************************************/ Loading Loading @@ -195,27 +244,15 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth const int na = info->euv_beam->na; const int nb = info->euv_beam->nb; const int nv = info->euv_beam->nv; const double *x = info->euv_beam->x; const double *y = info->euv_beam->y; const double *a = info->euv_beam->a; const double *b = info->euv_beam->b; const double dx = info->euv_beam->dx; const double dy = info->euv_beam->dy; const double dz = info->dz; const double da = info->euv_beam->da; const double db = info->euv_beam->db; const double *dv = info->euv_beam->dv; // Check the euv_beam grid bool grid_error = false; grid_error = grid_error || check_grid( nx, dx, x ); grid_error = grid_error || check_grid( ny, dy, y ); grid_error = grid_error || check_grid( na, da, a ); grid_error = grid_error || check_grid( nb, db, b ); grid_error = grid_error || check_grid( info->euv_beam->nx, info->euv_beam->dx, info->euv_beam->x ); grid_error = grid_error || check_grid( info->euv_beam->ny, info->euv_beam->dy, info->euv_beam->y ); grid_error = grid_error || check_grid( info->euv_beam->na, info->euv_beam->da, info->euv_beam->a ); grid_error = grid_error || check_grid( info->euv_beam->nb, info->euv_beam->db, info->euv_beam->b ); if ( grid_error ) RAY_ERROR( "Only uniform grid spacings are currently supported (euv_beam)" ); if ( dz != info->euv_beam->dz ) RAY_ERROR( "info.dz != euv_beam.dz" ); // Check the seed_beam grid if ( info->seed_beam != NULL ) { Loading Loading @@ -258,7 +295,7 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth double seed_dy = info->seed_beam->dy; double seed_da = info->seed_beam->da; double seed_db = info->seed_beam->db; scale = ( seed_dx * seed_dy * seed_da * seed_db ) / ( dx * dy ); scale = ( seed_dx * seed_dy * seed_da * seed_db ) / ( info->euv_beam->dx * info->euv_beam->dy ); timer_name = "propagate_seed"; } else { method = 1; Loading @@ -282,10 +319,10 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth // Create ray information ray_struct ray; if ( info->seed_beam == NULL ) { ray.x = (float) x[i]; ray.y = (float) y[j]; ray.a = (float) a[k]; ray.b = (float) b[m]; ray.x = (float) info->euv_beam->x[i]; ray.y = (float) info->euv_beam->y[j]; ray.a = (float) info->euv_beam->a[k]; ray.b = (float) info->euv_beam->b[m]; } else { ray.x = (float) info->seed_beam->x[i]; ray.y = (float) info->seed_beam->y[j]; Loading Loading @@ -317,38 +354,34 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth PROFILE_START( timer_name ); if ( compute_method == "openacc" ) { #if defined( ENABLE_OPENACC ) RayTraceImageOpenAccLoop( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageOpenAccLoop( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "OpenAcc is not availible" ); #endif } else if ( compute_method.substr( 0, 7 ) == "kokkos-" ) { #if defined( ENABLE_KOKKOS ) if ( compute_method == "kokkos-serial" ) { RayTraceImageKokkosLoop<Kokkos::Serial>( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageKokkosLoop<Kokkos::Serial>( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); } else if ( compute_method == "kokkos-openmp" ) { #ifdef KOKKOS_HAVE_OPENMP RayTraceImageKokkosLoop<Kokkos::OpenMP>( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageKokkosLoop<Kokkos::OpenMP>( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "Kokkos compiled without OpenMP" ); #endif } else if ( compute_method == "kokkos-thread" ) { #ifdef KOKKOS_HAVE_PTHREAD RayTraceImageKokkosLoop<Kokkos::Threads>( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageKokkosLoop<Kokkos::Threads>( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "Kokkos compiled without pthreads" ); #endif } else if ( compute_method == "kokkos-cuda" ) { #ifdef KOKKOS_HAVE_CUDA RayTraceImageKokkosLoop<Kokkos::Cuda>( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageKokkosLoop<Kokkos::Cuda>( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "Kokkos compiled without cuda" ); #endif Loading @@ -360,25 +393,37 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth #endif } else if ( compute_method == "cuda" ) { #if defined( ENABLE_CUDA ) RayTraceImageCudaLoop( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageCudaLoop( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "Cuda is not availible" ); #endif } else if ( compute_method == "cuda-multigpu" ) { #if defined( ENABLE_CUDA ) int N_gpu; cudaGetDeviceCount( &N_gpu ); RayTraceImageThreadLoop( N_gpu, RayTraceImageCudaLoop, setGPU, N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "Cuda-MultiGPU is not availible" ); #endif } else if ( compute_method == "cpu" ) { RayTraceImageCPULoop( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageCPULoop( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); } else if ( compute_method == "threads" ) { #if CXX_STD == 11 || CXX_STD == 14 RayTraceImageThreadLoop( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); size_t N_threads = std::thread::hardware_concurrency(); RayTraceImageThreadLoop( N_threads, RayTraceImageCPULoop, [](int) {}, N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "Threaded version requires C++11" ); #endif } else if ( compute_method == "openmp" ) { #if defined( ENABLE_OPENMP ) RayTraceImageOpenMPLoop( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageOpenMPLoop( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "OpenMP is not availible" ); #endif Loading @@ -389,7 +434,7 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth // Save failed rays if ( failure_code != 0 ) { write_failures( failure_code, failed_rays, method, N, dz, info->gain ); write_failures( failure_code, failed_rays, method, N, info->euv_beam->dz, info->gain ); RAY_ERROR( "Some rays failed" ); } Loading Loading
src/AtomicModel/interp.cpp +23 −8 Original line number Diff line number Diff line Loading @@ -207,11 +207,22 @@ double interp::bisection_coeff( { if ( N < 2 ) throw std::logic_error( "Error: N<2" ); // Copy and sort x,r by r // Copy x and r double *x = new double[N]; double *r = new double[N]; memcpy( x, x_in, N * sizeof( double ) ); memcpy( r, r_in, N * sizeof( double ) ); // Change the sign or r such that min(x) will be at the beginning auto tmp = std::make_pair( x[0], r[0] ); for (size_t i=0; i<N; i++) { if ( x[i] < tmp.first ) tmp = std::make_pair( x[i], r[i] ); } if ( tmp.second > 0 ) { for (size_t i=0; i<N; i++) r[i] = -r[i]; } // Sort x and r quicksort( N, r, x ); // Check that we have two signs for r if ( r[0] > 0.0 || r[N - 1] < 0.0 ) { Loading @@ -219,17 +230,21 @@ double interp::bisection_coeff( delete[] r; throw std::logic_error( "r does not have two different signs" ); } // Find r > 0 size_t i = findfirstsingle( r, N, 0.0 ); double range[2] = { std::min( x[i - 1], x[i] ), std::max( x[i - 1], x[i] ) }; // Find r >= 0 size_t index = findfirstsingle( r, N, 0.0 ); double range[2] = { x[index-1], x[index] }; for (size_t i=0; i<index; i++) range[0] = std::max( range[0], x[i] ); for (size_t i=index; i<N; i++) range[1] = std::min( range[1], x[i] ); double y = 0; if ( N < 5 ) { // Use simple bisection for the first couple of iterations y = 0.5 * ( range[0] + range[1] ); } else if ( i == 1 || i == N - 1 ) { } else if ( index == 1 || index == N - 1 ) { // We are at the boundary which reduces the gradient that we can use // Use an uneven bisection to try an ensure we have 2 points on each side if ( i == 1 ) { if ( index == 1 ) { y = 0.8 * x[0] + 0.2 * x[1]; } else { y = 0.2 * x[N - 2] + 0.8 * x[N - 1]; Loading @@ -245,7 +260,7 @@ double interp::bisection_coeff( // Finished delete[] x; delete[] r; if ( range_out != NULL ) { if ( range_out != nullptr ) { range_out[0] = range[0]; range_out[1] = range[1]; } Loading
src/AtomicModel/interp.h +3 −3 Original line number Diff line number Diff line Loading @@ -428,7 +428,7 @@ HOST_DEVICE inline T1 integrate_simpson( * using an adaptive Simpson's rule. This requires N+1 function evaluations. * @param[in] fun The function to integrate of the form y = f(x) * @param[in] range The range to integrate * @param[in] tol Number of sub-intervals to use * @param[in] tol Absolute tolerance to use * @param[in] N_eval Total number of evalutations * @param[in] norm Function to compute the norm to use for the error */ Loading @@ -446,7 +446,7 @@ HOST_DEVICE inline T1 integrate( const std::function<T1( T2 )> &fun, const std:: * N=1). * @param[in] fun The function to integrate of the form y = f(x) * @param[in] range The range to integrate * @param[in] tol Number of sub-intervals to use * @param[in] tol Absolute tolerance to use * @param[in] N_eval Total number of evalutations * @param[in] norm Function to compute the norm to use for the error */ Loading @@ -463,7 +463,7 @@ HOST_DEVICE inline T1 integrate( const std::function<T1( T2, T2 )> &fun, * using an adaptive Simpson's rule. * @param[in] fun The function to integrate of the form y = f(x) * @param[in] range The range to integrate * @param[in] tol Number of sub-intervals to use * @param[in] tol Absolute tolerance to use * @param[in] N_eval Total number of evalutations * @param[in] norm Function to compute the norm to use for the error */ Loading
src/CreateImage.cpp +1 −0 Original line number Diff line number Diff line Loading @@ -68,6 +68,7 @@ int run_tests( const std::string& filename, const Options& options ) #endif #ifdef USE_CUDA methods.push_back( "Cuda" ); methods.push_back( "Cuda-MultiGPU" ); #endif #ifdef USE_OPENACC methods.push_back( "OpenAcc" ); Loading
src/CreateImageHelpers.h +1 −1 Original line number Diff line number Diff line Loading @@ -59,7 +59,7 @@ public: " CreateImage <args> file.dat\n" "Optional arguments:\n" " -methods=METHODS Comma seperated list of methods to test. Default is all availible methods\n" " cpu, threads, OpenMP, Cuda, OpenAcc, Kokkos-Serial, " " cpu, threads, OpenMP, Cuda, Cuda-MultiGPU, OpenAcc, Kokkos-Serial, " "Kokkos-Thread, Kokkos-OpenMP, Kokkos-Cuda\n" " -iterations=N Number of iterations to run. Time returned will be " "the average time/iteration.\n" Loading
src/RayTraceImage.cpp +116 −71 Original line number Diff line number Diff line Loading @@ -6,6 +6,10 @@ #include <math.h> #include <stdint.h> #if CXX_STD == 11 || CXX_STD == 14 #include <thread> #endif // Timer include #include "ProfilerApp.h" Loading Loading @@ -35,61 +39,106 @@ #define ENABLE_OPENMP #undef USE_OPENMP #endif #ifdef ENABLE_CUDA #include <cuda_runtime_api.h> #endif #include "common/RayTraceImageHelper.h" #include "utilities/RayUtilityMacros.h" // Define the external loop interfaces extern void RayTraceImageCPULoop( int N, int nx, int ny, int na, int nb, int nv, const double *x, const double *y, const double *a, const double *b, double dx, double dy, double dz, double da, double db, const double *dv, const RayTrace::ray_gain_struct *gain, extern void RayTraceImageCPULoop( int N, const RayTrace::EUV_beam_struct& euv_beam, const RayTrace::ray_gain_struct *gain, const RayTrace::ray_seed_struct *seed, int method, const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang, unsigned int &failure_code, std::vector<ray_struct> &failed_rays ); #if defined( ENABLE_OPENMP ) extern void RayTraceImageOpenMPLoop( int N, int nx, int ny, int na, int nb, int nv, const double *x, const double *y, const double *a, const double *b, double dx, double dy, double dz, double da, double db, const double *dv, const RayTrace::ray_gain_struct *gain, const RayTrace::ray_seed_struct *seed, int method, const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang, unsigned int &failure_code, std::vector<ray_struct> &failed_rays ); #endif #if CXX_STD == 11 || CXX_STD == 14 extern void RayTraceImageThreadLoop( int N, int nx, int ny, int na, int nb, int nv, const double *x, const double *y, const double *a, const double *b, double dx, double dy, double dz, double da, double db, const double *dv, const RayTrace::ray_gain_struct *gain, extern void RayTraceImageOpenMPLoop( int N, const RayTrace::EUV_beam_struct& euv_beam, const RayTrace::ray_gain_struct *gain, const RayTrace::ray_seed_struct *seed, int method, const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang, unsigned int &failure_code, std::vector<ray_struct> &failed_rays ); #endif #if defined( ENABLE_OPENACC ) extern void RayTraceImageOpenAccLoop( int N, int nx, int ny, int na, int nb, int nv, const double *x, const double *y, const double *a, const double *b, double dx, double dy, double dz, double da, double db, const double *dv, const RayTrace::ray_gain_struct *gain, extern void RayTraceImageOpenAccLoop( int N, const RayTrace::EUV_beam_struct& euv_beam, const RayTrace::ray_gain_struct *gain, const RayTrace::ray_seed_struct *seed, int method, const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang, unsigned int &failure_code, std::vector<ray_struct> &failed_rays ); #endif #if defined( ENABLE_KOKKOS ) #include "kokkos/RayTraceImageKokkos.hpp" /*extern void RayTraceImageKokkosLoop( int N, int nx, int ny, int na, int nb, int nv, const double *x, const double *y, const double *a, const double *b, double dx, double dy, double dz, double da, double db, const double *dv, const RayTrace::ray_gain_struct* gain, const RayTrace::ray_seed_struct* seed, int method, const std::vector<ray_struct>& rays, double scale, double *image, double *I_ang, unsigned int& failure_code, std::vector<ray_struct>& failed_rays );*/ /*extern void RayTraceImageKokkosLoop( int N, const RayTrace::EUV_beam_struct& euv_beam, const RayTrace::ray_gain_struct *gain, const RayTrace::ray_seed_struct *seed, int method, const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang, unsigned int &failure_code, std::vector<ray_struct> &failed_rays );*/ #endif #if defined( ENABLE_CUDA ) extern void RayTraceImageCudaLoop( int N, int nx, int ny, int na, int nb, int nv, const double *x, const double *y, const double *a, const double *b, double dx, double dy, double dz, double da, double db, const double *dv, const RayTrace::ray_gain_struct *gain, extern void RayTraceImageCudaLoop( int N, const RayTrace::EUV_beam_struct& euv_beam, const RayTrace::ray_gain_struct *gain, const RayTrace::ray_seed_struct *seed, int method, const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang, unsigned int &failure_code, std::vector<ray_struct> &failed_rays ); #endif /********************************************************************** * Call RayTraceImage function from a thread loop * **********************************************************************/ void setGPU( int id ) { NULL_USE(id); #if defined( ENABLE_CUDA ) cudaSetDevice(id); #endif } #if CXX_STD == 11 || CXX_STD == 14 void RayTraceImageThreadLoop( size_t N_threads, std::function<void( int, const RayTrace::EUV_beam_struct&, const RayTrace::ray_gain_struct*, const RayTrace::ray_seed_struct*, int, const std::vector<ray_struct>&, double, double*, double*, unsigned int&, std::vector<ray_struct>& )> function, std::function<void( int )> setID, int N, const RayTrace::EUV_beam_struct& beam, const RayTrace::ray_gain_struct *gain, const RayTrace::ray_seed_struct *seed, int method, const std::vector<ray_struct> &rays, double scale, double *image, double *I_ang, unsigned int &failure_code, std::vector<ray_struct> &failed_rays ) { std::vector<std::vector<ray_struct>> rays2( N_threads ); std::vector<std::vector<ray_struct>> failed_rays2( N_threads ); std::vector<unsigned int> failure_code2( N_threads, 0 ); std::vector<double *> image2( N_threads, NULL ); std::vector<double *> I_ang2( N_threads, NULL ); std::vector<std::thread> threads; for ( size_t i = 0, j = 0; i < N_threads; i++ ) { size_t N2 = rays.size() / N_threads + 1; if ( N2 == 0 ) { N2 = 8; } rays2.reserve( N2 ); for ( size_t k = 0; k < N2 && j < rays.size(); k++, j++ ) rays2[i].push_back( rays[j] ); image2[i] = (double *) calloc( beam.nx * beam.ny * beam.nv, sizeof( double ) ); I_ang2[i] = (double *) calloc( beam.na * beam.nb, sizeof( double ) ); setID( i ); threads.push_back( std::thread( function, N, std::ref(beam), gain, seed, method, std::ref( rays2[i] ), scale, image2[i], I_ang2[i], std::ref( failure_code2[i] ), std::ref( failed_rays2[i] ) ) ); } for ( size_t i = 0; i < N_threads; i++ ) { threads[i].join(); for ( int j = 0; j < beam.nx * beam.ny * beam.nv; j++ ) image[j] += image2[i][j]; for ( int j = 0; j < beam.na * beam.nb; j++ ) I_ang[j] += I_ang2[i][j]; failure_code = failure_code | failure_code2[i]; for ( size_t j = 0; j < failed_rays2[i].size(); j++ ) failed_rays.push_back( failed_rays2[i][j] ); free( image2[i] ); free( I_ang2[i] ); rays2[i].clear(); } } #endif /********************************************************************** * Write the failed rays to a file * **********************************************************************/ Loading Loading @@ -195,27 +244,15 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth const int na = info->euv_beam->na; const int nb = info->euv_beam->nb; const int nv = info->euv_beam->nv; const double *x = info->euv_beam->x; const double *y = info->euv_beam->y; const double *a = info->euv_beam->a; const double *b = info->euv_beam->b; const double dx = info->euv_beam->dx; const double dy = info->euv_beam->dy; const double dz = info->dz; const double da = info->euv_beam->da; const double db = info->euv_beam->db; const double *dv = info->euv_beam->dv; // Check the euv_beam grid bool grid_error = false; grid_error = grid_error || check_grid( nx, dx, x ); grid_error = grid_error || check_grid( ny, dy, y ); grid_error = grid_error || check_grid( na, da, a ); grid_error = grid_error || check_grid( nb, db, b ); grid_error = grid_error || check_grid( info->euv_beam->nx, info->euv_beam->dx, info->euv_beam->x ); grid_error = grid_error || check_grid( info->euv_beam->ny, info->euv_beam->dy, info->euv_beam->y ); grid_error = grid_error || check_grid( info->euv_beam->na, info->euv_beam->da, info->euv_beam->a ); grid_error = grid_error || check_grid( info->euv_beam->nb, info->euv_beam->db, info->euv_beam->b ); if ( grid_error ) RAY_ERROR( "Only uniform grid spacings are currently supported (euv_beam)" ); if ( dz != info->euv_beam->dz ) RAY_ERROR( "info.dz != euv_beam.dz" ); // Check the seed_beam grid if ( info->seed_beam != NULL ) { Loading Loading @@ -258,7 +295,7 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth double seed_dy = info->seed_beam->dy; double seed_da = info->seed_beam->da; double seed_db = info->seed_beam->db; scale = ( seed_dx * seed_dy * seed_da * seed_db ) / ( dx * dy ); scale = ( seed_dx * seed_dy * seed_da * seed_db ) / ( info->euv_beam->dx * info->euv_beam->dy ); timer_name = "propagate_seed"; } else { method = 1; Loading @@ -282,10 +319,10 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth // Create ray information ray_struct ray; if ( info->seed_beam == NULL ) { ray.x = (float) x[i]; ray.y = (float) y[j]; ray.a = (float) a[k]; ray.b = (float) b[m]; ray.x = (float) info->euv_beam->x[i]; ray.y = (float) info->euv_beam->y[j]; ray.a = (float) info->euv_beam->a[k]; ray.b = (float) info->euv_beam->b[m]; } else { ray.x = (float) info->seed_beam->x[i]; ray.y = (float) info->seed_beam->y[j]; Loading Loading @@ -317,38 +354,34 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth PROFILE_START( timer_name ); if ( compute_method == "openacc" ) { #if defined( ENABLE_OPENACC ) RayTraceImageOpenAccLoop( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageOpenAccLoop( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "OpenAcc is not availible" ); #endif } else if ( compute_method.substr( 0, 7 ) == "kokkos-" ) { #if defined( ENABLE_KOKKOS ) if ( compute_method == "kokkos-serial" ) { RayTraceImageKokkosLoop<Kokkos::Serial>( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageKokkosLoop<Kokkos::Serial>( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); } else if ( compute_method == "kokkos-openmp" ) { #ifdef KOKKOS_HAVE_OPENMP RayTraceImageKokkosLoop<Kokkos::OpenMP>( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageKokkosLoop<Kokkos::OpenMP>( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "Kokkos compiled without OpenMP" ); #endif } else if ( compute_method == "kokkos-thread" ) { #ifdef KOKKOS_HAVE_PTHREAD RayTraceImageKokkosLoop<Kokkos::Threads>( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageKokkosLoop<Kokkos::Threads>( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "Kokkos compiled without pthreads" ); #endif } else if ( compute_method == "kokkos-cuda" ) { #ifdef KOKKOS_HAVE_CUDA RayTraceImageKokkosLoop<Kokkos::Cuda>( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageKokkosLoop<Kokkos::Cuda>( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "Kokkos compiled without cuda" ); #endif Loading @@ -360,25 +393,37 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth #endif } else if ( compute_method == "cuda" ) { #if defined( ENABLE_CUDA ) RayTraceImageCudaLoop( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageCudaLoop( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "Cuda is not availible" ); #endif } else if ( compute_method == "cuda-multigpu" ) { #if defined( ENABLE_CUDA ) int N_gpu; cudaGetDeviceCount( &N_gpu ); RayTraceImageThreadLoop( N_gpu, RayTraceImageCudaLoop, setGPU, N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "Cuda-MultiGPU is not availible" ); #endif } else if ( compute_method == "cpu" ) { RayTraceImageCPULoop( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageCPULoop( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); } else if ( compute_method == "threads" ) { #if CXX_STD == 11 || CXX_STD == 14 RayTraceImageThreadLoop( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); size_t N_threads = std::thread::hardware_concurrency(); RayTraceImageThreadLoop( N_threads, RayTraceImageCPULoop, [](int) {}, N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "Threaded version requires C++11" ); #endif } else if ( compute_method == "openmp" ) { #if defined( ENABLE_OPENMP ) RayTraceImageOpenMPLoop( N, nx, ny, na, nb, nv, x, y, a, b, dx, dy, dz, da, db, dv, info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); RayTraceImageOpenMPLoop( N, std::ref(*info->euv_beam), info->gain, info->seed, method, rays, scale, image, I_ang, failure_code, failed_rays ); #else RAY_ERROR( "OpenMP is not availible" ); #endif Loading @@ -389,7 +434,7 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth // Save failed rays if ( failure_code != 0 ) { write_failures( failure_code, failed_rays, method, N, dz, info->gain ); write_failures( failure_code, failed_rays, method, N, info->euv_beam->dz, info->gain ); RAY_ERROR( "Some rays failed" ); } Loading