Loading .gitignore +1 −0 Original line number Diff line number Diff line Loading @@ -9,5 +9,6 @@ build/*install* build/*interp* build/*null_timer build/*lib* build/*compile_commands.json build/*core.* build/pgi/*.lst build/configure +1 −1 Original line number Diff line number Diff line Loading @@ -5,7 +5,7 @@ export KOKKOS_DIR=/packages/TPLs/install/opt/kokkos rm -rf CMake* cmake \ -D CMAKE_BUILD_TYPE=Debug \ -D CMAKE_BUILD_TYPE=Release \ -D CMAKE_CXX_COMPILER=g++ \ -D CXX_STD=11 \ -D USE_OPENACC=0 \ Loading src/CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -178,7 +178,7 @@ ADD_DISTCLEAN( libRayTrace.* null_timer CreateImage* ) # Create the library INCLUDE_DIRECTORIES( ${RAYTRACE_SOURCE_DIR} ) ADD_DEFINITIONS( -DDISABLE_WRITE_FAILED_RAYS ) SET( SOURCES RayTrace RayTraceImage.cpp RayTraceStructures.cpp utilities/RayUtilities.cpp AtomicModel/interp.cpp RayTraceImageCPU.cpp CreateImageHelpers.cpp ) SET( SOURCES RayTrace RayTraceImage.cpp RayTraceStructures.cpp utilities/RayUtilities.cpp AtomicModel/interp.cpp RayTraceImageCPU.cpp CreateImageHelpers.cpp MPI_helpers.cpp ) IF ( USE_OPENACC ) SET( SOURCES ${SOURCES} RayTraceImageOpenACC.cpp ) ENDIF() Loading src/CreateImage.cpp +54 −33 Original line number Diff line number Diff line Loading @@ -27,9 +27,9 @@ static RayTrace::create_image_struct *loadInput( { // Load the input file FILE *fid = fopen( filename.c_str(), "rb" ); if ( fid == NULL ) { if ( fid == nullptr ) { std::cerr << "Error opening file: " << filename << std::endl; return NULL; return nullptr; } uint64_t N_bytes = 0; fread2( &N_bytes, sizeof( uint64_t ), 1, fid ); Loading @@ -41,16 +41,16 @@ static RayTrace::create_image_struct *loadInput( auto info = new RayTrace::create_image_struct(); info->unpack( std::pair<char *, size_t>( data, N_bytes ) ); delete[] data; if ( image0 != NULL ) if ( image0 != nullptr ) *image0 = info->image; else delete[] info->image; info->image = NULL; if ( I_ang0 != NULL ) free( (void *) info->image ); info->image = nullptr; if ( I_ang0 != nullptr ) *I_ang0 = info->I_ang; else delete[] info->I_ang; info->I_ang = NULL; free( (void *) info->I_ang ); info->I_ang = nullptr; if ( scale != 1.0 ) scale_problem( *info, scale ); return info; Loading @@ -60,15 +60,15 @@ static RayTrace::create_image_struct *loadInput( // Free the structure static inline void free2( RayTrace::create_image_struct *info ) { if ( info == NULL ) if ( info == nullptr ) return; if ( info->image != NULL ) { if ( info->image != nullptr ) { free( (void *) info->image ); info->image = NULL; info->image = nullptr; } if ( info->I_ang != NULL ) { if ( info->I_ang != nullptr ) { free( (void *) info->I_ang ); info->I_ang = NULL; info->I_ang = nullptr; } delete info->euv_beam; delete info->seed_beam; Loading @@ -77,7 +77,6 @@ static inline void free2( RayTrace::create_image_struct *info ) delete info; } // Run the tests for a single file int run_tests( const std::string &filename, const Options &options ) { Loading Loading @@ -113,6 +112,10 @@ int run_tests( const std::string &filename, const Options &options ) double start = getTime(); for ( int it = 0; it < iterations; it++ ) { RayTrace::create_image( info, methods[i] ); if ( options.benchmark ) { // Mimic communication in full application communicate( *info ); } double stop = getTime(); time[i].push_back( stop - start ); start = stop; Loading @@ -130,6 +133,19 @@ int run_tests( const std::string &filename, const Options &options ) info->I_ang = NULL; } if ( rank() == 0 ) { if ( options.benchmark ) { double t = getAvg( time[0] ); double N = 0; if ( info->seed_beam == nullptr ) { auto &beam = *( info->euv_beam ); N = beam.nx * beam.ny * beam.na * beam.nb; } else { auto &beam = *( info->seed_beam ); N = beam.nx * beam.ny * beam.na * beam.nb; } N *= size(); printf( "\n%0.3e rays/s\n", N / t ); } else { printf( "\n METHOD Avg Min Max Std Dev\n" ); for ( size_t i = 0; i < methods.size(); i++ ) { double min = getMin( time[i] ); Loading @@ -148,6 +164,7 @@ int run_tests( const std::string &filename, const Options &options ) } std::cout << std::endl; } } // Free memory and return free( (void *) image0 ); Loading Loading @@ -204,6 +221,7 @@ int main( int argc, char *argv[] ) KokkosInitialize( argc, argv ); // Print hardware stats if ( rank() == 0 ) printHardware(); // Run the tests for all files Loading @@ -211,10 +229,13 @@ int main( int argc, char *argv[] ) for ( size_t i = 0; i < filenames.size(); i++ ) N_errors += run_tests( filenames[i], options ); // Finished if ( rank() == 0 ) { if ( N_errors == 0 ) std::cout << "\nAll tests passed\n"; else std::cout << "\nSome tests failed\n"; } KokkosFinalize(); shutdown(); return N_errors; Loading src/CreateImageHelpers.cpp +83 −22 Original line number Diff line number Diff line #include "CreateImageHelpers.h" #include "utilities/RayUtilityMacros.h" #include <algorithm> #include <math.h> Loading Loading @@ -111,7 +112,56 @@ double getTime() } // Check the answer /********************************************************************** * Perform the communications * **********************************************************************/ void communicate( const RayTrace::create_image_struct &info ) { #if USE_MPI int nx = info.euv_beam->nx; int ny = info.euv_beam->ny; int na = info.euv_beam->na; int nb = info.euv_beam->nb; int nv = info.euv_beam->nv; // Allocate memory to use as a temporary buffer int N_seed = info.seed_beam == nullptr ? 0 : 1; int Nm = ( nv + 2 * nx * ny + na * nb ) + ( nv + nx * ny + na * nb ) * N_seed; auto *mem1 = new double[Nm]; auto *mem2 = new double[Nm]; // Copy the variables to the buffer memset( mem1, 0, Nm * sizeof( double ) ); double *E_v = &mem1[0]; double *image = &mem1[nv]; double *W = &mem1[nv + nx * ny]; double *E_ang = &mem1[nv + 2 * nx * ny]; for ( int i = 0; i < nx * ny; i++ ) { for ( int j = 0; j < nv; j++ ) { E_v[j] += info.image[j + i * nv]; image[i] += info.image[j + i * nv]; } W[i] = 0.1 * image[i]; } for ( int i = 0; i < na * nb; i++ ) E_ang[i] += info.I_ang[i]; for ( int i = 0; i < N_seed; i++ ) { size_t k = ( nv + 2 * nx * ny + na * nb ) + ( nv + nx * ny + na * nb ) * i; memcpy( &mem1[k], mem1, ( nv + nx * ny + na * nb ) * sizeof( double ) ); } // Perform the communication MPI_Barrier( MPI_COMM_WORLD ); MPI_Allreduce( mem1, mem2, Nm, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); // Free the memory delete[] mem1; delete[] mem2; #else NULL_USE( info ); #endif } /********************************************************************** * Check the answer * **********************************************************************/ bool check_ans( const double *image0, const double *I_ang0, const RayTrace::create_image_struct &data ) { Loading Loading @@ -151,7 +201,9 @@ bool check_ans( } // Scale the input problem /********************************************************************** * Scale the input problem * **********************************************************************/ template<class TYPE> void scale_beam( TYPE &beam, double scale ) { Loading Loading @@ -195,13 +247,15 @@ void scale_beam( TYPE &beam, double scale ) void scale_problem( RayTrace::create_image_struct &info, double scale ) { scale_beam( *const_cast<RayTrace::EUV_beam_struct *>( info.euv_beam ), pow( scale, 0.25 ) ); if ( info.seed_beam != NULL ) if ( info.seed_beam != nullptr ) scale_beam( *const_cast<RayTrace::seed_beam_struct *>( info.seed_beam ), pow( scale, 0.25 ) ); } // Get the minimum value /********************************************************************** * Basic math operations * **********************************************************************/ double getMin( const std::vector<double> &x ) { double y = x[0]; Loading @@ -209,9 +263,6 @@ double getMin( const std::vector<double> &x ) y = std::min( y, x[i] ); return y; } // Get the maximum value double getMax( const std::vector<double> &x ) { double y = x[0]; Loading @@ -219,9 +270,6 @@ double getMax( const std::vector<double> &x ) y = std::max( y, x[i] ); return y; } // Get the average value double getAvg( const std::vector<double> &x ) { double y = 0; Loading @@ -229,9 +277,6 @@ double getAvg( const std::vector<double> &x ) y += x[i]; return y / x.size(); } // Get the standard deviation double getDev( const std::vector<double> &x ) { double avg = getAvg( x ); Loading @@ -242,7 +287,10 @@ double getDev( const std::vector<double> &x ) return y; } // Print info about the hardware /********************************************************************** * Print info about the hardware * **********************************************************************/ void printHardware() { // Get number of threads Loading Loading @@ -321,7 +369,7 @@ static inline void erase( std::vector<std::string> &x, const std::string &v ) if ( it != x.end() ) x.erase( it ); } Options::Options() : iterations( 1 ), scale( 1.0 ) {} Options::Options() : benchmark( false ), iterations( 1 ), scale( 1.0 ) {} Options::Options( int argc, char *argv[] ) : iterations( 1 ), scale( 1.0 ) { const char *err_msg = Loading @@ -331,14 +379,18 @@ Options::Options( int argc, char *argv[] ) : iterations( 1 ), scale( 1.0 ) " -methods=METHODS Comma seperated list of methods to test\n" " cpu, threads, OpenMP, Cuda, Cuda-MultiGPU, OpenAcc, Kokkos-Serial, " "Kokkos-Thread, Kokkos-OpenMP, Kokkos-Cuda\n" " all - run all availible tests (default)" " parallel - run all availible parallel tests" " -iterations=N Number of iterations to run. Time returned will be " "the average time/iteration.\n" " -scale=factor Increate the size of the problem by ~ this factor. " "(2.0 - twice as expensive)\n" " all - run all availible tests (default)\n" " parallel - run all availible parallel tests\n" " -iterations=N Number of iterations to run.\n" " Time returned will be the average time/iteration.\n" " -scale=factor Increate the cost of the problem by ~ this factor.\n" " 2.0 - twice as expensive\n" " Note: this will disable checking the answer.\n" " Note: the scale factor is only approximate.\n"; " Note: the scale factor is only approximate.\n" " -benchmark Run in benchmark mode.\n" " Benchmark mode will run the calculation across all nodes,\n" " returning the average rays/second processed\n" " Note: a single method must be specified\n"; std::vector<std::string> exclude; // Process the input arguments for ( int i = 1; i < argc; i++ ) { Loading @@ -358,6 +410,8 @@ Options::Options( int argc, char *argv[] ) : iterations( 1 ), scale( 1.0 ) iterations = atoi( &argv[i][12] ); } else if ( strncmp( argv[i], "-scale=", 7 ) == 0 ) { scale = atof( &argv[i][7] ); } else if ( strncmp( argv[i], "-benchmark", 10 ) == 0 ) { benchmark = true; } else { std::cerr << "Unknown option: " << argv[i] << std::endl; exit( 1 ); Loading Loading @@ -396,4 +450,11 @@ Options::Options( int argc, char *argv[] ) : iterations( 1 ), scale( 1.0 ) else ++it; } // Check benchmark options if ( benchmark ) { if ( methods.size() != 1 ) { std::cerr << "Only one method must be speficied in benchmark mode (-methods)\n"; exit( 1 ); } } } Loading
.gitignore +1 −0 Original line number Diff line number Diff line Loading @@ -9,5 +9,6 @@ build/*install* build/*interp* build/*null_timer build/*lib* build/*compile_commands.json build/*core.* build/pgi/*.lst
build/configure +1 −1 Original line number Diff line number Diff line Loading @@ -5,7 +5,7 @@ export KOKKOS_DIR=/packages/TPLs/install/opt/kokkos rm -rf CMake* cmake \ -D CMAKE_BUILD_TYPE=Debug \ -D CMAKE_BUILD_TYPE=Release \ -D CMAKE_CXX_COMPILER=g++ \ -D CXX_STD=11 \ -D USE_OPENACC=0 \ Loading
src/CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -178,7 +178,7 @@ ADD_DISTCLEAN( libRayTrace.* null_timer CreateImage* ) # Create the library INCLUDE_DIRECTORIES( ${RAYTRACE_SOURCE_DIR} ) ADD_DEFINITIONS( -DDISABLE_WRITE_FAILED_RAYS ) SET( SOURCES RayTrace RayTraceImage.cpp RayTraceStructures.cpp utilities/RayUtilities.cpp AtomicModel/interp.cpp RayTraceImageCPU.cpp CreateImageHelpers.cpp ) SET( SOURCES RayTrace RayTraceImage.cpp RayTraceStructures.cpp utilities/RayUtilities.cpp AtomicModel/interp.cpp RayTraceImageCPU.cpp CreateImageHelpers.cpp MPI_helpers.cpp ) IF ( USE_OPENACC ) SET( SOURCES ${SOURCES} RayTraceImageOpenACC.cpp ) ENDIF() Loading
src/CreateImage.cpp +54 −33 Original line number Diff line number Diff line Loading @@ -27,9 +27,9 @@ static RayTrace::create_image_struct *loadInput( { // Load the input file FILE *fid = fopen( filename.c_str(), "rb" ); if ( fid == NULL ) { if ( fid == nullptr ) { std::cerr << "Error opening file: " << filename << std::endl; return NULL; return nullptr; } uint64_t N_bytes = 0; fread2( &N_bytes, sizeof( uint64_t ), 1, fid ); Loading @@ -41,16 +41,16 @@ static RayTrace::create_image_struct *loadInput( auto info = new RayTrace::create_image_struct(); info->unpack( std::pair<char *, size_t>( data, N_bytes ) ); delete[] data; if ( image0 != NULL ) if ( image0 != nullptr ) *image0 = info->image; else delete[] info->image; info->image = NULL; if ( I_ang0 != NULL ) free( (void *) info->image ); info->image = nullptr; if ( I_ang0 != nullptr ) *I_ang0 = info->I_ang; else delete[] info->I_ang; info->I_ang = NULL; free( (void *) info->I_ang ); info->I_ang = nullptr; if ( scale != 1.0 ) scale_problem( *info, scale ); return info; Loading @@ -60,15 +60,15 @@ static RayTrace::create_image_struct *loadInput( // Free the structure static inline void free2( RayTrace::create_image_struct *info ) { if ( info == NULL ) if ( info == nullptr ) return; if ( info->image != NULL ) { if ( info->image != nullptr ) { free( (void *) info->image ); info->image = NULL; info->image = nullptr; } if ( info->I_ang != NULL ) { if ( info->I_ang != nullptr ) { free( (void *) info->I_ang ); info->I_ang = NULL; info->I_ang = nullptr; } delete info->euv_beam; delete info->seed_beam; Loading @@ -77,7 +77,6 @@ static inline void free2( RayTrace::create_image_struct *info ) delete info; } // Run the tests for a single file int run_tests( const std::string &filename, const Options &options ) { Loading Loading @@ -113,6 +112,10 @@ int run_tests( const std::string &filename, const Options &options ) double start = getTime(); for ( int it = 0; it < iterations; it++ ) { RayTrace::create_image( info, methods[i] ); if ( options.benchmark ) { // Mimic communication in full application communicate( *info ); } double stop = getTime(); time[i].push_back( stop - start ); start = stop; Loading @@ -130,6 +133,19 @@ int run_tests( const std::string &filename, const Options &options ) info->I_ang = NULL; } if ( rank() == 0 ) { if ( options.benchmark ) { double t = getAvg( time[0] ); double N = 0; if ( info->seed_beam == nullptr ) { auto &beam = *( info->euv_beam ); N = beam.nx * beam.ny * beam.na * beam.nb; } else { auto &beam = *( info->seed_beam ); N = beam.nx * beam.ny * beam.na * beam.nb; } N *= size(); printf( "\n%0.3e rays/s\n", N / t ); } else { printf( "\n METHOD Avg Min Max Std Dev\n" ); for ( size_t i = 0; i < methods.size(); i++ ) { double min = getMin( time[i] ); Loading @@ -148,6 +164,7 @@ int run_tests( const std::string &filename, const Options &options ) } std::cout << std::endl; } } // Free memory and return free( (void *) image0 ); Loading Loading @@ -204,6 +221,7 @@ int main( int argc, char *argv[] ) KokkosInitialize( argc, argv ); // Print hardware stats if ( rank() == 0 ) printHardware(); // Run the tests for all files Loading @@ -211,10 +229,13 @@ int main( int argc, char *argv[] ) for ( size_t i = 0; i < filenames.size(); i++ ) N_errors += run_tests( filenames[i], options ); // Finished if ( rank() == 0 ) { if ( N_errors == 0 ) std::cout << "\nAll tests passed\n"; else std::cout << "\nSome tests failed\n"; } KokkosFinalize(); shutdown(); return N_errors; Loading
src/CreateImageHelpers.cpp +83 −22 Original line number Diff line number Diff line #include "CreateImageHelpers.h" #include "utilities/RayUtilityMacros.h" #include <algorithm> #include <math.h> Loading Loading @@ -111,7 +112,56 @@ double getTime() } // Check the answer /********************************************************************** * Perform the communications * **********************************************************************/ void communicate( const RayTrace::create_image_struct &info ) { #if USE_MPI int nx = info.euv_beam->nx; int ny = info.euv_beam->ny; int na = info.euv_beam->na; int nb = info.euv_beam->nb; int nv = info.euv_beam->nv; // Allocate memory to use as a temporary buffer int N_seed = info.seed_beam == nullptr ? 0 : 1; int Nm = ( nv + 2 * nx * ny + na * nb ) + ( nv + nx * ny + na * nb ) * N_seed; auto *mem1 = new double[Nm]; auto *mem2 = new double[Nm]; // Copy the variables to the buffer memset( mem1, 0, Nm * sizeof( double ) ); double *E_v = &mem1[0]; double *image = &mem1[nv]; double *W = &mem1[nv + nx * ny]; double *E_ang = &mem1[nv + 2 * nx * ny]; for ( int i = 0; i < nx * ny; i++ ) { for ( int j = 0; j < nv; j++ ) { E_v[j] += info.image[j + i * nv]; image[i] += info.image[j + i * nv]; } W[i] = 0.1 * image[i]; } for ( int i = 0; i < na * nb; i++ ) E_ang[i] += info.I_ang[i]; for ( int i = 0; i < N_seed; i++ ) { size_t k = ( nv + 2 * nx * ny + na * nb ) + ( nv + nx * ny + na * nb ) * i; memcpy( &mem1[k], mem1, ( nv + nx * ny + na * nb ) * sizeof( double ) ); } // Perform the communication MPI_Barrier( MPI_COMM_WORLD ); MPI_Allreduce( mem1, mem2, Nm, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); // Free the memory delete[] mem1; delete[] mem2; #else NULL_USE( info ); #endif } /********************************************************************** * Check the answer * **********************************************************************/ bool check_ans( const double *image0, const double *I_ang0, const RayTrace::create_image_struct &data ) { Loading Loading @@ -151,7 +201,9 @@ bool check_ans( } // Scale the input problem /********************************************************************** * Scale the input problem * **********************************************************************/ template<class TYPE> void scale_beam( TYPE &beam, double scale ) { Loading Loading @@ -195,13 +247,15 @@ void scale_beam( TYPE &beam, double scale ) void scale_problem( RayTrace::create_image_struct &info, double scale ) { scale_beam( *const_cast<RayTrace::EUV_beam_struct *>( info.euv_beam ), pow( scale, 0.25 ) ); if ( info.seed_beam != NULL ) if ( info.seed_beam != nullptr ) scale_beam( *const_cast<RayTrace::seed_beam_struct *>( info.seed_beam ), pow( scale, 0.25 ) ); } // Get the minimum value /********************************************************************** * Basic math operations * **********************************************************************/ double getMin( const std::vector<double> &x ) { double y = x[0]; Loading @@ -209,9 +263,6 @@ double getMin( const std::vector<double> &x ) y = std::min( y, x[i] ); return y; } // Get the maximum value double getMax( const std::vector<double> &x ) { double y = x[0]; Loading @@ -219,9 +270,6 @@ double getMax( const std::vector<double> &x ) y = std::max( y, x[i] ); return y; } // Get the average value double getAvg( const std::vector<double> &x ) { double y = 0; Loading @@ -229,9 +277,6 @@ double getAvg( const std::vector<double> &x ) y += x[i]; return y / x.size(); } // Get the standard deviation double getDev( const std::vector<double> &x ) { double avg = getAvg( x ); Loading @@ -242,7 +287,10 @@ double getDev( const std::vector<double> &x ) return y; } // Print info about the hardware /********************************************************************** * Print info about the hardware * **********************************************************************/ void printHardware() { // Get number of threads Loading Loading @@ -321,7 +369,7 @@ static inline void erase( std::vector<std::string> &x, const std::string &v ) if ( it != x.end() ) x.erase( it ); } Options::Options() : iterations( 1 ), scale( 1.0 ) {} Options::Options() : benchmark( false ), iterations( 1 ), scale( 1.0 ) {} Options::Options( int argc, char *argv[] ) : iterations( 1 ), scale( 1.0 ) { const char *err_msg = Loading @@ -331,14 +379,18 @@ Options::Options( int argc, char *argv[] ) : iterations( 1 ), scale( 1.0 ) " -methods=METHODS Comma seperated list of methods to test\n" " cpu, threads, OpenMP, Cuda, Cuda-MultiGPU, OpenAcc, Kokkos-Serial, " "Kokkos-Thread, Kokkos-OpenMP, Kokkos-Cuda\n" " all - run all availible tests (default)" " parallel - run all availible parallel tests" " -iterations=N Number of iterations to run. Time returned will be " "the average time/iteration.\n" " -scale=factor Increate the size of the problem by ~ this factor. " "(2.0 - twice as expensive)\n" " all - run all availible tests (default)\n" " parallel - run all availible parallel tests\n" " -iterations=N Number of iterations to run.\n" " Time returned will be the average time/iteration.\n" " -scale=factor Increate the cost of the problem by ~ this factor.\n" " 2.0 - twice as expensive\n" " Note: this will disable checking the answer.\n" " Note: the scale factor is only approximate.\n"; " Note: the scale factor is only approximate.\n" " -benchmark Run in benchmark mode.\n" " Benchmark mode will run the calculation across all nodes,\n" " returning the average rays/second processed\n" " Note: a single method must be specified\n"; std::vector<std::string> exclude; // Process the input arguments for ( int i = 1; i < argc; i++ ) { Loading @@ -358,6 +410,8 @@ Options::Options( int argc, char *argv[] ) : iterations( 1 ), scale( 1.0 ) iterations = atoi( &argv[i][12] ); } else if ( strncmp( argv[i], "-scale=", 7 ) == 0 ) { scale = atof( &argv[i][7] ); } else if ( strncmp( argv[i], "-benchmark", 10 ) == 0 ) { benchmark = true; } else { std::cerr << "Unknown option: " << argv[i] << std::endl; exit( 1 ); Loading Loading @@ -396,4 +450,11 @@ Options::Options( int argc, char *argv[] ) : iterations( 1 ), scale( 1.0 ) else ++it; } // Check benchmark options if ( benchmark ) { if ( methods.size() != 1 ) { std::cerr << "Only one method must be speficied in benchmark mode (-methods)\n"; exit( 1 ); } } }