Loading src/CMakeLists.txt +1 −0 Original line number Diff line number Diff line Loading @@ -133,6 +133,7 @@ ENDIF() CHECK_ENABLE_FLAG( USE_CUDA 0 ) IF ( USE_CUDA ) ADD_DEFINITIONS( -DUSE_CUDA ) SET( CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD} ) ENABLE_LANGUAGE( CUDA ) ENDIF() Loading src/CreateImage.cpp +20 −11 Original line number Diff line number Diff line Loading @@ -123,23 +123,25 @@ int run_tests( const std::string &filename, const Options &options ) std::vector<double> tmp2; while ( true ) { for ( int it = 0; it < iterations; it++ ) { auto t1 = std::chrono::steady_clock::now(); barrier(); // Run the calculation auto t1 = std::chrono::steady_clock::now(); RayTrace::create_image( info, methods[i] ); auto t2 = std::chrono::steady_clock::now(); // Mimic communication in full application communicate( *info ); barrier(); auto t3 = std::chrono::steady_clock::now(); communicate( *info ); auto t4 = std::chrono::steady_clock::now(); tmp1.push_back( 1e-9 * diff_ns( t2, t1 ) ); tmp2.push_back( 1e-9 * diff_ns( t3, t1 ) ); t1 = t2; tmp2.push_back( 1e-9 * diff_ns( t4, t3 ) ); } double time = 1e-9 * diff_ns( std::chrono::steady_clock::now(), t0 ); time = bcast( time ); if ( time > options.time ) break; } perf1[i] = Times( tmp2 ); perf1[i] = Times( tmp1 ); perf2[i] = Times( tmp2 ); // Check the results if ( options.scale == 1.0 ) { Loading @@ -155,11 +157,11 @@ int run_tests( const std::string &filename, const Options &options ) if ( rank == 0 ) { if ( perf1[i].std[1] > 0.25 * perf1[i].avg[1] ) { printf( " Standard deviation exceeded tolerance (25%%)\n" ); N_errors++; // N_errors++; } if ( perf1[i].max[1] > 2.0 * perf1[i].avg[1] ) { printf( " Maximum runtime exceeded average by more than 2X\n" ); N_errors++; // N_errors++; } } free( (void *) info->image ); Loading @@ -169,7 +171,7 @@ int run_tests( const std::string &filename, const Options &options ) } if ( rank == 0 ) { if ( options.benchmark ) { double t = perf2[0].avg[1]; double t = perf1[0].avg[1] + perf2[0].avg[1]; double N = 0; if ( info->seed_beam == nullptr ) { auto &beam = *( info->euv_beam ); Loading @@ -181,10 +183,17 @@ int run_tests( const std::string &filename, const Options &options ) N *= getSize(); printf( "\n%0.3e rays/s\n", N / t ); } else { printf( "\n METHOD Avg Min Max Std Dev\n" ); printf( "\n" ); printf( " Compute Communicate\n" ); printf( " METHOD Avg Min Max Std Dev" ); printf( " Avg Min Max Std Dev\n" ); for ( size_t i = 0; i < methods.size(); i++ ) { printf( "%14s %7.3f %7.3f %7.3f %7.3f\n", methods[i].c_str(), perf1[i].avg[1], perf1[i].min[1], perf1[i].max[1], perf1[i].std[1] ); auto method = methods[i].c_str(); const auto &t1 = perf1[i]; const auto &t2 = perf2[i]; printf( "%14s %7.3f %7.3f %7.3f %7.3f %7.3f %7.3f %7.3f %7.3f\n", method, t1.avg[1], t1.min[1], t1.max[1], t1.std[1], t2.avg[1], t2.min[1], t2.max[1], t2.std[1] ); } std::cout << std::endl; } Loading src/CreateImageHelpers.cpp +4 −2 Original line number Diff line number Diff line Loading @@ -257,8 +257,10 @@ Times::Times( const std::vector<double> &x ) double tmp = std[1]; MPI_Allreduce( &tmp, &std[1], 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); #endif std[0] = sqrt( std[0] / ( N[0] - 1 ) ); std[1] = sqrt( std[1] / ( N[1] - 1 ) ); int N1 = std::max<int>( N[0] - 1, 1 ); int N2 = std::max<int>( N[1] - 1, 1 ); std[0] = sqrt( std[0] / N1 ); std[1] = sqrt( std[1] / N2 ); } Loading Loading
src/CMakeLists.txt +1 −0 Original line number Diff line number Diff line Loading @@ -133,6 +133,7 @@ ENDIF() CHECK_ENABLE_FLAG( USE_CUDA 0 ) IF ( USE_CUDA ) ADD_DEFINITIONS( -DUSE_CUDA ) SET( CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD} ) ENABLE_LANGUAGE( CUDA ) ENDIF() Loading
src/CreateImage.cpp +20 −11 Original line number Diff line number Diff line Loading @@ -123,23 +123,25 @@ int run_tests( const std::string &filename, const Options &options ) std::vector<double> tmp2; while ( true ) { for ( int it = 0; it < iterations; it++ ) { auto t1 = std::chrono::steady_clock::now(); barrier(); // Run the calculation auto t1 = std::chrono::steady_clock::now(); RayTrace::create_image( info, methods[i] ); auto t2 = std::chrono::steady_clock::now(); // Mimic communication in full application communicate( *info ); barrier(); auto t3 = std::chrono::steady_clock::now(); communicate( *info ); auto t4 = std::chrono::steady_clock::now(); tmp1.push_back( 1e-9 * diff_ns( t2, t1 ) ); tmp2.push_back( 1e-9 * diff_ns( t3, t1 ) ); t1 = t2; tmp2.push_back( 1e-9 * diff_ns( t4, t3 ) ); } double time = 1e-9 * diff_ns( std::chrono::steady_clock::now(), t0 ); time = bcast( time ); if ( time > options.time ) break; } perf1[i] = Times( tmp2 ); perf1[i] = Times( tmp1 ); perf2[i] = Times( tmp2 ); // Check the results if ( options.scale == 1.0 ) { Loading @@ -155,11 +157,11 @@ int run_tests( const std::string &filename, const Options &options ) if ( rank == 0 ) { if ( perf1[i].std[1] > 0.25 * perf1[i].avg[1] ) { printf( " Standard deviation exceeded tolerance (25%%)\n" ); N_errors++; // N_errors++; } if ( perf1[i].max[1] > 2.0 * perf1[i].avg[1] ) { printf( " Maximum runtime exceeded average by more than 2X\n" ); N_errors++; // N_errors++; } } free( (void *) info->image ); Loading @@ -169,7 +171,7 @@ int run_tests( const std::string &filename, const Options &options ) } if ( rank == 0 ) { if ( options.benchmark ) { double t = perf2[0].avg[1]; double t = perf1[0].avg[1] + perf2[0].avg[1]; double N = 0; if ( info->seed_beam == nullptr ) { auto &beam = *( info->euv_beam ); Loading @@ -181,10 +183,17 @@ int run_tests( const std::string &filename, const Options &options ) N *= getSize(); printf( "\n%0.3e rays/s\n", N / t ); } else { printf( "\n METHOD Avg Min Max Std Dev\n" ); printf( "\n" ); printf( " Compute Communicate\n" ); printf( " METHOD Avg Min Max Std Dev" ); printf( " Avg Min Max Std Dev\n" ); for ( size_t i = 0; i < methods.size(); i++ ) { printf( "%14s %7.3f %7.3f %7.3f %7.3f\n", methods[i].c_str(), perf1[i].avg[1], perf1[i].min[1], perf1[i].max[1], perf1[i].std[1] ); auto method = methods[i].c_str(); const auto &t1 = perf1[i]; const auto &t2 = perf2[i]; printf( "%14s %7.3f %7.3f %7.3f %7.3f %7.3f %7.3f %7.3f %7.3f\n", method, t1.avg[1], t1.min[1], t1.max[1], t1.std[1], t2.avg[1], t2.min[1], t2.max[1], t2.std[1] ); } std::cout << std::endl; } Loading
src/CreateImageHelpers.cpp +4 −2 Original line number Diff line number Diff line Loading @@ -257,8 +257,10 @@ Times::Times( const std::vector<double> &x ) double tmp = std[1]; MPI_Allreduce( &tmp, &std[1], 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); #endif std[0] = sqrt( std[0] / ( N[0] - 1 ) ); std[1] = sqrt( std[1] / ( N[1] - 1 ) ); int N1 = std::max<int>( N[0] - 1, 1 ); int N2 = std::max<int>( N[1] - 1, 1 ); std[0] = sqrt( std[0] / N1 ); std[1] = sqrt( std[1] / N2 ); } Loading