Commit f3641551 authored by Berrill, Mark's avatar Berrill, Mark
Browse files

Merging changes to intialize Cuda before getting times

parent 963bacf5
Loading
Loading
Loading
Loading
+109 −47
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@
#include "RayTrace.h"
#include "MPI_helpers.h"
#include "CreateImageHelpers.h"
#include "utilities/RayUtilities.h"

#include <cstring>
#include <iostream>
@@ -11,6 +12,7 @@
#include <sstream>
#include <stdint.h>
#include <string>
#include <algorithm>



@@ -19,18 +21,15 @@
#endif




// Run the tests for a single file
int run_tests( const std::string& filename, const Options& options )
// Load the input file
static RayTrace::create_image_struct* loadInput( const std::string& filename,
    double scale, double **image0=NULL, double **I_ang0=NULL )
{
    if ( rank() == 0 )
        printf( "\nRunning tests for %s\n\n", filename.c_str() );
    // load the input file
    // Load the input file
    FILE *fid = fopen( filename.c_str(), "rb" );
    if ( fid == NULL ) {
        std::cerr << "Error opening file: " << filename << std::endl;
        return -2;
        return NULL;
    }
    uint64_t N_bytes = 0;
    fread2( &N_bytes, sizeof( uint64_t ), 1, fid );
@@ -39,30 +38,58 @@ int run_tests( const std::string& filename, const Options& options )
    fclose( fid );

    // Create the image structure
    double scale = options.scale;
    RayTrace::create_image_struct info;
    info.unpack( std::pair<char *, size_t>( data, N_bytes ) );
    auto info = new RayTrace::create_image_struct();
    info->unpack( std::pair<char *, size_t>( data, N_bytes ) );
    delete[] data;
    data                 = NULL;
    const double *image0 = info.image;
    const double *I_ang0 = info.I_ang;
    info.image           = NULL;
    info.I_ang           = NULL;
    if ( scale != 1.0 ) {
        delete[] image0;
        image0 = NULL;
        delete[] I_ang0;
        I_ang0 = NULL;
        scale_problem( info, scale );
    if ( image0 != NULL )
        *image0 = info->image;
    else
        delete[] info->image;
    info->image = NULL;
    if ( I_ang0 != NULL )
        *I_ang0 = info->I_ang;
    else
        delete[] info->I_ang;
    info->I_ang = NULL;
    if ( scale != 1.0 )
        scale_problem( *info, scale );
    return info;
}


// Free the structure
static inline void free2( RayTrace::create_image_struct* info )
{
    if ( info == NULL )
        return;
    if ( info->image !=NULL ) {
        free( (void *) info->image );
        info->image = NULL;
    }        
    if ( info->I_ang !=NULL ) {
        free( (void *) info->I_ang );
        info->I_ang = NULL;
    }        
    delete info->euv_beam;
    delete info->seed_beam;
    delete[] info->gain;
    delete info->seed;
    delete info;
}



// Run the tests for a single file
int run_tests( const std::string& filename, const Options& options )
{
    if ( rank() == 0 )
        printf( "\nRunning tests for %s\n\n", filename.c_str() );

    // Get the list of methods to try
    std::vector<std::string> methods = options.methods;
    auto methods = options.methods;
    if ( methods.empty() ) {
        methods.push_back( "cpu" );
#if CXX_STD == 11 || CXX_STD == 14
        methods.push_back( "threads" );
#endif
#ifdef USE_OPENMP
        methods.push_back( "OpenMP" );
#endif
@@ -87,6 +114,28 @@ int run_tests( const std::string& filename, const Options& options )
#endif
    }

    // Call a dummy CUDA/OpenAcc method to initialize the GPU (for more accurate times)
    static bool cudaInitialized = false;
    if ( !cudaInitialized ) {
        auto index = std::find(methods.begin(),methods.end(),"Cuda-MultiGPU");
        if ( index == methods.end() )
            index = std::find(methods.begin(),methods.end(),"Cuda");
        if ( index == methods.end() )
            index = find(methods.begin(),methods.end(),"OpenAcc");
        if ( index != methods.end() ) {
            auto info = loadInput( filename, 0.1 );
            RayTrace::create_image( info, *index );
            free2( info );
        }
        cudaInitialized = true;
    }

    // Load the image structure
    double *image0=NULL, *I_ang0=NULL;
    auto info = loadInput( filename, options.scale, &image0, &I_ang0 );
    if ( info == NULL )
        return -2;

    // Call create_image for each method
    int N_errors = 0;
    sleep_ms( 50 );
@@ -96,22 +145,22 @@ int run_tests( const std::string& filename, const Options& options )
            printf( "Running %s\n", methods[i].c_str() );
        double start = getTime();
        for ( int it = 0; it < options.iterations; it++ ) {
            RayTrace::create_image( &info, methods[i] );
            RayTrace::create_image( info, methods[i] );
            double stop = getTime();
            time[i].push_back( stop - start );
            start = stop;
        }
        time[i] = gatherAll( time[i] );
        // Check the results
        if ( scale == 1.0 ) {
            bool pass = check_ans( image0, I_ang0, info );
        if ( options.scale == 1.0 ) {
            bool pass = check_ans( image0, I_ang0, *info );
            if ( !pass )
                N_errors++;
        }
        free( (void *) info.image );
        free( (void *) info.I_ang );
        info.image = NULL;
        info.I_ang = NULL;
        free( (void *) info->image );
        free( (void *) info->I_ang );
        info->image = NULL;
        info->I_ang = NULL;
    }
    if ( rank() == 0 ) {
        printf( "\n        METHOD    Avg     Min     Max   Std Dev\n" );
@@ -135,23 +184,18 @@ int run_tests( const std::string& filename, const Options& options )
    // Free memory and return
    free( (void *) image0 );
    free( (void *) I_ang0 );
    delete info.euv_beam;
    delete info.seed_beam;
    delete[] info.gain;
    delete info.seed;
    free2( info );
    return sumReduce(N_errors);
}


/******************************************************************
* The main program                                                *
* Initialize/finalize kokkos                                      *
******************************************************************/
int main( int argc, char *argv[] )
void KokkosInitialize( int argc, char *argv[] )
{
    // Start MPI (if used)
    startup( argc, argv );

// Initialize kokkos
    NULL_USE(argc);
    NULL_USE(argv);
#ifdef USE_KOKKOS
#ifdef KOKKOS_HAVE_PTHREAD
    int argc2             = 1;
@@ -166,6 +210,22 @@ int main( int argc, char *argv[] )
    Kokkos::initialize( argc, argv );
#endif
#endif
}
void KokkosFinalize()
{
#ifdef USE_KOKKOS
    Kokkos::finalize();
#endif
}


/******************************************************************
* The main program                                                *
******************************************************************/
int main( int argc, char *argv[] )
{
    // Start MPI (if used)
    startup( argc, argv );

    // Check the input arguments
    Options options;
@@ -173,6 +233,9 @@ int main( int argc, char *argv[] )
    if ( filenames.empty() )
        return -2;

    // Initialize kokkos
    KokkosInitialize( argc, argv );

    // Run the tests for all files
    int N_errors = 0;
    for (size_t i=0; i<filenames.size(); i++)
@@ -182,9 +245,8 @@ int main( int argc, char *argv[] )
        std::cout << "\nAll tests passed\n";
    else
        std::cout << "\nSome tests failed\n";
#ifdef USE_KOKKOS
    Kokkos::finalize();
#endif
    KokkosFinalize();
    shutdown();
    return N_errors;
}
+1 −10
Original line number Diff line number Diff line
@@ -5,10 +5,7 @@
#include <assert.h>
#include <math.h>
#include <stdint.h>

#if CXX_STD == 11 || CXX_STD == 14
#include <thread>
#endif

// Timer include
#include "ProfilerApp.h"
@@ -89,7 +86,6 @@ void setGPU( int id )
    cudaSetDevice(id);
#endif
}
#if CXX_STD == 11 || CXX_STD == 14
void RayTraceImageThreadLoop( size_t N_threads, 
    std::function<void( int, const RayTrace::EUV_beam_struct&,
        const RayTrace::ray_gain_struct*, const RayTrace::ray_seed_struct*,
@@ -136,7 +132,6 @@ void RayTraceImageThreadLoop( size_t N_threads,
        rays2[i].clear();
    }
}
#endif


/**********************************************************************
@@ -412,14 +407,10 @@ void RayTrace::create_image( create_image_struct *info, std::string compute_meth
        RayTraceImageCPULoop( N, std::ref(*info->euv_beam), info->gain, info->seed,
            method, rays, scale, image, I_ang, failure_code, failed_rays );
    } else if ( compute_method == "threads" ) {
#if CXX_STD == 11 || CXX_STD == 14
        size_t N_threads = std::thread::hardware_concurrency();
        RayTraceImageThreadLoop( N_threads, RayTraceImageCPULoop, [](int) {},
            N, std::ref(*info->euv_beam), info->gain, info->seed,
            method, rays, scale, image, I_ang, failure_code, failed_rays );
#else
        RAY_ERROR( "Threaded version requires C++11" );
#endif
    } else if ( compute_method == "openmp" ) {
#if defined( ENABLE_OPENMP )
        RayTraceImageOpenMPLoop( N, std::ref(*info->euv_beam), info->gain, info->seed,
+0 −2
Original line number Diff line number Diff line
@@ -4,10 +4,8 @@

#include "common/RayTraceImageHelper.h"

#if CXX_STD == 11 || CXX_STD == 14
#include <atomic>
#include <thread>
#endif


inline int getIndex( int n, const double *x, double dx, double y )