Commit 39e20785 authored by Cianciosa, Mark's avatar Cianciosa, Mark
Browse files

Merge branch 'jit' into 'main'

Enable JIT compilation of the graph.

See merge request !2
parents 959ca190 529fa3cc
Loading
Loading
Loading
Loading
+67 −0
Original line number Diff line number Diff line
@@ -13,6 +13,65 @@ set_property (CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS

option (USE_REDUCE "Enable the graph reduction" ON)

#-------------------------------------------------------------------------------
#  GPU config
#-------------------------------------------------------------------------------

if (${APPLE})
    option (USE_METAL "Enable the metal backend" OFF)

    if (${USE_METAL})
        enable_language (OBJCXX)

        add_library (metal_lib INTERFACE)
        target_link_libraries (metal_lib
                               INTERFACE
                               "-framework Metal"
                               "-framework Foundation"
        )

        target_compile_definitions (metal_lib
                                    INTERFACE
                                    USE_METAL
        )
        target_compile_options (metal_lib
                                INTERFACE
                                -fobjc-arc
        )
    endif ()
else ()
    option (USE_CUDA "Enable the cuda backend" OFF)

    if (${USE_CUDA})
        add_library (cuda_lib INTERFACE)

        find_package (CUDAToolkit REQUIRED)

        target_compile_definitions (cuda_lib
                                    INTERFACE
                                    USE_CUDA
                                    CUDA_INCLUDE="${CUDAToolkit_INCLUDE_DIRS}"
        )
        target_link_libraries (cuda_lib
                               INTERFACE
                               $<$<BOOL:${CUDAToolkit_FOUND}>:CUDA::cuda_driver>
                               $<$<BOOL:${CUDAToolkit_FOUND}>:CUDA::nvrtc>
        )
    endif ()
endif ()

add_library (gpu_lib INTERFACE)
target_link_libraries (gpu_lib
                       INTERFACE
                       $<$<BOOL:${USE_METAL}>:metal_lib>
                       $<$<BOOL:${USE_CUDA}>:cuda_lib>
)
target_compile_definitions (gpu_lib
                            INTERFACE
                            $<$<BOOL:${USE_METAL}>:USE_GPU>
                            $<$<BOOL:${USE_CUDA}>:USE_GPU>
)

#-------------------------------------------------------------------------------
#  Sanitizer options
#-------------------------------------------------------------------------------
@@ -59,6 +118,14 @@ macro (add_tool_target target)
                    PRIVATE
                    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/${target}.cpp>
    )
    
    if (${USE_METAL})
        set_source_files_properties (${CMAKE_CURRENT_SOURCE_DIR}/${target}.cpp
                                     PROPERTIES
                                     LANGUAGE OBJCXX
        )
    endif ()
    
    target_link_libraries (${target}
                           PUBLIC
                           rays
+36 −69
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@

#include "../graph_framework/cpu_backend.hpp"
#include "../graph_framework/solver.hpp"
#include "../graph_framework/timing.hpp"

void write_time(const std::string &name, const std::chrono::nanoseconds time);

@@ -30,21 +31,32 @@ static base solution(const base t) {
///  @param[in] argv Array of commandline arguments.
//------------------------------------------------------------------------------
int main(int argc, const char * argv[]) {
    START_GPU

    //typedef std::complex<double> base;
    typedef double base;
    //typedef float base;
    //typedef double base;
    typedef float base;
    //typedef std::complex<float> base;
    typedef backend::cpu<base> cpu;
    
    const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
    const timeing::measure_diagnostic total("Total Time");

    const size_t num_times = 10000;
    //const size_t num_rays = 1;
    const size_t num_rays = 10000;
    const size_t num_rays = 1000000;

    std::vector<std::thread> threads(std::max(std::min(std::thread::hardware_concurrency(),
    std::vector<std::thread> threads(0);
#if USE_GPU
    if constexpr (jit::can_jit<cpu> ()) {
        threads.resize(1);
    } else {
#endif
        threads.resize(std::max(std::min(std::thread::hardware_concurrency(),
                                         static_cast<unsigned int> (num_rays)),
                                static_cast<unsigned int> (1)));
#if USE_GPU
    }
#endif

    for (size_t i = 0, ie = threads.size(); i < ie; i++) {
        threads[i] = std::thread([num_times, num_rays] (const size_t thread_number,
@@ -53,7 +65,8 @@ int main(int argc, const char * argv[]) {
                                        + std::min(thread_number, num_rays%num_threads);

            std::mt19937_64 engine((thread_number + 1)*static_cast<uint64_t> (std::chrono::system_clock::to_time_t(std::chrono::system_clock::now())));
            std::uniform_real_distribution<double> real_dist(0.6, 1.0);
            std::uniform_real_distribution<base> real_dist(0.6, 1.0);
            std::normal_distribution<base> norm_dist(600.0, 10.0);
            std::uniform_int_distribution<size_t> int_dist(0, local_num_rays - 1);
            
            auto omega = graph::variable<cpu> (local_num_rays, "\\omega");
@@ -69,7 +82,7 @@ int main(int argc, const char * argv[]) {

//  Inital conditions.
            for (size_t j = 0; j < local_num_rays; j++) {
                omega->set(j, 500.0);
                omega->set(j, norm_dist(engine));
            }

            x->set(backend::base_cast<cpu> (0.0));
@@ -85,11 +98,12 @@ int main(int argc, const char * argv[]) {
            //solver::split_simplextic<dispersion::bohm_gross<cpu>>
            //solver::rk4<dispersion::bohm_gross<cpu>>
            //solver::rk4<dispersion::simple<cpu>>
            solver::rk4<dispersion::ordinary_wave<cpu>>
            //solver::rk4<dispersion::ordinary_wave<cpu>>
            //solver::rk4<dispersion::extra_ordinary_wave<cpu>>
            //solver::rk4<dispersion::cold_plasma<cpu>>
            solver::rk4<dispersion::cold_plasma<cpu>>
                solve(omega, kx, ky, kz, x, y, z, t, 60.0/num_times, eq);
            solve.init(kx);
            solve.compile(num_rays);
            if (thread_number == 0) {
                solve.print_dispersion();
                std::cout << std::endl;
@@ -106,43 +120,25 @@ int main(int argc, const char * argv[]) {
                solve.print_dzdt();
            }

            auto residule = solve.residule();

            const size_t sample = int_dist(engine);

            if (thread_number == 0) {
                std::cout << "Omega " << omega->evaluate().at(sample) << std::endl;
                std::cout << "t = " << 0.0 << " ";
                std::cout << solve.state.back().x.at(sample) << std::endl;
            }

            for (size_t j = 0; j < num_times; j++) {
                if (thread_number == 0) {
                    std::cout << "Time Step " << j << " Sample " << sample << " "
                              << solve.state.back().t.at(sample) << " "
                              << solve.state.back().x.at(sample) << " "
                              << solve.state.back().y.at(sample) << " "
                              << solve.state.back().z.at(sample) << " "
                              << solve.state.back().kx.at(sample) << " "
                              << solve.state.back().ky.at(sample) << " "
                              << solve.state.back().kz.at(sample) << " "
                              << residule->evaluate().at(sample)
                              << std::endl;
                    solve.print(sample);
                }
                solve.step();
            }

            if (thread_number == 0) {
                std::cout << "Time Step " << num_times << " Sample " << sample << " "
                          << solve.state.back().t.at(sample) << " "
                          << solve.state.back().x.at(sample) << " "
                          << solve.state.back().y.at(sample) << " "
                          << solve.state.back().z.at(sample) << " "
                          << solve.state.back().kx.at(sample) << " "
                          << solve.state.back().ky.at(sample) << " "
                          << solve.state.back().kz.at(sample) << " "
                          << residule->evaluate().at(sample)
                          << std::endl;
                solve.print(sample);
            } else {
                solve.sync();
            }

        }, i, threads.size());
    }

@@ -150,37 +146,8 @@ int main(int argc, const char * argv[]) {
        t.join();
    }

    const std::chrono::high_resolution_clock::time_point evaluate = std::chrono::high_resolution_clock::now();

    const auto total_time = evaluate - start;

    const std::chrono::nanoseconds total_time_ns = std::chrono::duration_cast<std::chrono::nanoseconds> (total_time);

    std::cout << std::endl << "Timing:" << std::endl;
    std::cout << std::endl;
    write_time("  Total time : ", total_time_ns);
    std::cout << std::endl;
}
    total.stop();

//------------------------------------------------------------------------------
///  @brief Print out timings.
///
///  @param[in] name Discription of the times.
///  @param[in] time Elapsed time in nanoseconds.
//------------------------------------------------------------------------------
void write_time(const std::string &name, const std::chrono::nanoseconds time) {
    if (time.count() < 1000) {
        std::cout << name << time.count()               << " ns" << std::endl;
    } else if (time.count() < 1000000) {
        std::cout << name << time.count()/1000.0        << " μs" << std::endl;
    } else if (time.count() < 1000000000) {
        std::cout << name << time.count()/1000000.0     << " ms" << std::endl;
    } else if (time.count() < 60000000000) {
        std::cout << name << time.count()/1000000000.0  << " s" << std::endl;
    } else if (time.count() < 3600000000000) {
        std::cout << name << time.count()/60000000000.0 << " min" << std::endl;
    } else {
        std::cout << name << time.count()/3600000000000 << " h" << std::endl;
    END_GPU
}
}
+149 −10

File changed.

Preview size limit exceeded, changes collapsed.

+2 −2
Original line number Diff line number Diff line
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
   LastUpgradeVersion = "1320"
   LastUpgradeVersion = "1410"
   version = "1.3">
   <BuildAction
      parallelizeBuildables = "YES"
@@ -40,7 +40,7 @@
      </Testables>
   </TestAction>
   <LaunchAction
      buildConfiguration = "Debug"
      buildConfiguration = "Release"
      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
      launchStyle = "0"
+1 −1
Original line number Diff line number Diff line
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
   LastUpgradeVersion = "1340"
   LastUpgradeVersion = "1410"
   version = "1.3">
   <BuildAction
      parallelizeBuildables = "YES"
Loading