Commit f1679bea authored by Lopez Ortiz O E's avatar Lopez Ortiz O E
Browse files

Adding FO non-relativistic Boris integrator in dimensionless variables with E = 0

parent ef3afdd2
Loading
Loading
Loading
Loading
+214 −171
Original line number Diff line number Diff line
#include "../graph_framework/equilibrium.hpp"
#include "../graph_framework/timing.hpp"

//------------------------------------------------------------------------------
///  @brief Run Korc
///
///  @tparam T Base type.
//  xkorc.cpp  –  Full-orbit Boris integrator (dimensionless, E = 0)
//
//  Reads from  init_particles.nc
//      • x, y, z , ux, uy, uz                (dimensionless)
//      • dt               (dimensionless time-step)
//      • NSTEPS, DUMP_EVERY                  (run control)
//
//  Evolves {x,y,z,ux,uy,uz} with the Boris pusher.
//  NEW: one dedicated writer thread per worker to handle NetCDF output.
//------------------------------------------------------------------------------
template<jit::float_scalar T>
void run_korc() {
    const timeing::measure_diagnostic t_total("Total Time");

    const size_t num_particles = 1000000;
    std::cout << "Num particles " << num_particles << std::endl;
    std::vector<std::thread> threads(std::max(std::min(static_cast<unsigned int> (jit::context<T>::max_concurrency()),
                                                       static_cast<unsigned int> (num_particles)),
                                              static_cast<unsigned int> (1)));
#include <cmath>
#include <iostream>
#include <sstream>
#include <thread>
#include <vector>
#include <queue>
#include <mutex>
#include <condition_variable>
#include <algorithm>
#include <cassert>
#include <netcdf.h>

    const size_t batch = num_particles/threads.size();
    const size_t extra = num_particles%threads.size();
#include "../graph_framework/equilibrium.hpp"
#include "../graph_framework/timing.hpp"
#include "../graph_framework/output.hpp"

    for (size_t i = 0, ie = threads.size(); i < ie; i++) {
        threads[i] = std::thread([num_particles, batch, extra] (const size_t thread_number) -> void {
            const size_t local_num_particles = batch + (extra > thread_number ? 1 : 0);
static inline void check_nc(int status)
{
    if (status != NC_NOERR) {
        std::cerr << "netCDF error: " << nc_strerror(status) << '\n';
        std::exit(EXIT_FAILURE);
    }
}

template<jit::float_scalar T>
void run_korc()
{
    const timeing::measure_diagnostic t_total("Total Time");

            const timeing::measure_diagnostic t_setup("Setup Time");
    // ──────────────────────────────────────────────────────────────
    // 1.  READ INITIAL DATA & RUN CONTROL
    // ──────────────────────────────────────────────────────────────
    int ncid;
    check_nc( nc_open("init_particles.nc", NC_NOWRITE, &ncid) );

    int dim_id;
    check_nc( nc_inq_dimid(ncid, "num_particles", &dim_id) );
    size_t Np = 0;
    check_nc( nc_inq_dimlen(ncid, dim_id,          &Np   ) );
    std::cout << "Num particles: " << Np << '\n';

    std::vector<double> h_x (Np), h_y (Np), h_z (Np),
                        h_ux(Np), h_uy(Np), h_uz(Np);

    auto get_vec=[&](const char* n, std::vector<double>& v){
        int vid; check_nc( nc_inq_varid(ncid, n, &vid) );
        check_nc( nc_get_var_double(ncid, vid, v.data()) );
    };
    get_vec("x" ,h_x ); get_vec("y" ,h_y ); get_vec("z" ,h_z );
    get_vec("ux",h_ux); get_vec("uy",h_uy); get_vec("uz",h_uz);

    long long h_NSTEPS=0, h_DUMP=0;
    double    dt_val=0.0;

    int vid;
    check_nc( nc_inq_varid(ncid,"NSTEPS",     &vid) );
    check_nc( nc_get_var_longlong(ncid,vid,&h_NSTEPS) );
    check_nc( nc_inq_varid(ncid,"DUMP_EVERY", &vid) );
    check_nc( nc_get_var_longlong(ncid,vid,&h_DUMP)   );
    check_nc( nc_inq_varid(ncid,"dt",         &vid) );
    check_nc( nc_get_var_double  (ncid,vid,&dt_val)  );

    check_nc( nc_close(ncid) );

    const std::size_t NSTEPS     = static_cast<std::size_t>(h_NSTEPS);
    const std::size_t DUMP_EVERY = static_cast<std::size_t>(h_DUMP  );

    // ──────────────────────────────────────────────────────────────
    // 2.  CREATE GRAPH VARIABLES
    // ──────────────────────────────────────────────────────────────
    auto x  = graph::variable<T,false>(Np,"x");
    auto y  = graph::variable<T,false>(Np,"y");
    auto z  = graph::variable<T,false>(Np,"z");
    auto ux = graph::variable<T,false>(Np,"u_x");
    auto uy = graph::variable<T,false>(Np,"u_y");
    auto uz = graph::variable<T,false>(Np,"u_z");

    for(size_t i=0;i<Np;++i){
        x->set(i,h_x [i]); y->set(i,h_y [i]); z->set(i,h_z [i]);
        ux->set(i,h_ux[i]); uy->set(i,h_uy[i]); uz->set(i,h_uz[i]);
    }

    // ──────────────────────────────────────────────────────────────
    // 3.  THREAD LAYOUT
    // ──────────────────────────────────────────────────────────────
    unsigned n_thr = std::max(1u,
        std::min<unsigned>(jit::context<T>::max_concurrency(), static_cast<unsigned>(Np)));
    std::vector<std::thread> workers(n_thr);

    const size_t batch = Np / n_thr, extra = Np % n_thr;

    for(unsigned tid=0; tid<n_thr; ++tid)
    {
        workers[tid] = std::thread([=]() mutable
        {
            const size_t Nloc = batch + (extra>tid ? 1 : 0);
            const timeing::measure_diagnostic t_setup("Setup T"+std::to_string(tid));

            /* constants */
            auto dt   = graph::constant<T,false>(static_cast<T>(dt_val));
            auto half = graph::constant<T,false>(0.5);
            auto one  = graph::constant<T,false>(1.0);
            auto two  = graph::constant<T,false>(2.0);

            /* geometry */
            auto eq  = equilibrium::make_efit<T>(EFIT_FILE);
            //auto eq = equilibrium::make_slab_density<T> ();
            auto b0 = eq->get_characteristic_field(thread_number);
            const T q = 1.602176634E-19;
            const T me = 9.1093837139E-31;
            const T c = 299792458.0;
            
            auto gryo_period = me/(q*b0);
            std::cout << "gryo_period " << gryo_period->evaluate().at(0) << std::endl;
            auto larmor_radius = c*gryo_period;
            std::cout << "larmor_radius " << larmor_radius->evaluate().at(0) << std::endl;
            
            std::cout << "Local num particles " << local_num_particles << std::endl;
            
            auto ux = graph::variable<T> (local_num_particles, "u_{x}");
            auto uy = graph::variable<T> (local_num_particles, "u_{y}");
            auto uz = graph::variable<T> (local_num_particles, "u_{z}");
            
            ux->set(0.0);
            uy->set(0.99);
            uz->set(0.1);
            
            auto x = graph::variable<T> (local_num_particles, "x");
            auto y = graph::variable<T> (local_num_particles, "y");
            auto z = graph::variable<T> (local_num_particles, "z");
            auto pos = graph::vector(x, y, z);
            
            x->set(1.7);
            y->set(0.0);
            z->set(0.0);
            auto b0  = eq->get_characteristic_field(tid);
            std::cout<<"Thread "<<tid<<" B0: "<<b0->evaluate().at(0)<<'\n';

            /* vector wrappers */
            auto pos   = graph::vector(x ,y ,z );
            auto u_vec = graph::vector(ux,uy,uz);

            auto gamma = graph::variable<T> (local_num_particles, "\\gamma");
            
            auto dt = graph::constant<T> (0.5);
            
            auto gamma_init = 1.0/graph::sqrt(1.0 - u_vec->dot(u_vec));
            
            auto u_init = gamma_init*u_vec;
            
            auto b_vec = eq->get_magnetic_field(pos->get_x(),
                                                pos->get_y(),
                                                pos->get_z())/b0;
            
            workflow::manager<T> work(thread_number);
            work.add_preitem({
                graph::variable_cast(ux),
                graph::variable_cast(uy),
                graph::variable_cast(uz),
                graph::variable_cast(gamma)
            }, {}, {
                {u_init->get_x(), graph::variable_cast(ux)},
                {u_init->get_y(), graph::variable_cast(uy)},
                {u_init->get_z(), graph::variable_cast(uz)},
                {gamma_init, graph::variable_cast(gamma)}
            }, "initalize_gamma");
            
            auto u_prime = u_vec - dt*u_vec->cross(b_vec)/(2.0*gamma);
            
            auto tau = -0.5*dt*b_vec;
            auto tau_sq = tau->dot(tau);
            auto speed_sq = u_prime->dot(u_prime);
            auto sigma = 1.0 + speed_sq - tau_sq;
            auto ustar = u_prime->dot(tau);
            
            auto gamma_next = graph::sqrt(0.5*(sigma + graph::sqrt(sigma*sigma + 4.0*(tau_sq + ustar*ustar))));
            auto t = tau/gamma_next;
            
            auto s = 1.0 + t->dot(t);
            auto u_prime_dot_t = u_prime->dot(t);
            
            auto u_next = (u_prime + u_prime_dot_t*t + u_prime->cross(t))/s;
            
            auto pos_next = pos + larmor_radius*dt*u_next/gamma_next;
            
            work.add_item({
                graph::variable_cast(x),
                                                pos->get_z());

            /* Boris pusher algebra (symbolic) */
            auto t_vec   = dt * half * b_vec;
            auto t_sq    = t_vec->dot(t_vec);
            auto s_vec   = (two * t_vec) / (one + t_sq);
            auto u_prime = u_vec;
            auto u_tilde = u_prime + u_prime->cross(t_vec);
            auto u_next  = u_prime + u_tilde->cross(s_vec);
            auto pos_next= pos + dt * u_next;

            workflow::manager<T,false> w(tid);
            w.add_item({graph::variable_cast(x ),
                        graph::variable_cast(y ),
                        graph::variable_cast(z ),
                        graph::variable_cast(ux),
                        graph::variable_cast(uy),
                graph::variable_cast(uz),
                graph::variable_cast(gamma)
            }, {}, {
                        graph::variable_cast(uz)},
                       {},
                       {
                           {pos_next->get_x(), graph::variable_cast(x )},
                           {pos_next->get_y(), graph::variable_cast(y )},
                           {pos_next->get_z(), graph::variable_cast(z )},
                           {u_next->get_x(),   graph::variable_cast(ux)},
                           {u_next->get_y(),   graph::variable_cast(uy)},
                {u_next->get_z(), graph::variable_cast(uz)},
                {gamma_next, graph::variable_cast(gamma)}
            }, "step");
            
            work.compile();
            
            std::ostringstream stream;
            stream << "korc_" << thread_number << ".nc";
            
            output::result_file file(stream.str(), local_num_particles);
            output::data_set<T> dataset(file);
            
            dataset.create_variable(file, "x",     x,     work.get_context());
            dataset.create_variable(file, "y",     y,     work.get_context());
            dataset.create_variable(file, "z",     z,     work.get_context());
            dataset.create_variable(file, "ux",    ux,    work.get_context());
            dataset.create_variable(file, "uy",    uy,    work.get_context());
            dataset.create_variable(file, "uz",    uz,    work.get_context());
            dataset.create_variable(file, "gamma", gamma, work.get_context());
            
            file.end_define_mode();
            std::thread sync([]{});
                           {u_next->get_z(),   graph::variable_cast(uz)}
                       },
                       "boris");
            w.compile();

            /* set up output file */
            std::ostringstream fn; fn<<"korc_"<<tid<<".nc";
            output::result_file of(fn.str(), Nloc);
            output::data_set<T> ds(of);
            ds.template create_variable<false>(of,"x",  x,  w.get_context());
            ds.template create_variable<false>(of,"y",  y,  w.get_context());
            ds.template create_variable<false>(of,"z",  z,  w.get_context());
            ds.template create_variable<false>(of,"ux", ux, w.get_context());
            ds.template create_variable<false>(of,"uy", uy, w.get_context());
            ds.template create_variable<false>(of,"uz", uz, w.get_context());
            of.end_define_mode();

            /* ------------- one persistent writer thread ------------- */
            std::queue<size_t> q;
            std::mutex mtx;
            std::condition_variable cv;
            bool finish=false;

            std::thread writer([&](){
                std::unique_lock<std::mutex> lk(mtx);
                while(true){
                    cv.wait(lk,[&](){ return finish || !q.empty(); });
                    while(!q.empty()){
                        q.pop();
                        lk.unlock();
                        ds.write(of);      // I/O outside lock
                        lk.lock();
                    }
                    if(finish) break;
                }
            });

            t_setup.print();
            const timeing::measure_diagnostic t_run("Run T"+std::to_string(tid));
            w.pre_run();

            const timeing::measure_diagnostic t_run("Run Time");
            work.pre_run();
            for (size_t i = 0; i < 1000000; i++) {
/*                sync.join();
                work.wait();
                sync = std::thread([&file, &dataset] () -> void {
                    dataset.write(file);
                });*/
            // MAIN LOOP
            for(size_t s=0; s<NSTEPS; ++s)
            {
                w.wait();

                work.run();
                if(s % DUMP_EVERY == 0){
                    { std::lock_guard<std::mutex> lg(mtx); q.push(s); }
                    cv.notify_one();
                }
            work.wait();

            sync.join();
            dataset.write(file);
            work.wait();
            
            t_run.print();
        }, i);
                w.run();
            }
            w.wait();

    for (std::thread &t : threads) {
        t.join();
            // final dump + shutdown
            {
                std::lock_guard<std::mutex> lg(mtx);
                q.push(NSTEPS);
                finish=true;
            }
            cv.notify_one();
            writer.join();
        });
    }
    for(auto& th: workers) th.join();

    std::cout << std::endl << "Timing:" << std::endl;
    std::cout << "\nTiming:\n";
    t_total.print();
}

//------------------------------------------------------------------------------
///  @brief Main program of the driver.
///
///  @param[in] argc Number of commandline arguments.
///  @param[in] argv Array of commandline arguments.
//------------------------------------------------------------------------------
int main(int argc, const char * argv[]) {
int main(int argc, const char* argv[])
{
    START_GPU
    (void)argc;
    (void)argv;

    (void)argc; (void)argv;
    run_korc<double>();
//    run_korc<float> ();

    END_GPU
    return 0;
}