Commit 8e3924e0 authored by Cianciosa, Mark's avatar Cianciosa, Mark
Browse files

Refactor to remove the explicit cpu backend. This now takes the base types...

Refactor to remove the explicit cpu backend. This now takes the base types directly. Define common constants.
parent 39e20785
Loading
Loading
Loading
Loading
+26 −27
Original line number Diff line number Diff line
@@ -37,7 +37,6 @@ int main(int argc, const char * argv[]) {
    //typedef double base;
    typedef float base;
    //typedef std::complex<float> base;
    typedef backend::cpu<base> cpu;
    
    const timeing::measure_diagnostic total("Total Time");

@@ -47,7 +46,7 @@ int main(int argc, const char * argv[]) {

    std::vector<std::thread> threads(0);
#if USE_GPU
    if constexpr (jit::can_jit<cpu> ()) {
    if constexpr (jit::can_jit<base> ()) {
        threads.resize(1);
    } else {
#endif
@@ -69,38 +68,38 @@ int main(int argc, const char * argv[]) {
            std::normal_distribution<base> norm_dist(600.0, 10.0);
            std::uniform_int_distribution<size_t> int_dist(0, local_num_rays - 1);
            
            auto omega = graph::variable<cpu> (local_num_rays, "\\omega");
            auto kx = graph::variable<cpu> (local_num_rays, "k_{x}");
            auto ky = graph::variable<cpu> (local_num_rays, "k_{y}");
            auto kz = graph::variable<cpu> (local_num_rays, "k_{z}");
            auto x = graph::variable<cpu> (local_num_rays, "x");
            auto y = graph::variable<cpu> (local_num_rays, "y");
            auto z = graph::variable<cpu> (local_num_rays, "z");
            auto t = graph::variable<cpu> (local_num_rays, "t");
            auto omega = graph::variable<base> (local_num_rays, "\\omega");
            auto kx = graph::variable<base> (local_num_rays, "k_{x}");
            auto ky = graph::variable<base> (local_num_rays, "k_{y}");
            auto kz = graph::variable<base> (local_num_rays, "k_{z}");
            auto x = graph::variable<base> (local_num_rays, "x");
            auto y = graph::variable<base> (local_num_rays, "y");
            auto z = graph::variable<base> (local_num_rays, "z");
            auto t = graph::variable<base> (local_num_rays, "t");

            t->set(backend::base_cast<cpu> (0.0));
            t->set(static_cast<base> (0.0));

//  Inital conditions.
            for (size_t j = 0; j < local_num_rays; j++) {
                omega->set(j, norm_dist(engine));
            }

            x->set(backend::base_cast<cpu> (0.0));
            y->set(backend::base_cast<cpu> (0.0));
            z->set(backend::base_cast<cpu> (0.0));
            kx->set(backend::base_cast<cpu> (600.0));
            ky->set(backend::base_cast<cpu> (0.0));
            kz->set(backend::base_cast<cpu> (0.0));

            auto eq = equilibrium::make_slab_density<cpu> ();
            //auto eq = equilibrium::make_no_magnetic_field<cpu> ();

            //solver::split_simplextic<dispersion::bohm_gross<cpu>>
            //solver::rk4<dispersion::bohm_gross<cpu>>
            //solver::rk4<dispersion::simple<cpu>>
            //solver::rk4<dispersion::ordinary_wave<cpu>>
            //solver::rk4<dispersion::extra_ordinary_wave<cpu>>
            solver::rk4<dispersion::cold_plasma<cpu>>
            x->set(static_cast<base> (0.0));
            y->set(static_cast<base> (0.0));
            z->set(static_cast<base> (0.0));
            kx->set(static_cast<base> (600.0));
            ky->set(static_cast<base> (0.0));
            kz->set(static_cast<base> (0.0));

            auto eq = equilibrium::make_slab_density<base> ();
            //auto eq = equilibrium::make_no_magnetic_field<base> ();

            //solver::split_simplextic<dispersion::bohm_gross<base>>
            //solver::rk4<dispersion::bohm_gross<base>>
            //solver::rk4<dispersion::simple<base>>
            //solver::rk4<dispersion::ordinary_wave<base>>
            //solver::rk4<dispersion::extra_ordinary_wave<base>>
            solver::rk4<dispersion::cold_plasma<base>>
                solve(omega, kx, ky, kz, x, y, z, t, 60.0/num_times, eq);
            solve.init(kx);
            solve.compile(num_rays);
+118 −118

File changed.

Preview size limit exceeded, changes collapsed.

+0 −11
Original line number Diff line number Diff line
@@ -108,17 +108,6 @@ namespace backend {
///  Type def to retrieve the backend base type.
        typedef BASE base;
    };

//------------------------------------------------------------------------------
///  @brief Cast to a backend base type.
///
///  @param[in] arg Value to case.
///  @returns Cased value.
//------------------------------------------------------------------------------
    template<typename BACKEND, typename T>
    typename BACKEND::base base_cast(const T arg) {
        return static_cast<typename BACKEND::base> (arg);
    }
}

#endif /* backend_protocall_h */
+15 −15
Original line number Diff line number Diff line
@@ -137,11 +137,11 @@ namespace gpu {
///  @param[in] add_reduction Optional argument to generate the reduction
///                           kernel.
//------------------------------------------------------------------------------
        template<class BACKEND>
        template<typename T>
        void create_pipeline(const std::string kernel_source,
                             const std::string kernel_name,
                             graph::input_nodes<BACKEND> inputs,
                             graph::output_nodes<BACKEND> outputs,
                             graph::input_nodes<T> inputs,
                             graph::output_nodes<T> outputs,
                             const size_t num_rays,
                             const bool add_reduction=false) {
            check_nvrtc_error(nvrtcCreateProgram(&kernel_program,
@@ -224,9 +224,9 @@ namespace gpu {

            buffers.resize(inputs.size() + outputs.size());

            const size_t buffer_element_size = sizeof(typename BACKEND::base);
            const size_t buffer_element_size = sizeof(T);
            for (size_t i = 0, ie = inputs.size(); i < ie; i++) {
                const BACKEND backend = inputs[i]->evaluate();
                const backend::cpu<T> backend = inputs[i]->evaluate();

                check_error(cuMemAllocManaged(&buffers[i], backend.size()*buffer_element_size,
                                              CU_MEM_ATTACH_GLOBAL),
@@ -236,7 +236,7 @@ namespace gpu {
                kernel_arguments.push_back(reinterpret_cast<void *> (&buffers[i]));
            }
            for (size_t i = inputs.size(), ie = buffers.size(), j = 0; i < ie; i++, j++) {
                const BACKEND backend = outputs[j]->evaluate();
                const backend::cpu<T> backend = outputs[j]->evaluate();

                check_error(cuMemAllocManaged(&buffers[i], backend.size()*buffer_element_size,
                                              CU_MEM_ATTACH_GLOBAL), 
@@ -259,7 +259,7 @@ namespace gpu {
//------------------------------------------------------------------------------
///  @brief Create a max compute pipeline.
//------------------------------------------------------------------------------
        template<class BACKEND>
        template<typename T>
        void create_max_pipeline() {
            const char *mangled_kernel_name;
            check_nvrtc_error(nvrtcGetLoweredName(kernel_program,
@@ -269,7 +269,7 @@ namespace gpu {

            std::cout << "  Mangled Kernel Name      : " << mangled_kernel_name << std::endl;

            check_error(cuMemAllocManaged(&result_buffer, sizeof(typename BACKEND::base),
            check_error(cuMemAllocManaged(&result_buffer, sizeof(T),
                                          CU_MEM_ATTACH_GLOBAL),
                        "cuMemAllocManaged");

@@ -298,8 +298,8 @@ namespace gpu {
///
///  @returns The maximum value from the input buffer.
//------------------------------------------------------------------------------
        template<class BACKEND>
        typename BACKEND::base max_reduction() {
        template<typename T>
        T max_reduction() {
            run();
            check_error_async(cuLaunchKernel(max_function, 1, 1, 1,
                                             threads_per_group, 1, 1, 0, stream,
@@ -307,7 +307,7 @@ namespace gpu {
                              "cuLaunchKernel");
            wait();

            return reinterpret_cast<typename BACKEND::base *> (result_buffer)[0];
            return reinterpret_cast<T *> (result_buffer)[0];
        }

//------------------------------------------------------------------------------
@@ -323,11 +323,11 @@ namespace gpu {
///
///  @param[in] index Number of times to record.
//------------------------------------------------------------------------------
        template<class BACKEND>
        template<typename T>
        void print_results(const size_t index) {
            wait();
            for (CUdeviceptr &buffer : buffers) {
                std::cout << reinterpret_cast<typename BACKEND::base *> (buffer)[index] << " ";
                std::cout << reinterpret_cast<T *> (buffer)[index] << " ";
            }
            std::cout << std::endl;
        }
@@ -338,9 +338,9 @@ namespace gpu {
///  @param[in]     source_index Index of the GPU buffer.
///  @param[in,out] destination  Host side buffer to copy to.
//------------------------------------------------------------------------------
        template<typename BASE>
        template<typename T>
        void copy_buffer(const size_t source_index,
			 BASE *destination) {
                         T *destination) {
	    size_t size;
	    check_error(cuMemGetAddressRange(NULL, &size, buffers[source_index]), "cuMemGetAddressRange");
            check_error_async(cuMemcpyDtoHAsync(destination, buffers[source_index], size, stream), "cuMemcpyDtoHAsync");
+252 −267

File changed.

Preview size limit exceeded, changes collapsed.

Loading