Commit 0a077ed7 authored by Cianciosa, Mark's avatar Cianciosa, Mark
Browse files

Add the ability to run safe and unsafe math operations.

parent 9b96c5aa
Loading
Loading
Loading
Loading
+26 −25
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@

const bool print = true;
const bool write_step = false;
const bool print_expressions = true;
const bool print_expressions = false;

//------------------------------------------------------------------------------
///  @brief Main program of the driver.
@@ -30,6 +30,7 @@ int main(int argc, const char * argv[]) {
    //typedef double base;
    //typedef std::complex<float> base;
    typedef std::complex<double> base;
    constexpr bool use_safe_math = true;

    const timeing::measure_diagnostic total("Total Time");

@@ -56,14 +57,14 @@ int main(int argc, const char * argv[]) {
            std::mt19937_64 engine((thread_number + 1)*static_cast<uint64_t> (std::chrono::system_clock::to_time_t(std::chrono::system_clock::now())));
            std::uniform_int_distribution<size_t> int_dist(0, local_num_rays - 1);

            auto omega = graph::variable<base> (local_num_rays, "\\omega");
            auto kx = graph::variable<base> (local_num_rays, "k_{x}");
            auto ky = graph::variable<base> (local_num_rays, "k_{y}");
            auto kz = graph::variable<base> (local_num_rays, "k_{z}");
            auto x = graph::variable<base> (local_num_rays, "x");
            auto y = graph::variable<base> (local_num_rays, "y");
            auto z = graph::variable<base> (local_num_rays, "z");
            auto t = graph::variable<base> (local_num_rays, "t");
            auto omega = graph::variable<base, use_safe_math> (local_num_rays, "\\omega");
            auto kx = graph::variable<base, use_safe_math> (local_num_rays, "k_{x}");
            auto ky = graph::variable<base, use_safe_math> (local_num_rays, "k_{y}");
            auto kz = graph::variable<base, use_safe_math> (local_num_rays, "k_{z}");
            auto x = graph::variable<base, use_safe_math> (local_num_rays, "x");
            auto y = graph::variable<base, use_safe_math> (local_num_rays, "y");
            auto z = graph::variable<base, use_safe_math> (local_num_rays, "z");
            auto t = graph::variable<base, use_safe_math> (local_num_rays, "t");

            t->set(static_cast<base> (0.0));

@@ -89,9 +90,8 @@ int main(int argc, const char * argv[]) {
            ky->set(static_cast<base> (0.0));
            kz->set(static_cast<base> (0.0));


            //auto eq = equilibrium::make_efit<base> (NC_FILE, sync);
            auto eq = equilibrium::make_slab_density<base> ();
            auto eq = equilibrium::make_efit<base, use_safe_math> (NC_FILE, sync);
            //auto eq = equilibrium::make_slab_density<base> ();
            //auto eq = equilibrium::make_no_magnetic_field<base> ();

            const base endtime = static_cast<base> (1.0);
@@ -103,15 +103,16 @@ int main(int argc, const char * argv[]) {
            std::ostringstream stream;
            stream << "result" << thread_number << ".nc";

            //solver::split_simplextic<dispersion::bohm_gross<base>>
            //solver::rk4<dispersion::bohm_gross<base>>
            //solver::adaptive_rk4<dispersion::bohm_gross<base>>
            //solver::rk4<dispersion::simple<base>>
            //solver::rk4<dispersion::ordinary_wave<base>>
            //solver::rk4<dispersion::extra_ordinary_wave<base>>
            //solver::rk4<dispersion::cold_plasma<base>>
            //solver::adaptive_rk4<dispersion::ordinary_wave<base>>
            solver::rk4<dispersion::hot_plasma<base, dispersion::z_erfi<base>>>
            //solver::split_simplextic<dispersion::bohm_gross<base, use_safe_math>>
            //solver::rk4<dispersion::bohm_gross<base, use_safe_math>>
            //solver::adaptive_rk4<dispersion::bohm_gross<base, use_safe_math>>
            //solver::rk4<dispersion::simple<base, use_safe_math>>
            //solver::rk4<dispersion::ordinary_wave<base, use_safe_math>>
            //solver::rk4<dispersion::extra_ordinary_wave<base, use_safe_math>>
            //solver::rk4<dispersion::cold_plasma<base, use_safe_math>>
            //solver::adaptive_rk4<dispersion::ordinary_wave<base, use_safe_math>>
            solver::rk4<dispersion::hot_plasma<base, dispersion::z_erfi<base, use_safe_math>, use_safe_math>>
            //solver::rk4<dispersion::hot_plasma_expandion<base, dispersion::z_erfi<base, use_safe_math>, use_safe_math>>
                solve(omega, kx, ky, kz, x, y, z, t, dt, eq,
                      stream.str(), local_num_rays);
                //solve(omega, kx, ky, kz, x, y, z, t, dt_var, eq,
@@ -119,11 +120,11 @@ int main(int argc, const char * argv[]) {
            solve.init(kx);
            solve.compile();
            if (thread_number == 0 && print_expressions) {
                //solve.print_dispersion();
                //std::cout << std::endl;
                solve.print_dispersion();
                std::cout << std::endl;
                solve.print_dkxdt();
                std::cout << std::endl;
                /*solve.print_dkydt();
                solve.print_dkydt();
                std::cout << std::endl;
                solve.print_dkzdt();
                std::cout << std::endl;
@@ -146,7 +147,7 @@ int main(int argc, const char * argv[]) {
                solve.print_ky_next();
                std::cout << std::endl;
                solve.print_kz_next();
                std::cout << std::endl;*/
                std::cout << std::endl;
            }

            const size_t sample = int_dist(engine);
+290 −214

File changed.

Preview size limit exceeded, changes collapsed.

+16 −16
Original line number Diff line number Diff line
@@ -20,7 +20,7 @@ namespace gpu {
//------------------------------------------------------------------------------
///  @brief Class representing a cpu context.
//------------------------------------------------------------------------------
    template<typename T>
    template<typename T, bool SAFE_MATH=false>
    class cpu_context {
    private:
///  Library name.
@@ -28,9 +28,9 @@ namespace gpu {
///  Handle for the dynamic library.
        void *lib_handle;
///  Argument map.
        std::map<graph::leaf_node<T> *, std::vector<T>> kernel_arguments;
        std::map<graph::leaf_node<T, SAFE_MATH> *, std::vector<T>> kernel_arguments;
///  Argument index map.
        std::map<graph::leaf_node<T> *, size_t> arg_index;
        std::map<graph::leaf_node<T, SAFE_MATH> *, size_t> arg_index;

    public:
//------------------------------------------------------------------------------
@@ -131,8 +131,8 @@ namespace gpu {
///  @returns A lambda function to run the kernel.
//------------------------------------------------------------------------------
        std::function<void(void)>  create_kernel_call(const std::string kernel_name,
                                                      graph::input_nodes<T> inputs,
                                                      graph::output_nodes<T> outputs,
                                                      graph::input_nodes<T, SAFE_MATH> inputs,
                                                      graph::output_nodes<T, SAFE_MATH> outputs,
                                                      const size_t num_rays) {
            void *kernel = dlsym(lib_handle, kernel_name.c_str());
            if (!kernel) {
@@ -173,7 +173,7 @@ namespace gpu {
///  @params[in] argument Node to reduce.
///  @params[in] run      Function to run before reduction.
//------------------------------------------------------------------------------
        std::function<T(void)> create_max_call(graph::shared_leaf<T> &argument,
        std::function<T(void)> create_max_call(graph::shared_leaf<T, SAFE_MATH> &argument,
                                               std::function<void(void)> run) {
            auto begin = kernel_arguments[argument.get()].cbegin();
            auto end = kernel_arguments[argument.get()].cend();
@@ -203,7 +203,7 @@ namespace gpu {
///  @params[in] nodes Nodes to output.
//------------------------------------------------------------------------------
        void print_results(const size_t index,
                           const graph::output_nodes<T> &nodes) {
                           const graph::output_nodes<T, SAFE_MATH> &nodes) {
            for (auto &out : nodes) {
                const T temp = kernel_arguments[out.get()][index];
                if constexpr (jit::is_complex<T> ()) {
@@ -221,7 +221,7 @@ namespace gpu {
///  @params[in] node   Not to copy buffer to.
///  @params[in] source Host side buffer to copy from.
//------------------------------------------------------------------------------
        void copy_to_device(graph::shared_leaf<T> node,
        void copy_to_device(graph::shared_leaf<T, SAFE_MATH> node,
                            T *source) {
            memcpy(kernel_arguments[node.get()].data(),
                   source,
@@ -234,7 +234,7 @@ namespace gpu {
///  @params[in]     node        Node to copy buffer from.
///  @params[in,out] destination Host side buffer to copy to.
//------------------------------------------------------------------------------
        void copy_to_host(const graph::shared_leaf<T> node,
        void copy_to_host(const graph::shared_leaf<T, SAFE_MATH> node,
                          T *destination) {
            memcpy(destination,
                   kernel_arguments[node.get()].data(),
@@ -270,8 +270,8 @@ namespace gpu {
//------------------------------------------------------------------------------
        void create_kernel_prefix(std::ostringstream &source_buffer,
                                  const std::string name,
                                  graph::input_nodes<T> &inputs,
                                  graph::output_nodes<T> &outputs,
                                  graph::input_nodes<T, SAFE_MATH> &inputs,
                                  graph::output_nodes<T, SAFE_MATH> &outputs,
                                  const size_t size,
                                  jit::register_map &registers) {
            source_buffer << std::endl;
@@ -302,16 +302,16 @@ namespace gpu {
///  @params[in,out] registers     Map of used registers.
//------------------------------------------------------------------------------
        void create_kernel_postfix(std::ostringstream &source_buffer,
                                   graph::output_nodes<T> &outputs,
                                   graph::map_nodes<T> &setters,
                                   graph::output_nodes<T, SAFE_MATH> &outputs,
                                   graph::map_nodes<T, SAFE_MATH> &setters,
                                   jit::register_map &registers) {
            for (auto &[out, in] : setters) {
                graph::shared_leaf<T> a = out->compile(source_buffer, registers);
                graph::shared_leaf<T, SAFE_MATH> a = out->compile(source_buffer, registers);
                source_buffer << "        args[std::string(\"" << jit::to_string('v', in.get());
                source_buffer << "\")][i] = " << registers[a.get()] << ";" << std::endl;
            }
            for (auto &out : outputs) {
                graph::shared_leaf<T> a = out->compile(source_buffer, registers);
                graph::shared_leaf<T, SAFE_MATH> a = out->compile(source_buffer, registers);
                source_buffer << "        args[std::string(\"" << jit::to_string('o', out.get());
                source_buffer << "\")][i] = " << registers[a.get()] << ";" << std::endl;
            }
@@ -334,7 +334,7 @@ namespace gpu {
///
///  @params[in] node Node to get the buffer for.
//------------------------------------------------------------------------------
        T *get_buffer(graph::shared_leaf<T> &node) {
        T *get_buffer(graph::shared_leaf<T, SAFE_MATH> &node) {
            return kernel_arguments[node.get()].data();
        }
    };
+13 −13
Original line number Diff line number Diff line
@@ -22,7 +22,7 @@ namespace gpu {
//------------------------------------------------------------------------------
///  @brief Class representing a cuda gpu context.
//------------------------------------------------------------------------------
    template<typename T>
    template<typename T, bool SAFE_MATH=false>
    class cuda_context {
    private:
///  The cuda device.
@@ -32,7 +32,7 @@ namespace gpu {
///  The cuda code library.
        CUmodule module;
///  Argument map.
        std::map<graph::leaf_node<T> *, CUdeviceptr> kernel_arguments;
        std::map<graph::leaf_node<T, SAFE_MATH> *, CUdeviceptr> kernel_arguments;
///  Result buffer.
        CUdeviceptr result_buffer;
///  Cuda stream.
@@ -217,8 +217,8 @@ namespace gpu {
///  @returns A lambda function to run the kernel.
//------------------------------------------------------------------------------
        std::function<void(void)> create_kernel_call(const std::string kernel_name,
                                                     graph::input_nodes<T> inputs,
                                                     graph::output_nodes<T> outputs,
                                                     graph::input_nodes<T, SAFE_MATH> inputs,
                                                     graph::output_nodes<T, SAFE_MATH> outputs,
                                                     const size_t num_rays) {
            CUfunction function;
            check_error(cuModuleGetFunction(&function, module, kernel_name.c_str()), "cuModuleGetFunction");
@@ -277,7 +277,7 @@ namespace gpu {
///  @params[in] run      Function to run before reduction.
///  @returns A lambda function to run the kernel.
//------------------------------------------------------------------------------
        std::function<T(void)> create_max_call(graph::shared_leaf<T> &argument,
        std::function<T(void)> create_max_call(graph::shared_leaf<T, SAFE_MATH> &argument,
                                               std::function<void(void)> run) {
            check_error(cuMemAllocManaged(&result_buffer, sizeof(T),
                                          CU_MEM_ATTACH_GLOBAL),
@@ -321,7 +321,7 @@ namespace gpu {
///  @params[in] nodes Nodes to output.
//------------------------------------------------------------------------------
        void print_results(const size_t index,
                           const graph::output_nodes<T> &nodes) {
                           const graph::output_nodes<T, SAFE_MATH> &nodes) {
            wait();
            for (auto &out : nodes) {
                const T temp = reinterpret_cast<T *> (kernel_arguments[out.get()])[index];
@@ -340,7 +340,7 @@ namespace gpu {
///  @params[in] node   Not to copy buffer to.
///  @params[in] source Host side buffer to copy from.
//------------------------------------------------------------------------------
        void copy_to_device(graph::shared_leaf<T> node,
        void copy_to_device(graph::shared_leaf<T, SAFE_MATH> node,
                            T *source) {
            size_t size;
            check_error(cuMemGetAddressRange(NULL, &size, kernel_arguments[node.get()]), "cuMemGetAddressRange");
@@ -353,7 +353,7 @@ namespace gpu {
///  @params[in]     node        Node to copy buffer from.
///  @params[in,out] destination Host side buffer to copy to.
//------------------------------------------------------------------------------
        void copy_to_host(graph::shared_leaf<T> node,
        void copy_to_host(graph::shared_leaf<T, SAFE_MATH> node,
                          T *destination) {
            size_t size;
            check_error(cuMemGetAddressRange(NULL, &size, kernel_arguments[node.get()]), "cuMemGetAddressRange");
@@ -381,8 +381,8 @@ namespace gpu {
//------------------------------------------------------------------------------
        void create_kernel_prefix(std::ostringstream &source_buffer,
                                  const std::string name,
                                  graph::input_nodes<T> &inputs,
                                  graph::output_nodes<T> &outputs,
                                  graph::input_nodes<T, SAFE_MATH> &inputs,
                                  graph::output_nodes<T, SAFE_MATH> &outputs,
                                  const size_t size,
                                  jit::register_map &registers) {
            source_buffer << std::endl;
@@ -431,8 +431,8 @@ namespace gpu {

//------------------------------------------------------------------------------
        void create_kernel_postfix(std::ostringstream &source_buffer,
                                   graph::output_nodes<T> &outputs,
                                   graph::map_nodes<T> &setters,
                                   graph::output_nodes<T, SAFE_MATH> &outputs,
                                   graph::map_nodes<T, SAFE_MATH> &setters,
                                   jit::register_map &registers) {
            for (auto &[out, in] : setters) {
                graph::shared_leaf<T> a = out->compile(source_buffer, registers);
@@ -546,7 +546,7 @@ namespace gpu {
///
///  @params[in] node Node to get the buffer for.
//------------------------------------------------------------------------------
        T *get_buffer(graph::shared_leaf<T> &node) {
        T *get_buffer(graph::shared_leaf<T, SAFE_MATH> &node) {
            return reinterpret_cast<T *> (kernel_arguments[node.get()]);
        }
    };
+459 −262

File changed.

Preview size limit exceeded, changes collapsed.

Loading