Commit bbfbafa8 authored by Cianciosa, Mark's avatar Cianciosa, Mark
Browse files

Fix memory crouption error by using the workflow manager inside the solver.

parent af9434b4
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -98,7 +98,7 @@ int main(int argc, const char * argv[]) {
            solver::rk4<dispersion::cold_plasma<base>>
                solve(omega, kx, ky, kz, x, y, z, t, 60.0/num_times, eq);
            solve.init(kx);
            solve.compile(num_rays);
            solve.compile();
            if (thread_number == 0) {
                solve.print_dispersion();
                std::cout << std::endl;
@@ -131,7 +131,7 @@ int main(int argc, const char * argv[]) {
            if (thread_number == 0) {
                solve.print(sample);
            } else {
                solve.sync();
                solve.sync_host();
            }

        }, i, threads.size());
+23 −10
Original line number Diff line number Diff line
@@ -98,7 +98,7 @@ namespace gpu {
            std::cout << "  Command Line    : " << temp_stream.str() << std::endl;
            int error = system(temp_stream.str().c_str());
            if (error) {
                std::cout << "Failed to compile cpu kernel. Check source code in "
                std::cerr << "Failed to compile cpu kernel. Check source code in "
                          << filename << std::endl;
                exit(error);
            }
@@ -136,7 +136,7 @@ namespace gpu {
                                                      const size_t num_rays) {
            void *kernel = dlsym(lib_handle, kernel_name.c_str());
            if (!kernel) {
                std::cout << "Failed to load function. " << kernel_name
                std::cerr << "Failed to load function. " << kernel_name
                          << std::endl;
                exit(1);
            }
@@ -162,8 +162,8 @@ namespace gpu {

            std::cout << "  Function pointer: " << reinterpret_cast<size_t> (kernel) << std::endl;

            return [kernel, buffers] {
                ((void (*)(const std::vector<T *> &))kernel)(buffers);
            return [kernel, buffers] () mutable {
                ((void (*)(std::vector<T *> &))kernel)(buffers);
            };
        }

@@ -178,7 +178,7 @@ namespace gpu {
            auto begin = kernel_arguments[argument.get()].cbegin();
            auto end = kernel_arguments[argument.get()].cend();
            
            return [run, begin, end] {
            return [run, begin, end] () mutable {
                run();
                if constexpr (jit::is_complex<T> ()) {
                    return *std::max_element(begin, end,
@@ -209,12 +209,25 @@ namespace gpu {
        }

//------------------------------------------------------------------------------
///  @brief Copy buffer contents.
///  @brief Copy buffer contents to the device.
///
///  @params[in] node   Not to copy buffer to.
///  @params[in] source Host side buffer to copy from.
//------------------------------------------------------------------------------
        void copy_to_device(graph::shared_leaf<T> node,
                            T *source) {
            memcpy(kernel_arguments[node.get()].data(),
                   source,
                   sizeof(T)*kernel_arguments[node.get()].size());
        }

//------------------------------------------------------------------------------
///  @brief Copy buffer contents to host.
///
///  @params[in]     node        Node to copy buffer from.
///  @params[in,out] destination Host side buffer to copy to.
//------------------------------------------------------------------------------
        void copy_buffer(const graph::shared_leaf<T> node,
        void copy_to_host(const graph::shared_leaf<T> node,
                          T *destination) {
            memcpy(destination,
                   kernel_arguments[node.get()].data(),
@@ -255,7 +268,7 @@ namespace gpu {
            source_buffer << std::endl;
            source_buffer << "extern \"C\" void " << name << "(" << std::endl;
            
            source_buffer << "    const vector<";
            source_buffer << "    vector<";
            jit::add_type<T> (source_buffer);
            source_buffer << " *> &args) {" << std::endl;
            
+16 −3
Original line number Diff line number Diff line
@@ -323,12 +323,25 @@ namespace gpu {
        }

//------------------------------------------------------------------------------
///  @brief Copy buffer contents.
///  @brief Copy buffer contents to the device.
///
///  @params[in] node   Not to copy buffer to.
///  @params[in] source Host side buffer to copy from.
//------------------------------------------------------------------------------
        void copy_to_device(graph::shared_leaf<T> node,
                            T *source) {
            size_t size;
            check_error(cuMemGetAddressRange(NULL, &size, kernel_arguments[node.get()]), "cuMemGetAddressRange");
            check_error_async(cuMemcpyHtoDAsync(source, kernel_arguments[node.get()], size, stream), "cuMemcpyHtoDAsync");
        }

//------------------------------------------------------------------------------
///  @brief Copy buffer contents to host.
///
///  @params[in]     node        Node to copy buffer from.
///  @params[in,out] destination Host side buffer to copy to.
//------------------------------------------------------------------------------
        void copy_buffer(graph::shared_leaf<T> node,
        void copy_to_host(graph::shared_leaf<T> node,
                          T *destination) {
            size_t size;
            check_error(cuMemGetAddressRange(NULL, &size, kernel_arguments[node.get()]), "cuMemGetAddressRange");
+7 −36
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@

#include "vector.hpp"
#include "equilibrium.hpp"
#include "jit.hpp"
#include "workflow.hpp"

namespace dispersion {
//******************************************************************************
@@ -133,12 +133,7 @@ namespace dispersion {
                        - loss/(loss->df(x) +
                                graph::constant(static_cast<typename DISPERSION_FUNCTION::base> (tolarance)));
            
            typename DISPERSION_FUNCTION::base max_residule;
            size_t iterations = 0;
            std::unique_ptr<jit::context<typename DISPERSION_FUNCTION::base>> source;
            
            auto x_var = graph::variable_cast(x);
            inputs.push_back(x_var);

            graph::output_nodes<typename DISPERSION_FUNCTION::base> outputs = {
                loss
@@ -148,37 +143,13 @@ namespace dispersion {
                {x_next, x_var}
            };

            source = std::make_unique<jit::context<typename DISPERSION_FUNCTION::base>> ();
            source->add_kernel("loss_kernel",
                               inputs,
                               outputs,
                               setters);
            source->add_max_reduction(x_var);

            source->compile(true);
            workflow::manager<typename DISPERSION_FUNCTION::base> work;
            work.add_converge_item(inputs, outputs, setters, "loss_kernel",
                                   tolarance, max_iterations);
            work.compile();
            work.run();

            auto run = source->create_kernel_call("loss_kernel", inputs,
                                                  outputs, x_var->size());

            auto max = source->create_max_call(loss, run);
            max_residule = max();
            while (std::abs(max_residule) > std::abs(tolarance) &&
                   iterations++ < max_iterations) {
                   max_residule = max();
            }

            source->copy_buffer(x, x_var->data());

//  In release mode asserts are diaables so write error to standard err. Need to
//  flip the comparison operator because we want to assert to trip if false.
            assert(iterations < max_iterations &&
                   "Newton solve failed to converge with in given iterations.");
            if (iterations > max_iterations) {
                std::cerr << "Newton solve failed to converge with in given iterations."
                          << std::endl;
                std::cerr << "Minimum residule reached: " << max_residule
                          << std::endl;
            }
            work.copy_to_host(x, x_var->data());

            return loss;
        }
+18 −8
Original line number Diff line number Diff line
@@ -61,7 +61,6 @@ namespace jit {
//------------------------------------------------------------------------------
        context() {
            source_buffer << std::setprecision(jit::max_digits10<T> ());

            gpu_context.create_header(source_buffer);
        }
        
@@ -101,10 +100,10 @@ namespace jit {
//------------------------------------------------------------------------------
///  @brief Add max reduction kernel.
///
///  @params[in] input Graph node to reduce.
///  @params[in] size Size of the input buffer.
//------------------------------------------------------------------------------
        void add_max_reduction(graph::shared_variable<T> input) {
            gpu_context.create_reduction(source_buffer, input->size());
        void add_max_reduction(const size_t size) {
            gpu_context.create_reduction(source_buffer, size);
        }

//------------------------------------------------------------------------------
@@ -172,14 +171,25 @@ namespace jit {
        }

//------------------------------------------------------------------------------
///  @brief Copy contexts of buffer.
///  @brief Copy contexts of buffer to device.
///
///  @params[in] node   Not to copy buffer to.
///  @params[in] source Host side buffer to copy from.
//------------------------------------------------------------------------------
        void copy_to_device(graph::shared_leaf<T> &node,
                            T *source) {
            gpu_context.copy_to_device(node, source);
        }

//------------------------------------------------------------------------------
///  @brief Copy contexts of buffer to host.
///
///  @params[in]     node        Node to copy buffer from.
///  @params[in,out] destination Host side buffer to copy to.
//------------------------------------------------------------------------------
        void copy_buffer(graph::shared_leaf<T> &node,
        void copy_to_host(graph::shared_leaf<T> &node,
                          T *destination) {
            gpu_context.copy_buffer(node, destination);
            gpu_context.copy_to_host(node, destination);
        }
    };
}
Loading