Loading graph_driver/xrays.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -98,7 +98,7 @@ int main(int argc, const char * argv[]) { solver::rk4<dispersion::cold_plasma<base>> solve(omega, kx, ky, kz, x, y, z, t, 60.0/num_times, eq); solve.init(kx); solve.compile(num_rays); solve.compile(); if (thread_number == 0) { solve.print_dispersion(); std::cout << std::endl; Loading Loading @@ -131,7 +131,7 @@ int main(int argc, const char * argv[]) { if (thread_number == 0) { solve.print(sample); } else { solve.sync(); solve.sync_host(); } }, i, threads.size()); Loading graph_framework/cpu_context.hpp +23 −10 Original line number Diff line number Diff line Loading @@ -98,7 +98,7 @@ namespace gpu { std::cout << " Command Line : " << temp_stream.str() << std::endl; int error = system(temp_stream.str().c_str()); if (error) { std::cout << "Failed to compile cpu kernel. Check source code in " std::cerr << "Failed to compile cpu kernel. Check source code in " << filename << std::endl; exit(error); } Loading Loading @@ -136,7 +136,7 @@ namespace gpu { const size_t num_rays) { void *kernel = dlsym(lib_handle, kernel_name.c_str()); if (!kernel) { std::cout << "Failed to load function. " << kernel_name std::cerr << "Failed to load function. " << kernel_name << std::endl; exit(1); } Loading @@ -162,8 +162,8 @@ namespace gpu { std::cout << " Function pointer: " << reinterpret_cast<size_t> (kernel) << std::endl; return [kernel, buffers] { ((void (*)(const std::vector<T *> &))kernel)(buffers); return [kernel, buffers] () mutable { ((void (*)(std::vector<T *> &))kernel)(buffers); }; } Loading @@ -178,7 +178,7 @@ namespace gpu { auto begin = kernel_arguments[argument.get()].cbegin(); auto end = kernel_arguments[argument.get()].cend(); return [run, begin, end] { return [run, begin, end] () mutable { run(); if constexpr (jit::is_complex<T> ()) { return *std::max_element(begin, end, Loading Loading @@ -209,12 +209,25 @@ namespace gpu { } //------------------------------------------------------------------------------ /// @brief Copy buffer contents. /// @brief Copy buffer contents to the device. /// /// @params[in] node Not to copy buffer to. /// @params[in] source Host side buffer to copy from. //------------------------------------------------------------------------------ void copy_to_device(graph::shared_leaf<T> node, T *source) { memcpy(kernel_arguments[node.get()].data(), source, sizeof(T)*kernel_arguments[node.get()].size()); } //------------------------------------------------------------------------------ /// @brief Copy buffer contents to host. /// /// @params[in] node Node to copy buffer from. /// @params[in,out] destination Host side buffer to copy to. //------------------------------------------------------------------------------ void copy_buffer(const graph::shared_leaf<T> node, void copy_to_host(const graph::shared_leaf<T> node, T *destination) { memcpy(destination, kernel_arguments[node.get()].data(), Loading Loading @@ -255,7 +268,7 @@ namespace gpu { source_buffer << std::endl; source_buffer << "extern \"C\" void " << name << "(" << std::endl; source_buffer << " const vector<"; source_buffer << " vector<"; jit::add_type<T> (source_buffer); source_buffer << " *> &args) {" << std::endl; Loading graph_framework/cuda_context.hpp +16 −3 Original line number Diff line number Diff line Loading @@ -323,12 +323,25 @@ namespace gpu { } //------------------------------------------------------------------------------ /// @brief Copy buffer contents. /// @brief Copy buffer contents to the device. /// /// @params[in] node Not to copy buffer to. /// @params[in] source Host side buffer to copy from. //------------------------------------------------------------------------------ void copy_to_device(graph::shared_leaf<T> node, T *source) { size_t size; check_error(cuMemGetAddressRange(NULL, &size, kernel_arguments[node.get()]), "cuMemGetAddressRange"); check_error_async(cuMemcpyHtoDAsync(source, kernel_arguments[node.get()], size, stream), "cuMemcpyHtoDAsync"); } //------------------------------------------------------------------------------ /// @brief Copy buffer contents to host. /// /// @params[in] node Node to copy buffer from. /// @params[in,out] destination Host side buffer to copy to. //------------------------------------------------------------------------------ void copy_buffer(graph::shared_leaf<T> node, void copy_to_host(graph::shared_leaf<T> node, T *destination) { size_t size; check_error(cuMemGetAddressRange(NULL, &size, kernel_arguments[node.get()]), "cuMemGetAddressRange"); Loading graph_framework/dispersion.hpp +7 −36 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ #include "vector.hpp" #include "equilibrium.hpp" #include "jit.hpp" #include "workflow.hpp" namespace dispersion { //****************************************************************************** Loading Loading @@ -133,12 +133,7 @@ namespace dispersion { - loss/(loss->df(x) + graph::constant(static_cast<typename DISPERSION_FUNCTION::base> (tolarance))); typename DISPERSION_FUNCTION::base max_residule; size_t iterations = 0; std::unique_ptr<jit::context<typename DISPERSION_FUNCTION::base>> source; auto x_var = graph::variable_cast(x); inputs.push_back(x_var); graph::output_nodes<typename DISPERSION_FUNCTION::base> outputs = { loss Loading @@ -148,37 +143,13 @@ namespace dispersion { {x_next, x_var} }; source = std::make_unique<jit::context<typename DISPERSION_FUNCTION::base>> (); source->add_kernel("loss_kernel", inputs, outputs, setters); source->add_max_reduction(x_var); source->compile(true); workflow::manager<typename DISPERSION_FUNCTION::base> work; work.add_converge_item(inputs, outputs, setters, "loss_kernel", tolarance, max_iterations); work.compile(); work.run(); auto run = source->create_kernel_call("loss_kernel", inputs, outputs, x_var->size()); auto max = source->create_max_call(loss, run); max_residule = max(); while (std::abs(max_residule) > std::abs(tolarance) && iterations++ < max_iterations) { max_residule = max(); } source->copy_buffer(x, x_var->data()); // In release mode asserts are diaables so write error to standard err. Need to // flip the comparison operator because we want to assert to trip if false. assert(iterations < max_iterations && "Newton solve failed to converge with in given iterations."); if (iterations > max_iterations) { std::cerr << "Newton solve failed to converge with in given iterations." << std::endl; std::cerr << "Minimum residule reached: " << max_residule << std::endl; } work.copy_to_host(x, x_var->data()); return loss; } Loading graph_framework/jit.hpp +18 −8 Original line number Diff line number Diff line Loading @@ -61,7 +61,6 @@ namespace jit { //------------------------------------------------------------------------------ context() { source_buffer << std::setprecision(jit::max_digits10<T> ()); gpu_context.create_header(source_buffer); } Loading Loading @@ -101,10 +100,10 @@ namespace jit { //------------------------------------------------------------------------------ /// @brief Add max reduction kernel. /// /// @params[in] input Graph node to reduce. /// @params[in] size Size of the input buffer. //------------------------------------------------------------------------------ void add_max_reduction(graph::shared_variable<T> input) { gpu_context.create_reduction(source_buffer, input->size()); void add_max_reduction(const size_t size) { gpu_context.create_reduction(source_buffer, size); } //------------------------------------------------------------------------------ Loading Loading @@ -172,14 +171,25 @@ namespace jit { } //------------------------------------------------------------------------------ /// @brief Copy contexts of buffer. /// @brief Copy contexts of buffer to device. /// /// @params[in] node Not to copy buffer to. /// @params[in] source Host side buffer to copy from. //------------------------------------------------------------------------------ void copy_to_device(graph::shared_leaf<T> &node, T *source) { gpu_context.copy_to_device(node, source); } //------------------------------------------------------------------------------ /// @brief Copy contexts of buffer to host. /// /// @params[in] node Node to copy buffer from. /// @params[in,out] destination Host side buffer to copy to. //------------------------------------------------------------------------------ void copy_buffer(graph::shared_leaf<T> &node, void copy_to_host(graph::shared_leaf<T> &node, T *destination) { gpu_context.copy_buffer(node, destination); gpu_context.copy_to_host(node, destination); } }; } Loading Loading
graph_driver/xrays.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -98,7 +98,7 @@ int main(int argc, const char * argv[]) { solver::rk4<dispersion::cold_plasma<base>> solve(omega, kx, ky, kz, x, y, z, t, 60.0/num_times, eq); solve.init(kx); solve.compile(num_rays); solve.compile(); if (thread_number == 0) { solve.print_dispersion(); std::cout << std::endl; Loading Loading @@ -131,7 +131,7 @@ int main(int argc, const char * argv[]) { if (thread_number == 0) { solve.print(sample); } else { solve.sync(); solve.sync_host(); } }, i, threads.size()); Loading
graph_framework/cpu_context.hpp +23 −10 Original line number Diff line number Diff line Loading @@ -98,7 +98,7 @@ namespace gpu { std::cout << " Command Line : " << temp_stream.str() << std::endl; int error = system(temp_stream.str().c_str()); if (error) { std::cout << "Failed to compile cpu kernel. Check source code in " std::cerr << "Failed to compile cpu kernel. Check source code in " << filename << std::endl; exit(error); } Loading Loading @@ -136,7 +136,7 @@ namespace gpu { const size_t num_rays) { void *kernel = dlsym(lib_handle, kernel_name.c_str()); if (!kernel) { std::cout << "Failed to load function. " << kernel_name std::cerr << "Failed to load function. " << kernel_name << std::endl; exit(1); } Loading @@ -162,8 +162,8 @@ namespace gpu { std::cout << " Function pointer: " << reinterpret_cast<size_t> (kernel) << std::endl; return [kernel, buffers] { ((void (*)(const std::vector<T *> &))kernel)(buffers); return [kernel, buffers] () mutable { ((void (*)(std::vector<T *> &))kernel)(buffers); }; } Loading @@ -178,7 +178,7 @@ namespace gpu { auto begin = kernel_arguments[argument.get()].cbegin(); auto end = kernel_arguments[argument.get()].cend(); return [run, begin, end] { return [run, begin, end] () mutable { run(); if constexpr (jit::is_complex<T> ()) { return *std::max_element(begin, end, Loading Loading @@ -209,12 +209,25 @@ namespace gpu { } //------------------------------------------------------------------------------ /// @brief Copy buffer contents. /// @brief Copy buffer contents to the device. /// /// @params[in] node Not to copy buffer to. /// @params[in] source Host side buffer to copy from. //------------------------------------------------------------------------------ void copy_to_device(graph::shared_leaf<T> node, T *source) { memcpy(kernel_arguments[node.get()].data(), source, sizeof(T)*kernel_arguments[node.get()].size()); } //------------------------------------------------------------------------------ /// @brief Copy buffer contents to host. /// /// @params[in] node Node to copy buffer from. /// @params[in,out] destination Host side buffer to copy to. //------------------------------------------------------------------------------ void copy_buffer(const graph::shared_leaf<T> node, void copy_to_host(const graph::shared_leaf<T> node, T *destination) { memcpy(destination, kernel_arguments[node.get()].data(), Loading Loading @@ -255,7 +268,7 @@ namespace gpu { source_buffer << std::endl; source_buffer << "extern \"C\" void " << name << "(" << std::endl; source_buffer << " const vector<"; source_buffer << " vector<"; jit::add_type<T> (source_buffer); source_buffer << " *> &args) {" << std::endl; Loading
graph_framework/cuda_context.hpp +16 −3 Original line number Diff line number Diff line Loading @@ -323,12 +323,25 @@ namespace gpu { } //------------------------------------------------------------------------------ /// @brief Copy buffer contents. /// @brief Copy buffer contents to the device. /// /// @params[in] node Not to copy buffer to. /// @params[in] source Host side buffer to copy from. //------------------------------------------------------------------------------ void copy_to_device(graph::shared_leaf<T> node, T *source) { size_t size; check_error(cuMemGetAddressRange(NULL, &size, kernel_arguments[node.get()]), "cuMemGetAddressRange"); check_error_async(cuMemcpyHtoDAsync(source, kernel_arguments[node.get()], size, stream), "cuMemcpyHtoDAsync"); } //------------------------------------------------------------------------------ /// @brief Copy buffer contents to host. /// /// @params[in] node Node to copy buffer from. /// @params[in,out] destination Host side buffer to copy to. //------------------------------------------------------------------------------ void copy_buffer(graph::shared_leaf<T> node, void copy_to_host(graph::shared_leaf<T> node, T *destination) { size_t size; check_error(cuMemGetAddressRange(NULL, &size, kernel_arguments[node.get()]), "cuMemGetAddressRange"); Loading
graph_framework/dispersion.hpp +7 −36 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ #include "vector.hpp" #include "equilibrium.hpp" #include "jit.hpp" #include "workflow.hpp" namespace dispersion { //****************************************************************************** Loading Loading @@ -133,12 +133,7 @@ namespace dispersion { - loss/(loss->df(x) + graph::constant(static_cast<typename DISPERSION_FUNCTION::base> (tolarance))); typename DISPERSION_FUNCTION::base max_residule; size_t iterations = 0; std::unique_ptr<jit::context<typename DISPERSION_FUNCTION::base>> source; auto x_var = graph::variable_cast(x); inputs.push_back(x_var); graph::output_nodes<typename DISPERSION_FUNCTION::base> outputs = { loss Loading @@ -148,37 +143,13 @@ namespace dispersion { {x_next, x_var} }; source = std::make_unique<jit::context<typename DISPERSION_FUNCTION::base>> (); source->add_kernel("loss_kernel", inputs, outputs, setters); source->add_max_reduction(x_var); source->compile(true); workflow::manager<typename DISPERSION_FUNCTION::base> work; work.add_converge_item(inputs, outputs, setters, "loss_kernel", tolarance, max_iterations); work.compile(); work.run(); auto run = source->create_kernel_call("loss_kernel", inputs, outputs, x_var->size()); auto max = source->create_max_call(loss, run); max_residule = max(); while (std::abs(max_residule) > std::abs(tolarance) && iterations++ < max_iterations) { max_residule = max(); } source->copy_buffer(x, x_var->data()); // In release mode asserts are diaables so write error to standard err. Need to // flip the comparison operator because we want to assert to trip if false. assert(iterations < max_iterations && "Newton solve failed to converge with in given iterations."); if (iterations > max_iterations) { std::cerr << "Newton solve failed to converge with in given iterations." << std::endl; std::cerr << "Minimum residule reached: " << max_residule << std::endl; } work.copy_to_host(x, x_var->data()); return loss; } Loading
graph_framework/jit.hpp +18 −8 Original line number Diff line number Diff line Loading @@ -61,7 +61,6 @@ namespace jit { //------------------------------------------------------------------------------ context() { source_buffer << std::setprecision(jit::max_digits10<T> ()); gpu_context.create_header(source_buffer); } Loading Loading @@ -101,10 +100,10 @@ namespace jit { //------------------------------------------------------------------------------ /// @brief Add max reduction kernel. /// /// @params[in] input Graph node to reduce. /// @params[in] size Size of the input buffer. //------------------------------------------------------------------------------ void add_max_reduction(graph::shared_variable<T> input) { gpu_context.create_reduction(source_buffer, input->size()); void add_max_reduction(const size_t size) { gpu_context.create_reduction(source_buffer, size); } //------------------------------------------------------------------------------ Loading Loading @@ -172,14 +171,25 @@ namespace jit { } //------------------------------------------------------------------------------ /// @brief Copy contexts of buffer. /// @brief Copy contexts of buffer to device. /// /// @params[in] node Not to copy buffer to. /// @params[in] source Host side buffer to copy from. //------------------------------------------------------------------------------ void copy_to_device(graph::shared_leaf<T> &node, T *source) { gpu_context.copy_to_device(node, source); } //------------------------------------------------------------------------------ /// @brief Copy contexts of buffer to host. /// /// @params[in] node Node to copy buffer from. /// @params[in,out] destination Host side buffer to copy to. //------------------------------------------------------------------------------ void copy_buffer(graph::shared_leaf<T> &node, void copy_to_host(graph::shared_leaf<T> &node, T *destination) { gpu_context.copy_buffer(node, destination); gpu_context.copy_to_host(node, destination); } }; } Loading