Fix memory crouption error by using the workflow manager inside the solver. (bbfbafa8) · Commits · Cianciosa, Mark / graph_framework

graph_driver/xrays.cpp

+2 −2

Original line number	Diff line number	Diff line
		@@ -98,7 +98,7 @@ int main(int argc, const char * argv[]) {
		solver::rk4<dispersion::cold_plasma<base>>
		solve(omega, kx, ky, kz, x, y, z, t, 60.0/num_times, eq);
		solve.init(kx);
		solve.compile(num_rays);
		solve.compile();
		if (thread_number == 0) {
		solve.print_dispersion();
		std::cout << std::endl;
		@@ -131,7 +131,7 @@ int main(int argc, const char * argv[]) {
		if (thread_number == 0) {
		solve.print(sample);
		} else {
		solve.sync();
		solve.sync_host();
		}

		}, i, threads.size());

graph_framework/cpu_context.hpp

+23 −10

Original line number	Diff line number	Diff line
		@@ -98,7 +98,7 @@ namespace gpu {
		std::cout << " Command Line : " << temp_stream.str() << std::endl;
		int error = system(temp_stream.str().c_str());
		if (error) {
		std::cout << "Failed to compile cpu kernel. Check source code in "
		std::cerr << "Failed to compile cpu kernel. Check source code in "
		<< filename << std::endl;
		exit(error);
		}
		@@ -136,7 +136,7 @@ namespace gpu {
		const size_t num_rays) {
		void *kernel = dlsym(lib_handle, kernel_name.c_str());
		if (!kernel) {
		std::cout << "Failed to load function. " << kernel_name
		std::cerr << "Failed to load function. " << kernel_name
		<< std::endl;
		exit(1);
		}
		@@ -162,8 +162,8 @@ namespace gpu {

		std::cout << " Function pointer: " << reinterpret_cast<size_t> (kernel) << std::endl;

		return [kernel, buffers] {
		((void ()(const std::vector<T > &))kernel)(buffers);
		return [kernel, buffers] () mutable {
		((void ()(std::vector<T > &))kernel)(buffers);
		};
		}

		@@ -178,7 +178,7 @@ namespace gpu {
		auto begin = kernel_arguments[argument.get()].cbegin();
		auto end = kernel_arguments[argument.get()].cend();

		return [run, begin, end] {
		return [run, begin, end] () mutable {
		run();
		if constexpr (jit::is_complex<T> ()) {
		return *std::max_element(begin, end,
		@@ -209,12 +209,25 @@ namespace gpu {
		}

		//------------------------------------------------------------------------------
		/// @brief Copy buffer contents.
		/// @brief Copy buffer contents to the device.
		///
		/// @params[in] node Not to copy buffer to.
		/// @params[in] source Host side buffer to copy from.
		//------------------------------------------------------------------------------
		void copy_to_device(graph::shared_leaf<T> node,
		T *source) {
		memcpy(kernel_arguments[node.get()].data(),
		source,
		sizeof(T)*kernel_arguments[node.get()].size());
		}

		//------------------------------------------------------------------------------
		/// @brief Copy buffer contents to host.
		///
		/// @params[in] node Node to copy buffer from.
		/// @params[in,out] destination Host side buffer to copy to.
		//------------------------------------------------------------------------------
		void copy_buffer(const graph::shared_leaf<T> node,
		void copy_to_host(const graph::shared_leaf<T> node,
		T *destination) {
		memcpy(destination,
		kernel_arguments[node.get()].data(),
		@@ -255,7 +268,7 @@ namespace gpu {
		source_buffer << std::endl;
		source_buffer << "extern \"C\" void " << name << "(" << std::endl;

		source_buffer << " const vector<";
		source_buffer << " vector<";
		jit::add_type<T> (source_buffer);
		source_buffer << " *> &args) {" << std::endl;

graph_framework/cuda_context.hpp

+16 −3

Original line number	Diff line number	Diff line
		@@ -323,12 +323,25 @@ namespace gpu {
		}

		//------------------------------------------------------------------------------
		/// @brief Copy buffer contents.
		/// @brief Copy buffer contents to the device.
		///
		/// @params[in] node Not to copy buffer to.
		/// @params[in] source Host side buffer to copy from.
		//------------------------------------------------------------------------------
		void copy_to_device(graph::shared_leaf<T> node,
		T *source) {
		size_t size;
		check_error(cuMemGetAddressRange(NULL, &size, kernel_arguments[node.get()]), "cuMemGetAddressRange");
		check_error_async(cuMemcpyHtoDAsync(source, kernel_arguments[node.get()], size, stream), "cuMemcpyHtoDAsync");
		}

		//------------------------------------------------------------------------------
		/// @brief Copy buffer contents to host.
		///
		/// @params[in] node Node to copy buffer from.
		/// @params[in,out] destination Host side buffer to copy to.
		//------------------------------------------------------------------------------
		void copy_buffer(graph::shared_leaf<T> node,
		void copy_to_host(graph::shared_leaf<T> node,
		T *destination) {
		size_t size;
		check_error(cuMemGetAddressRange(NULL, &size, kernel_arguments[node.get()]), "cuMemGetAddressRange");

graph_framework/dispersion.hpp

+7 −36

Original line number	Diff line number	Diff line
		@@ -13,7 +13,7 @@

		#include "vector.hpp"
		#include "equilibrium.hpp"
		#include "jit.hpp"
		#include "workflow.hpp"

		namespace dispersion {
		//******************************************************************************
		@@ -133,12 +133,7 @@ namespace dispersion {
		- loss/(loss->df(x) +
		graph::constant(static_cast<typename DISPERSION_FUNCTION::base> (tolarance)));

		typename DISPERSION_FUNCTION::base max_residule;
		size_t iterations = 0;
		std::unique_ptr<jit::context<typename DISPERSION_FUNCTION::base>> source;

		auto x_var = graph::variable_cast(x);
		inputs.push_back(x_var);

		graph::output_nodes<typename DISPERSION_FUNCTION::base> outputs = {
		loss
		@@ -148,37 +143,13 @@ namespace dispersion {
		{x_next, x_var}
		};

		source = std::make_unique<jit::context<typename DISPERSION_FUNCTION::base>> ();
		source->add_kernel("loss_kernel",
		inputs,
		outputs,
		setters);
		source->add_max_reduction(x_var);

		source->compile(true);
		workflow::manager<typename DISPERSION_FUNCTION::base> work;
		work.add_converge_item(inputs, outputs, setters, "loss_kernel",
		tolarance, max_iterations);
		work.compile();
		work.run();

		auto run = source->create_kernel_call("loss_kernel", inputs,
		outputs, x_var->size());

		auto max = source->create_max_call(loss, run);
		max_residule = max();
		while (std::abs(max_residule) > std::abs(tolarance) &&
		iterations++ < max_iterations) {
		max_residule = max();
		}

		source->copy_buffer(x, x_var->data());

		// In release mode asserts are diaables so write error to standard err. Need to
		// flip the comparison operator because we want to assert to trip if false.
		assert(iterations < max_iterations &&
		"Newton solve failed to converge with in given iterations.");
		if (iterations > max_iterations) {
		std::cerr << "Newton solve failed to converge with in given iterations."
		<< std::endl;
		std::cerr << "Minimum residule reached: " << max_residule
		<< std::endl;
		}
		work.copy_to_host(x, x_var->data());

		return loss;
		}

graph_framework/jit.hpp

+18 −8

Original line number	Diff line number	Diff line
		@@ -61,7 +61,6 @@ namespace jit {
		//------------------------------------------------------------------------------
		context() {
		source_buffer << std::setprecision(jit::max_digits10<T> ());

		gpu_context.create_header(source_buffer);
		}

		@@ -101,10 +100,10 @@ namespace jit {
		//------------------------------------------------------------------------------
		/// @brief Add max reduction kernel.
		///
		/// @params[in] input Graph node to reduce.
		/// @params[in] size Size of the input buffer.
		//------------------------------------------------------------------------------
		void add_max_reduction(graph::shared_variable<T> input) {
		gpu_context.create_reduction(source_buffer, input->size());
		void add_max_reduction(const size_t size) {
		gpu_context.create_reduction(source_buffer, size);
		}

		//------------------------------------------------------------------------------
		@@ -172,14 +171,25 @@ namespace jit {
		}

		//------------------------------------------------------------------------------
		/// @brief Copy contexts of buffer.
		/// @brief Copy contexts of buffer to device.
		///
		/// @params[in] node Not to copy buffer to.
		/// @params[in] source Host side buffer to copy from.
		//------------------------------------------------------------------------------
		void copy_to_device(graph::shared_leaf<T> &node,
		T *source) {
		gpu_context.copy_to_device(node, source);
		}

		//------------------------------------------------------------------------------
		/// @brief Copy contexts of buffer to host.
		///
		/// @params[in] node Node to copy buffer from.
		/// @params[in,out] destination Host side buffer to copy to.
		//------------------------------------------------------------------------------
		void copy_buffer(graph::shared_leaf<T> &node,
		void copy_to_host(graph::shared_leaf<T> &node,
		T *destination) {
		gpu_context.copy_buffer(node, destination);
		gpu_context.copy_to_host(node, destination);
		}
		};
		}