Separate .cu implementation of G0Interpolation<GPU> methods similarly to other accelerated classes. (7a0f21e0) · Commits · NDIP / Tool Sources / Direct-Geometry Spectroscopy / DCA / DCA Main

include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/device_interpolation_data.hpp

+1 −1

Original line number	Diff line number	Diff line
		@@ -60,7 +60,7 @@ protected:
		unsigned stride_;
		Real beta_, n_div_beta_;
		Real values_, g0_minus_;
		}; // namespace ctint
		};

		} // namespace ctint
		} // namespace solver

include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/g0_interpolation_gpu.hpp

+5 −0

Original line number	Diff line number	Diff line
		@@ -109,6 +109,11 @@ G0Interpolation<linalg::GPU, Real>::G0Interpolation(const func::function<double,
		initialize(G0_pars_t);
		}

		template <typename Real>
		Real G0Interpolation<linalg::GPU, Real>::operator()(Real tau, int lindex) const {
		return details::interpolateSlow(tau, lindex, static_cast<DeviceInterpolationData<Real>>(*this));
		}

		} // namespace ctint
		} // namespace solver
		} // namespace phys

include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/kernels_interface.hpp

+3 −0

Original line number	Diff line number	Diff line
		@@ -31,6 +31,9 @@ void buildG0Matrix(linalg::MatrixView<Real, linalg::GPU> G0, const int n_init,
		const bool right_section, DeviceConfiguration config,
		DeviceInterpolationData<Real> g0_interp, cudaStream_t stream);

		template <typename Real>
		Real interpolateSlow(Real tau, int linindex, const DeviceInterpolationData<Real>& g0);

		} // namespace details
		} // namespace ctint
		} // namespace solver

src/phys/dca_step/cluster_solver/ctint/walker/tools/g0_interpolation_gpu.cu

+11 −8

Original line number	Diff line number	Diff line
		@@ -9,32 +9,34 @@
		//
		// This file implements the device methods of G0Interpolation<GPU>.

		#include "dca/phys/dca_step/cluster_solver/ctint/walker/tools/g0_interpolation_gpu.hpp"
		#include "dca/phys/dca_step/cluster_solver/ctint/walker/tools/kernels_interface.hpp"

		#include <cuda_runtime.h>

		#include "dca/linalg/util/error_cuda.hpp"
		#include "dca/util/cuda_blocks.hpp"
		#include "../../../../../../../include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/device_interpolation_data.hpp"

		namespace dca {
		namespace phys {
		namespace solver {
		namespace ctint {
		// dca::phys::solver::ctint::
		namespace details {
		// dca::phys::solver::ctint::details

		template <typename Real>
		__global__ void g0InterpolationTestKernel(Real tau, const int lindex,
		DeviceInterpolationData<Real> g0, Real* result) {
		__global__ void interpolateSlowKernel(Real tau, const int lindex, DeviceInterpolationData<Real> g0,
		Real* result) {
		*result = g0(tau, lindex);
		}

		template <typename Real>
		Real G0Interpolation<linalg::GPU, Real>::operator()(Real tau, int lindex) const {
		Real interpolateSlow(Real tau, int lindex, const DeviceInterpolationData<Real>& g0) {
		Real* d_result;
		Real result;
		cudaMalloc((void**)&d_result, sizeof(Real));

		g0InterpolationTestKernel<<<1, 1>>>(tau, lindex, *this, d_result);
		interpolateSlowKernel<<<1, 1>>>(tau, lindex, g0, d_result);

		assert(cudaSuccess == cudaPeekAtLastError());
		cudaMemcpy(&result, d_result, sizeof(Real), cudaMemcpyDeviceToHost);
		@@ -42,9 +44,10 @@ Real G0Interpolation<linalg::GPU, Real>::operator()(Real tau, int lindex) const
		return result;
		}

		template class G0Interpolation<linalg::GPU, float>;
		template class G0Interpolation<linalg::GPU, double>;
		template float interpolateSlow(float, int, const DeviceInterpolationData<float>&);
		template double interpolateSlow(double, int, const DeviceInterpolationData<double>&);

		} // namespace details
		} // namespace ctint
		} // namespace solver
		} // namespace phys

test/unit/phys/dca_step/cluster_solver/ctint/walker/tools/CMakeLists.txt

+2 −2

Original line number	Diff line number	Diff line
		@@ -13,7 +13,7 @@ dca_add_gtest(ct_int_interpolation_gpu_test
		CUDA
		GTEST_MAIN
		INCLUDE_DIRS ${TEST_INCLUDES}
		LIBS ${TEST_LIBS};mc_kernels
		LIBS ${TEST_LIBS}
		)

		dca_add_gtest(d_matrix_builder_gpu_test
		@@ -21,5 +21,5 @@ dca_add_gtest(d_matrix_builder_gpu_test
		CUDA
		GTEST_MAIN
		INCLUDE_DIRS ${TEST_INCLUDES}
		LIBS ${TEST_LIBS};mc_kernels;ctint
		LIBS ${TEST_LIBS}
		)