Commit 7a0f21e0 authored by gbalduzz's avatar gbalduzz
Browse files

Separate .cu implementation of G0Interpolation<GPU> methods similarly to other accelerated classes.

parent baaf9955
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -60,7 +60,7 @@ protected:
  unsigned stride_;
  Real beta_, n_div_beta_;
  Real *values_, *g0_minus_;
};  // namespace ctint
};

}  // namespace ctint
}  // namespace solver
+5 −0
Original line number Diff line number Diff line
@@ -109,6 +109,11 @@ G0Interpolation<linalg::GPU, Real>::G0Interpolation(const func::function<double,
  initialize(G0_pars_t);
}

template <typename Real>
Real G0Interpolation<linalg::GPU, Real>::operator()(Real tau, int lindex) const {
  return details::interpolateSlow(tau, lindex, static_cast<DeviceInterpolationData<Real>>(*this));
}

}  // namespace ctint
}  // namespace solver
}  // namespace phys
+3 −0
Original line number Diff line number Diff line
@@ -31,6 +31,9 @@ void buildG0Matrix(linalg::MatrixView<Real, linalg::GPU> G0, const int n_init,
                   const bool right_section, DeviceConfiguration config,
                   DeviceInterpolationData<Real> g0_interp, cudaStream_t stream);

template <typename Real>
Real interpolateSlow(Real tau, int linindex, const DeviceInterpolationData<Real>& g0);

}  // namespace details
}  // namespace ctint
}  // namespace solver
+11 −8
Original line number Diff line number Diff line
@@ -9,32 +9,34 @@
//
// This file implements the device methods of G0Interpolation<GPU>.

#include "dca/phys/dca_step/cluster_solver/ctint/walker/tools/g0_interpolation_gpu.hpp"
#include "dca/phys/dca_step/cluster_solver/ctint/walker/tools/kernels_interface.hpp"

#include <cuda_runtime.h>

#include "dca/linalg/util/error_cuda.hpp"
#include "dca/util/cuda_blocks.hpp"
#include "../../../../../../../include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/device_interpolation_data.hpp"

namespace dca {
namespace phys {
namespace solver {
namespace ctint {
// dca::phys::solver::ctint::
namespace details {
// dca::phys::solver::ctint::details

template <typename Real>
__global__ void g0InterpolationTestKernel(Real tau, const int lindex,
                                          DeviceInterpolationData<Real> g0, Real* result) {
__global__ void interpolateSlowKernel(Real tau, const int lindex, DeviceInterpolationData<Real> g0,
                                      Real* result) {
  *result = g0(tau, lindex);
}

template <typename Real>
Real G0Interpolation<linalg::GPU, Real>::operator()(Real tau, int lindex) const {
Real interpolateSlow(Real tau, int lindex, const DeviceInterpolationData<Real>& g0) {
  Real* d_result;
  Real result;
  cudaMalloc((void**)&d_result, sizeof(Real));

  g0InterpolationTestKernel<<<1, 1>>>(tau, lindex, *this, d_result);
  interpolateSlowKernel<<<1, 1>>>(tau, lindex, g0, d_result);

  assert(cudaSuccess == cudaPeekAtLastError());
  cudaMemcpy(&result, d_result, sizeof(Real), cudaMemcpyDeviceToHost);
@@ -42,9 +44,10 @@ Real G0Interpolation<linalg::GPU, Real>::operator()(Real tau, int lindex) const
  return result;
}

template class G0Interpolation<linalg::GPU, float>;
template class G0Interpolation<linalg::GPU, double>;
template float interpolateSlow(float, int, const DeviceInterpolationData<float>&);
template double interpolateSlow(double, int, const DeviceInterpolationData<double>&);

}  // namespace details
}  // namespace ctint
}  // namespace solver
}  // namespace phys
+2 −2
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@ dca_add_gtest(ct_int_interpolation_gpu_test
    CUDA
    GTEST_MAIN
    INCLUDE_DIRS ${TEST_INCLUDES}
    LIBS     ${TEST_LIBS};mc_kernels
    LIBS     ${TEST_LIBS}
    )

dca_add_gtest(d_matrix_builder_gpu_test
@@ -21,5 +21,5 @@ dca_add_gtest(d_matrix_builder_gpu_test
    CUDA
    GTEST_MAIN
    INCLUDE_DIRS ${TEST_INCLUDES}
    LIBS     ${TEST_LIBS};mc_kernels;ctint
    LIBS     ${TEST_LIBS}
    )