Loading include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/time_correlator.hpp +1 −0 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ #include <mutex> #include "dca/math/statistics/autocorrelation.hpp" #include "dca/phys/dca_step/cluster_solver/shared_tools/solver_helper.cuh" #include "dca/phys/dca_step/cluster_solver/shared_tools/interpolation/g0_interpolation.hpp" #include "dca/phys/dca_step/cluster_solver/ctint/structs/ct_int_matrix_configuration.hpp" #include "dca/phys/domains/quantum/electron_band_domain.hpp" Loading include/dca/phys/dca_step/cluster_solver/shared_tools/solver_helper.cuh +22 −0 Original line number Diff line number Diff line Loading @@ -12,16 +12,19 @@ #ifndef DCA_PHYS_DCA_STEP_CLUSTER_SOLVER_SHARED_TOOLS_SOLVER_HELPER_CUH #define DCA_PHYS_DCA_STEP_CLUSTER_SOLVER_SHARED_TOOLS_SOLVER_HELPER_CUH #ifdef DCA_HAVE_CUDA #include <cuda.h> #include "dca/phys/dca_step/cluster_solver/shared_tools/cluster_helper.cuh" #include "dca/phys/domains/cluster/cluster_definitions.hpp" #endif namespace dca { namespace phys { namespace solver { // dca::phys::solver:: #ifdef DCA_HAVE_CUDA class SolverHelper { public: static void set(const int* sum_r, int lda, const int* sub_r, int lds, int nb, int nc, int r0); Loading @@ -29,10 +32,16 @@ public: template <class RDmn, class BDmn> static void set(); static bool initialized(){ return initialized_; } // Return the index of a single particle function of b1, b2, r1 - r2. __device__ std::size_t index(int b1, int b2, int r1, int r2) const; private: static bool initialized_; std::size_t subdm_step_[2]; }; Loading @@ -55,6 +64,19 @@ void SolverHelper::set() { sub_matrix.leadingDimension(), BDmn::dmn_size(), RDmn::dmn_size(), Cluster::origin_index()); } #else // !DCA_HAVE_CUDA // No-op version. class SolverHelper { public: template <class RDmn, class BDmn> static void set() {} constexpr static bool initialized(){ return false; } }; #endif // DCA_HAVE_CUDA } // namespace solver } // namespace phys } // namespace dca Loading src/phys/dca_step/cluster_solver/shared_tools/accumulation/time_correlator_kernels.cu +6 −6 Original line number Diff line number Diff line Loading @@ -37,9 +37,10 @@ __global__ void computeG0Kernel(linalg::MatrixView<Real, linalg::GPU> mat, } template <typename Real> void computeG0(linalg::MatrixView<Real, linalg::GPU>& g0_mat, const DeviceInterpolationData<Real> g0, const Real* t_l, const int* b_l, const int* r_l, const Real* t_r, const int* b_r, const int* r_r, cudaStream_t stream) { void computeG0(linalg::MatrixView<Real, linalg::GPU>& g0_mat, const DeviceInterpolationData<Real> g0, const Real* t_l, const int* b_l, const int* r_l, const Real* t_r, const int* b_r, const int* r_r, cudaStream_t stream) { assert(SolverHelper::initialized()); auto blocks = dca::util::get2DBlockSize(g0_mat.nrRows(), g0_mat.nrCols(), 32); computeG0Kernel<<<blocks[0], blocks[1], 0, stream>>>(g0_mat, g0, t_l, b_l, r_l, t_r, b_r, r_r); Loading @@ -47,9 +48,8 @@ void computeG0(linalg::MatrixView<Real, linalg::GPU>& g0_mat, // Instantation. template void computeG0<double>(linalg::MatrixView<double, linalg::GPU>&, const DeviceInterpolationData<double>, const double*, const int*, const int*, const double*, const int*, const int*, cudaStream_t); const DeviceInterpolationData<double>, const double*, const int*, const int*, const double*, const int*, const int*, cudaStream_t); template void computeG0<float>(linalg::MatrixView<float, linalg::GPU>&, const DeviceInterpolationData<float>, const float*, const int*, const int*, const float*, const int*, const int*, cudaStream_t); Loading src/phys/dca_step/cluster_solver/shared_tools/solver_helper.cu +3 −0 Original line number Diff line number Diff line Loading @@ -24,6 +24,8 @@ namespace solver { // Global helper instance. __device__ __constant__ SolverHelper solver_helper; bool SolverHelper::initialized_ = false; void SolverHelper::set(const int* add_r, int lda, const int* sub_r, int lds, const int nb, const int nc, const int r0) { static std::once_flag flag; Loading @@ -36,6 +38,7 @@ void SolverHelper::set(const int* add_r, int lda, const int* sub_r, int lds, con host_helper.subdm_step_[1] = nb * nb; cudaMemcpyToSymbol(solver_helper, &host_helper, sizeof(SolverHelper)); initialized_ = true; }); } Loading Loading
include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/time_correlator.hpp +1 −0 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ #include <mutex> #include "dca/math/statistics/autocorrelation.hpp" #include "dca/phys/dca_step/cluster_solver/shared_tools/solver_helper.cuh" #include "dca/phys/dca_step/cluster_solver/shared_tools/interpolation/g0_interpolation.hpp" #include "dca/phys/dca_step/cluster_solver/ctint/structs/ct_int_matrix_configuration.hpp" #include "dca/phys/domains/quantum/electron_band_domain.hpp" Loading
include/dca/phys/dca_step/cluster_solver/shared_tools/solver_helper.cuh +22 −0 Original line number Diff line number Diff line Loading @@ -12,16 +12,19 @@ #ifndef DCA_PHYS_DCA_STEP_CLUSTER_SOLVER_SHARED_TOOLS_SOLVER_HELPER_CUH #define DCA_PHYS_DCA_STEP_CLUSTER_SOLVER_SHARED_TOOLS_SOLVER_HELPER_CUH #ifdef DCA_HAVE_CUDA #include <cuda.h> #include "dca/phys/dca_step/cluster_solver/shared_tools/cluster_helper.cuh" #include "dca/phys/domains/cluster/cluster_definitions.hpp" #endif namespace dca { namespace phys { namespace solver { // dca::phys::solver:: #ifdef DCA_HAVE_CUDA class SolverHelper { public: static void set(const int* sum_r, int lda, const int* sub_r, int lds, int nb, int nc, int r0); Loading @@ -29,10 +32,16 @@ public: template <class RDmn, class BDmn> static void set(); static bool initialized(){ return initialized_; } // Return the index of a single particle function of b1, b2, r1 - r2. __device__ std::size_t index(int b1, int b2, int r1, int r2) const; private: static bool initialized_; std::size_t subdm_step_[2]; }; Loading @@ -55,6 +64,19 @@ void SolverHelper::set() { sub_matrix.leadingDimension(), BDmn::dmn_size(), RDmn::dmn_size(), Cluster::origin_index()); } #else // !DCA_HAVE_CUDA // No-op version. class SolverHelper { public: template <class RDmn, class BDmn> static void set() {} constexpr static bool initialized(){ return false; } }; #endif // DCA_HAVE_CUDA } // namespace solver } // namespace phys } // namespace dca Loading
src/phys/dca_step/cluster_solver/shared_tools/accumulation/time_correlator_kernels.cu +6 −6 Original line number Diff line number Diff line Loading @@ -37,9 +37,10 @@ __global__ void computeG0Kernel(linalg::MatrixView<Real, linalg::GPU> mat, } template <typename Real> void computeG0(linalg::MatrixView<Real, linalg::GPU>& g0_mat, const DeviceInterpolationData<Real> g0, const Real* t_l, const int* b_l, const int* r_l, const Real* t_r, const int* b_r, const int* r_r, cudaStream_t stream) { void computeG0(linalg::MatrixView<Real, linalg::GPU>& g0_mat, const DeviceInterpolationData<Real> g0, const Real* t_l, const int* b_l, const int* r_l, const Real* t_r, const int* b_r, const int* r_r, cudaStream_t stream) { assert(SolverHelper::initialized()); auto blocks = dca::util::get2DBlockSize(g0_mat.nrRows(), g0_mat.nrCols(), 32); computeG0Kernel<<<blocks[0], blocks[1], 0, stream>>>(g0_mat, g0, t_l, b_l, r_l, t_r, b_r, r_r); Loading @@ -47,9 +48,8 @@ void computeG0(linalg::MatrixView<Real, linalg::GPU>& g0_mat, // Instantation. template void computeG0<double>(linalg::MatrixView<double, linalg::GPU>&, const DeviceInterpolationData<double>, const double*, const int*, const int*, const double*, const int*, const int*, cudaStream_t); const DeviceInterpolationData<double>, const double*, const int*, const int*, const double*, const int*, const int*, cudaStream_t); template void computeG0<float>(linalg::MatrixView<float, linalg::GPU>&, const DeviceInterpolationData<float>, const float*, const int*, const int*, const float*, const int*, const int*, cudaStream_t); Loading
src/phys/dca_step/cluster_solver/shared_tools/solver_helper.cu +3 −0 Original line number Diff line number Diff line Loading @@ -24,6 +24,8 @@ namespace solver { // Global helper instance. __device__ __constant__ SolverHelper solver_helper; bool SolverHelper::initialized_ = false; void SolverHelper::set(const int* add_r, int lda, const int* sub_r, int lds, const int nb, const int nc, const int r0) { static std::once_flag flag; Loading @@ -36,6 +38,7 @@ void SolverHelper::set(const int* add_r, int lda, const int* sub_r, int lds, con host_helper.subdm_step_[1] = nb * nb; cudaMemcpyToSymbol(solver_helper, &host_helper, sizeof(SolverHelper)); initialized_ = true; }); } Loading