Loading include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -396,7 +396,7 @@ void TpAccumulator<Parameters, DT, linalg::GPU>::computeGSingleband(const int s) template <class Parameters, DistType DT> void TpAccumulator<Parameters, DT, linalg::GPU>::computeGMultiband(const int s) { std::cout << "WTpExtDmn::dmn_size(): " << WTpExtDmn::dmn_size() << '\n'; // std::cout << "WTpExtDmn::dmn_size(): " << WTpExtDmn::dmn_size() << '\n'; details::computeGMultiband(G_[s].ptr(), G_[s].leadingDimension(), get_G0()[s].ptr(), get_G0()[s].leadingDimension(), n_bands_, KDmn::dmn_size(), WTpExtDmn::dmn_size(), beta_, queues_[s]); Loading src/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_kernels.cu +5 −3 Original line number Diff line number Diff line Loading @@ -37,6 +37,8 @@ namespace accumulator { namespace details { // dca::phys::solver::accumulator::details:: #undef DEBUG_G4_GPU using namespace linalg; using dca::util::ComplexAlias; using dca::util::castGPUType; Loading Loading @@ -206,9 +208,9 @@ void computeGMultiband(std::complex<Real>* G, int ldg, const std::complex<Real>* const auto blocks = getBlockSize(n_rows, n_rows, width); #ifndef NDEBUG std::cout << "computeGMultiband for tp gpu with block size " << n_rows << "," << n_rows << "," << width << '\n'; std::cout << "cuda block dims: " << toString(blocks) << '\n'; // std::cout << "computeGMultiband for tp gpu with block size " << n_rows << "," << n_rows << "," // << width << '\n'; // std::cout << "cuda block dims: " << toString(blocks) << '\n'; #endif computeGMultibandKernel<<<blocks[0], blocks[1], width * width * sizeof(std::complex<Real>), stream>>>( Loading Loading
include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -396,7 +396,7 @@ void TpAccumulator<Parameters, DT, linalg::GPU>::computeGSingleband(const int s) template <class Parameters, DistType DT> void TpAccumulator<Parameters, DT, linalg::GPU>::computeGMultiband(const int s) { std::cout << "WTpExtDmn::dmn_size(): " << WTpExtDmn::dmn_size() << '\n'; // std::cout << "WTpExtDmn::dmn_size(): " << WTpExtDmn::dmn_size() << '\n'; details::computeGMultiband(G_[s].ptr(), G_[s].leadingDimension(), get_G0()[s].ptr(), get_G0()[s].leadingDimension(), n_bands_, KDmn::dmn_size(), WTpExtDmn::dmn_size(), beta_, queues_[s]); Loading
src/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_kernels.cu +5 −3 Original line number Diff line number Diff line Loading @@ -37,6 +37,8 @@ namespace accumulator { namespace details { // dca::phys::solver::accumulator::details:: #undef DEBUG_G4_GPU using namespace linalg; using dca::util::ComplexAlias; using dca::util::castGPUType; Loading Loading @@ -206,9 +208,9 @@ void computeGMultiband(std::complex<Real>* G, int ldg, const std::complex<Real>* const auto blocks = getBlockSize(n_rows, n_rows, width); #ifndef NDEBUG std::cout << "computeGMultiband for tp gpu with block size " << n_rows << "," << n_rows << "," << width << '\n'; std::cout << "cuda block dims: " << toString(blocks) << '\n'; // std::cout << "computeGMultiband for tp gpu with block size " << n_rows << "," << n_rows << "," // << width << '\n'; // std::cout << "cuda block dims: " << toString(blocks) << '\n'; #endif computeGMultibandKernel<<<blocks[0], blocks[1], width * width * sizeof(std::complex<Real>), stream>>>( Loading