Loading applications/dca/main_dca.cpp +16 −15 Original line number Diff line number Diff line Loading @@ -74,21 +74,22 @@ int main(int argc, char** argv) { dca::DistType distribution = parameters.get_g4_distribution(); switch (distribution) { case dca::DistType::MPI: { #ifdef DCA_HAVE_MPI case dca::DistType::MPI: { DCALoopDispatch<dca::DistType::MPI> dca_loop_dispatch; dca_loop_dispatch(parameters, dca_data, concurrency); } break; case dca::DistType::NONE: { } break; #else case dca::DistType::MPI: { throw std::runtime_error( "Input calls for function MPI distribution but DCA is not built with MPI."); } break; #endif case dca::DistType::NONE: { DCALoopDispatch<dca::DistType::NONE> dca_loop_dispatch; dca_loop_dispatch(parameters, dca_data, concurrency); } break; } break; } } catch (const std::exception& err) { std::cout << "Unhandled exception in main function:\n\t" << err.what(); Loading include/dca/function/function.hpp +7 −1 Original line number Diff line number Diff line Loading @@ -30,11 +30,14 @@ #include "dca/distribution/dist_types.hpp" #include "dca/function/scalar_cast.hpp" #include "dca/function/set_to_zero.hpp" #include "dca/util/ignore.hpp" #include "dca/util/pack_operations.hpp" #include "dca/util/type_utils.hpp" #include "dca/parallel/util/get_workload.hpp" #ifdef DCA_HAVE_MPI #include "mpi.h" #endif namespace dca { namespace func { Loading Loading @@ -302,12 +305,15 @@ function<scalartype, domain>::function(const std::string& name, DistType dist) size_sbdm(dmn.get_leaf_domain_sizes()), step_sbdm(dmn.get_leaf_domain_steps()), fnc_values(nullptr) { if (name.substr(0, 2) == "G4" && dist == DistType::MPI) { dca::util::ignoreUnused(dist); #ifdef DCA_HAVE_MPI if (dist == DistType::MPI) { int my_rank, mpi_size; MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); nb_elements_ = dca::parallel::util::getWorkload(dmn.get_size(), mpi_size, my_rank); } #endif // DCA_HAVE_MPI fnc_values = new scalartype[nb_elements_]; for (int linind = 0; linind < nb_elements_; ++linind) setToZero(fnc_values[linind]); Loading include/dca/parallel/no_concurrency/serial_gather.hpp +0 −4 Original line number Diff line number Diff line Loading @@ -14,12 +14,8 @@ #include <vector> #include <mpi.h> #include "dca/function/domains.hpp" #include "dca/function/function.hpp" #include "dca/parallel/mpi_concurrency/mpi_gang.hpp" #include "dca/parallel/mpi_concurrency/mpi_type_map.hpp" #include "dca/util/integer_division.hpp" namespace dca { Loading include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_accumulator.hpp +4 −3 Original line number Diff line number Diff line Loading @@ -43,17 +43,18 @@ #ifdef DCA_HAVE_CUDA #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/sp/sp_accumulator_gpu.hpp" #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp" #endif // DCA_HAVE_CUDA #ifdef DCA_HAVE_MPI #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu.hpp" #endif // DCA_HAVE_MPI #endif // DCA_HAVE_CUDA namespace dca { namespace phys { namespace solver { namespace ctaux { // dca::phys::solver::ctaux:: template <dca::linalg::DeviceType device_t, class Parameters, class Data, DistType DIST, typename Real = double> template <dca::linalg::DeviceType device_t, class Parameters, class Data, DistType DIST = dca::DistType::NONE, typename Real = double> class CtauxAccumulator : public MC_accumulator_data { public: using this_type = CtauxAccumulator<device_t, Parameters, Data, DIST, Real>; Loading Loading @@ -162,7 +163,7 @@ public: } static std::size_t staticDeviceFingerprint() { return accumulator::TpAccumulator<Parameters, device_t>::staticDeviceFingerprint(); return accumulator::TpAccumulator<Parameters, device_t, DIST>::staticDeviceFingerprint(); } private: Loading include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator.hpp +46 −42 Original line number Diff line number Diff line Loading @@ -45,8 +45,8 @@ namespace accumulator { template <class Parameters, linalg::DeviceType device = linalg::CPU, DistType DT = DistType::NONE> class TpAccumulator; template <class Parameters> class TpAccumulator<Parameters, linalg::CPU, dca::DistType::NONE> { template <class Parameters, DistType DT> class TpAccumulator<Parameters, linalg::CPU, DT> { public: using Real = typename Parameters::TP_measurement_scalar_type; Loading @@ -64,7 +64,6 @@ public: using WTpExtPosDmn = func::dmn_0<domains::vertex_frequency_domain<domains::EXTENDED_POSITIVE>>; using WExchangeDmn = func::dmn_0<domains::FrequencyExchangeDomain>; using this_type = TpAccumulator<Parameters>; using Data = DcaData<Parameters>; using TpGreensFunction = typename Data::TpGreensFunction; Loading Loading @@ -110,7 +109,7 @@ public: const auto& get_sign_times_G4() const; // Sums the accumulated Green's function to the accumulated Green's function of other_acc. void sumTo(this_type& other_acc); void sumTo(TpAccumulator& other_acc); void synchronizeCopy() {} Loading Loading @@ -139,7 +138,7 @@ protected: void getGMultiband(int s, int k1, int k2, int w1, int w2, Matrix& G, Complex beta = 0) const; Complex getGSingleband(int s, int k1, int k2, int w1, int w2) const; auto getGSingleband(int s, int k1, int k2, int w1, int w2) -> Complex const; template <class Configuration, typename RealIn> float computeM(const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair, Loading Loading @@ -186,8 +185,8 @@ private: Matrix G0_M_, G_a_, G_b_; }; template <class Parameters> TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( template <class Parameters, DistType DT> TpAccumulator<Parameters, linalg::CPU, DT>::TpAccumulator( const func::function<std::complex<double>, func::dmn_variadic<NuDmn, NuDmn, KDmn, WDmn>>& G0, const Parameters& pars, const int thread_id) : G0_ptr_(&G0), Loading @@ -200,6 +199,11 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( G0_M_(n_bands_), G_a_(n_bands_), G_b_(n_bands_) { if constexpr (DT == DistType::MPI) { std::cerr << "The MPI distribution of G4 on the CPU is not supported. Reverting to no " "distribution.\n"; } if (WDmn::dmn_size() < WTpExtDmn::dmn_size()) throw(std::logic_error("The number of single particle frequencies is too small.")); initializeG0(); Loading @@ -212,16 +216,16 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( } } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::resetAccumulation(unsigned int /*dca_loop*/) { template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::resetAccumulation(unsigned int /*dca_loop*/) { for (auto& G4_channel : G4_) G4_channel = 0.; initializeG0(); } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::initializeG0() { template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::initializeG0() { const int sp_index_offset = (WDmn::dmn_size() - WTpExtDmn::dmn_size()) / 2; for (int w = 0; w < WTpExtDmn::dmn_size(); ++w) { Loading @@ -234,9 +238,9 @@ void TpAccumulator<Parameters, linalg::CPU>::initializeG0() { } } template <class Parameters> template <class Parameters, DistType DT> template <class Configuration, typename RealIn> double TpAccumulator<Parameters, linalg::CPU>::accumulate( double TpAccumulator<Parameters, linalg::CPU, DT>::accumulate( const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair, const std::array<Configuration, 2>& configs, const int sign) { Profiler profiler("accumulate", "tp-accumulation", __LINE__, thread_id_); Loading @@ -254,9 +258,9 @@ double TpAccumulator<Parameters, linalg::CPU>::accumulate( return gflops; } template <class Parameters> template <class Parameters, DistType DT> template <class Configuration, typename RealIn> float TpAccumulator<Parameters, linalg::CPU>::computeM( float TpAccumulator<Parameters, linalg::CPU, DT>::computeM( const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair, const std::array<Configuration, 2>& configs) { float flops = 0.; Loading @@ -277,8 +281,8 @@ float TpAccumulator<Parameters, linalg::CPU>::computeM( return flops; } template <class Parameters> double TpAccumulator<Parameters, linalg::CPU>::computeG() { template <class Parameters, DistType DT> double TpAccumulator<Parameters, linalg::CPU, DT>::computeG() { Profiler prf("ComputeG", "tp-accumulation", __LINE__, thread_id_); for (int w2 = 0; w2 < WTpExtDmn::dmn_size(); ++w2) for (int w1 = 0; w1 < WTpExtPosDmn::dmn_size(); ++w1) Loading @@ -298,8 +302,8 @@ double TpAccumulator<Parameters, linalg::CPU>::computeG() { return 1e-9 * flops; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::computeGSingleband(const int s, const int k1, template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::computeGSingleband(const int s, const int k1, const int k2, const int w1, const int w2) { assert(w1 < WTpExtPosDmn::dmn_size()); Loading @@ -315,8 +319,8 @@ void TpAccumulator<Parameters, linalg::CPU>::computeGSingleband(const int s, con G_(0, 0, s, k1, k2, w1, w2) = -G0_w1 * M_val * G0_w2; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::computeGMultiband(const int s, const int k1, template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::computeGMultiband(const int s, const int k1, const int k2, const int w1, const int w2) { assert(w1 < WTpExtPosDmn::dmn_size()); Loading @@ -339,10 +343,10 @@ void TpAccumulator<Parameters, linalg::CPU>::computeGMultiband(const int s, cons } } template <class Parameters> std::complex<typename TpAccumulator<Parameters, linalg::CPU>::Real> TpAccumulator< Parameters, linalg::CPU>::getGSingleband(const int s, const int k1, const int k2, const int w1, const int w2) const { template <class Parameters, DistType DT> auto TpAccumulator<Parameters, linalg::CPU, DT>::getGSingleband(const int s, const int k1, const int k2, const int w1, const int w2) -> Complex const { const int w2_ext = w2 + extension_index_offset_; const int w1_ext = w1 + extension_index_offset_; auto minus_w1 = [=](const int w) { return n_pos_frqs_ - 1 - w; }; Loading @@ -359,8 +363,8 @@ std::complex<typename TpAccumulator<Parameters, linalg::CPU>::Real> TpAccumulato return std::conj(G_(0, 0, s, minus_k(k1), minus_k(k2), minus_w1(w1_ext), minus_w2(w2_ext))); } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::getGMultiband(int s, int k1, int k2, int w1, int w2, template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::getGMultiband(int s, int k1, int k2, int w1, int w2, Matrix& G, const Complex beta) const { const int w2_ext = w2 + extension_index_offset_; const int w1_ext = w1 + extension_index_offset_; Loading Loading @@ -388,8 +392,8 @@ void TpAccumulator<Parameters, linalg::CPU>::getGMultiband(int s, int k1, int k2 } } template <class Parameters> double TpAccumulator<Parameters, linalg::CPU>::updateG4(const int channel_id) { template <class Parameters, DistType DT> double TpAccumulator<Parameters, linalg::CPU, DT>::updateG4(const int channel_id) { // G4 is stored with the following band convention: // b1 ------------------------ b3 // | | Loading Loading @@ -580,8 +584,8 @@ double TpAccumulator<Parameters, linalg::CPU>::updateG4(const int channel_id) { return 1e-9 * flops; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::updateG4Atomic( template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::updateG4Atomic( Complex* G4_ptr, const int s_a, const int k1_a, const int k2_a, const int w1_a, const int w2_a, const int s_b, const int k1_b, const int k2_b, const int w1_b, const int w2_b, const Real alpha, const bool cross_legs) { Loading Loading @@ -617,8 +621,8 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4Atomic( } } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::updateG4SpinDifference( template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::updateG4SpinDifference( Complex* G4_ptr, const int sign, const int k1_a, const int k2_a, const int w1_a, const int w2_a, const int k1_b, const int k2_b, const int w1_b, const int w2_b, const Real alpha, const bool cross_legs) { Loading Loading @@ -660,16 +664,16 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4SpinDifference( } } template <class Parameters> const auto& TpAccumulator<Parameters, linalg::CPU>::get_sign_times_G4() const { template <class Parameters, DistType DT> const auto& TpAccumulator<Parameters, linalg::CPU, DT>::get_sign_times_G4() const { if (G4_.empty()) throw std::logic_error("There is no G4 stored in this class."); return G4_; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::sumTo(this_type& other_one) { template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::sumTo(TpAccumulator& other_one) { if (other_one.G4_.size() != G4_.size()) throw std::logic_error("Objects accumulate different number of channels."); Loading Loading
applications/dca/main_dca.cpp +16 −15 Original line number Diff line number Diff line Loading @@ -74,21 +74,22 @@ int main(int argc, char** argv) { dca::DistType distribution = parameters.get_g4_distribution(); switch (distribution) { case dca::DistType::MPI: { #ifdef DCA_HAVE_MPI case dca::DistType::MPI: { DCALoopDispatch<dca::DistType::MPI> dca_loop_dispatch; dca_loop_dispatch(parameters, dca_data, concurrency); } break; case dca::DistType::NONE: { } break; #else case dca::DistType::MPI: { throw std::runtime_error( "Input calls for function MPI distribution but DCA is not built with MPI."); } break; #endif case dca::DistType::NONE: { DCALoopDispatch<dca::DistType::NONE> dca_loop_dispatch; dca_loop_dispatch(parameters, dca_data, concurrency); } break; } break; } } catch (const std::exception& err) { std::cout << "Unhandled exception in main function:\n\t" << err.what(); Loading
include/dca/function/function.hpp +7 −1 Original line number Diff line number Diff line Loading @@ -30,11 +30,14 @@ #include "dca/distribution/dist_types.hpp" #include "dca/function/scalar_cast.hpp" #include "dca/function/set_to_zero.hpp" #include "dca/util/ignore.hpp" #include "dca/util/pack_operations.hpp" #include "dca/util/type_utils.hpp" #include "dca/parallel/util/get_workload.hpp" #ifdef DCA_HAVE_MPI #include "mpi.h" #endif namespace dca { namespace func { Loading Loading @@ -302,12 +305,15 @@ function<scalartype, domain>::function(const std::string& name, DistType dist) size_sbdm(dmn.get_leaf_domain_sizes()), step_sbdm(dmn.get_leaf_domain_steps()), fnc_values(nullptr) { if (name.substr(0, 2) == "G4" && dist == DistType::MPI) { dca::util::ignoreUnused(dist); #ifdef DCA_HAVE_MPI if (dist == DistType::MPI) { int my_rank, mpi_size; MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); nb_elements_ = dca::parallel::util::getWorkload(dmn.get_size(), mpi_size, my_rank); } #endif // DCA_HAVE_MPI fnc_values = new scalartype[nb_elements_]; for (int linind = 0; linind < nb_elements_; ++linind) setToZero(fnc_values[linind]); Loading
include/dca/parallel/no_concurrency/serial_gather.hpp +0 −4 Original line number Diff line number Diff line Loading @@ -14,12 +14,8 @@ #include <vector> #include <mpi.h> #include "dca/function/domains.hpp" #include "dca/function/function.hpp" #include "dca/parallel/mpi_concurrency/mpi_gang.hpp" #include "dca/parallel/mpi_concurrency/mpi_type_map.hpp" #include "dca/util/integer_division.hpp" namespace dca { Loading
include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_accumulator.hpp +4 −3 Original line number Diff line number Diff line Loading @@ -43,17 +43,18 @@ #ifdef DCA_HAVE_CUDA #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/sp/sp_accumulator_gpu.hpp" #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp" #endif // DCA_HAVE_CUDA #ifdef DCA_HAVE_MPI #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu.hpp" #endif // DCA_HAVE_MPI #endif // DCA_HAVE_CUDA namespace dca { namespace phys { namespace solver { namespace ctaux { // dca::phys::solver::ctaux:: template <dca::linalg::DeviceType device_t, class Parameters, class Data, DistType DIST, typename Real = double> template <dca::linalg::DeviceType device_t, class Parameters, class Data, DistType DIST = dca::DistType::NONE, typename Real = double> class CtauxAccumulator : public MC_accumulator_data { public: using this_type = CtauxAccumulator<device_t, Parameters, Data, DIST, Real>; Loading Loading @@ -162,7 +163,7 @@ public: } static std::size_t staticDeviceFingerprint() { return accumulator::TpAccumulator<Parameters, device_t>::staticDeviceFingerprint(); return accumulator::TpAccumulator<Parameters, device_t, DIST>::staticDeviceFingerprint(); } private: Loading
include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator.hpp +46 −42 Original line number Diff line number Diff line Loading @@ -45,8 +45,8 @@ namespace accumulator { template <class Parameters, linalg::DeviceType device = linalg::CPU, DistType DT = DistType::NONE> class TpAccumulator; template <class Parameters> class TpAccumulator<Parameters, linalg::CPU, dca::DistType::NONE> { template <class Parameters, DistType DT> class TpAccumulator<Parameters, linalg::CPU, DT> { public: using Real = typename Parameters::TP_measurement_scalar_type; Loading @@ -64,7 +64,6 @@ public: using WTpExtPosDmn = func::dmn_0<domains::vertex_frequency_domain<domains::EXTENDED_POSITIVE>>; using WExchangeDmn = func::dmn_0<domains::FrequencyExchangeDomain>; using this_type = TpAccumulator<Parameters>; using Data = DcaData<Parameters>; using TpGreensFunction = typename Data::TpGreensFunction; Loading Loading @@ -110,7 +109,7 @@ public: const auto& get_sign_times_G4() const; // Sums the accumulated Green's function to the accumulated Green's function of other_acc. void sumTo(this_type& other_acc); void sumTo(TpAccumulator& other_acc); void synchronizeCopy() {} Loading Loading @@ -139,7 +138,7 @@ protected: void getGMultiband(int s, int k1, int k2, int w1, int w2, Matrix& G, Complex beta = 0) const; Complex getGSingleband(int s, int k1, int k2, int w1, int w2) const; auto getGSingleband(int s, int k1, int k2, int w1, int w2) -> Complex const; template <class Configuration, typename RealIn> float computeM(const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair, Loading Loading @@ -186,8 +185,8 @@ private: Matrix G0_M_, G_a_, G_b_; }; template <class Parameters> TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( template <class Parameters, DistType DT> TpAccumulator<Parameters, linalg::CPU, DT>::TpAccumulator( const func::function<std::complex<double>, func::dmn_variadic<NuDmn, NuDmn, KDmn, WDmn>>& G0, const Parameters& pars, const int thread_id) : G0_ptr_(&G0), Loading @@ -200,6 +199,11 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( G0_M_(n_bands_), G_a_(n_bands_), G_b_(n_bands_) { if constexpr (DT == DistType::MPI) { std::cerr << "The MPI distribution of G4 on the CPU is not supported. Reverting to no " "distribution.\n"; } if (WDmn::dmn_size() < WTpExtDmn::dmn_size()) throw(std::logic_error("The number of single particle frequencies is too small.")); initializeG0(); Loading @@ -212,16 +216,16 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( } } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::resetAccumulation(unsigned int /*dca_loop*/) { template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::resetAccumulation(unsigned int /*dca_loop*/) { for (auto& G4_channel : G4_) G4_channel = 0.; initializeG0(); } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::initializeG0() { template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::initializeG0() { const int sp_index_offset = (WDmn::dmn_size() - WTpExtDmn::dmn_size()) / 2; for (int w = 0; w < WTpExtDmn::dmn_size(); ++w) { Loading @@ -234,9 +238,9 @@ void TpAccumulator<Parameters, linalg::CPU>::initializeG0() { } } template <class Parameters> template <class Parameters, DistType DT> template <class Configuration, typename RealIn> double TpAccumulator<Parameters, linalg::CPU>::accumulate( double TpAccumulator<Parameters, linalg::CPU, DT>::accumulate( const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair, const std::array<Configuration, 2>& configs, const int sign) { Profiler profiler("accumulate", "tp-accumulation", __LINE__, thread_id_); Loading @@ -254,9 +258,9 @@ double TpAccumulator<Parameters, linalg::CPU>::accumulate( return gflops; } template <class Parameters> template <class Parameters, DistType DT> template <class Configuration, typename RealIn> float TpAccumulator<Parameters, linalg::CPU>::computeM( float TpAccumulator<Parameters, linalg::CPU, DT>::computeM( const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair, const std::array<Configuration, 2>& configs) { float flops = 0.; Loading @@ -277,8 +281,8 @@ float TpAccumulator<Parameters, linalg::CPU>::computeM( return flops; } template <class Parameters> double TpAccumulator<Parameters, linalg::CPU>::computeG() { template <class Parameters, DistType DT> double TpAccumulator<Parameters, linalg::CPU, DT>::computeG() { Profiler prf("ComputeG", "tp-accumulation", __LINE__, thread_id_); for (int w2 = 0; w2 < WTpExtDmn::dmn_size(); ++w2) for (int w1 = 0; w1 < WTpExtPosDmn::dmn_size(); ++w1) Loading @@ -298,8 +302,8 @@ double TpAccumulator<Parameters, linalg::CPU>::computeG() { return 1e-9 * flops; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::computeGSingleband(const int s, const int k1, template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::computeGSingleband(const int s, const int k1, const int k2, const int w1, const int w2) { assert(w1 < WTpExtPosDmn::dmn_size()); Loading @@ -315,8 +319,8 @@ void TpAccumulator<Parameters, linalg::CPU>::computeGSingleband(const int s, con G_(0, 0, s, k1, k2, w1, w2) = -G0_w1 * M_val * G0_w2; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::computeGMultiband(const int s, const int k1, template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::computeGMultiband(const int s, const int k1, const int k2, const int w1, const int w2) { assert(w1 < WTpExtPosDmn::dmn_size()); Loading @@ -339,10 +343,10 @@ void TpAccumulator<Parameters, linalg::CPU>::computeGMultiband(const int s, cons } } template <class Parameters> std::complex<typename TpAccumulator<Parameters, linalg::CPU>::Real> TpAccumulator< Parameters, linalg::CPU>::getGSingleband(const int s, const int k1, const int k2, const int w1, const int w2) const { template <class Parameters, DistType DT> auto TpAccumulator<Parameters, linalg::CPU, DT>::getGSingleband(const int s, const int k1, const int k2, const int w1, const int w2) -> Complex const { const int w2_ext = w2 + extension_index_offset_; const int w1_ext = w1 + extension_index_offset_; auto minus_w1 = [=](const int w) { return n_pos_frqs_ - 1 - w; }; Loading @@ -359,8 +363,8 @@ std::complex<typename TpAccumulator<Parameters, linalg::CPU>::Real> TpAccumulato return std::conj(G_(0, 0, s, minus_k(k1), minus_k(k2), minus_w1(w1_ext), minus_w2(w2_ext))); } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::getGMultiband(int s, int k1, int k2, int w1, int w2, template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::getGMultiband(int s, int k1, int k2, int w1, int w2, Matrix& G, const Complex beta) const { const int w2_ext = w2 + extension_index_offset_; const int w1_ext = w1 + extension_index_offset_; Loading Loading @@ -388,8 +392,8 @@ void TpAccumulator<Parameters, linalg::CPU>::getGMultiband(int s, int k1, int k2 } } template <class Parameters> double TpAccumulator<Parameters, linalg::CPU>::updateG4(const int channel_id) { template <class Parameters, DistType DT> double TpAccumulator<Parameters, linalg::CPU, DT>::updateG4(const int channel_id) { // G4 is stored with the following band convention: // b1 ------------------------ b3 // | | Loading Loading @@ -580,8 +584,8 @@ double TpAccumulator<Parameters, linalg::CPU>::updateG4(const int channel_id) { return 1e-9 * flops; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::updateG4Atomic( template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::updateG4Atomic( Complex* G4_ptr, const int s_a, const int k1_a, const int k2_a, const int w1_a, const int w2_a, const int s_b, const int k1_b, const int k2_b, const int w1_b, const int w2_b, const Real alpha, const bool cross_legs) { Loading Loading @@ -617,8 +621,8 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4Atomic( } } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::updateG4SpinDifference( template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::updateG4SpinDifference( Complex* G4_ptr, const int sign, const int k1_a, const int k2_a, const int w1_a, const int w2_a, const int k1_b, const int k2_b, const int w1_b, const int w2_b, const Real alpha, const bool cross_legs) { Loading Loading @@ -660,16 +664,16 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4SpinDifference( } } template <class Parameters> const auto& TpAccumulator<Parameters, linalg::CPU>::get_sign_times_G4() const { template <class Parameters, DistType DT> const auto& TpAccumulator<Parameters, linalg::CPU, DT>::get_sign_times_G4() const { if (G4_.empty()) throw std::logic_error("There is no G4 stored in this class."); return G4_; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::sumTo(this_type& other_one) { template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::sumTo(TpAccumulator& other_one) { if (other_one.G4_.size() != G4_.size()) throw std::logic_error("Objects accumulate different number of channels."); Loading