Loading include/dca/distribution/dist_types.hpp +7 −3 Original line number Diff line number Diff line Loading @@ -6,16 +6,20 @@ // See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. // // Author: Peter Doak (doakpw@ornl.gov) // Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) // // This file provides distribution strategy tags #ifndef DCA_DIST_TYPE_HPP #define DCA_DIST_TYPE_HPP #include <string> namespace dca { enum class DistType { NONE, MPI }; enum class DistType { NONE, MPI }; DistType stringToDistType(const std::string& name); std::string toString(DistType type); } // namespace dca #endif // DCA_DIST_TYPE_HPP include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_accumulator.hpp +2 −1 Original line number Diff line number Diff line Loading @@ -43,10 +43,11 @@ #ifdef DCA_HAVE_CUDA #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/sp/sp_accumulator_gpu.hpp" #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp" #endif // DCA_HAVE_CUDA #ifdef DCA_HAVE_MPI #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu.hpp" #endif // DCA_HAVE_MPI #endif // DCA_HAVE_CUDA namespace dca { namespace phys { namespace solver { Loading include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator.hpp +46 −42 Original line number Diff line number Diff line Loading @@ -44,8 +44,8 @@ namespace accumulator { template <class Parameters, linalg::DeviceType device = linalg::CPU, DistType DT = DistType::NONE> class TpAccumulator; template <class Parameters> class TpAccumulator<Parameters, linalg::CPU, dca::DistType::NONE> { template <class Parameters, DistType DT> class TpAccumulator<Parameters, linalg::CPU, DT> { public: using Real = typename Parameters::TP_measurement_scalar_type; Loading @@ -63,7 +63,6 @@ public: using WTpExtPosDmn = func::dmn_0<domains::vertex_frequency_domain<domains::EXTENDED_POSITIVE>>; using WExchangeDmn = func::dmn_0<domains::FrequencyExchangeDomain>; using this_type = TpAccumulator<Parameters>; using Data = DcaData<Parameters>; using TpGreensFunction = typename Data::TpGreensFunction; Loading Loading @@ -109,7 +108,7 @@ public: const auto& get_sign_times_G4() const; // Sums the accumulated Green's function to the accumulated Green's function of other_acc. void sumTo(this_type& other_acc); void sumTo(TpAccumulator& other_acc); void synchronizeCopy() {} Loading @@ -134,7 +133,7 @@ protected: void getGMultiband(int s, int k1, int k2, int w1, int w2, Matrix& G, Complex beta = 0) const; Complex getGSingleband(int s, int k1, int k2, int w1, int w2) const; auto getGSingleband(int s, int k1, int k2, int w1, int w2) -> Complex const; template <class Configuration, typename RealIn> float computeM(const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair, Loading Loading @@ -181,8 +180,8 @@ private: Matrix G0_M_, G_a_, G_b_; }; template <class Parameters> TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( template <class Parameters, DistType DT> TpAccumulator<Parameters, linalg::CPU, DT>::TpAccumulator( const func::function<std::complex<double>, func::dmn_variadic<NuDmn, NuDmn, KDmn, WDmn>>& G0, const Parameters& pars, const int thread_id) : G0_ptr_(&G0), Loading @@ -195,6 +194,11 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( G0_M_(n_bands_), G_a_(n_bands_), G_b_(n_bands_) { if constexpr (DT == DistType::MPI) { std::cerr << "The MPI distribution of G4 on the CPU is not supported. Reverting to no " "distribution.\n"; } if (WDmn::dmn_size() < WTpExtDmn::dmn_size()) throw(std::logic_error("The number of single particle frequencies is too small.")); initializeG0(); Loading @@ -207,16 +211,16 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( } } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::resetAccumulation(unsigned int /*dca_loop*/) { template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::resetAccumulation(unsigned int /*dca_loop*/) { for (auto& G4_channel : G4_) G4_channel = 0.; initializeG0(); } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::initializeG0() { template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::initializeG0() { const int sp_index_offset = (WDmn::dmn_size() - WTpExtDmn::dmn_size()) / 2; for (int w = 0; w < WTpExtDmn::dmn_size(); ++w) { Loading @@ -229,9 +233,9 @@ void TpAccumulator<Parameters, linalg::CPU>::initializeG0() { } } template <class Parameters> template <class Parameters, DistType DT> template <class Configuration, typename RealIn> double TpAccumulator<Parameters, linalg::CPU>::accumulate( double TpAccumulator<Parameters, linalg::CPU, DT>::accumulate( const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair, const std::array<Configuration, 2>& configs, const int sign) { Profiler profiler("accumulate", "tp-accumulation", __LINE__, thread_id_); Loading @@ -249,9 +253,9 @@ double TpAccumulator<Parameters, linalg::CPU>::accumulate( return gflops; } template <class Parameters> template <class Parameters, DistType DT> template <class Configuration, typename RealIn> float TpAccumulator<Parameters, linalg::CPU>::computeM( float TpAccumulator<Parameters, linalg::CPU, DT>::computeM( const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair, const std::array<Configuration, 2>& configs) { float flops = 0.; Loading @@ -272,8 +276,8 @@ float TpAccumulator<Parameters, linalg::CPU>::computeM( return flops; } template <class Parameters> double TpAccumulator<Parameters, linalg::CPU>::computeG() { template <class Parameters, DistType DT> double TpAccumulator<Parameters, linalg::CPU, DT>::computeG() { Profiler prf("ComputeG", "tp-accumulation", __LINE__, thread_id_); for (int w2 = 0; w2 < WTpExtDmn::dmn_size(); ++w2) for (int w1 = 0; w1 < WTpExtPosDmn::dmn_size(); ++w1) Loading @@ -293,8 +297,8 @@ double TpAccumulator<Parameters, linalg::CPU>::computeG() { return 1e-9 * flops; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::computeGSingleband(const int s, const int k1, template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::computeGSingleband(const int s, const int k1, const int k2, const int w1, const int w2) { assert(w1 < WTpExtPosDmn::dmn_size()); Loading @@ -310,8 +314,8 @@ void TpAccumulator<Parameters, linalg::CPU>::computeGSingleband(const int s, con G_(0, 0, s, k1, k2, w1, w2) = -G0_w1 * M_val * G0_w2; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::computeGMultiband(const int s, const int k1, template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::computeGMultiband(const int s, const int k1, const int k2, const int w1, const int w2) { assert(w1 < WTpExtPosDmn::dmn_size()); Loading @@ -334,10 +338,10 @@ void TpAccumulator<Parameters, linalg::CPU>::computeGMultiband(const int s, cons } } template <class Parameters> std::complex<typename TpAccumulator<Parameters, linalg::CPU>::Real> TpAccumulator< Parameters, linalg::CPU>::getGSingleband(const int s, const int k1, const int k2, const int w1, const int w2) const { template <class Parameters, DistType DT> auto TpAccumulator<Parameters, linalg::CPU, DT>::getGSingleband(const int s, const int k1, const int k2, const int w1, const int w2) -> Complex const { const int w2_ext = w2 + extension_index_offset_; const int w1_ext = w1 + extension_index_offset_; auto minus_w1 = [=](const int w) { return n_pos_frqs_ - 1 - w; }; Loading @@ -354,8 +358,8 @@ std::complex<typename TpAccumulator<Parameters, linalg::CPU>::Real> TpAccumulato return std::conj(G_(0, 0, s, minus_k(k1), minus_k(k2), minus_w1(w1_ext), minus_w2(w2_ext))); } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::getGMultiband(int s, int k1, int k2, int w1, int w2, template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::getGMultiband(int s, int k1, int k2, int w1, int w2, Matrix& G, const Complex beta) const { const int w2_ext = w2 + extension_index_offset_; const int w1_ext = w1 + extension_index_offset_; Loading Loading @@ -383,8 +387,8 @@ void TpAccumulator<Parameters, linalg::CPU>::getGMultiband(int s, int k1, int k2 } } template <class Parameters> double TpAccumulator<Parameters, linalg::CPU>::updateG4(const int channel_id) { template <class Parameters, DistType DT> double TpAccumulator<Parameters, linalg::CPU, DT>::updateG4(const int channel_id) { // G4 is stored with the following band convention: // b1 ------------------------ b3 // | | Loading Loading @@ -575,8 +579,8 @@ double TpAccumulator<Parameters, linalg::CPU>::updateG4(const int channel_id) { return 1e-9 * flops; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::updateG4Atomic( template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::updateG4Atomic( Complex* G4_ptr, const int s_a, const int k1_a, const int k2_a, const int w1_a, const int w2_a, const int s_b, const int k1_b, const int k2_b, const int w1_b, const int w2_b, const Real alpha, const bool cross_legs) { Loading Loading @@ -612,8 +616,8 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4Atomic( } } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::updateG4SpinDifference( template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::updateG4SpinDifference( Complex* G4_ptr, const int sign, const int k1_a, const int k2_a, const int w1_a, const int w2_a, const int k1_b, const int k2_b, const int w1_b, const int w2_b, const Real alpha, const bool cross_legs) { Loading Loading @@ -655,16 +659,16 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4SpinDifference( } } template <class Parameters> const auto& TpAccumulator<Parameters, linalg::CPU>::get_sign_times_G4() const { template <class Parameters, DistType DT> const auto& TpAccumulator<Parameters, linalg::CPU, DT>::get_sign_times_G4() const { if (G4_.empty()) throw std::logic_error("There is no G4 stored in this class."); return G4_; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::sumTo(this_type& other_one) { template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::sumTo(TpAccumulator& other_one) { if (other_one.G4_.size() != G4_.size()) throw std::logic_error("Objects accumulate different number of channels."); Loading include/dca/phys/parameters/mci_parameters.hpp +75 −152 Original line number Diff line number Diff line Loading @@ -46,7 +46,8 @@ public: fix_meas_per_walker_(false), adjust_self_energy_for_double_counting_(false), error_computation_type_(ErrorComputationType::NONE), store_configuration_(true) {} store_configuration_(true), g4_distribution_(DistType::NONE) {} template <typename Concurrency> int getBufferSize(const Concurrency& concurrency) const; Loading Loading @@ -185,7 +186,14 @@ void MciParameters::unpack(const Concurrency& concurrency, char* buffer, int buf template <typename ReaderOrWriter> void MciParameters::readWrite(ReaderOrWriter& reader_or_writer) { auto try_to_read_write = [&](const std::string& name, auto& obj) { try { reader_or_writer.execute(name, obj); } catch (std::exception&) { } }; reader_or_writer.open_group("Monte-Carlo-integration"); if (reader_or_writer.is_reader()) { Loading @@ -194,103 +202,68 @@ void MciParameters::readWrite(ReaderOrWriter& reader_or_writer) { // Try to read a seeding option. std::string seed_string; reader_or_writer.execute("seed", seed_string); if (strcmp(seed_string.c_str(), "random") == 0) if (seed_string == "random") generateRandomSeed(); else { std::cerr << "Warning: Invalid seeding option. Using default seed = " << default_seed << "." << std::endl; std::cerr << "Warning: Invalid seeding option. Using default seed = " << default_seed << "." << std::endl; seed_ = default_seed; } } catch (const std::exception& r_e) { try { // Read the seed as an integer. reader_or_writer.execute("seed", seed_); } catch (const std::exception& r_e2) { } } try_to_read_write("seed", seed_); } } // is_reader() else { // Write the seed. try { reader_or_writer.execute("seed", seed_); } catch (const std::exception& r_e) { } } try { reader_or_writer.execute("warm-up-sweeps", warm_up_sweeps_); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("sweeps-per-measurement", sweeps_per_measurement_); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("measurements", measurements_); } catch (const std::exception& r_e) { // Write the seed directly. try_to_read_write("seed", seed_); } // Read error computation type. std::string error_type = toString(error_computation_type_); try { reader_or_writer.execute("error-computation-type", error_type); try_to_read_write("error-computation-type", error_type); error_computation_type_ = stringToErrorComputationType(error_type); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("store-configuration", store_configuration_); } catch (const std::exception& r_e) { } try_to_read_write("warm-up-sweeps", warm_up_sweeps_); try_to_read_write("sweeps-per-measurement", sweeps_per_measurement_); try_to_read_write("measurements", measurements_); try_to_read_write("store-configuration", store_configuration_); // Read arguments for threaded solver. try { reader_or_writer.open_group("threaded-solver"); try { reader_or_writer.execute("walkers", walkers_); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("accumulators", accumulators_); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("shared-walk-and-accumulation-thread", shared_walk_and_accumulation_thread_); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("fix-meas-per-walker", fix_meas_per_walker_); } catch (const std::exception& r_e) { } std::string g4_dist_input; if (reader_or_writer.is_reader()) { try { reader_or_writer.execute("g4-distribution", g4_dist_input); if (g4_dist_input.size() > 0) { if (strcmp(g4_dist_input.c_str(), "MPI") == 0) { g4_distribution_ = dca::DistType::MPI; try_to_read_write("walkers", walkers_); try_to_read_write("accumulators", accumulators_); try_to_read_write("shared-walk-and-accumulation-thread", shared_walk_and_accumulation_thread_); try_to_read_write("fix-meas-per-walker", fix_meas_per_walker_); // Read distribution type. std::string g4_dist_name = toString(g4_distribution_); try_to_read_write("g4-distribution", g4_dist_name); g4_distribution_ = stringToDistType(g4_dist_name); reader_or_writer.close_group(); // TODO: adjust_self_energy_for_double_counting has no effect at the moment. Use default value // 'false'. // try_to_read_write("adjust-self-energy-for-double-counting", adjust_self_energy_for_double_counting_); reader_or_writer.close_group(); // Check parameters requirements. if (g4_distribution_ == DistType::MPI) { // Check for number of accumulators and walkers consistency. if (!shared_walk_and_accumulation_thread_ || walkers_ != accumulators_) { throw std::logic_error( "\n With distributed g4 enabled, 1) walker and accumulator should share " "thread, " "2) #walker == #accumulator\n"); } // Check for number of ranks and g4 measurements consistency. int mpi_size; MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); int local_meas = measurements_ / mpi_size; Loading @@ -301,56 +274,6 @@ void MciParameters::readWrite(ReaderOrWriter& reader_or_writer) { "2) each accumulator should have same measurements\n"); } } else if (g4_dist_input.size() == 0 || strcmp(g4_dist_input.c_str(), "NONE") == 0) { g4_distribution_ = dca::DistType::NONE; } else { std::cerr << "Warning: Invalid g4-distribution. Using None." << std::endl; g4_distribution_ = dca::DistType::NONE; } } else { g4_distribution_ = dca::DistType::NONE; } } catch (const std::exception& r_e) { } } else { try { switch (g4_distribution_) { case dca::DistType::MPI: g4_dist_input = "MPI"; reader_or_writer.execute("g4-distribution", g4_dist_input); break; case dca::DistType::NONE: g4_dist_input = "NONE"; reader_or_writer.execute("g4-distribution", g4_dist_input); break; } } catch (const std::exception& r_e) { } } reader_or_writer.close_group(); } catch (const std::exception& r_e) { } // TODO: adjust_self_energy_for_double_counting has no effect at the moment. Use default value // 'false'. // try { // reader_or_writer.execute("adjust-self-energy-for-double-counting", // adjust_self_energy_for_double_counting_); // } // catch (const std::exception& r_e) { // } reader_or_writer.close_group(); } catch (const std::exception& r_e) { } } } // namespace params Loading src/CMakeLists.txt +4 −0 Original line number Diff line number Diff line Loading @@ -7,3 +7,7 @@ add_subdirectory(parallel) add_subdirectory(phys) add_subdirectory(profiling) add_subdirectory(util) add_library(enumerations STATIC phys/four_point_type.cpp phys/error_computation_type.cpp distribution/dist_types.cpp) Loading
include/dca/distribution/dist_types.hpp +7 −3 Original line number Diff line number Diff line Loading @@ -6,16 +6,20 @@ // See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. // // Author: Peter Doak (doakpw@ornl.gov) // Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) // // This file provides distribution strategy tags #ifndef DCA_DIST_TYPE_HPP #define DCA_DIST_TYPE_HPP #include <string> namespace dca { enum class DistType { NONE, MPI }; enum class DistType { NONE, MPI }; DistType stringToDistType(const std::string& name); std::string toString(DistType type); } // namespace dca #endif // DCA_DIST_TYPE_HPP
include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_accumulator.hpp +2 −1 Original line number Diff line number Diff line Loading @@ -43,10 +43,11 @@ #ifdef DCA_HAVE_CUDA #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/sp/sp_accumulator_gpu.hpp" #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp" #endif // DCA_HAVE_CUDA #ifdef DCA_HAVE_MPI #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu.hpp" #endif // DCA_HAVE_MPI #endif // DCA_HAVE_CUDA namespace dca { namespace phys { namespace solver { Loading
include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator.hpp +46 −42 Original line number Diff line number Diff line Loading @@ -44,8 +44,8 @@ namespace accumulator { template <class Parameters, linalg::DeviceType device = linalg::CPU, DistType DT = DistType::NONE> class TpAccumulator; template <class Parameters> class TpAccumulator<Parameters, linalg::CPU, dca::DistType::NONE> { template <class Parameters, DistType DT> class TpAccumulator<Parameters, linalg::CPU, DT> { public: using Real = typename Parameters::TP_measurement_scalar_type; Loading @@ -63,7 +63,6 @@ public: using WTpExtPosDmn = func::dmn_0<domains::vertex_frequency_domain<domains::EXTENDED_POSITIVE>>; using WExchangeDmn = func::dmn_0<domains::FrequencyExchangeDomain>; using this_type = TpAccumulator<Parameters>; using Data = DcaData<Parameters>; using TpGreensFunction = typename Data::TpGreensFunction; Loading Loading @@ -109,7 +108,7 @@ public: const auto& get_sign_times_G4() const; // Sums the accumulated Green's function to the accumulated Green's function of other_acc. void sumTo(this_type& other_acc); void sumTo(TpAccumulator& other_acc); void synchronizeCopy() {} Loading @@ -134,7 +133,7 @@ protected: void getGMultiband(int s, int k1, int k2, int w1, int w2, Matrix& G, Complex beta = 0) const; Complex getGSingleband(int s, int k1, int k2, int w1, int w2) const; auto getGSingleband(int s, int k1, int k2, int w1, int w2) -> Complex const; template <class Configuration, typename RealIn> float computeM(const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair, Loading Loading @@ -181,8 +180,8 @@ private: Matrix G0_M_, G_a_, G_b_; }; template <class Parameters> TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( template <class Parameters, DistType DT> TpAccumulator<Parameters, linalg::CPU, DT>::TpAccumulator( const func::function<std::complex<double>, func::dmn_variadic<NuDmn, NuDmn, KDmn, WDmn>>& G0, const Parameters& pars, const int thread_id) : G0_ptr_(&G0), Loading @@ -195,6 +194,11 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( G0_M_(n_bands_), G_a_(n_bands_), G_b_(n_bands_) { if constexpr (DT == DistType::MPI) { std::cerr << "The MPI distribution of G4 on the CPU is not supported. Reverting to no " "distribution.\n"; } if (WDmn::dmn_size() < WTpExtDmn::dmn_size()) throw(std::logic_error("The number of single particle frequencies is too small.")); initializeG0(); Loading @@ -207,16 +211,16 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( } } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::resetAccumulation(unsigned int /*dca_loop*/) { template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::resetAccumulation(unsigned int /*dca_loop*/) { for (auto& G4_channel : G4_) G4_channel = 0.; initializeG0(); } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::initializeG0() { template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::initializeG0() { const int sp_index_offset = (WDmn::dmn_size() - WTpExtDmn::dmn_size()) / 2; for (int w = 0; w < WTpExtDmn::dmn_size(); ++w) { Loading @@ -229,9 +233,9 @@ void TpAccumulator<Parameters, linalg::CPU>::initializeG0() { } } template <class Parameters> template <class Parameters, DistType DT> template <class Configuration, typename RealIn> double TpAccumulator<Parameters, linalg::CPU>::accumulate( double TpAccumulator<Parameters, linalg::CPU, DT>::accumulate( const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair, const std::array<Configuration, 2>& configs, const int sign) { Profiler profiler("accumulate", "tp-accumulation", __LINE__, thread_id_); Loading @@ -249,9 +253,9 @@ double TpAccumulator<Parameters, linalg::CPU>::accumulate( return gflops; } template <class Parameters> template <class Parameters, DistType DT> template <class Configuration, typename RealIn> float TpAccumulator<Parameters, linalg::CPU>::computeM( float TpAccumulator<Parameters, linalg::CPU, DT>::computeM( const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair, const std::array<Configuration, 2>& configs) { float flops = 0.; Loading @@ -272,8 +276,8 @@ float TpAccumulator<Parameters, linalg::CPU>::computeM( return flops; } template <class Parameters> double TpAccumulator<Parameters, linalg::CPU>::computeG() { template <class Parameters, DistType DT> double TpAccumulator<Parameters, linalg::CPU, DT>::computeG() { Profiler prf("ComputeG", "tp-accumulation", __LINE__, thread_id_); for (int w2 = 0; w2 < WTpExtDmn::dmn_size(); ++w2) for (int w1 = 0; w1 < WTpExtPosDmn::dmn_size(); ++w1) Loading @@ -293,8 +297,8 @@ double TpAccumulator<Parameters, linalg::CPU>::computeG() { return 1e-9 * flops; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::computeGSingleband(const int s, const int k1, template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::computeGSingleband(const int s, const int k1, const int k2, const int w1, const int w2) { assert(w1 < WTpExtPosDmn::dmn_size()); Loading @@ -310,8 +314,8 @@ void TpAccumulator<Parameters, linalg::CPU>::computeGSingleband(const int s, con G_(0, 0, s, k1, k2, w1, w2) = -G0_w1 * M_val * G0_w2; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::computeGMultiband(const int s, const int k1, template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::computeGMultiband(const int s, const int k1, const int k2, const int w1, const int w2) { assert(w1 < WTpExtPosDmn::dmn_size()); Loading @@ -334,10 +338,10 @@ void TpAccumulator<Parameters, linalg::CPU>::computeGMultiband(const int s, cons } } template <class Parameters> std::complex<typename TpAccumulator<Parameters, linalg::CPU>::Real> TpAccumulator< Parameters, linalg::CPU>::getGSingleband(const int s, const int k1, const int k2, const int w1, const int w2) const { template <class Parameters, DistType DT> auto TpAccumulator<Parameters, linalg::CPU, DT>::getGSingleband(const int s, const int k1, const int k2, const int w1, const int w2) -> Complex const { const int w2_ext = w2 + extension_index_offset_; const int w1_ext = w1 + extension_index_offset_; auto minus_w1 = [=](const int w) { return n_pos_frqs_ - 1 - w; }; Loading @@ -354,8 +358,8 @@ std::complex<typename TpAccumulator<Parameters, linalg::CPU>::Real> TpAccumulato return std::conj(G_(0, 0, s, minus_k(k1), minus_k(k2), minus_w1(w1_ext), minus_w2(w2_ext))); } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::getGMultiband(int s, int k1, int k2, int w1, int w2, template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::getGMultiband(int s, int k1, int k2, int w1, int w2, Matrix& G, const Complex beta) const { const int w2_ext = w2 + extension_index_offset_; const int w1_ext = w1 + extension_index_offset_; Loading Loading @@ -383,8 +387,8 @@ void TpAccumulator<Parameters, linalg::CPU>::getGMultiband(int s, int k1, int k2 } } template <class Parameters> double TpAccumulator<Parameters, linalg::CPU>::updateG4(const int channel_id) { template <class Parameters, DistType DT> double TpAccumulator<Parameters, linalg::CPU, DT>::updateG4(const int channel_id) { // G4 is stored with the following band convention: // b1 ------------------------ b3 // | | Loading Loading @@ -575,8 +579,8 @@ double TpAccumulator<Parameters, linalg::CPU>::updateG4(const int channel_id) { return 1e-9 * flops; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::updateG4Atomic( template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::updateG4Atomic( Complex* G4_ptr, const int s_a, const int k1_a, const int k2_a, const int w1_a, const int w2_a, const int s_b, const int k1_b, const int k2_b, const int w1_b, const int w2_b, const Real alpha, const bool cross_legs) { Loading Loading @@ -612,8 +616,8 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4Atomic( } } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::updateG4SpinDifference( template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::updateG4SpinDifference( Complex* G4_ptr, const int sign, const int k1_a, const int k2_a, const int w1_a, const int w2_a, const int k1_b, const int k2_b, const int w1_b, const int w2_b, const Real alpha, const bool cross_legs) { Loading Loading @@ -655,16 +659,16 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4SpinDifference( } } template <class Parameters> const auto& TpAccumulator<Parameters, linalg::CPU>::get_sign_times_G4() const { template <class Parameters, DistType DT> const auto& TpAccumulator<Parameters, linalg::CPU, DT>::get_sign_times_G4() const { if (G4_.empty()) throw std::logic_error("There is no G4 stored in this class."); return G4_; } template <class Parameters> void TpAccumulator<Parameters, linalg::CPU>::sumTo(this_type& other_one) { template <class Parameters, DistType DT> void TpAccumulator<Parameters, linalg::CPU, DT>::sumTo(TpAccumulator& other_one) { if (other_one.G4_.size() != G4_.size()) throw std::logic_error("Objects accumulate different number of channels."); Loading
include/dca/phys/parameters/mci_parameters.hpp +75 −152 Original line number Diff line number Diff line Loading @@ -46,7 +46,8 @@ public: fix_meas_per_walker_(false), adjust_self_energy_for_double_counting_(false), error_computation_type_(ErrorComputationType::NONE), store_configuration_(true) {} store_configuration_(true), g4_distribution_(DistType::NONE) {} template <typename Concurrency> int getBufferSize(const Concurrency& concurrency) const; Loading Loading @@ -185,7 +186,14 @@ void MciParameters::unpack(const Concurrency& concurrency, char* buffer, int buf template <typename ReaderOrWriter> void MciParameters::readWrite(ReaderOrWriter& reader_or_writer) { auto try_to_read_write = [&](const std::string& name, auto& obj) { try { reader_or_writer.execute(name, obj); } catch (std::exception&) { } }; reader_or_writer.open_group("Monte-Carlo-integration"); if (reader_or_writer.is_reader()) { Loading @@ -194,103 +202,68 @@ void MciParameters::readWrite(ReaderOrWriter& reader_or_writer) { // Try to read a seeding option. std::string seed_string; reader_or_writer.execute("seed", seed_string); if (strcmp(seed_string.c_str(), "random") == 0) if (seed_string == "random") generateRandomSeed(); else { std::cerr << "Warning: Invalid seeding option. Using default seed = " << default_seed << "." << std::endl; std::cerr << "Warning: Invalid seeding option. Using default seed = " << default_seed << "." << std::endl; seed_ = default_seed; } } catch (const std::exception& r_e) { try { // Read the seed as an integer. reader_or_writer.execute("seed", seed_); } catch (const std::exception& r_e2) { } } try_to_read_write("seed", seed_); } } // is_reader() else { // Write the seed. try { reader_or_writer.execute("seed", seed_); } catch (const std::exception& r_e) { } } try { reader_or_writer.execute("warm-up-sweeps", warm_up_sweeps_); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("sweeps-per-measurement", sweeps_per_measurement_); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("measurements", measurements_); } catch (const std::exception& r_e) { // Write the seed directly. try_to_read_write("seed", seed_); } // Read error computation type. std::string error_type = toString(error_computation_type_); try { reader_or_writer.execute("error-computation-type", error_type); try_to_read_write("error-computation-type", error_type); error_computation_type_ = stringToErrorComputationType(error_type); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("store-configuration", store_configuration_); } catch (const std::exception& r_e) { } try_to_read_write("warm-up-sweeps", warm_up_sweeps_); try_to_read_write("sweeps-per-measurement", sweeps_per_measurement_); try_to_read_write("measurements", measurements_); try_to_read_write("store-configuration", store_configuration_); // Read arguments for threaded solver. try { reader_or_writer.open_group("threaded-solver"); try { reader_or_writer.execute("walkers", walkers_); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("accumulators", accumulators_); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("shared-walk-and-accumulation-thread", shared_walk_and_accumulation_thread_); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("fix-meas-per-walker", fix_meas_per_walker_); } catch (const std::exception& r_e) { } std::string g4_dist_input; if (reader_or_writer.is_reader()) { try { reader_or_writer.execute("g4-distribution", g4_dist_input); if (g4_dist_input.size() > 0) { if (strcmp(g4_dist_input.c_str(), "MPI") == 0) { g4_distribution_ = dca::DistType::MPI; try_to_read_write("walkers", walkers_); try_to_read_write("accumulators", accumulators_); try_to_read_write("shared-walk-and-accumulation-thread", shared_walk_and_accumulation_thread_); try_to_read_write("fix-meas-per-walker", fix_meas_per_walker_); // Read distribution type. std::string g4_dist_name = toString(g4_distribution_); try_to_read_write("g4-distribution", g4_dist_name); g4_distribution_ = stringToDistType(g4_dist_name); reader_or_writer.close_group(); // TODO: adjust_self_energy_for_double_counting has no effect at the moment. Use default value // 'false'. // try_to_read_write("adjust-self-energy-for-double-counting", adjust_self_energy_for_double_counting_); reader_or_writer.close_group(); // Check parameters requirements. if (g4_distribution_ == DistType::MPI) { // Check for number of accumulators and walkers consistency. if (!shared_walk_and_accumulation_thread_ || walkers_ != accumulators_) { throw std::logic_error( "\n With distributed g4 enabled, 1) walker and accumulator should share " "thread, " "2) #walker == #accumulator\n"); } // Check for number of ranks and g4 measurements consistency. int mpi_size; MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); int local_meas = measurements_ / mpi_size; Loading @@ -301,56 +274,6 @@ void MciParameters::readWrite(ReaderOrWriter& reader_or_writer) { "2) each accumulator should have same measurements\n"); } } else if (g4_dist_input.size() == 0 || strcmp(g4_dist_input.c_str(), "NONE") == 0) { g4_distribution_ = dca::DistType::NONE; } else { std::cerr << "Warning: Invalid g4-distribution. Using None." << std::endl; g4_distribution_ = dca::DistType::NONE; } } else { g4_distribution_ = dca::DistType::NONE; } } catch (const std::exception& r_e) { } } else { try { switch (g4_distribution_) { case dca::DistType::MPI: g4_dist_input = "MPI"; reader_or_writer.execute("g4-distribution", g4_dist_input); break; case dca::DistType::NONE: g4_dist_input = "NONE"; reader_or_writer.execute("g4-distribution", g4_dist_input); break; } } catch (const std::exception& r_e) { } } reader_or_writer.close_group(); } catch (const std::exception& r_e) { } // TODO: adjust_self_energy_for_double_counting has no effect at the moment. Use default value // 'false'. // try { // reader_or_writer.execute("adjust-self-energy-for-double-counting", // adjust_self_energy_for_double_counting_); // } // catch (const std::exception& r_e) { // } reader_or_writer.close_group(); } catch (const std::exception& r_e) { } } } // namespace params Loading
src/CMakeLists.txt +4 −0 Original line number Diff line number Diff line Loading @@ -7,3 +7,7 @@ add_subdirectory(parallel) add_subdirectory(phys) add_subdirectory(profiling) add_subdirectory(util) add_library(enumerations STATIC phys/four_point_type.cpp phys/error_computation_type.cpp distribution/dist_types.cpp)