Merge branch 'fix_dist_G4' into fix_no_mpi_build (a183636f) · Commits · NDIP / Tool Sources / Direct-Geometry Spectroscopy / DCA / DCA Main

include/dca/distribution/dist_types.hpp

+7 −3

Original line number	Diff line number	Diff line
		@@ -6,16 +6,20 @@
		// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications.
		//
		// Author: Peter Doak (doakpw@ornl.gov)
		// Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch)
		//
		// This file provides distribution strategy tags

		#ifndef DCA_DIST_TYPE_HPP
		#define DCA_DIST_TYPE_HPP

		#include <string>

		namespace dca {
		enum class DistType {
		NONE,
		MPI };
		enum class DistType { NONE, MPI };

		DistType stringToDistType(const std::string& name);
		std::string toString(DistType type);
		} // namespace dca

		#endif // DCA_DIST_TYPE_HPP

include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_accumulator.hpp

+2 −1

Original line number	Diff line number	Diff line
		@@ -43,10 +43,11 @@
		#ifdef DCA_HAVE_CUDA
		#include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/sp/sp_accumulator_gpu.hpp"
		#include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp"
		#endif // DCA_HAVE_CUDA
		#ifdef DCA_HAVE_MPI
		#include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu.hpp"
		#endif // DCA_HAVE_MPI
		#endif // DCA_HAVE_CUDA

		namespace dca {
		namespace phys {
		namespace solver {

include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator.hpp

+46 −42

Original line number	Diff line number	Diff line
		@@ -44,8 +44,8 @@ namespace accumulator {
		template <class Parameters, linalg::DeviceType device = linalg::CPU, DistType DT = DistType::NONE>
		class TpAccumulator;

		template <class Parameters>
		class TpAccumulator<Parameters, linalg::CPU, dca::DistType::NONE> {
		template <class Parameters, DistType DT>
		class TpAccumulator<Parameters, linalg::CPU, DT> {
		public:
		using Real = typename Parameters::TP_measurement_scalar_type;

		@@ -63,7 +63,6 @@ public:
		using WTpExtPosDmn = func::dmn_0<domains::vertex_frequency_domain<domains::EXTENDED_POSITIVE>>;
		using WExchangeDmn = func::dmn_0<domains::FrequencyExchangeDomain>;

		using this_type = TpAccumulator<Parameters>;
		using Data = DcaData<Parameters>;
		using TpGreensFunction = typename Data::TpGreensFunction;

		@@ -109,7 +108,7 @@ public:
		const auto& get_sign_times_G4() const;

		// Sums the accumulated Green's function to the accumulated Green's function of other_acc.
		void sumTo(this_type& other_acc);
		void sumTo(TpAccumulator& other_acc);

		void synchronizeCopy() {}

		@@ -134,7 +133,7 @@ protected:

		void getGMultiband(int s, int k1, int k2, int w1, int w2, Matrix& G, Complex beta = 0) const;

		Complex getGSingleband(int s, int k1, int k2, int w1, int w2) const;
		auto getGSingleband(int s, int k1, int k2, int w1, int w2) -> Complex const;

		template <class Configuration, typename RealIn>
		float computeM(const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair,
		@@ -181,8 +180,8 @@ private:
		Matrix G0_M_, G_a_, G_b_;
		};

		template <class Parameters>
		TpAccumulator<Parameters, linalg::CPU>::TpAccumulator(
		template <class Parameters, DistType DT>
		TpAccumulator<Parameters, linalg::CPU, DT>::TpAccumulator(
		const func::function<std::complex<double>, func::dmn_variadic<NuDmn, NuDmn, KDmn, WDmn>>& G0,
		const Parameters& pars, const int thread_id)
		: G0_ptr_(&G0),
		@@ -195,6 +194,11 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator(
		G0_M_(n_bands_),
		G_a_(n_bands_),
		G_b_(n_bands_) {
		if constexpr (DT == DistType::MPI) {
		std::cerr << "The MPI distribution of G4 on the CPU is not supported. Reverting to no "
		"distribution.\n";
		}

		if (WDmn::dmn_size() < WTpExtDmn::dmn_size())
		throw(std::logic_error("The number of single particle frequencies is too small."));
		initializeG0();
		@@ -207,16 +211,16 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator(
		}
		}

		template <class Parameters>
		void TpAccumulator<Parameters, linalg::CPU>::resetAccumulation(unsigned int /dca_loop/) {
		template <class Parameters, DistType DT>
		void TpAccumulator<Parameters, linalg::CPU, DT>::resetAccumulation(unsigned int /dca_loop/) {
		for (auto& G4_channel : G4_)
		G4_channel = 0.;

		initializeG0();
		}

		template <class Parameters>
		void TpAccumulator<Parameters, linalg::CPU>::initializeG0() {
		template <class Parameters, DistType DT>
		void TpAccumulator<Parameters, linalg::CPU, DT>::initializeG0() {
		const int sp_index_offset = (WDmn::dmn_size() - WTpExtDmn::dmn_size()) / 2;

		for (int w = 0; w < WTpExtDmn::dmn_size(); ++w) {
		@@ -229,9 +233,9 @@ void TpAccumulator<Parameters, linalg::CPU>::initializeG0() {
		}
		}

		template <class Parameters>
		template <class Parameters, DistType DT>
		template <class Configuration, typename RealIn>
		double TpAccumulator<Parameters, linalg::CPU>::accumulate(
		double TpAccumulator<Parameters, linalg::CPU, DT>::accumulate(
		const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair,
		const std::array<Configuration, 2>& configs, const int sign) {
		Profiler profiler("accumulate", "tp-accumulation", __LINE__, thread_id_);
		@@ -249,9 +253,9 @@ double TpAccumulator<Parameters, linalg::CPU>::accumulate(
		return gflops;
		}

		template <class Parameters>
		template <class Parameters, DistType DT>
		template <class Configuration, typename RealIn>
		float TpAccumulator<Parameters, linalg::CPU>::computeM(
		float TpAccumulator<Parameters, linalg::CPU, DT>::computeM(
		const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair,
		const std::array<Configuration, 2>& configs) {
		float flops = 0.;
		@@ -272,8 +276,8 @@ float TpAccumulator<Parameters, linalg::CPU>::computeM(
		return flops;
		}

		template <class Parameters>
		double TpAccumulator<Parameters, linalg::CPU>::computeG() {
		template <class Parameters, DistType DT>
		double TpAccumulator<Parameters, linalg::CPU, DT>::computeG() {
		Profiler prf("ComputeG", "tp-accumulation", __LINE__, thread_id_);
		for (int w2 = 0; w2 < WTpExtDmn::dmn_size(); ++w2)
		for (int w1 = 0; w1 < WTpExtPosDmn::dmn_size(); ++w1)
		@@ -293,8 +297,8 @@ double TpAccumulator<Parameters, linalg::CPU>::computeG() {
		return 1e-9 * flops;
		}

		template <class Parameters>
		void TpAccumulator<Parameters, linalg::CPU>::computeGSingleband(const int s, const int k1,
		template <class Parameters, DistType DT>
		void TpAccumulator<Parameters, linalg::CPU, DT>::computeGSingleband(const int s, const int k1,
		const int k2, const int w1,
		const int w2) {
		assert(w1 < WTpExtPosDmn::dmn_size());
		@@ -310,8 +314,8 @@ void TpAccumulator<Parameters, linalg::CPU>::computeGSingleband(const int s, con
		G_(0, 0, s, k1, k2, w1, w2) = -G0_w1 * M_val * G0_w2;
		}

		template <class Parameters>
		void TpAccumulator<Parameters, linalg::CPU>::computeGMultiband(const int s, const int k1,
		template <class Parameters, DistType DT>
		void TpAccumulator<Parameters, linalg::CPU, DT>::computeGMultiband(const int s, const int k1,
		const int k2, const int w1,
		const int w2) {
		assert(w1 < WTpExtPosDmn::dmn_size());
		@@ -334,10 +338,10 @@ void TpAccumulator<Parameters, linalg::CPU>::computeGMultiband(const int s, cons
		}
		}

		template <class Parameters>
		std::complex<typename TpAccumulator<Parameters, linalg::CPU>::Real> TpAccumulator<
		Parameters, linalg::CPU>::getGSingleband(const int s, const int k1, const int k2, const int w1,
		const int w2) const {
		template <class Parameters, DistType DT>
		auto TpAccumulator<Parameters, linalg::CPU, DT>::getGSingleband(const int s, const int k1,
		const int k2, const int w1,
		const int w2) -> Complex const {
		const int w2_ext = w2 + extension_index_offset_;
		const int w1_ext = w1 + extension_index_offset_;
		auto minus_w1 = [=](const int w) { return n_pos_frqs_ - 1 - w; };
		@@ -354,8 +358,8 @@ std::complex<typename TpAccumulator<Parameters, linalg::CPU>::Real> TpAccumulato
		return std::conj(G_(0, 0, s, minus_k(k1), minus_k(k2), minus_w1(w1_ext), minus_w2(w2_ext)));
		}

		template <class Parameters>
		void TpAccumulator<Parameters, linalg::CPU>::getGMultiband(int s, int k1, int k2, int w1, int w2,
		template <class Parameters, DistType DT>
		void TpAccumulator<Parameters, linalg::CPU, DT>::getGMultiband(int s, int k1, int k2, int w1, int w2,
		Matrix& G, const Complex beta) const {
		const int w2_ext = w2 + extension_index_offset_;
		const int w1_ext = w1 + extension_index_offset_;
		@@ -383,8 +387,8 @@ void TpAccumulator<Parameters, linalg::CPU>::getGMultiband(int s, int k1, int k2
		}
		}

		template <class Parameters>
		double TpAccumulator<Parameters, linalg::CPU>::updateG4(const int channel_id) {
		template <class Parameters, DistType DT>
		double TpAccumulator<Parameters, linalg::CPU, DT>::updateG4(const int channel_id) {
		// G4 is stored with the following band convention:
		// b1 ------------------------ b3
		// \| \|
		@@ -575,8 +579,8 @@ double TpAccumulator<Parameters, linalg::CPU>::updateG4(const int channel_id) {
		return 1e-9 * flops;
		}

		template <class Parameters>
		void TpAccumulator<Parameters, linalg::CPU>::updateG4Atomic(
		template <class Parameters, DistType DT>
		void TpAccumulator<Parameters, linalg::CPU, DT>::updateG4Atomic(
		Complex* G4_ptr, const int s_a, const int k1_a, const int k2_a, const int w1_a, const int w2_a,
		const int s_b, const int k1_b, const int k2_b, const int w1_b, const int w2_b, const Real alpha,
		const bool cross_legs) {
		@@ -612,8 +616,8 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4Atomic(
		}
		}

		template <class Parameters>
		void TpAccumulator<Parameters, linalg::CPU>::updateG4SpinDifference(
		template <class Parameters, DistType DT>
		void TpAccumulator<Parameters, linalg::CPU, DT>::updateG4SpinDifference(
		Complex* G4_ptr, const int sign, const int k1_a, const int k2_a, const int w1_a, const int w2_a,
		const int k1_b, const int k2_b, const int w1_b, const int w2_b, const Real alpha,
		const bool cross_legs) {
		@@ -655,16 +659,16 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4SpinDifference(
		}
		}

		template <class Parameters>
		const auto& TpAccumulator<Parameters, linalg::CPU>::get_sign_times_G4() const {
		template <class Parameters, DistType DT>
		const auto& TpAccumulator<Parameters, linalg::CPU, DT>::get_sign_times_G4() const {
		if (G4_.empty())
		throw std::logic_error("There is no G4 stored in this class.");

		return G4_;
		}

		template <class Parameters>
		void TpAccumulator<Parameters, linalg::CPU>::sumTo(this_type& other_one) {
		template <class Parameters, DistType DT>
		void TpAccumulator<Parameters, linalg::CPU, DT>::sumTo(TpAccumulator& other_one) {
		if (other_one.G4_.size() != G4_.size())
		throw std::logic_error("Objects accumulate different number of channels.");

include/dca/phys/parameters/mci_parameters.hpp

+75 −152

Original line number	Diff line number	Diff line
		@@ -46,7 +46,8 @@ public:
		fix_meas_per_walker_(false),
		adjust_self_energy_for_double_counting_(false),
		error_computation_type_(ErrorComputationType::NONE),
		store_configuration_(true) {}
		store_configuration_(true),
		g4_distribution_(DistType::NONE) {}

		template <typename Concurrency>
		int getBufferSize(const Concurrency& concurrency) const;
		@@ -185,7 +186,14 @@ void MciParameters::unpack(const Concurrency& concurrency, char* buffer, int buf

		template <typename ReaderOrWriter>
		void MciParameters::readWrite(ReaderOrWriter& reader_or_writer) {
		auto try_to_read_write = [&](const std::string& name, auto& obj) {
		try {
		reader_or_writer.execute(name, obj);
		}
		catch (std::exception&) {
		}
		};

		reader_or_writer.open_group("Monte-Carlo-integration");

		if (reader_or_writer.is_reader()) {
		@@ -194,103 +202,68 @@ void MciParameters::readWrite(ReaderOrWriter& reader_or_writer) {
		// Try to read a seeding option.
		std::string seed_string;
		reader_or_writer.execute("seed", seed_string);
		if (strcmp(seed_string.c_str(), "random") == 0)
		if (seed_string == "random")
		generateRandomSeed();
		else {
		std::cerr << "Warning: Invalid seeding option. Using default seed = " << default_seed
		<< "." << std::endl;
		std::cerr << "Warning: Invalid seeding option. Using default seed = " << default_seed << "."
		<< std::endl;
		seed_ = default_seed;
		}
		}
		catch (const std::exception& r_e) {
		try {
		// Read the seed as an integer.
		reader_or_writer.execute("seed", seed_);
		}

		catch (const std::exception& r_e2) {
		}
		}
		try_to_read_write("seed", seed_);
		}
		} // is_reader()

		else {
		// Write the seed.
		try {
		reader_or_writer.execute("seed", seed_);
		}
		catch (const std::exception& r_e) {
		}
		}

		try {
		reader_or_writer.execute("warm-up-sweeps", warm_up_sweeps_);
		}
		catch (const std::exception& r_e) {
		}
		try {
		reader_or_writer.execute("sweeps-per-measurement", sweeps_per_measurement_);
		}
		catch (const std::exception& r_e) {
		}

		try {
		reader_or_writer.execute("measurements", measurements_);
		}
		catch (const std::exception& r_e) {
		// Write the seed directly.
		try_to_read_write("seed", seed_);
		}

		// Read error computation type.
		std::string error_type = toString(error_computation_type_);
		try {
		reader_or_writer.execute("error-computation-type", error_type);
		try_to_read_write("error-computation-type", error_type);
		error_computation_type_ = stringToErrorComputationType(error_type);
		}
		catch (const std::exception& r_e) {
		}

		try {
		reader_or_writer.execute("store-configuration", store_configuration_);
		}
		catch (const std::exception& r_e) {
		}
		try_to_read_write("warm-up-sweeps", warm_up_sweeps_);
		try_to_read_write("sweeps-per-measurement", sweeps_per_measurement_);
		try_to_read_write("measurements", measurements_);

		try_to_read_write("store-configuration", store_configuration_);

		// Read arguments for threaded solver.
		try {
		reader_or_writer.open_group("threaded-solver");
		try {
		reader_or_writer.execute("walkers", walkers_);
		}
		catch (const std::exception& r_e) {
		}
		try {
		reader_or_writer.execute("accumulators", accumulators_);
		}
		catch (const std::exception& r_e) {
		}
		try {
		reader_or_writer.execute("shared-walk-and-accumulation-thread",
		shared_walk_and_accumulation_thread_);
		}
		catch (const std::exception& r_e) {
		}
		try {
		reader_or_writer.execute("fix-meas-per-walker", fix_meas_per_walker_);
		}
		catch (const std::exception& r_e) {
		}
		std::string g4_dist_input;
		if (reader_or_writer.is_reader()) {
		try {
		reader_or_writer.execute("g4-distribution", g4_dist_input);
		if (g4_dist_input.size() > 0) {
		if (strcmp(g4_dist_input.c_str(), "MPI") == 0) {
		g4_distribution_ = dca::DistType::MPI;

		try_to_read_write("walkers", walkers_);
		try_to_read_write("accumulators", accumulators_);
		try_to_read_write("shared-walk-and-accumulation-thread", shared_walk_and_accumulation_thread_);
		try_to_read_write("fix-meas-per-walker", fix_meas_per_walker_);

		// Read distribution type.
		std::string g4_dist_name = toString(g4_distribution_);
		try_to_read_write("g4-distribution", g4_dist_name);
		g4_distribution_ = stringToDistType(g4_dist_name);

		reader_or_writer.close_group();

		// TODO: adjust_self_energy_for_double_counting has no effect at the moment. Use default value
		// 'false'.
		// try_to_read_write("adjust-self-energy-for-double-counting", adjust_self_energy_for_double_counting_);

		reader_or_writer.close_group();

		// Check parameters requirements.
		if (g4_distribution_ == DistType::MPI) {
		// Check for number of accumulators and walkers consistency.
		if (!shared_walk_and_accumulation_thread_ \|\| walkers_ != accumulators_) {
		throw std::logic_error(
		"\n With distributed g4 enabled, 1) walker and accumulator should share "
		"thread, "
		"2) #walker == #accumulator\n");
		}

		// Check for number of ranks and g4 measurements consistency.
		int mpi_size;
		MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
		int local_meas = measurements_ / mpi_size;
		@@ -301,56 +274,6 @@ void MciParameters::readWrite(ReaderOrWriter& reader_or_writer) {
		"2) each accumulator should have same measurements\n");
		}
		}
		else if (g4_dist_input.size() == 0 \|\| strcmp(g4_dist_input.c_str(), "NONE") == 0) {
		g4_distribution_ = dca::DistType::NONE;
		}
		else {
		std::cerr << "Warning: Invalid g4-distribution. Using None." << std::endl;
		g4_distribution_ = dca::DistType::NONE;
		}
		}
		else {
		g4_distribution_ = dca::DistType::NONE;
		}
		}
		catch (const std::exception& r_e) {
		}
		}
		else {
		try {
		switch (g4_distribution_) {
		case dca::DistType::MPI:
		g4_dist_input = "MPI";
		reader_or_writer.execute("g4-distribution", g4_dist_input);
		break;
		case dca::DistType::NONE:
		g4_dist_input = "NONE";
		reader_or_writer.execute("g4-distribution", g4_dist_input);
		break;
		}
		}
		catch (const std::exception& r_e) {
		}
		}

		reader_or_writer.close_group();
		}
		catch (const std::exception& r_e) {
		}

		// TODO: adjust_self_energy_for_double_counting has no effect at the moment. Use default value
		// 'false'.
		// try {
		// reader_or_writer.execute("adjust-self-energy-for-double-counting",
		// adjust_self_energy_for_double_counting_);
		// }
		// catch (const std::exception& r_e) {
		// }

		reader_or_writer.close_group();
		}
		catch (const std::exception& r_e) {
		}
		}

		} // namespace params

src/CMakeLists.txt

+4 −0

Original line number	Diff line number	Diff line
		@@ -7,3 +7,7 @@ add_subdirectory(parallel)
		add_subdirectory(phys)
		add_subdirectory(profiling)
		add_subdirectory(util)

		add_library(enumerations STATIC phys/four_point_type.cpp phys/error_computation_type.cpp
		distribution/dist_types.cpp)