Loading include/dca/parallel/mpi_concurrency/mpi_collective_sum.hpp +27 −11 Original line number Diff line number Diff line Loading @@ -68,16 +68,24 @@ public: void average_and_compute_stddev(func::function<std::complex<scalar_type>, domain>& f_mean, func::function<std::complex<scalar_type>, domain>& f_stddev) const; // Computes the average of s over all ranks excluding the local value and stores the result back // Computes the sum of s over all ranks excluding the local value and stores the result back // in s. // Does nothing, if there is only one rank. // In/Out: s template <typename Scalar> void leaveOneOutAvg(Scalar& s) const; // Element-wise implementation for dca::func::function. template <typename T> void leaveOneOutSum(T& s) const; // Element-wise implementations for dca::func::function. // In/Out: f template <typename Scalar, class Domain> void leaveOneOutAvg(func::function<Scalar, Domain>& f) const; void leaveOneOutSum(func::function<Scalar, Domain>& f) const; // Computes the average of s over all ranks excluding the local value and stores the result back // in s. // Does nothing, if there is only one rank. // In/Out: s template <typename T> void leaveOneOutAvg(T& s) const; // Computes and returns the element-wise jackknife error // \Delta f_{jack} = \sqrt( (n-1)/n \sum_i^n (f_i - f_avg)^2 ), Loading Loading @@ -268,26 +276,34 @@ void MPICollectiveSum::sum_and_average(const some_type& in, some_type& out, } template <typename Scalar> void MPICollectiveSum::leaveOneOutAvg(Scalar& s) const { void MPICollectiveSum::leaveOneOutSum(Scalar& s) const { if (MPIProcessorGrouping::get_size() == 1) return; const Scalar s_local(s); sum(s); s = (s - s_local) / (MPIProcessorGrouping::get_size() - 1); s = s - s_local; } template <typename Scalar, class Domain> void MPICollectiveSum::leaveOneOutAvg(func::function<Scalar, Domain>& f) const { void MPICollectiveSum::leaveOneOutSum(func::function<Scalar, Domain>& f) const { if (MPIProcessorGrouping::get_size() == 1) return; const func::function<Scalar, Domain> f_local(f); sum(f_local, f); const double scale = 1. / (MPIProcessorGrouping::get_size() - 1); for (int i = 0; i < f.size(); ++i) f(i) = scale * (f(i) - f_local(i)); f(i) = f(i) - f_local(i); } template <typename T> void MPICollectiveSum::leaveOneOutAvg(T& x) const { if (MPIProcessorGrouping::get_size() == 1) return; leaveOneOutSum(x); x /= MPIProcessorGrouping::get_size() - 1; } template <typename Scalar, class Domain> Loading include/dca/parallel/no_concurrency/serial_collective_sum.hpp +4 −4 Original line number Diff line number Diff line Loading @@ -57,10 +57,10 @@ public: template <typename T> void sum_and_average(T& /*obj*/) const {} template <typename Scalar> void leaveOneOutAvg(Scalar&) const {} template <typename Scalar, class Domain> void leaveOneOutAvg(func::function<Scalar, Domain>&) const {} template <typename T> void leaveOneOutAvg(T&) const {} template <typename T> void leaveOneOutSum(T&) const {} template <typename Scalar, class Domain> func::function<Scalar, Domain> jackknifeError(func::function<Scalar, Domain>&, Loading include/dca/phys/dca_data/dca_data.hpp +9 −0 Original line number Diff line number Diff line Loading @@ -90,6 +90,8 @@ public: using SpGreensFunction = func::function<std::complex<double>, func::dmn_variadic<NuDmn, NuDmn, KClusterDmn, WDmn>>; using SpRGreensFunction = func::function<std::complex<double>, func::dmn_variadic<NuDmn, NuDmn, RClusterDmn, WDmn>>; using TpGreensFunction = func::function<std::complex<TpAccumulatorScalar>, func::dmn_variadic<BDmn, BDmn, BDmn, BDmn, KClusterDmn, KClusterDmn, Loading Loading @@ -178,6 +180,11 @@ public: // Optional members getters. G_k_w_err_.reset(new SpGreensFunction("G_k_w-error")); return *G_k_w_err_; } auto& get_G_r_w_error() { if (not G_r_w_err_) G_r_w_err_ = std::make_unique<SpRGreensFunction>("G_r_w-error"); return *G_r_w_err_; } auto& get_G_k_w_stdv() { if (not G_k_w_err_) G_k_w_err_.reset(new SpGreensFunction("cluster_greens_function_G_k_w-stddev")); Loading Loading @@ -221,6 +228,7 @@ public: // Optional members getters. private: // Optional members. std::unique_ptr<SpGreensFunction> G_k_w_err_; std::unique_ptr<SpRGreensFunction> G_r_w_err_; std::unique_ptr<SpGreensFunction> Sigma_err_; std::unique_ptr<TpGreensFunction> G4_; std::unique_ptr<TpGreensFunction> G4_err_; Loading Loading @@ -434,6 +442,7 @@ void DcaData<Parameters>::write(Writer& writer) { writer.execute(G_k_w); writer.execute(G_k_w_err_); writer.execute(G_r_w); writer.execute(G_r_w_err_); writer.execute(G_k_t); writer.execute(G_r_t); Loading include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp +24 −4 Original line number Diff line number Diff line Loading @@ -146,8 +146,8 @@ private: func::function<std::complex<double>, NuNuRClusterWDmn> M_r_w_; func::function<std::complex<double>, NuNuRClusterWDmn> M_r_w_squared_; private: bool averaged_; bool compute_jack_knife_; }; template <dca::linalg::DeviceType device_t, class Parameters, class Data> Loading Loading @@ -201,6 +201,9 @@ void CtauxClusterSolver<device_t, Parameters, Data>::initialize(int dca_iteratio accumulator_.initialize(dca_iteration_); averaged_ = false; compute_jack_knife_ = (dca_iteration == parameters_.get_dca_iterations() - 1) && (parameters_.get_error_computation_type() == ErrorComputationType::JACK_KNIFE); if (concurrency_.id() == concurrency_.first()) std::cout << "\n\n\t CT-AUX Integrator has initialized (DCA-iteration : " << dca_iteration Loading Loading @@ -279,6 +282,9 @@ double CtauxClusterSolver<device_t, Parameters, Data>::finalize(dca_info_struct_ parameters_.get_four_point_type() != NONE) data_.get_G4() /= parameters_.get_beta() * parameters_.get_beta(); if (compute_jack_knife_) concurrency_.jackknifeError(data_.get_G4(), true); double total = 1.e-6, integral = 0; for (int l = 0; l < accumulator_.get_visited_expansion_order_k().size(); l++) { Loading Loading @@ -400,12 +406,19 @@ void CtauxClusterSolver<device_t, Parameters, Data>::computeErrorBars() { template <dca::linalg::DeviceType device_t, class Parameters, class Data> void CtauxClusterSolver<device_t, Parameters, Data>::collect_measurements() { auto collect = [&](auto& f) { if (compute_jack_knife_) concurrency_.leaveOneOutSum(f); else concurrency_.sum(f); }; { Profiler profiler("Scalars", "QMC-collectives", __LINE__); concurrency_.sum(total_time_); concurrency_.sum(accumulator_.get_Gflop()); accumulated_sign_ = accumulator_.get_accumulated_sign(); concurrency_.sum(accumulated_sign_); collect(accumulated_sign_); } if (concurrency_.id() == concurrency_.first()) Loading @@ -419,7 +432,7 @@ void CtauxClusterSolver<device_t, Parameters, Data>::collect_measurements() { M_r_w_ = accumulator_.get_sign_times_M_r_w(); { Profiler profiler("QMC-self-energy", "QMC-collectives", __LINE__); concurrency_.sum(M_r_w_); collect(M_r_w_); } M_r_w_ /= accumulated_sign_; Loading Loading @@ -457,7 +470,7 @@ void CtauxClusterSolver<device_t, Parameters, Data>::collect_measurements() { Profiler profiler("QMC-two-particle-Greens-function", "QMC-collectives", __LINE__); auto& G4 = data_.get_G4(); G4 = accumulator_.get_sign_times_G4(); concurrency_.sum(G4); collect(G4); G4 /= accumulated_sign_; } Loading Loading @@ -567,6 +580,13 @@ double CtauxClusterSolver<device_t, Parameters, Data>::compute_S_k_w_from_G_k_w( } } // Compute error on G and Self Energy. if (compute_jack_knife_) { data_.get_G_k_w_error() = concurrency_.jackknifeError(data_.G_k_w, true); data_.get_G_r_w_error() = concurrency_.jackknifeError(data_.G_r_w, true); data_.get_Sigma_error() = concurrency_.jackknifeError(data_.Sigma, true); } // set_non_interacting_bands_to_zero(); symmetrize::execute(data_.Sigma, data_.H_symmetry); Loading test/unit/parallel/mpi_concurrency/mpi_collective_sum_test.cpp +9 −1 Original line number Diff line number Diff line Loading @@ -67,7 +67,7 @@ TEST_F(MPICollectiveSumTest, SumFunction) { EXPECT_EQ(function_expected(i), function_test(i)); } TEST_F(MPICollectiveSumTest, LeaveOneOutAvg) { TEST_F(MPICollectiveSumTest, LeaveOneOutAvgAndSum) { std::vector<double> values(size_); double sum = 0.; for (int i = 0; i < size_; ++i) { Loading @@ -80,8 +80,11 @@ TEST_F(MPICollectiveSumTest, LeaveOneOutAvg) { // Check scalar version. double scalar = values[rank_]; double sum_one_out = scalar; sum_interface_.leaveOneOutAvg(scalar); sum_interface_.leaveOneOutSum(sum_one_out); EXPECT_DOUBLE_EQ(expected, scalar); EXPECT_DOUBLE_EQ(expected * (size_ - 1), sum_one_out); // Check dca::func::function version. using TestDomain = dca::func::dmn_0<dca::func::dmn<2, int>>; Loading @@ -89,10 +92,15 @@ TEST_F(MPICollectiveSumTest, LeaveOneOutAvg) { f(0) = values[rank_]; f(1) = 0.; dca::func::function<double, TestDomain> f_sum(f); sum_interface_.leaveOneOutAvg(f); sum_interface_.leaveOneOutSum(f_sum); EXPECT_DOUBLE_EQ(expected, f(0)); EXPECT_DOUBLE_EQ(expected * (size_ - 1), f_sum(0)); EXPECT_DOUBLE_EQ(0., f(1)); EXPECT_DOUBLE_EQ(0., f_sum(1)); } TEST_F(MPICollectiveSumTest, JackknifeErrorReal) { Loading Loading
include/dca/parallel/mpi_concurrency/mpi_collective_sum.hpp +27 −11 Original line number Diff line number Diff line Loading @@ -68,16 +68,24 @@ public: void average_and_compute_stddev(func::function<std::complex<scalar_type>, domain>& f_mean, func::function<std::complex<scalar_type>, domain>& f_stddev) const; // Computes the average of s over all ranks excluding the local value and stores the result back // Computes the sum of s over all ranks excluding the local value and stores the result back // in s. // Does nothing, if there is only one rank. // In/Out: s template <typename Scalar> void leaveOneOutAvg(Scalar& s) const; // Element-wise implementation for dca::func::function. template <typename T> void leaveOneOutSum(T& s) const; // Element-wise implementations for dca::func::function. // In/Out: f template <typename Scalar, class Domain> void leaveOneOutAvg(func::function<Scalar, Domain>& f) const; void leaveOneOutSum(func::function<Scalar, Domain>& f) const; // Computes the average of s over all ranks excluding the local value and stores the result back // in s. // Does nothing, if there is only one rank. // In/Out: s template <typename T> void leaveOneOutAvg(T& s) const; // Computes and returns the element-wise jackknife error // \Delta f_{jack} = \sqrt( (n-1)/n \sum_i^n (f_i - f_avg)^2 ), Loading Loading @@ -268,26 +276,34 @@ void MPICollectiveSum::sum_and_average(const some_type& in, some_type& out, } template <typename Scalar> void MPICollectiveSum::leaveOneOutAvg(Scalar& s) const { void MPICollectiveSum::leaveOneOutSum(Scalar& s) const { if (MPIProcessorGrouping::get_size() == 1) return; const Scalar s_local(s); sum(s); s = (s - s_local) / (MPIProcessorGrouping::get_size() - 1); s = s - s_local; } template <typename Scalar, class Domain> void MPICollectiveSum::leaveOneOutAvg(func::function<Scalar, Domain>& f) const { void MPICollectiveSum::leaveOneOutSum(func::function<Scalar, Domain>& f) const { if (MPIProcessorGrouping::get_size() == 1) return; const func::function<Scalar, Domain> f_local(f); sum(f_local, f); const double scale = 1. / (MPIProcessorGrouping::get_size() - 1); for (int i = 0; i < f.size(); ++i) f(i) = scale * (f(i) - f_local(i)); f(i) = f(i) - f_local(i); } template <typename T> void MPICollectiveSum::leaveOneOutAvg(T& x) const { if (MPIProcessorGrouping::get_size() == 1) return; leaveOneOutSum(x); x /= MPIProcessorGrouping::get_size() - 1; } template <typename Scalar, class Domain> Loading
include/dca/parallel/no_concurrency/serial_collective_sum.hpp +4 −4 Original line number Diff line number Diff line Loading @@ -57,10 +57,10 @@ public: template <typename T> void sum_and_average(T& /*obj*/) const {} template <typename Scalar> void leaveOneOutAvg(Scalar&) const {} template <typename Scalar, class Domain> void leaveOneOutAvg(func::function<Scalar, Domain>&) const {} template <typename T> void leaveOneOutAvg(T&) const {} template <typename T> void leaveOneOutSum(T&) const {} template <typename Scalar, class Domain> func::function<Scalar, Domain> jackknifeError(func::function<Scalar, Domain>&, Loading
include/dca/phys/dca_data/dca_data.hpp +9 −0 Original line number Diff line number Diff line Loading @@ -90,6 +90,8 @@ public: using SpGreensFunction = func::function<std::complex<double>, func::dmn_variadic<NuDmn, NuDmn, KClusterDmn, WDmn>>; using SpRGreensFunction = func::function<std::complex<double>, func::dmn_variadic<NuDmn, NuDmn, RClusterDmn, WDmn>>; using TpGreensFunction = func::function<std::complex<TpAccumulatorScalar>, func::dmn_variadic<BDmn, BDmn, BDmn, BDmn, KClusterDmn, KClusterDmn, Loading Loading @@ -178,6 +180,11 @@ public: // Optional members getters. G_k_w_err_.reset(new SpGreensFunction("G_k_w-error")); return *G_k_w_err_; } auto& get_G_r_w_error() { if (not G_r_w_err_) G_r_w_err_ = std::make_unique<SpRGreensFunction>("G_r_w-error"); return *G_r_w_err_; } auto& get_G_k_w_stdv() { if (not G_k_w_err_) G_k_w_err_.reset(new SpGreensFunction("cluster_greens_function_G_k_w-stddev")); Loading Loading @@ -221,6 +228,7 @@ public: // Optional members getters. private: // Optional members. std::unique_ptr<SpGreensFunction> G_k_w_err_; std::unique_ptr<SpRGreensFunction> G_r_w_err_; std::unique_ptr<SpGreensFunction> Sigma_err_; std::unique_ptr<TpGreensFunction> G4_; std::unique_ptr<TpGreensFunction> G4_err_; Loading Loading @@ -434,6 +442,7 @@ void DcaData<Parameters>::write(Writer& writer) { writer.execute(G_k_w); writer.execute(G_k_w_err_); writer.execute(G_r_w); writer.execute(G_r_w_err_); writer.execute(G_k_t); writer.execute(G_r_t); Loading
include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp +24 −4 Original line number Diff line number Diff line Loading @@ -146,8 +146,8 @@ private: func::function<std::complex<double>, NuNuRClusterWDmn> M_r_w_; func::function<std::complex<double>, NuNuRClusterWDmn> M_r_w_squared_; private: bool averaged_; bool compute_jack_knife_; }; template <dca::linalg::DeviceType device_t, class Parameters, class Data> Loading Loading @@ -201,6 +201,9 @@ void CtauxClusterSolver<device_t, Parameters, Data>::initialize(int dca_iteratio accumulator_.initialize(dca_iteration_); averaged_ = false; compute_jack_knife_ = (dca_iteration == parameters_.get_dca_iterations() - 1) && (parameters_.get_error_computation_type() == ErrorComputationType::JACK_KNIFE); if (concurrency_.id() == concurrency_.first()) std::cout << "\n\n\t CT-AUX Integrator has initialized (DCA-iteration : " << dca_iteration Loading Loading @@ -279,6 +282,9 @@ double CtauxClusterSolver<device_t, Parameters, Data>::finalize(dca_info_struct_ parameters_.get_four_point_type() != NONE) data_.get_G4() /= parameters_.get_beta() * parameters_.get_beta(); if (compute_jack_knife_) concurrency_.jackknifeError(data_.get_G4(), true); double total = 1.e-6, integral = 0; for (int l = 0; l < accumulator_.get_visited_expansion_order_k().size(); l++) { Loading Loading @@ -400,12 +406,19 @@ void CtauxClusterSolver<device_t, Parameters, Data>::computeErrorBars() { template <dca::linalg::DeviceType device_t, class Parameters, class Data> void CtauxClusterSolver<device_t, Parameters, Data>::collect_measurements() { auto collect = [&](auto& f) { if (compute_jack_knife_) concurrency_.leaveOneOutSum(f); else concurrency_.sum(f); }; { Profiler profiler("Scalars", "QMC-collectives", __LINE__); concurrency_.sum(total_time_); concurrency_.sum(accumulator_.get_Gflop()); accumulated_sign_ = accumulator_.get_accumulated_sign(); concurrency_.sum(accumulated_sign_); collect(accumulated_sign_); } if (concurrency_.id() == concurrency_.first()) Loading @@ -419,7 +432,7 @@ void CtauxClusterSolver<device_t, Parameters, Data>::collect_measurements() { M_r_w_ = accumulator_.get_sign_times_M_r_w(); { Profiler profiler("QMC-self-energy", "QMC-collectives", __LINE__); concurrency_.sum(M_r_w_); collect(M_r_w_); } M_r_w_ /= accumulated_sign_; Loading Loading @@ -457,7 +470,7 @@ void CtauxClusterSolver<device_t, Parameters, Data>::collect_measurements() { Profiler profiler("QMC-two-particle-Greens-function", "QMC-collectives", __LINE__); auto& G4 = data_.get_G4(); G4 = accumulator_.get_sign_times_G4(); concurrency_.sum(G4); collect(G4); G4 /= accumulated_sign_; } Loading Loading @@ -567,6 +580,13 @@ double CtauxClusterSolver<device_t, Parameters, Data>::compute_S_k_w_from_G_k_w( } } // Compute error on G and Self Energy. if (compute_jack_knife_) { data_.get_G_k_w_error() = concurrency_.jackknifeError(data_.G_k_w, true); data_.get_G_r_w_error() = concurrency_.jackknifeError(data_.G_r_w, true); data_.get_Sigma_error() = concurrency_.jackknifeError(data_.Sigma, true); } // set_non_interacting_bands_to_zero(); symmetrize::execute(data_.Sigma, data_.H_symmetry); Loading
test/unit/parallel/mpi_concurrency/mpi_collective_sum_test.cpp +9 −1 Original line number Diff line number Diff line Loading @@ -67,7 +67,7 @@ TEST_F(MPICollectiveSumTest, SumFunction) { EXPECT_EQ(function_expected(i), function_test(i)); } TEST_F(MPICollectiveSumTest, LeaveOneOutAvg) { TEST_F(MPICollectiveSumTest, LeaveOneOutAvgAndSum) { std::vector<double> values(size_); double sum = 0.; for (int i = 0; i < size_; ++i) { Loading @@ -80,8 +80,11 @@ TEST_F(MPICollectiveSumTest, LeaveOneOutAvg) { // Check scalar version. double scalar = values[rank_]; double sum_one_out = scalar; sum_interface_.leaveOneOutAvg(scalar); sum_interface_.leaveOneOutSum(sum_one_out); EXPECT_DOUBLE_EQ(expected, scalar); EXPECT_DOUBLE_EQ(expected * (size_ - 1), sum_one_out); // Check dca::func::function version. using TestDomain = dca::func::dmn_0<dca::func::dmn<2, int>>; Loading @@ -89,10 +92,15 @@ TEST_F(MPICollectiveSumTest, LeaveOneOutAvg) { f(0) = values[rank_]; f(1) = 0.; dca::func::function<double, TestDomain> f_sum(f); sum_interface_.leaveOneOutAvg(f); sum_interface_.leaveOneOutSum(f_sum); EXPECT_DOUBLE_EQ(expected, f(0)); EXPECT_DOUBLE_EQ(expected * (size_ - 1), f_sum(0)); EXPECT_DOUBLE_EQ(0., f(1)); EXPECT_DOUBLE_EQ(0., f_sum(1)); } TEST_F(MPICollectiveSumTest, JackknifeErrorReal) { Loading