Unverified Commit d2cd13c4 authored by Peter Doak's avatar Peter Doak Committed by GitHub
Browse files

Merge pull request #1 from gbalduzz/fix_no_mpi_build

Fix no mpi build -- addendum
parents db49c5b6 740973db
Loading
Loading
Loading
Loading
+7 −3
Original line number Diff line number Diff line
@@ -6,16 +6,20 @@
// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications.
//
// Author: Peter Doak (doakpw@ornl.gov)
//         Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch)
//
// This file provides distribution strategy tags

#ifndef DCA_DIST_TYPE_HPP
#define DCA_DIST_TYPE_HPP

#include <string>

namespace dca {
enum class DistType {
		     NONE,
		     MPI };
enum class DistType { NONE, MPI };

DistType stringToDistType(const std::string& name);
std::string toString(DistType type);
}  // namespace dca

#endif  // DCA_DIST_TYPE_HPP
+7 −1
Original line number Diff line number Diff line
@@ -30,11 +30,14 @@
#include "dca/distribution/dist_types.hpp"
#include "dca/function/scalar_cast.hpp"
#include "dca/function/set_to_zero.hpp"
#include "dca/util/ignore.hpp"
#include "dca/util/pack_operations.hpp"
#include "dca/util/type_utils.hpp"

#include "dca/parallel/util/get_workload.hpp"
#ifdef DCA_HAVE_MPI
#include "mpi.h"
#endif

namespace dca {
namespace func {
@@ -302,12 +305,15 @@ function<scalartype, domain>::function(const std::string& name, DistType dist)
      size_sbdm(dmn.get_leaf_domain_sizes()),
      step_sbdm(dmn.get_leaf_domain_steps()),
      fnc_values(nullptr) {
  if (name.substr(0, 2) == "G4" && dist == DistType::MPI) {
  dca::util::ignoreUnused(dist);
#ifdef DCA_HAVE_MPI
  if (dist == DistType::MPI) {
    int my_rank, mpi_size;
    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    nb_elements_ = dca::parallel::util::getWorkload(dmn.get_size(), mpi_size, my_rank);
  }
#endif  // DCA_HAVE_MPI
  fnc_values = new scalartype[nb_elements_];
  for (int linind = 0; linind < nb_elements_; ++linind)
    setToZero(fnc_values[linind]);
+0 −4
Original line number Diff line number Diff line
@@ -14,12 +14,8 @@

#include <vector>

#include <mpi.h>

#include "dca/function/domains.hpp"
#include "dca/function/function.hpp"
#include "dca/parallel/mpi_concurrency/mpi_gang.hpp"
#include "dca/parallel/mpi_concurrency/mpi_type_map.hpp"
#include "dca/util/integer_division.hpp"

namespace dca {
+2 −1
Original line number Diff line number Diff line
@@ -43,10 +43,11 @@
#ifdef DCA_HAVE_CUDA
#include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/sp/sp_accumulator_gpu.hpp"
#include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp"
#endif  // DCA_HAVE_CUDA
#ifdef DCA_HAVE_MPI
#include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu.hpp"
#endif // DCA_HAVE_MPI
#endif  // DCA_HAVE_CUDA

namespace dca {
namespace phys {
namespace solver {
+46 −42
Original line number Diff line number Diff line
@@ -44,8 +44,8 @@ namespace accumulator {
template <class Parameters, linalg::DeviceType device = linalg::CPU, DistType DT = DistType::NONE>
class TpAccumulator;

template <class Parameters>
class TpAccumulator<Parameters, linalg::CPU, dca::DistType::NONE> {
template <class Parameters, DistType DT>
class TpAccumulator<Parameters, linalg::CPU, DT> {
public:
  using Real = typename Parameters::TP_measurement_scalar_type;

@@ -63,7 +63,6 @@ public:
  using WTpExtPosDmn = func::dmn_0<domains::vertex_frequency_domain<domains::EXTENDED_POSITIVE>>;
  using WExchangeDmn = func::dmn_0<domains::FrequencyExchangeDomain>;

  using this_type = TpAccumulator<Parameters>;
  using Data = DcaData<Parameters>;
  using TpGreensFunction = typename Data::TpGreensFunction;

@@ -109,7 +108,7 @@ public:
  const auto& get_sign_times_G4() const;

  // Sums the accumulated Green's function to the accumulated Green's function of other_acc.
  void sumTo(this_type& other_acc);
  void sumTo(TpAccumulator& other_acc);

  void synchronizeCopy() {}

@@ -134,7 +133,7 @@ protected:

  void getGMultiband(int s, int k1, int k2, int w1, int w2, Matrix& G, Complex beta = 0) const;

  Complex getGSingleband(int s, int k1, int k2, int w1, int w2) const;
  auto getGSingleband(int s, int k1, int k2, int w1, int w2) -> Complex const;

  template <class Configuration, typename RealIn>
  float computeM(const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair,
@@ -181,8 +180,8 @@ private:
  Matrix G0_M_, G_a_, G_b_;
};

template <class Parameters>
TpAccumulator<Parameters, linalg::CPU>::TpAccumulator(
template <class Parameters, DistType DT>
TpAccumulator<Parameters, linalg::CPU, DT>::TpAccumulator(
    const func::function<std::complex<double>, func::dmn_variadic<NuDmn, NuDmn, KDmn, WDmn>>& G0,
    const Parameters& pars, const int thread_id)
    : G0_ptr_(&G0),
@@ -195,6 +194,11 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator(
      G0_M_(n_bands_),
      G_a_(n_bands_),
      G_b_(n_bands_) {
  if constexpr (DT == DistType::MPI) {
    std::cerr << "The MPI distribution of G4 on the CPU is not supported. Reverting to no "
                 "distribution.\n";
  }

  if (WDmn::dmn_size() < WTpExtDmn::dmn_size())
    throw(std::logic_error("The number of single particle frequencies is too small."));
  initializeG0();
@@ -207,16 +211,16 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator(
  }
}

template <class Parameters>
void TpAccumulator<Parameters, linalg::CPU>::resetAccumulation(unsigned int /*dca_loop*/) {
template <class Parameters, DistType DT>
void TpAccumulator<Parameters, linalg::CPU, DT>::resetAccumulation(unsigned int /*dca_loop*/) {
  for (auto& G4_channel : G4_)
    G4_channel = 0.;

  initializeG0();
}

template <class Parameters>
void TpAccumulator<Parameters, linalg::CPU>::initializeG0() {
template <class Parameters, DistType DT>
void TpAccumulator<Parameters, linalg::CPU, DT>::initializeG0() {
  const int sp_index_offset = (WDmn::dmn_size() - WTpExtDmn::dmn_size()) / 2;

  for (int w = 0; w < WTpExtDmn::dmn_size(); ++w) {
@@ -229,9 +233,9 @@ void TpAccumulator<Parameters, linalg::CPU>::initializeG0() {
  }
}

template <class Parameters>
template <class Parameters, DistType DT>
template <class Configuration, typename RealIn>
double TpAccumulator<Parameters, linalg::CPU>::accumulate(
double TpAccumulator<Parameters, linalg::CPU, DT>::accumulate(
    const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair,
    const std::array<Configuration, 2>& configs, const int sign) {
  Profiler profiler("accumulate", "tp-accumulation", __LINE__, thread_id_);
@@ -249,9 +253,9 @@ double TpAccumulator<Parameters, linalg::CPU>::accumulate(
  return gflops;
}

template <class Parameters>
template <class Parameters, DistType DT>
template <class Configuration, typename RealIn>
float TpAccumulator<Parameters, linalg::CPU>::computeM(
float TpAccumulator<Parameters, linalg::CPU, DT>::computeM(
    const std::array<linalg::Matrix<RealIn, linalg::CPU>, 2>& M_pair,
    const std::array<Configuration, 2>& configs) {
  float flops = 0.;
@@ -272,8 +276,8 @@ float TpAccumulator<Parameters, linalg::CPU>::computeM(
  return flops;
}

template <class Parameters>
double TpAccumulator<Parameters, linalg::CPU>::computeG() {
template <class Parameters, DistType DT>
double TpAccumulator<Parameters, linalg::CPU, DT>::computeG() {
  Profiler prf("ComputeG", "tp-accumulation", __LINE__, thread_id_);
  for (int w2 = 0; w2 < WTpExtDmn::dmn_size(); ++w2)
    for (int w1 = 0; w1 < WTpExtPosDmn::dmn_size(); ++w1)
@@ -293,8 +297,8 @@ double TpAccumulator<Parameters, linalg::CPU>::computeG() {
  return 1e-9 * flops;
}

template <class Parameters>
void TpAccumulator<Parameters, linalg::CPU>::computeGSingleband(const int s, const int k1,
template <class Parameters, DistType DT>
void TpAccumulator<Parameters, linalg::CPU, DT>::computeGSingleband(const int s, const int k1,
                                                                    const int k2, const int w1,
                                                                    const int w2) {
  assert(w1 < WTpExtPosDmn::dmn_size());
@@ -310,8 +314,8 @@ void TpAccumulator<Parameters, linalg::CPU>::computeGSingleband(const int s, con
    G_(0, 0, s, k1, k2, w1, w2) = -G0_w1 * M_val * G0_w2;
}

template <class Parameters>
void TpAccumulator<Parameters, linalg::CPU>::computeGMultiband(const int s, const int k1,
template <class Parameters, DistType DT>
void TpAccumulator<Parameters, linalg::CPU, DT>::computeGMultiband(const int s, const int k1,
                                                                   const int k2, const int w1,
                                                                   const int w2) {
  assert(w1 < WTpExtPosDmn::dmn_size());
@@ -334,10 +338,10 @@ void TpAccumulator<Parameters, linalg::CPU>::computeGMultiband(const int s, cons
  }
}

template <class Parameters>
std::complex<typename TpAccumulator<Parameters, linalg::CPU>::Real> TpAccumulator<
    Parameters, linalg::CPU>::getGSingleband(const int s, const int k1, const int k2, const int w1,
                                             const int w2) const {
template <class Parameters, DistType DT>
auto TpAccumulator<Parameters, linalg::CPU, DT>::getGSingleband(const int s, const int k1,
                                                                const int k2, const int w1,
                                                                const int w2) -> Complex const {
  const int w2_ext = w2 + extension_index_offset_;
  const int w1_ext = w1 + extension_index_offset_;
  auto minus_w1 = [=](const int w) { return n_pos_frqs_ - 1 - w; };
@@ -354,8 +358,8 @@ std::complex<typename TpAccumulator<Parameters, linalg::CPU>::Real> TpAccumulato
    return std::conj(G_(0, 0, s, minus_k(k1), minus_k(k2), minus_w1(w1_ext), minus_w2(w2_ext)));
}

template <class Parameters>
void TpAccumulator<Parameters, linalg::CPU>::getGMultiband(int s, int k1, int k2, int w1, int w2,
template <class Parameters, DistType DT>
void TpAccumulator<Parameters, linalg::CPU, DT>::getGMultiband(int s, int k1, int k2, int w1, int w2,
                                                               Matrix& G, const Complex beta) const {
  const int w2_ext = w2 + extension_index_offset_;
  const int w1_ext = w1 + extension_index_offset_;
@@ -383,8 +387,8 @@ void TpAccumulator<Parameters, linalg::CPU>::getGMultiband(int s, int k1, int k2
  }
}

template <class Parameters>
double TpAccumulator<Parameters, linalg::CPU>::updateG4(const int channel_id) {
template <class Parameters, DistType DT>
double TpAccumulator<Parameters, linalg::CPU, DT>::updateG4(const int channel_id) {
  // G4 is stored with the following band convention:
  // b1 ------------------------ b3
  //        |           |
@@ -575,8 +579,8 @@ double TpAccumulator<Parameters, linalg::CPU>::updateG4(const int channel_id) {
  return 1e-9 * flops;
}

template <class Parameters>
void TpAccumulator<Parameters, linalg::CPU>::updateG4Atomic(
template <class Parameters, DistType DT>
void TpAccumulator<Parameters, linalg::CPU, DT>::updateG4Atomic(
    Complex* G4_ptr, const int s_a, const int k1_a, const int k2_a, const int w1_a, const int w2_a,
    const int s_b, const int k1_b, const int k2_b, const int w1_b, const int w2_b, const Real alpha,
    const bool cross_legs) {
@@ -612,8 +616,8 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4Atomic(
  }
}

template <class Parameters>
void TpAccumulator<Parameters, linalg::CPU>::updateG4SpinDifference(
template <class Parameters, DistType DT>
void TpAccumulator<Parameters, linalg::CPU, DT>::updateG4SpinDifference(
    Complex* G4_ptr, const int sign, const int k1_a, const int k2_a, const int w1_a, const int w2_a,
    const int k1_b, const int k2_b, const int w1_b, const int w2_b, const Real alpha,
    const bool cross_legs) {
@@ -655,16 +659,16 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4SpinDifference(
  }
}

template <class Parameters>
const auto& TpAccumulator<Parameters, linalg::CPU>::get_sign_times_G4() const {
template <class Parameters, DistType DT>
const auto& TpAccumulator<Parameters, linalg::CPU, DT>::get_sign_times_G4() const {
  if (G4_.empty())
    throw std::logic_error("There is no G4 stored in this class.");

  return G4_;
}

template <class Parameters>
void TpAccumulator<Parameters, linalg::CPU>::sumTo(this_type& other_one) {
template <class Parameters, DistType DT>
void TpAccumulator<Parameters, linalg::CPU, DT>::sumTo(TpAccumulator& other_one) {
  if (other_one.G4_.size() != G4_.size())
    throw std::logic_error("Objects accumulate different number of channels.");

Loading