Loading include/dca/function/domains/local_domain.hpp 0 → 100644 +99 −0 Original line number Diff line number Diff line // Copyright (C) 2018 ETH Zurich // Copyright (C) 2018 UT-Battelle, LLC // All rights reserved. // // See LICENSE for terms of usage. // See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. // // Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) // // This file implements a class to store a subset of a domain distributed among different MPI ranks. // // INTERNAL: This class can only be used with dmn_variadic when it's wrapped with dmn_0. #ifndef DCA_FUNCTION_DOMAINS_LOCAL_DOMAIN_HPP #define DCA_FUNCTION_DOMAINS_LOCAL_DOMAIN_HPP #include <iostream> #include <stdexcept> #include <string> #include <vector> #include "dca/util/integer_division.hpp" namespace dca { namespace func { // dca::func:: template <typename BaseDomain, int id = 0> class LocalDomain { public: using element_type = typename BaseDomain::element_type; // For putting LocalDomain in dmn_0. using this_type = LocalDomain<BaseDomain>; template <class Grouping> static void initialize(const Grouping& group); static std::size_t get_physical_size() { return elements_.size(); } static std::size_t get_offset() { return offset_; } static std::size_t get_size() { assert(initialized_); return padded_size_; } static std::string get_name() { return "Local_" + BaseDomain::get_name(); } static const std::vector<element_type>& get_elements() { assert(initialized_); return elements_; } static bool is_initialized() { return initialized_; } private: static bool initialized_; static std::vector<element_type> elements_; static std::size_t padded_size_; static std::size_t offset_; }; template <class BaseDomain, int id> bool LocalDomain<BaseDomain, id>::initialized_ = false; template <class BaseDomain, int id> std::vector<typename LocalDomain<BaseDomain, id>::element_type> LocalDomain<BaseDomain, id>::elements_; template <class BaseDomain, int id> std::size_t LocalDomain<BaseDomain, id>::padded_size_ = 0; template <class BaseDomain, int id> std::size_t LocalDomain<BaseDomain, id>::offset_ = 0; template <class BaseDomain, int id> template <class Grouping> void LocalDomain<BaseDomain, id>::initialize(const Grouping& group) { if (initialized_) { throw(std::logic_error("Domain " + get_name() + " is already initialized.")); } const std::size_t global_size = BaseDomain::get_size(); padded_size_ = dca::util::ceilDiv(global_size, std::size_t(group.get_size())); const std::size_t start = std::min(padded_size_ * group.get_id(), global_size); const std::size_t end = std::min(start + padded_size_, global_size); const auto physical_size = end - start; offset_ = start; elements_.resize(physical_size); std::copy_n(BaseDomain::get_elements().data() + start, physical_size, elements_.data()); initialized_ = true; } } // namespace func } // namespace dca #endif // DCA_FUNCTION_DOMAINS_LOCAL_DOMAIN_HPP include/dca/parallel/mpi_concurrency/mpi_collective_sum.hpp +8 −1 Original line number Diff line number Diff line Loading @@ -41,6 +41,7 @@ class MPICollectiveSum : public virtual MPIProcessorGrouping { public: MPICollectiveSum() = default; // Wrappers to MPI_Allreduce. template <typename scalar_type> void sum(scalar_type& value) const; template <typename scalar_type> Loading @@ -59,9 +60,12 @@ public: template <typename scalar_type> void sum(linalg::Matrix<scalar_type, linalg::CPU>& f) const; // Wrapper to MPI_Reduce. template <typename Scalar, class Domain> void localSum(func::function<Scalar, Domain>& f, int root_id) const; // Delay the execution of sum (implemented with MPI_Allreduce) until 'resolveSums' is called, // or 'delayedSum' is called with an object of different Scalar type. template <typename Scalar> void delayedSum(Scalar& obj); template <typename Scalar> Loading @@ -69,6 +73,7 @@ public: template <typename Scalar, class domain> void delayedSum(func::function<Scalar, domain>& f); // Execute all the reductions scheduled with 'delayedSum' or delayed leaveOneOutSum out calls. void resolveSums(); template <typename some_type> Loading @@ -87,11 +92,13 @@ public: // in s. // Does nothing, if there is only one rank. // In/Out: s // In: delay. If true delay the sum until 'resolveSums' is called. template <typename T> void leaveOneOutSum(T& s, bool delay = 0); // Element-wise implementations for dca::func::function. // In/Out: f // In: delay. If true delay the sum until 'resolveSums' is called. template <typename Scalar, class Domain> void leaveOneOutSum(func::function<Scalar, Domain>& f, bool delay = 0); Loading Loading @@ -561,7 +568,7 @@ std::vector<Scalar> MPICollectiveSum::avgNormalizedMomenta(const func::function< template <typename T> void MPICollectiveSum::sum(const T* in, T* out, std::size_t n, int id) const { // On summit large messages hangs if sizeof(floating point type) type * message_size > 2^31-1. // On summit large messages hangs if sizeof(floating point type) * message_size > 2^31-1. constexpr std::size_t max_size = dca::util::IsComplex<T>::value ? 2 * (std::numeric_limits<int>::max() / sizeof(T)) : std::numeric_limits<int>::max() / sizeof(T); Loading include/dca/parallel/mpi_concurrency/mpi_concurrency.hpp +15 −4 Original line number Diff line number Diff line Loading @@ -25,6 +25,8 @@ #include "dca/parallel/mpi_concurrency/mpi_collective_min.hpp" #include "dca/parallel/mpi_concurrency/mpi_collective_sum.hpp" #include "dca/parallel/mpi_concurrency/mpi_initializer.hpp" #include "dca/parallel/mpi_concurrency/mpi_gang.hpp" #include "dca/parallel/mpi_concurrency/mpi_gather.hpp" #include "dca/parallel/mpi_concurrency/mpi_packing.hpp" #include "dca/parallel/mpi_concurrency/mpi_processor_grouping.hpp" #include "dca/parallel/util/get_bounds.hpp" Loading @@ -34,12 +36,15 @@ namespace parallel { // dca::parallel:: class MPIConcurrency final : public virtual MPIInitializer, private virtual MPIProcessorGrouping, public virtual MPIProcessorGrouping, public MPIPacking, public MPICollectiveMax, public MPICollectiveMin, public MPICollectiveSum { public MPICollectiveSum, public MPIGather { public: using Gang = MPIGang; MPIConcurrency(int argc, char** argv); inline int id() const { Loading Loading @@ -68,6 +73,12 @@ public: return util::getBounds(id(), number_of_processors(), dmn); } // Using gather with no gang uses the entire concurrency. // template <class Scalar, class DmnIn, class DmnOut> // void gather(const func::function<Scalar, DmnIn>& f_in, func::function<Scalar, DmnOut>& f_out) const { // gather(f_in, f_out, *this); // } friend std::ostream& operator<<(std::ostream& some_ostream, const MPIConcurrency& this_concurrency); private: Loading Loading @@ -147,7 +158,7 @@ bool MPIConcurrency::broadcast_object(object_type& object, int root_id) const { return true; } } // parallel } // dca } // namespace parallel } // namespace dca #endif // DCA_PARALLEL_MPI_CONCURRENCY_MPI_CONCURRENCY_HPP include/dca/parallel/mpi_concurrency/mpi_gang.hpp 0 → 100644 +50 −0 Original line number Diff line number Diff line // Copyright (C) 2018 ETH Zurich // Copyright (C) 2018 UT-Battelle, LLC // All rights reserved. // // See LICENSE for terms of usage. // See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. // // Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) // // This class represent a subgroup of MPIProcessorGrouping. #ifndef DCA_PARALLEL_MPI_CONCURRENCY_MPI_GANG_HPP #define DCA_PARALLEL_MPI_CONCURRENCY_MPI_GANG_HPP #include <mpi.h> #include "dca/parallel/mpi_concurrency/mpi_processor_grouping.hpp" namespace dca { namespace parallel { // dca::parallel:: class MPIGang { public: MPIGang(const MPIProcessorGrouping& group, int min_size); ~MPIGang(); MPIGang(const MPIGang& other) = delete; int get_id() const { return id_; } int get_size() const { return size_; } auto get() const { return communicator_; } private: MPI_Comm communicator_; int id_; int size_; }; } // namespace parallel } // namespace dca #endif // DCA_PARALLEL_MPI_CONCURRENCY_MPI_GANG_HPP include/dca/parallel/mpi_concurrency/mpi_gather.hpp 0 → 100644 +67 −0 Original line number Diff line number Diff line // Copyright (C) 2018 ETH Zurich // Copyright (C) 2018 UT-Battelle, LLC // All rights reserved. // // See LICENSE for terms of usage. // See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. // // Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) // // This class provides an interface to gather functions with MPI. #ifndef DCA_PARALLEL_MPI_CONCURRENCY_MPI_GATHER_HPP #define DCA_PARALLEL_MPI_CONCURRENCY_MPI_GATHER_HPP #include <vector> #include <mpi.h> #include "dca/function/domains.hpp" #include "dca/function/function.hpp" #include "dca/parallel/mpi_concurrency/mpi_gang.hpp" #include "dca/parallel/mpi_concurrency/mpi_type_map.hpp" #include "dca/util/integer_division.hpp" namespace dca { namespace parallel { // dca::parallel:: class MPIGather { public: MPIGather() = default; // Gather the function 'f_in' on all processes in 'gang' and copy the result into 'f_out', // discarding eventual padding. // Precondition: gang.get_size() * f_in.size() >= f_out.size() template <class Scalar, class DmnIn, class DmnOut, class Gang> void gather(const func::function<Scalar, DmnIn>& f_in, func::function<Scalar, DmnOut>& f_out, const Gang& gang) const; private: template <class T, class Gang> void gather(const T* in, T* out, int local_n, const Gang& gang) const; }; template <class Scalar, class DmnIn, class DmnOut, class Gang> void MPIGather::gather(const func::function<Scalar, DmnIn>& f_in, func::function<Scalar, DmnOut>& f_out, const Gang& gang) const { std::vector<Scalar> gathered(f_in.size() * gang.get_size()); gather(f_in.values(), gathered.data(), f_in.size(), gang); if (f_out.size() > gathered.size()) throw(std::logic_error("Output function is too large.")); // TODO: move. std::copy_n(gathered.data(), f_out.size(), f_out.values()); } template <class T, class Gang> void MPIGather::gather(const T* in, T* out, int local_n, const Gang& gang) const { MPI_Allgather(in, local_n, MPITypeMap<T>::value(), out, local_n, MPITypeMap<T>::value(), gang.get()); } } // namespace parallel } // namespace dca #endif // DCA_PARALLEL_MPI_CONCURRENCY_MPI_GATHER_HPP Loading
include/dca/function/domains/local_domain.hpp 0 → 100644 +99 −0 Original line number Diff line number Diff line // Copyright (C) 2018 ETH Zurich // Copyright (C) 2018 UT-Battelle, LLC // All rights reserved. // // See LICENSE for terms of usage. // See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. // // Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) // // This file implements a class to store a subset of a domain distributed among different MPI ranks. // // INTERNAL: This class can only be used with dmn_variadic when it's wrapped with dmn_0. #ifndef DCA_FUNCTION_DOMAINS_LOCAL_DOMAIN_HPP #define DCA_FUNCTION_DOMAINS_LOCAL_DOMAIN_HPP #include <iostream> #include <stdexcept> #include <string> #include <vector> #include "dca/util/integer_division.hpp" namespace dca { namespace func { // dca::func:: template <typename BaseDomain, int id = 0> class LocalDomain { public: using element_type = typename BaseDomain::element_type; // For putting LocalDomain in dmn_0. using this_type = LocalDomain<BaseDomain>; template <class Grouping> static void initialize(const Grouping& group); static std::size_t get_physical_size() { return elements_.size(); } static std::size_t get_offset() { return offset_; } static std::size_t get_size() { assert(initialized_); return padded_size_; } static std::string get_name() { return "Local_" + BaseDomain::get_name(); } static const std::vector<element_type>& get_elements() { assert(initialized_); return elements_; } static bool is_initialized() { return initialized_; } private: static bool initialized_; static std::vector<element_type> elements_; static std::size_t padded_size_; static std::size_t offset_; }; template <class BaseDomain, int id> bool LocalDomain<BaseDomain, id>::initialized_ = false; template <class BaseDomain, int id> std::vector<typename LocalDomain<BaseDomain, id>::element_type> LocalDomain<BaseDomain, id>::elements_; template <class BaseDomain, int id> std::size_t LocalDomain<BaseDomain, id>::padded_size_ = 0; template <class BaseDomain, int id> std::size_t LocalDomain<BaseDomain, id>::offset_ = 0; template <class BaseDomain, int id> template <class Grouping> void LocalDomain<BaseDomain, id>::initialize(const Grouping& group) { if (initialized_) { throw(std::logic_error("Domain " + get_name() + " is already initialized.")); } const std::size_t global_size = BaseDomain::get_size(); padded_size_ = dca::util::ceilDiv(global_size, std::size_t(group.get_size())); const std::size_t start = std::min(padded_size_ * group.get_id(), global_size); const std::size_t end = std::min(start + padded_size_, global_size); const auto physical_size = end - start; offset_ = start; elements_.resize(physical_size); std::copy_n(BaseDomain::get_elements().data() + start, physical_size, elements_.data()); initialized_ = true; } } // namespace func } // namespace dca #endif // DCA_FUNCTION_DOMAINS_LOCAL_DOMAIN_HPP
include/dca/parallel/mpi_concurrency/mpi_collective_sum.hpp +8 −1 Original line number Diff line number Diff line Loading @@ -41,6 +41,7 @@ class MPICollectiveSum : public virtual MPIProcessorGrouping { public: MPICollectiveSum() = default; // Wrappers to MPI_Allreduce. template <typename scalar_type> void sum(scalar_type& value) const; template <typename scalar_type> Loading @@ -59,9 +60,12 @@ public: template <typename scalar_type> void sum(linalg::Matrix<scalar_type, linalg::CPU>& f) const; // Wrapper to MPI_Reduce. template <typename Scalar, class Domain> void localSum(func::function<Scalar, Domain>& f, int root_id) const; // Delay the execution of sum (implemented with MPI_Allreduce) until 'resolveSums' is called, // or 'delayedSum' is called with an object of different Scalar type. template <typename Scalar> void delayedSum(Scalar& obj); template <typename Scalar> Loading @@ -69,6 +73,7 @@ public: template <typename Scalar, class domain> void delayedSum(func::function<Scalar, domain>& f); // Execute all the reductions scheduled with 'delayedSum' or delayed leaveOneOutSum out calls. void resolveSums(); template <typename some_type> Loading @@ -87,11 +92,13 @@ public: // in s. // Does nothing, if there is only one rank. // In/Out: s // In: delay. If true delay the sum until 'resolveSums' is called. template <typename T> void leaveOneOutSum(T& s, bool delay = 0); // Element-wise implementations for dca::func::function. // In/Out: f // In: delay. If true delay the sum until 'resolveSums' is called. template <typename Scalar, class Domain> void leaveOneOutSum(func::function<Scalar, Domain>& f, bool delay = 0); Loading Loading @@ -561,7 +568,7 @@ std::vector<Scalar> MPICollectiveSum::avgNormalizedMomenta(const func::function< template <typename T> void MPICollectiveSum::sum(const T* in, T* out, std::size_t n, int id) const { // On summit large messages hangs if sizeof(floating point type) type * message_size > 2^31-1. // On summit large messages hangs if sizeof(floating point type) * message_size > 2^31-1. constexpr std::size_t max_size = dca::util::IsComplex<T>::value ? 2 * (std::numeric_limits<int>::max() / sizeof(T)) : std::numeric_limits<int>::max() / sizeof(T); Loading
include/dca/parallel/mpi_concurrency/mpi_concurrency.hpp +15 −4 Original line number Diff line number Diff line Loading @@ -25,6 +25,8 @@ #include "dca/parallel/mpi_concurrency/mpi_collective_min.hpp" #include "dca/parallel/mpi_concurrency/mpi_collective_sum.hpp" #include "dca/parallel/mpi_concurrency/mpi_initializer.hpp" #include "dca/parallel/mpi_concurrency/mpi_gang.hpp" #include "dca/parallel/mpi_concurrency/mpi_gather.hpp" #include "dca/parallel/mpi_concurrency/mpi_packing.hpp" #include "dca/parallel/mpi_concurrency/mpi_processor_grouping.hpp" #include "dca/parallel/util/get_bounds.hpp" Loading @@ -34,12 +36,15 @@ namespace parallel { // dca::parallel:: class MPIConcurrency final : public virtual MPIInitializer, private virtual MPIProcessorGrouping, public virtual MPIProcessorGrouping, public MPIPacking, public MPICollectiveMax, public MPICollectiveMin, public MPICollectiveSum { public MPICollectiveSum, public MPIGather { public: using Gang = MPIGang; MPIConcurrency(int argc, char** argv); inline int id() const { Loading Loading @@ -68,6 +73,12 @@ public: return util::getBounds(id(), number_of_processors(), dmn); } // Using gather with no gang uses the entire concurrency. // template <class Scalar, class DmnIn, class DmnOut> // void gather(const func::function<Scalar, DmnIn>& f_in, func::function<Scalar, DmnOut>& f_out) const { // gather(f_in, f_out, *this); // } friend std::ostream& operator<<(std::ostream& some_ostream, const MPIConcurrency& this_concurrency); private: Loading Loading @@ -147,7 +158,7 @@ bool MPIConcurrency::broadcast_object(object_type& object, int root_id) const { return true; } } // parallel } // dca } // namespace parallel } // namespace dca #endif // DCA_PARALLEL_MPI_CONCURRENCY_MPI_CONCURRENCY_HPP
include/dca/parallel/mpi_concurrency/mpi_gang.hpp 0 → 100644 +50 −0 Original line number Diff line number Diff line // Copyright (C) 2018 ETH Zurich // Copyright (C) 2018 UT-Battelle, LLC // All rights reserved. // // See LICENSE for terms of usage. // See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. // // Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) // // This class represent a subgroup of MPIProcessorGrouping. #ifndef DCA_PARALLEL_MPI_CONCURRENCY_MPI_GANG_HPP #define DCA_PARALLEL_MPI_CONCURRENCY_MPI_GANG_HPP #include <mpi.h> #include "dca/parallel/mpi_concurrency/mpi_processor_grouping.hpp" namespace dca { namespace parallel { // dca::parallel:: class MPIGang { public: MPIGang(const MPIProcessorGrouping& group, int min_size); ~MPIGang(); MPIGang(const MPIGang& other) = delete; int get_id() const { return id_; } int get_size() const { return size_; } auto get() const { return communicator_; } private: MPI_Comm communicator_; int id_; int size_; }; } // namespace parallel } // namespace dca #endif // DCA_PARALLEL_MPI_CONCURRENCY_MPI_GANG_HPP
include/dca/parallel/mpi_concurrency/mpi_gather.hpp 0 → 100644 +67 −0 Original line number Diff line number Diff line // Copyright (C) 2018 ETH Zurich // Copyright (C) 2018 UT-Battelle, LLC // All rights reserved. // // See LICENSE for terms of usage. // See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. // // Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) // // This class provides an interface to gather functions with MPI. #ifndef DCA_PARALLEL_MPI_CONCURRENCY_MPI_GATHER_HPP #define DCA_PARALLEL_MPI_CONCURRENCY_MPI_GATHER_HPP #include <vector> #include <mpi.h> #include "dca/function/domains.hpp" #include "dca/function/function.hpp" #include "dca/parallel/mpi_concurrency/mpi_gang.hpp" #include "dca/parallel/mpi_concurrency/mpi_type_map.hpp" #include "dca/util/integer_division.hpp" namespace dca { namespace parallel { // dca::parallel:: class MPIGather { public: MPIGather() = default; // Gather the function 'f_in' on all processes in 'gang' and copy the result into 'f_out', // discarding eventual padding. // Precondition: gang.get_size() * f_in.size() >= f_out.size() template <class Scalar, class DmnIn, class DmnOut, class Gang> void gather(const func::function<Scalar, DmnIn>& f_in, func::function<Scalar, DmnOut>& f_out, const Gang& gang) const; private: template <class T, class Gang> void gather(const T* in, T* out, int local_n, const Gang& gang) const; }; template <class Scalar, class DmnIn, class DmnOut, class Gang> void MPIGather::gather(const func::function<Scalar, DmnIn>& f_in, func::function<Scalar, DmnOut>& f_out, const Gang& gang) const { std::vector<Scalar> gathered(f_in.size() * gang.get_size()); gather(f_in.values(), gathered.data(), f_in.size(), gang); if (f_out.size() > gathered.size()) throw(std::logic_error("Output function is too large.")); // TODO: move. std::copy_n(gathered.data(), f_out.size(), f_out.values()); } template <class T, class Gang> void MPIGather::gather(const T* in, T* out, int local_n, const Gang& gang) const { MPI_Allgather(in, local_n, MPITypeMap<T>::value(), out, local_n, MPITypeMap<T>::value(), gang.get()); } } // namespace parallel } // namespace dca #endif // DCA_PARALLEL_MPI_CONCURRENCY_MPI_GATHER_HPP