Commit c3a43b03 authored by Diy2 Upstream's avatar Diy2 Upstream Committed by Sujin Philip
Browse files

diy2 2016-03-29 (4169b469)

Code extracted from:

    https://gitlab.kitware.com/third-party/diy2.git

at commit 4169b4699582964d7b6ad9a9211637bd83f9de41 (for/vtk).
parent 75937411
## DIY2 is a data-parallel out-of-core library
## DIY2 is a block-parallel library
DIY2 is a data-parallel library for implementing scalable algorithms that can execute both
DIY2 is a block-parallel library for implementing scalable algorithms that can execute both
in-core and out-of-core. The same program can be executed with one or more threads per MPI
process, seamlessly combining distributed-memory message passing with shared-memory thread
parallelism. The abstraction enabling these capabilities is block-based parallelism; blocks
parallelism. The abstraction enabling these capabilities is block parallelism; blocks
and their message queues are mapped onto processing elements (MPI processes or threads) and are
migrated between memory and storage by the DIY2 runtime. Complex communication patterns,
including neighbor exchange, merge reduction, swap reduction, and all-to-all exchange, are
......
......@@ -11,6 +11,7 @@
#include "detail/algorithms/sort.hpp"
#include "detail/algorithms/kdtree.hpp"
#include "detail/algorithms/kdtree-sampling.hpp"
namespace diy
{
......@@ -36,7 +37,8 @@ namespace diy
detail::SampleSort<Block,T,Cmp> sorter(values, samples, cmp, num_samples);
// swap-reduce to all-gather samples
RegularSwapPartners partners(1, assigner.nblocks(), k);
RegularDecomposer<DiscreteBounds> decomposer(1, interval(0,assigner.nblocks()), assigner.nblocks());
RegularSwapPartners partners(decomposer, k);
reduce(master, assigner, partners, sorter.sample(), detail::SkipIntermediate(partners.rounds()));
// all_to_all to exchange the values
......@@ -66,7 +68,7 @@ namespace diy
/**
* \ingroup Algorithms
* \brief build a kd-tree and sort a set of points into it
* \brief build a kd-tree and sort a set of points into it (use histograms to determine split values)
*/
template<class Block, class Point>
void kdtree(Master& master, //!< master object
......@@ -85,11 +87,33 @@ namespace diy
typedef diy::RegularContinuousLink RCLink;
for (int i = 0; i < master.size(); ++i)
for (size_t i = 0; i < master.size(); ++i)
{
RCLink* link = static_cast<RCLink*>(master.link(i));
link->core() = domain;
link->bounds() = domain;
*link = RCLink(dim, domain, domain);
if (wrap) // set up the links to self
{
diy::BlockID self = { master.gid(i), master.communicator().rank() };
for (int j = 0; j < dim; ++j)
{
diy::Direction dir, wrap_dir;
// left
dir.x[j] = -1; wrap_dir.x[j] = -1;
link->add_neighbor(self);
link->add_bounds(domain);
link->add_direction(dir);
link->add_wrap(wrap_dir);
// right
dir.x[j] = 1; wrap_dir.x[j] = 1;
link->add_neighbor(self);
link->add_bounds(domain);
link->add_direction(dir);
link->add_wrap(wrap_dir);
}
}
}
detail::KDTreePartition<Block,Point> kdtree_partition(dim, points, bins);
......@@ -99,11 +123,73 @@ namespace diy
// update master.expected to match the links
int expected = 0;
for (int i = 0; i < master.size(); ++i)
for (size_t i = 0; i < master.size(); ++i)
expected += master.link(i)->size_unique();
master.set_expected(expected);
}
/**
* \ingroup Algorithms
* \brief build a kd-tree and sort a set of points into it (use sampling to determine split values)
*/
template<class Block, class Point>
void kdtree_sampling
(Master& master, //!< master object
const Assigner& assigner, //!< assigner object
int dim, //!< dimensionality
const ContinuousBounds& domain, //!< global data extents
std::vector<Point> Block::* points, //!< input points to sort into kd-tree
size_t samples, //!< number of samples to take in each block
bool wrap = false)//!< periodic boundaries in all dimensions
{
if (assigner.nblocks() & (assigner.nblocks() - 1))
{
fprintf(stderr, "KD-tree requires a number of blocks that's a power of 2, got %d\n", assigner.nblocks());
std::abort();
}
typedef diy::RegularContinuousLink RCLink;
for (size_t i = 0; i < master.size(); ++i)
{
RCLink* link = static_cast<RCLink*>(master.link(i));
*link = RCLink(dim, domain, domain);
if (wrap) // set up the links to self
{
diy::BlockID self = { master.gid(i), master.communicator().rank() };
for (int j = 0; j < dim; ++j)
{
diy::Direction dir, wrap_dir;
// left
dir.x[j] = -1; wrap_dir.x[j] = -1;
link->add_neighbor(self);
link->add_bounds(domain);
link->add_direction(dir);
link->add_wrap(wrap_dir);
// right
dir.x[j] = 1; wrap_dir.x[j] = 1;
link->add_neighbor(self);
link->add_bounds(domain);
link->add_direction(dir);
link->add_wrap(wrap_dir);
}
}
}
detail::KDTreeSamplingPartition<Block,Point> kdtree_partition(dim, points, samples);
detail::KDTreePartners partners(dim, assigner.nblocks(), wrap, domain);
reduce(master, assigner, partners, kdtree_partition);
// update master.expected to match the links
int expected = 0;
for (size_t i = 0; i < master.size(); ++i)
expected += master.link(i)->size_unique();
master.set_expected(expected);
}
}
#endif
......@@ -7,23 +7,9 @@
#define DIY_MAX_DIM 4
#endif
/* DIY directions: neighbor direction enumeration
used to identify direction of one neighbor block for both regular and
wrapround neighbors
can be bitwise ORed, eg., maximum-side neighboring block in 3 dimensions
would be DIY_X1 | DIY_Y1 | DIY_Z1
each use identifies exactly one block
eg. DIY_X0 is the (one) left neighbor block, not the entire left plane */
typedef enum
struct dir_t
{
DIY_X0 = 0x01, /* minimum-side x (left) neighbor */
DIY_X1 = 0x02, /* maximum-side x (right) neighbor */
DIY_Y0 = 0x04, /* minimum-side y (bottom) neighbor */
DIY_Y1 = 0x08, /* maximum-side y (top) neighbor */
DIY_Z0 = 0x10, /* minimum-side z (back) neighbor */
DIY_Z1 = 0x20, /* maximum-side z (front)neighbor */
DIY_T0 = 0x40, /* minimum-side t (earlier) neighbor */
DIY_T1 = 0x80 /* maximum-side t (later) neighbor */
} dir_t;
int x[DIY_MAX_DIM];
};
#endif
......@@ -3,6 +3,14 @@
namespace diy
{
// TODO: when not running under C++11, i.e., when lock_guard is TinyThread's
// lock_guard, and not C++11's unique_lock, this implementation might
// be buggy since the copy constructor is invoked when
// critical_resource::access() returns an instance of this class. Once
// the temporary is destroyed the mutex is unlocked. I'm not 100%
// certain of this because I'd expect a deadlock on copy constructor,
// but it's clearly not happening -- so I may be missing something.
// (This issue will take care of itself in DIY3 once we switch to C++11 completely.)
template<class T, class Mutex>
class resource_accessor
{
......
......@@ -5,6 +5,8 @@
#include <algorithm>
#include <iostream>
#include <cmath>
#include <sstream>
#include <stdexcept>
#include "link.hpp"
#include "assigner.hpp"
......@@ -74,35 +76,37 @@ namespace detail
typedef std::vector<Coordinate> CoordinateVector;
typedef std::vector<int> DivisionsVector;
/// @param assigner: decides how processors are assigned to blocks (maps a gid to a rank)
/// also communicates the total number of blocks
/// @param wrap: indicates dimensions on which to wrap the boundary
/// @param ghosts: indicates how many ghosts to use in each dimension
/// @param divisions: indicates how many cuts to make along each dimension
/// (0 means "no constraint," i.e., leave it up to the algorithm)
RegularDecomposer(int dim_,
const Bounds& domain_,
const Assigner& assigner_,
int nblocks_,
BoolVector share_face_ = BoolVector(),
BoolVector wrap_ = BoolVector(),
CoordinateVector ghosts_ = CoordinateVector(),
DivisionsVector divisions_ = DivisionsVector()):
dim(dim_), domain(domain_), assigner(assigner_),
dim(dim_), domain(domain_), nblocks(nblocks_),
share_face(share_face_),
wrap(wrap_), ghosts(ghosts_), divisions(divisions_)
{
if (share_face.size() < dim) share_face.resize(dim);
if (wrap.size() < dim) wrap.resize(dim);
if (ghosts.size() < dim) ghosts.resize(dim);
if (divisions.size() < dim) divisions.resize(dim);
if ((int) share_face.size() < dim) share_face.resize(dim);
if ((int) wrap.size() < dim) wrap.resize(dim);
if ((int) ghosts.size() < dim) ghosts.resize(dim);
if ((int) divisions.size() < dim) divisions.resize(dim);
int nblocks = assigner.nblocks();
fill_divisions(nblocks);
fill_divisions(divisions);
}
// Calls create(int gid, const Bounds& bounds, const Link& link)
template<class Creator>
void decompose(int rank, const Creator& create);
void decompose(int rank, const Assigner& assigner, const Creator& create);
template<class Updater>
void decompose(int rank, const Assigner& assigner, Master& master, const Updater& update);
void decompose(int rank, const Assigner& assigner, Master& master);
// find lowest gid that owns a particular point
template<class Point>
......@@ -110,7 +114,7 @@ namespace detail
void gid_to_coords(int gid, DivisionsVector& coords) const { gid_to_coords(gid, coords, divisions); }
int coords_to_gid(const DivisionsVector& coords) const { return coords_to_gid(coords, divisions); }
void fill_divisions(int nblocks) { fill_divisions(dim, nblocks, divisions); }
void fill_divisions(std::vector<int>& divisions) const;
void fill_bounds(Bounds& bounds, const DivisionsVector& coords, bool add_ghosts = false) const;
void fill_bounds(Bounds& bounds, int gid, bool add_ghosts = false) const;
......@@ -118,7 +122,7 @@ namespace detail
static bool all(const std::vector<int>& v, int x);
static void gid_to_coords(int gid, DivisionsVector& coords, const DivisionsVector& divisions);
static int coords_to_gid(const DivisionsVector& coords, const DivisionsVector& divisions);
static void fill_divisions(int dim, int nblocks, std::vector<int>& divisions);
static void factor(std::vector<unsigned>& factors, int n);
// Point to GIDs functions
......@@ -137,8 +141,8 @@ namespace detail
int dim;
const Bounds& domain;
const Assigner& assigner;
Bounds domain;
int nblocks;
BoolVector share_face;
BoolVector wrap;
CoordinateVector ghosts;
......@@ -162,7 +166,7 @@ namespace detail
*
* `create(...)` is called with each block assigned to the local domain. See [decomposition example](#decomposition-example).
*/
template<class Bounds, class Assigner, class Creator>
template<class Bounds, class Creator>
void decompose(int dim,
int rank,
const Bounds& domain,
......@@ -173,7 +177,7 @@ namespace detail
typename RegularDecomposer<Bounds>::CoordinateVector ghosts = typename RegularDecomposer<Bounds>::CoordinateVector(),
typename RegularDecomposer<Bounds>::DivisionsVector divs = typename RegularDecomposer<Bounds>::DivisionsVector())
{
RegularDecomposer<Bounds>(dim, domain, assigner, share_face, wrap, ghosts, divs).decompose(rank, create);
RegularDecomposer<Bounds>(dim, domain, assigner.nblocks(), share_face, wrap, ghosts, divs).decompose(rank, assigner, create);
}
namespace detail
......@@ -195,6 +199,26 @@ namespace detail
diy::Master* master_;
};
template<class Bounds, class Update>
struct Updater
{
typedef typename RegularDecomposer<Bounds>::Link Link;
Updater(diy::Master* master, const Update& update):
master_(master), update_(update) {}
void operator()(int gid, const Bounds& core, const Bounds& bounds, const Bounds& domain, const Link& link) const
{
int lid = master_->lid(gid);
Link* l = new Link(link);
master_->replace_link(lid, l);
update_(gid, lid, core, bounds, domain, *l);
}
diy::Master* master_;
const Update& update_;
};
}
/**
......@@ -213,7 +237,7 @@ namespace detail
*
* `master` must have been supplied a create function in order for this function to work.
*/
template<class Bounds, class Assigner>
template<class Bounds>
void decompose(int dim,
int rank,
const Bounds& domain,
......@@ -224,7 +248,7 @@ namespace detail
typename RegularDecomposer<Bounds>::CoordinateVector ghosts = typename RegularDecomposer<Bounds>::CoordinateVector(),
typename RegularDecomposer<Bounds>::DivisionsVector divs = typename RegularDecomposer<Bounds>::DivisionsVector())
{
RegularDecomposer<Bounds>(dim, domain, assigner, share_face, wrap, ghosts, divs).decompose(rank, detail::AddBlock<Bounds>(&master));
RegularDecomposer<Bounds>(dim, domain, assigner.nblocks(), share_face, wrap, ghosts, divs).decompose(rank, assigner, master);
}
/**
......@@ -248,15 +272,53 @@ namespace detail
master.add(local_gids[i], master.create(), new diy::Link);
}
/**
* \ingroup Decomposition
* \brief Add a decomposition (modify links) of an existing set of blocks that were
* added to the master previously
*
* @param rank local rank
* @param assigner decides how processors are assigned to blocks (maps a gid to a rank)
* also communicates the total number of blocks
*/
template<class Bounds, class Update>
void decompose(int dim,
int rank,
const Bounds& domain,
const Assigner& assigner,
Master& master,
const Update& update,
typename RegularDecomposer<Bounds>::BoolVector share_face =
typename RegularDecomposer<Bounds>::BoolVector(),
typename RegularDecomposer<Bounds>::BoolVector wrap =
typename RegularDecomposer<Bounds>::BoolVector(),
typename RegularDecomposer<Bounds>::CoordinateVector ghosts =
typename RegularDecomposer<Bounds>::CoordinateVector(),
typename RegularDecomposer<Bounds>::DivisionsVector divs =
typename RegularDecomposer<Bounds>::DivisionsVector())
{
RegularDecomposer<Bounds>(dim, domain, share_face, wrap, ghosts, divs).
decompose(rank, assigner, master, update);
}
//! Decomposition example: \example decomposition/test-decomposition.cpp
//! Direct master insertion example: \example decomposition/test-direct-master.cpp
}
// decomposes domain and adds blocks to the master
template<class Bounds>
void
diy::RegularDecomposer<Bounds>::
decompose(int rank, const Assigner& assigner, Master& master)
{
decompose(rank, assigner, detail::AddBlock<Bounds>(&master));
}
template<class Bounds>
template<class Creator>
void
diy::RegularDecomposer<Bounds>::
decompose(int rank, const Creator& create)
decompose(int rank, const Assigner& assigner, const Creator& create)
{
std::vector<int> gids;
assigner.local_gids(rank, gids);
......@@ -289,7 +351,7 @@ decompose(int rank, const Creator& create)
if (all(offsets, 0)) continue; // skip ourselves
DivisionsVector nhbr_coords(dim);
int dir = 0;
Direction dir, wrap_dir;
bool inbounds = true;
for (int i = 0; i < dim; ++i)
{
......@@ -301,7 +363,7 @@ decompose(int rank, const Creator& create)
if (wrap[i])
{
nhbr_coords[i] = divisions[i] - 1;
link.add_wrap(Direction(1 << 2*i));
wrap_dir[i] = -1;
}
else
inbounds = false;
......@@ -312,17 +374,15 @@ decompose(int rank, const Creator& create)
if (wrap[i])
{
nhbr_coords[i] = 0;
link.add_wrap(Direction(1 << (2*i + 1)));
wrap_dir[i] = 1;
}
else
inbounds = false;
}
// NB: this needs to match the addressing scheme in dir_t (in constants.h)
if (offsets[i] == -1)
dir |= 1 << (2*i + 0);
if (offsets[i] == 1)
dir |= 1 << (2*i + 1);
if (offsets[i] == -1 || offsets[i] == 1)
dir[i] = offsets[i];
}
if (!inbounds) continue;
......@@ -334,13 +394,24 @@ decompose(int rank, const Creator& create)
fill_bounds(nhbr_bounds, nhbr_coords);
link.add_bounds(nhbr_bounds);
link.add_direction(static_cast<Direction>(dir));
link.add_direction(dir);
link.add_wrap(wrap_dir);
}
create(gid, core, bounds, domain, link);
}
}
// decomposes domain but does not add blocks to master, assumes they were added already
template<class Bounds>
template<class Update>
void
diy::RegularDecomposer<Bounds>::
decompose(int rank, const Assigner& assigner, Master& master, const Update& update)
{
decompose(rank, assigner, detail::Updater<Bounds,Update>(&master, update));
}
template<class Bounds>
bool
diy::RegularDecomposer<Bounds>::
......@@ -436,40 +507,116 @@ fill_bounds(Bounds& bounds, //!< (output) bounds
fill_bounds(bounds, coords);
}
namespace diy { namespace detail {
// current state of division in one dimension used in fill_divisions below
template<class Coordinate>
struct Div
{
int dim; // 0, 1, 2, etc. e.g. for x, y, z etc.
int nb; // number of blocks so far in this dimension
Coordinate b_size; // block size so far in this dimension
// sort on descending block size unless tied, in which case
// sort on ascending num blocks in current dim unless tied, in which case
// sort on ascending dimension
bool operator<(Div rhs) const
{
// sort on second value of the pair unless tied, in which case sort on first
if (b_size == rhs.b_size)
{
if (nb == rhs.nb)
return(dim < rhs.dim);
return(nb < rhs.nb);
}
return(b_size > rhs.b_size);
}
};
} }
template<class Bounds>
void
diy::RegularDecomposer<Bounds>::
fill_divisions(int dim, int nblocks, std::vector<int>& divisions)
fill_divisions(std::vector<int>& divisions) const
{
int prod = 1; int c = 0;
for (unsigned i = 0; i < dim; ++i)
if (divisions[i] != 0)
// prod = number of blocks unconstrained by user; c = number of unconstrained dimensions
int prod = 1; int c = 0;
for (int i = 0; i < dim; ++i)
if (divisions[i] != 0)
{
prod *= divisions[i];
++c;
}
if (nblocks % prod != 0)
{
prod *= divisions[i];
++c;
fprintf(stderr, "Total number of blocks cannot be factored into provided divs\n");
return;
}
if (nblocks % prod != 0)
{
std::cerr << "Incompatible requirements" << std::endl;
return;
}
if (c == (int) divisions.size()) // nothing to do; user provided all divs
return;
if (c == divisions.size())
return;
// factor number of blocks left in unconstrained dimensions
// factorization is sorted from smallest to largest factors
std::vector<unsigned> factors;
factor(factors, nblocks/prod);
using detail::Div;
std::vector< Div<Coordinate> > missing_divs; // pairs consisting of (dim, #divs)
std::vector<unsigned> factors;
factor(factors, nblocks/prod);
// init missing_divs
for (int i = 0; i < dim; i++)
{
if (divisions[i] == 0)
{
Div<Coordinate> div;
div.dim = i;
div.nb = 1;
div.b_size = domain.max[i] - domain.min[i];
missing_divs.push_back(div);
}
}
// Fill the missing divs using LPT algorithm
std::vector<unsigned> missing_divs(divisions.size() - c, 1);
for (int i = factors.size() - 1; i >= 0; --i)
*std::min_element(missing_divs.begin(), missing_divs.end()) *= factors[i];
// iterate over factorization of number of blocks (factors are sorted smallest to largest)
// NB: using int instead of size_t because must be negative in order to break out of loop
for (int i = factors.size() - 1; i >= 0; --i)
{
// fill in missing divs by dividing dimension w/ largest block size
// except when this would be illegal (resulting in bounds.max < bounds.min;
// only a problem for discrete bounds
// sort on decreasing block size
std::sort(missing_divs.begin(), missing_divs.end());
// split the dimension with the largest block size (first element in vector)
Coordinate min =
detail::BoundsHelper<Bounds>::from(0,
missing_divs[0].nb * factors[i],
domain.min[missing_divs[0].dim],
domain.max[missing_divs[0].dim],
share_face[missing_divs[0].dim]);
Coordinate max =
detail::BoundsHelper<Bounds>::to(0,
missing_divs[0].nb * factors[i],
domain.min[missing_divs[0].dim],
domain.max[missing_divs[0].dim],
share_face[missing_divs[0].dim]);
if (max >= min)
{
missing_divs[0].nb *= factors[i];
missing_divs[0].b_size = max - min;
}
else
{
std::ostringstream oss;
oss << "Unable to decompose domain into " << nblocks << " blocks: " << min << " " << max;
throw std::runtime_error(oss.str());
}
}
c = 0;
for (unsigned i = 0; i < dim; ++i)
if (divisions[i] == 0)
divisions[i] = missing_divs[c++];
// assign the divisions
for (size_t i = 0; i < missing_divs.size(); i++)
divisions[missing_divs[i].dim] = missing_divs[i].nb;
}
template<class Bounds>
......@@ -478,7 +625,7 @@ diy::RegularDecomposer<Bounds>::
factor(std::vector<unsigned>& factors, int n)
{
while (n != 1)
for (unsigned i = 2