Commit 40c84f37 authored by gbalduzz's avatar gbalduzz
Browse files

Reshapable matrix inherits from the allocator.

parent 1e7e76cd
Loading
Loading
Loading
Loading
+64 −65
Original line number Diff line number Diff line
@@ -30,27 +30,27 @@ namespace dca {
namespace linalg {
// dca::linalg::

template <typename ScalarType, DeviceType device_name>
class ReshapableMatrix {
template <typename ScalarType, DeviceType device_name,
          class Allocator = util::DefaultAllocator<ScalarType, device_name>>
class ReshapableMatrix : public Allocator {
public:
  using ThisType = ReshapableMatrix<ScalarType, device_name>;
  using ThisType = ReshapableMatrix<ScalarType, device_name, Allocator>;
  using ValueType = ScalarType;
  using Allocator = util::DefaultAllocator<ScalarType, device_name>;

  ReshapableMatrix(int size = 0);
  ReshapableMatrix(std::pair<int, int> size);

  // Copy and move constructor:
  // Constructs a matrix with name name, size rhs.size() and a copy of the elements of rhs.
  ReshapableMatrix(const ReshapableMatrix<ScalarType, device_name>& rhs);
  ReshapableMatrix(const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs);
  // Constructs a matrix with name name, size rhs.size(). The elements of rhs are moved.
  // Postcondition: rhs is a (0 x 0) matrix.
  ReshapableMatrix(ReshapableMatrix<ScalarType, device_name>&& rhs);
  ReshapableMatrix(ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs);

  // Contructs a matrix with name name, size rhs.size() and a copy of the elements of rhs, where rhs
  // elements are stored on a different device.
  template <DeviceType rhs_device_name>
  ReshapableMatrix(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs);
  template <DeviceType rhs_device_name, class AllocatorRhs>
  ReshapableMatrix(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs);

  ~ReshapableMatrix();

@@ -58,11 +58,11 @@ public:
  // Two matrices are equal, if they have the same size and contain the same elements. Name and
  // capacity are ignored.
  // Special case: two matrices without elements are equal.
  bool operator==(const ReshapableMatrix<ScalarType, device_name>& other) const;
  bool operator==(const ReshapableMatrix<ScalarType, device_name, Allocator>& other) const;

  // Returns true if this is not equal to other, false otherwise.
  // See description of operator== for the definition of equality.
  bool operator!=(const ReshapableMatrix<ScalarType, device_name>& other) const;
  bool operator!=(const ReshapableMatrix<ScalarType, device_name, Allocator>& other) const;

  // Returns the (i,j)-th element of the matrix.
  // Preconditions: 0 <= i < size().first, 0 <= j < size().second.
@@ -137,7 +137,7 @@ public:

  bool reserveNoCopy(std::size_t new_size);

  void swap(ReshapableMatrix<ScalarType, device_name>& other);
  void swap(ReshapableMatrix<ScalarType, device_name, Allocator>& other);

  // Releases the memory allocated by *this and sets size and capacity to zero.
  void clear();
@@ -180,57 +180,55 @@ private:

  ValueType* data_ = nullptr;

  Allocator allocator_;

  template <class ScalarType2, DeviceType device_name2>
  template <class ScalarType2, DeviceType device_name2, class Allocator2>
  friend class dca::linalg::ReshapableMatrix;
};

template <typename ScalarType, DeviceType device_name>
ReshapableMatrix<ScalarType, device_name>::ReshapableMatrix(int size)
template <typename ScalarType, DeviceType device_name, class Allocator>
ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(int size)
    : ReshapableMatrix(std::make_pair(size, size)) {}

template <typename ScalarType, DeviceType device_name>
ReshapableMatrix<ScalarType, device_name>::ReshapableMatrix(std::pair<int, int> size)
template <typename ScalarType, DeviceType device_name, class Allocator>
ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(std::pair<int, int> size)
    : size_(size), capacity_(nextCapacity(nrElements(size))) {
  assert(size_.first >= 0 && size_.second >= 0);
  assert(capacity_ >= nrElements(size_));

  data_ = allocator_.allocate(capacity_);
  data_ = Allocator::allocate(capacity_);
}

template <typename ScalarType, DeviceType device_name>
ReshapableMatrix<ScalarType, device_name>::ReshapableMatrix(
    const ReshapableMatrix<ScalarType, device_name>& rhs) {
template <typename ScalarType, DeviceType device_name, class Allocator>
ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(
    const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs) {
  *this = rhs;
}

template <typename ScalarType, DeviceType device_name>
ReshapableMatrix<ScalarType, device_name>::ReshapableMatrix(
    ReshapableMatrix<ScalarType, device_name>&& rhs)
template <typename ScalarType, DeviceType device_name, class Allocator>
ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(
    ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs)
    : size_(rhs.size_), capacity_(rhs.capacity_), data_(rhs.data_) {
  rhs.capacity_ = 0;
  rhs.size_ = std::make_pair(0, 0);
  rhs.data_ = nullptr;
}

template <typename ScalarType, DeviceType device_name>
template <DeviceType rhs_device_name>
ReshapableMatrix<ScalarType, device_name>::ReshapableMatrix(
    const ReshapableMatrix<ScalarType, rhs_device_name>& rhs)
template <typename ScalarType, DeviceType device_name, class Allocator>
template <DeviceType rhs_device_name, class AllocatorRhs>
ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(
    const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs)
    : size_(rhs.size_), capacity_(rhs.capacity_) {
  data_ = allocator_.allocate(capacity_);
  data_ = Allocator::allocate(capacity_);
  util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
}

template <typename ScalarType, DeviceType device_name>
ReshapableMatrix<ScalarType, device_name>::~ReshapableMatrix() {
  allocator_.deallocate(data_);
template <typename ScalarType, DeviceType device_name, class Allocator>
ReshapableMatrix<ScalarType, device_name, Allocator>::~ReshapableMatrix() {
  Allocator::deallocate(data_);
}

template <typename ScalarType, DeviceType device_name>
bool ReshapableMatrix<ScalarType, device_name>::operator==(
    const ReshapableMatrix<ScalarType, device_name>& other) const {
template <typename ScalarType, DeviceType device_name, class Allocator>
bool ReshapableMatrix<ScalarType, device_name, Allocator>::operator==(
    const ReshapableMatrix<ScalarType, device_name, Allocator>& other) const {
  if (device_name == GPU)
    return ReshapableMatrix<ScalarType, CPU>(*this) == ReshapableMatrix<ScalarType, CPU>(other);

@@ -245,49 +243,50 @@ bool ReshapableMatrix<ScalarType, device_name>::operator==(
  return true;
}

template <typename ScalarType, DeviceType device_name>
bool ReshapableMatrix<ScalarType, device_name>::operator!=(
    const ReshapableMatrix<ScalarType, device_name>& other) const {
template <typename ScalarType, DeviceType device_name, class Allocator>
bool ReshapableMatrix<ScalarType, device_name, Allocator>::operator!=(
    const ReshapableMatrix<ScalarType, device_name, Allocator>& other) const {
  return !(*this == other);
}

template <typename ScalarType, DeviceType device_name>
bool ReshapableMatrix<ScalarType, device_name>::resizeNoCopy(const std::pair<int, int> new_size) {
template <typename ScalarType, DeviceType device_name, class Allocator>
bool ReshapableMatrix<ScalarType, device_name, Allocator>::resizeNoCopy(
    const std::pair<int, int> new_size) {
  const bool realloc = reserveNoCopy(nrElements(new_size));
  size_ = new_size;
  assert(capacity_ >= nrElements(size_));
  return realloc;
}

template <typename ScalarType, DeviceType device_name>
bool ReshapableMatrix<ScalarType, device_name>::reserveNoCopy(std::size_t new_size) {
template <typename ScalarType, DeviceType device_name, class Allocator>
bool ReshapableMatrix<ScalarType, device_name, Allocator>::reserveNoCopy(std::size_t new_size) {
  if (new_size > capacity_) {
    allocator_.deallocate(data_);
    Allocator::deallocate(data_);
    capacity_ = nextCapacity(new_size);
    data_ = allocator_.allocate(capacity_);
    data_ = Allocator::allocate(capacity_);
    return true;
  }
  return false;
}

template <typename ScalarType, DeviceType device_name>
void ReshapableMatrix<ScalarType, device_name>::swap(ReshapableMatrix<ScalarType, device_name>& other) {
template <typename ScalarType, DeviceType device_name, class Allocator>
void ReshapableMatrix<ScalarType, device_name, Allocator>::swap(
    ReshapableMatrix<ScalarType, device_name, Allocator>& other) {
  std::swap(size_, other.size_);
  std::swap(capacity_, other.capacity_);
  std::swap(data_, other.data_);
  std::swap(allocator_, other.allocator_);
}

template <typename ScalarType, DeviceType device_name>
void ReshapableMatrix<ScalarType, device_name>::clear() {
  allocator_.deallocate(data_);
template <typename ScalarType, DeviceType device_name, class Allocator>
void ReshapableMatrix<ScalarType, device_name, Allocator>::clear() {
  Allocator::deallocate(data_);
  size_ = std::make_pair(0, 0);
  capacity_ = 0;
}

template <typename ScalarType, DeviceType device_name>
template <typename ScalarType, DeviceType device_name, class Allocator>
template <DeviceType rhs_device_name>
void ReshapableMatrix<ScalarType, device_name>::set(
void ReshapableMatrix<ScalarType, device_name, Allocator>::set(
    const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id) {
  resize(rhs.size_);
  util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_, thread_id,
@@ -296,40 +295,40 @@ void ReshapableMatrix<ScalarType, device_name>::set(

#ifdef DCA_HAVE_CUDA

template <typename ScalarType, DeviceType device_name>
template <typename ScalarType, DeviceType device_name, class Allocator>
template <DeviceType rhs_device_name>
void ReshapableMatrix<ScalarType, device_name>::setAsync(
void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync(
    const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, const cudaStream_t stream) {
  resizeNoCopy(rhs.size_);
  util::memoryCopyAsync(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_, stream);
}

template <typename ScalarType, DeviceType device_name>
template <typename ScalarType, DeviceType device_name, class Allocator>
template <DeviceType rhs_device_name>
void ReshapableMatrix<ScalarType, device_name>::setAsync(
void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync(
    const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, const int thread_id,
    const int stream_id) {
  setAsync(rhs, util::getStream(thread_id, stream_id));
}

template <typename ScalarType, DeviceType device_name>
void ReshapableMatrix<ScalarType, device_name>::setToZero(cudaStream_t stream) {
template <typename ScalarType, DeviceType device_name, class Allocator>
void ReshapableMatrix<ScalarType, device_name, Allocator>::setToZero(cudaStream_t stream) {
  cudaMemsetAsync(data_, 0, leadingDimension() * nrCols() * sizeof(ScalarType), stream);
}

#else  // DCA_HAVE_CUDA

template <typename ScalarType, DeviceType device_name>
template <typename ScalarType, DeviceType device_name, class Allocator>
template <DeviceType rhs_device_name>
void ReshapableMatrix<ScalarType, device_name>::setAsync(
void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync(
    const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /*thread_id*/, int /*stream_id*/) {
  set(rhs);
}

#endif  // DCA_HAVE_CUDA

template <typename ScalarType, DeviceType device_name>
std::size_t ReshapableMatrix<ScalarType, device_name>::nextCapacity(const std::size_t size) {
template <typename ScalarType, DeviceType device_name, class Allocator>
std::size_t ReshapableMatrix<ScalarType, device_name, Allocator>::nextCapacity(const std::size_t size) {
  assert(size >= 0);
  constexpr std::size_t block_size = 512;

@@ -347,8 +346,8 @@ std::size_t ReshapableMatrix<ScalarType, device_name>::nextCapacity(const std::s
                            : (size + block_size - 1) / block_size * block_size;
}

template <typename ScalarType, DeviceType device_name>
std::size_t ReshapableMatrix<ScalarType, device_name>::deviceFingerprint() const {
template <typename ScalarType, DeviceType device_name, class Allocator>
std::size_t ReshapableMatrix<ScalarType, device_name, Allocator>::deviceFingerprint() const {
  if (device_name == GPU)
    return capacity_ * sizeof(ScalarType);
  else