Commit 3dbf3a26 authored by gbalduzz's avatar gbalduzz
Browse files

Cleanup allocators.

parent 749a664e
Loading
Loading
Loading
Loading
+17 −21
Original line number Diff line number Diff line
@@ -26,14 +26,13 @@
#include "dca/linalg/device_type.hpp"
#include "dca/linalg/util/copy.hpp"
#include "dca/linalg/util/stream_functions.hpp"
#include "dca/linalg/util/memory.hpp"

namespace dca {
namespace linalg {
// dca::linalg::

template <typename ScalarType, DeviceType device_name>
class Matrix {
class Matrix : public util::DefaultAllocator<ScalarType, device_name> {
public:
  using ThisType = Matrix<ScalarType, device_name>;
  using ValueType = ScalarType;
@@ -237,8 +236,6 @@ private:

  ValueType* data_ = nullptr;

  Allocator allocator_;

  template <class ScalarType2, DeviceType device_name2>
  friend class dca::linalg::Matrix;
};
@@ -274,6 +271,15 @@ template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>::Matrix(std::pair<int, int> size, std::pair<int, int> capacity)
    : Matrix(default_name_, size, capacity) {}

template <typename ScalarType, DeviceType device_name>
template <DeviceType rhs_device_name>
Matrix<ScalarType, device_name>::Matrix(const Matrix<ScalarType, rhs_device_name>& rhs,
                                        const std::string& name)
    : name_(name), size_(rhs.size_), capacity_(rhs.capacity_) {
  data_ = Allocator::allocate(nrElements(capacity_));
  util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
}

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>::Matrix(const std::string& name, std::pair<int, int> size,
                                        std::pair<int, int> capacity)
@@ -283,8 +289,7 @@ Matrix<ScalarType, device_name>::Matrix(const std::string& name, std::pair<int,
  assert(capacity.first >= size_.first && capacity.second >= size_.second);
  assert(capacity_.first >= capacity.first && capacity_.second >= capacity.second);

  data_ = allocator_.allocate(nrElements(capacity_));
  util::Memory<device_name>::setToZero(data_, nrElements(capacity_));
  data_ = Allocator::allocate(nrElements(capacity_));
}

template <typename ScalarType, DeviceType device_name>
@@ -302,18 +307,9 @@ Matrix<ScalarType, device_name>::Matrix(Matrix<ScalarType, device_name>&& rhs, c
  rhs.data_ = nullptr;
}

template <typename ScalarType, DeviceType device_name>
template <DeviceType rhs_device_name>
Matrix<ScalarType, device_name>::Matrix(const Matrix<ScalarType, rhs_device_name>& rhs,
                                        const std::string& name)
    : name_(name), size_(rhs.size_), capacity_(rhs.capacity_) {
  data_ = allocator_.allocate(nrElements(capacity_));
  util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
}

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>::~Matrix() {
  allocator_.deallocate(data_);
  Allocator::deallocate(data_);
}

template <typename ScalarType, DeviceType device_name>
@@ -323,11 +319,11 @@ void Matrix<ScalarType, device_name>::resize(std::pair<int, int> new_size) {
    std::pair<int, int> new_capacity = capacityMultipleOfBlockSize(new_size);

    ValueType* new_data = nullptr;
    new_data = allocator_.allocate(nrElements(new_capacity));
    new_data = Allocator::allocate(nrElements(new_capacity));
    const std::pair<int, int> copy_size(std::min(new_size.first, size_.first),
                                        std::min(new_size.second, size_.second));
    util::memoryCopy(new_data, new_capacity.first, data_, leadingDimension(), copy_size);
    allocator_.deallocate(data_);
    Allocator::deallocate(data_);

    data_ = new_data;
    capacity_ = new_capacity;
@@ -392,8 +388,8 @@ void Matrix<ScalarType, device_name>::resizeNoCopy(std::pair<int, int> new_size)
    size_ = new_size;
    capacity_ = capacityMultipleOfBlockSize(new_size);

    allocator_.deallocate(data_);
    data_ = allocator_.allocate(nrElements(capacity_));
    Allocator::deallocate(data_);
    data_ = Allocator::allocate(nrElements(capacity_));
  }
  else {
    size_ = new_size;
@@ -402,7 +398,7 @@ void Matrix<ScalarType, device_name>::resizeNoCopy(std::pair<int, int> new_size)

template <typename ScalarType, DeviceType device_name>
void Matrix<ScalarType, device_name>::clear() {
  allocator_.deallocate(data_);
  Allocator::deallocate(data_);
  size_ = capacity_ = std::make_pair(0, 0);
}

+2 −4
Original line number Diff line number Diff line
@@ -12,16 +12,14 @@
#ifndef DCA_LINALG_UTIL_ALLOCATORS_ALIGNED_ALLOCATOR_HPP
#define DCA_LINALG_UTIL_ALLOCATORS_ALIGNED_ALLOCATOR_HPP

#include <vector>

namespace dca {
namespace linalg {
namespace util {
// dca::linalg::util::

template <typename T>
class AlignedAllocator : public std::allocator<T> {
public:
class AlignedAllocator {
protected:
  T* allocate(std::size_t n) {
    T* ptr;
    int err = posix_memalign((void**)&ptr, 128, n * sizeof(T));
+6 −3
Original line number Diff line number Diff line
@@ -16,7 +16,6 @@
#error "This file requires CUDA support."
#endif

#include <vector>
#include <cuda_runtime.h>

#include "dca/linalg/util/error_cuda.hpp"
@@ -27,8 +26,8 @@ namespace util {
// dca::linalg::util::

template <typename T>
class DeviceAllocator : public std::allocator<T> {
public:
class DeviceAllocator {
protected:
  T* allocate(std::size_t n) {
    if (n == 0)
      return nullptr;
@@ -50,6 +49,10 @@ public:
    }
    ptr = nullptr;
  }

public:
  // SFINAE method for setting managed memory stream.
  void setStream(const cudaStream_t /*stream*/) const {}
};

}  // util
+20 −7
Original line number Diff line number Diff line
@@ -16,7 +16,6 @@
#error "This file requires CUDA support."
#endif

#include <vector>
#include <cuda_runtime.h>

#include "dca/linalg/util/error_cuda.hpp"
@@ -27,20 +26,23 @@ namespace util {
// dca::linalg::util::

template <typename T>
class ManagedAllocator : public std::allocator<T> {
public:
class ManagedAllocator {
protected:
  T* allocate(std::size_t n) {
    if (n == 0)
      return nullptr;

    T* ptr;
    cudaError_t ret = cudaMallocManaged((void**)&ptr, n * sizeof(T));
    cudaError_t ret = cudaMallocManaged((void**)&ptr_, n * sizeof(T));
    if (ret != cudaSuccess) {
      printErrorMessage(ret, __FUNCTION__, __FILE__, __LINE__,
                        "\t Managed size requested : " + std::to_string(n * sizeof(T)));
      throw(std::bad_alloc());
    }
    return ptr;

    if (stream_)
      cudaStreamAttachMemAsync(stream_, ptr_);

    return ptr_;
  }

  void deallocate(T*& ptr, std::size_t /*n*/ = 0) noexcept {
@@ -49,8 +51,19 @@ public:
      printErrorMessage(ret, __FUNCTION__, __FILE__, __LINE__);
      std::terminate();
    }
    ptr = nullptr;
    ptr_ = ptr = nullptr;
  }

public:
  void setStream(cudaStream_t stream) {
    stream_ = stream;
    if (ptr_)
      cudaStreamAttachMemAsync(stream, ptr_);
  }

private:
  T* ptr_ = nullptr;
  cudaStream_t stream_ = nullptr;
};

}  // util
+8 −11
Original line number Diff line number Diff line
@@ -34,7 +34,7 @@ namespace linalg {

template <typename ScalarType, DeviceType device_name,
          class Allocator = util::DefaultAllocator<ScalarType, device_name>>
class Vector {
class Vector : public Allocator {
public:
  using ThisType = Vector<ScalarType, device_name, Allocator>;
  using ValueType = ScalarType;
@@ -165,9 +165,6 @@ public:

  std::size_t deviceFingerprint() const;

protected:
  Allocator allocator_;

private:
  std::string name_;

@@ -207,7 +204,7 @@ Vector<ScalarType, device_name, Allocator>::Vector(const std::string& name, size
                                                   size_t capacity)
    : name_(name), size_(size), capacity_(capacity), data_(nullptr) {
  assert(capacity_ >= size_);
  data_ = allocator_.allocate(capacity_);
  data_ = Allocator::allocate(capacity_);
  util::Memory<device_name>::setToZero(data_, capacity_);
}

@@ -235,7 +232,7 @@ Vector<ScalarType, device_name, Allocator>::Vector(Vector<ScalarType, device_nam

template <typename ScalarType, DeviceType device_name, class Allocator>
Vector<ScalarType, device_name, Allocator>::~Vector() {
  allocator_.deallocate(data_);
  Allocator::deallocate(data_);
}

template <typename ScalarType, DeviceType device_name, class Allocator>
@@ -334,9 +331,9 @@ void Vector<ScalarType, device_name, Allocator>::resize(size_t new_size) {
  if (new_size > capacity_) {
    int new_capacity = (new_size / 64 + 1) * 64;

    ValueType* new_data = allocator_.allocate(new_capacity);
    ValueType* new_data = Allocator::allocate(new_capacity);
    util::memoryCopy(new_data, data_, size_);
    allocator_.deallocate(data_);
    Allocator::deallocate(data_);

    data_ = new_data;
    capacity_ = new_capacity;
@@ -351,8 +348,8 @@ void Vector<ScalarType, device_name, Allocator>::resizeNoCopy(size_t new_size) {
  if (new_size > capacity_) {
    int new_capacity = (new_size / 64 + 1) * 64;

    allocator_.deallocate(data_);
    data_ = allocator_.allocate(new_capacity);
    Allocator::deallocate(data_);
    data_ = Allocator::allocate(new_capacity);

    capacity_ = new_capacity;
    size_ = new_size;
@@ -363,7 +360,7 @@ void Vector<ScalarType, device_name, Allocator>::resizeNoCopy(size_t new_size) {

template <typename ScalarType, DeviceType device_name, class Allocator>
void Vector<ScalarType, device_name, Allocator>::clear() {
  allocator_.deallocate(data_);
  Allocator::deallocate(data_);
  size_ = capacity_ = 0;
}

Loading