Loading include/dca/linalg/matrix.hpp +17 −21 Original line number Diff line number Diff line Loading @@ -26,14 +26,13 @@ #include "dca/linalg/device_type.hpp" #include "dca/linalg/util/copy.hpp" #include "dca/linalg/util/stream_functions.hpp" #include "dca/linalg/util/memory.hpp" namespace dca { namespace linalg { // dca::linalg:: template <typename ScalarType, DeviceType device_name> class Matrix { class Matrix : public util::DefaultAllocator<ScalarType, device_name> { public: using ThisType = Matrix<ScalarType, device_name>; using ValueType = ScalarType; Loading Loading @@ -237,8 +236,6 @@ private: ValueType* data_ = nullptr; Allocator allocator_; template <class ScalarType2, DeviceType device_name2> friend class dca::linalg::Matrix; }; Loading Loading @@ -274,6 +271,15 @@ template <typename ScalarType, DeviceType device_name> Matrix<ScalarType, device_name>::Matrix(std::pair<int, int> size, std::pair<int, int> capacity) : Matrix(default_name_, size, capacity) {} template <typename ScalarType, DeviceType device_name> template <DeviceType rhs_device_name> Matrix<ScalarType, device_name>::Matrix(const Matrix<ScalarType, rhs_device_name>& rhs, const std::string& name) : name_(name), size_(rhs.size_), capacity_(rhs.capacity_) { data_ = Allocator::allocate(nrElements(capacity_)); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_); } template <typename ScalarType, DeviceType device_name> Matrix<ScalarType, device_name>::Matrix(const std::string& name, std::pair<int, int> size, std::pair<int, int> capacity) Loading @@ -283,8 +289,7 @@ Matrix<ScalarType, device_name>::Matrix(const std::string& name, std::pair<int, assert(capacity.first >= size_.first && capacity.second >= size_.second); assert(capacity_.first >= capacity.first && capacity_.second >= capacity.second); data_ = allocator_.allocate(nrElements(capacity_)); util::Memory<device_name>::setToZero(data_, nrElements(capacity_)); data_ = Allocator::allocate(nrElements(capacity_)); } template <typename ScalarType, DeviceType device_name> Loading @@ -302,18 +307,9 @@ Matrix<ScalarType, device_name>::Matrix(Matrix<ScalarType, device_name>&& rhs, c rhs.data_ = nullptr; } template <typename ScalarType, DeviceType device_name> template <DeviceType rhs_device_name> Matrix<ScalarType, device_name>::Matrix(const Matrix<ScalarType, rhs_device_name>& rhs, const std::string& name) : name_(name), size_(rhs.size_), capacity_(rhs.capacity_) { data_ = allocator_.allocate(nrElements(capacity_)); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_); } template <typename ScalarType, DeviceType device_name> Matrix<ScalarType, device_name>::~Matrix() { allocator_.deallocate(data_); Allocator::deallocate(data_); } template <typename ScalarType, DeviceType device_name> Loading @@ -323,11 +319,11 @@ void Matrix<ScalarType, device_name>::resize(std::pair<int, int> new_size) { std::pair<int, int> new_capacity = capacityMultipleOfBlockSize(new_size); ValueType* new_data = nullptr; new_data = allocator_.allocate(nrElements(new_capacity)); new_data = Allocator::allocate(nrElements(new_capacity)); const std::pair<int, int> copy_size(std::min(new_size.first, size_.first), std::min(new_size.second, size_.second)); util::memoryCopy(new_data, new_capacity.first, data_, leadingDimension(), copy_size); allocator_.deallocate(data_); Allocator::deallocate(data_); data_ = new_data; capacity_ = new_capacity; Loading Loading @@ -392,8 +388,8 @@ void Matrix<ScalarType, device_name>::resizeNoCopy(std::pair<int, int> new_size) size_ = new_size; capacity_ = capacityMultipleOfBlockSize(new_size); allocator_.deallocate(data_); data_ = allocator_.allocate(nrElements(capacity_)); Allocator::deallocate(data_); data_ = Allocator::allocate(nrElements(capacity_)); } else { size_ = new_size; Loading @@ -402,7 +398,7 @@ void Matrix<ScalarType, device_name>::resizeNoCopy(std::pair<int, int> new_size) template <typename ScalarType, DeviceType device_name> void Matrix<ScalarType, device_name>::clear() { allocator_.deallocate(data_); Allocator::deallocate(data_); size_ = capacity_ = std::make_pair(0, 0); } Loading include/dca/linalg/util/allocators/aligned_allocator.hpp +2 −4 Original line number Diff line number Diff line Loading @@ -12,16 +12,14 @@ #ifndef DCA_LINALG_UTIL_ALLOCATORS_ALIGNED_ALLOCATOR_HPP #define DCA_LINALG_UTIL_ALLOCATORS_ALIGNED_ALLOCATOR_HPP #include <vector> namespace dca { namespace linalg { namespace util { // dca::linalg::util:: template <typename T> class AlignedAllocator : public std::allocator<T> { public: class AlignedAllocator { protected: T* allocate(std::size_t n) { T* ptr; int err = posix_memalign((void**)&ptr, 128, n * sizeof(T)); Loading include/dca/linalg/util/allocators/device_allocator.hpp +6 −3 Original line number Diff line number Diff line Loading @@ -16,7 +16,6 @@ #error "This file requires CUDA support." #endif #include <vector> #include <cuda_runtime.h> #include "dca/linalg/util/error_cuda.hpp" Loading @@ -27,8 +26,8 @@ namespace util { // dca::linalg::util:: template <typename T> class DeviceAllocator : public std::allocator<T> { public: class DeviceAllocator { protected: T* allocate(std::size_t n) { if (n == 0) return nullptr; Loading @@ -50,6 +49,10 @@ public: } ptr = nullptr; } public: // SFINAE method for setting managed memory stream. void setStream(const cudaStream_t /*stream*/) const {} }; } // util Loading include/dca/linalg/util/allocators/managed_allocator.hpp +20 −7 Original line number Diff line number Diff line Loading @@ -16,7 +16,6 @@ #error "This file requires CUDA support." #endif #include <vector> #include <cuda_runtime.h> #include "dca/linalg/util/error_cuda.hpp" Loading @@ -27,20 +26,23 @@ namespace util { // dca::linalg::util:: template <typename T> class ManagedAllocator : public std::allocator<T> { public: class ManagedAllocator { protected: T* allocate(std::size_t n) { if (n == 0) return nullptr; T* ptr; cudaError_t ret = cudaMallocManaged((void**)&ptr, n * sizeof(T)); cudaError_t ret = cudaMallocManaged((void**)&ptr_, n * sizeof(T)); if (ret != cudaSuccess) { printErrorMessage(ret, __FUNCTION__, __FILE__, __LINE__, "\t Managed size requested : " + std::to_string(n * sizeof(T))); throw(std::bad_alloc()); } return ptr; if (stream_) cudaStreamAttachMemAsync(stream_, ptr_); return ptr_; } void deallocate(T*& ptr, std::size_t /*n*/ = 0) noexcept { Loading @@ -49,8 +51,19 @@ public: printErrorMessage(ret, __FUNCTION__, __FILE__, __LINE__); std::terminate(); } ptr = nullptr; ptr_ = ptr = nullptr; } public: void setStream(cudaStream_t stream) { stream_ = stream; if (ptr_) cudaStreamAttachMemAsync(stream, ptr_); } private: T* ptr_ = nullptr; cudaStream_t stream_ = nullptr; }; } // util Loading include/dca/linalg/vector.hpp +8 −11 Original line number Diff line number Diff line Loading @@ -34,7 +34,7 @@ namespace linalg { template <typename ScalarType, DeviceType device_name, class Allocator = util::DefaultAllocator<ScalarType, device_name>> class Vector { class Vector : public Allocator { public: using ThisType = Vector<ScalarType, device_name, Allocator>; using ValueType = ScalarType; Loading Loading @@ -165,9 +165,6 @@ public: std::size_t deviceFingerprint() const; protected: Allocator allocator_; private: std::string name_; Loading Loading @@ -207,7 +204,7 @@ Vector<ScalarType, device_name, Allocator>::Vector(const std::string& name, size size_t capacity) : name_(name), size_(size), capacity_(capacity), data_(nullptr) { assert(capacity_ >= size_); data_ = allocator_.allocate(capacity_); data_ = Allocator::allocate(capacity_); util::Memory<device_name>::setToZero(data_, capacity_); } Loading Loading @@ -235,7 +232,7 @@ Vector<ScalarType, device_name, Allocator>::Vector(Vector<ScalarType, device_nam template <typename ScalarType, DeviceType device_name, class Allocator> Vector<ScalarType, device_name, Allocator>::~Vector() { allocator_.deallocate(data_); Allocator::deallocate(data_); } template <typename ScalarType, DeviceType device_name, class Allocator> Loading Loading @@ -334,9 +331,9 @@ void Vector<ScalarType, device_name, Allocator>::resize(size_t new_size) { if (new_size > capacity_) { int new_capacity = (new_size / 64 + 1) * 64; ValueType* new_data = allocator_.allocate(new_capacity); ValueType* new_data = Allocator::allocate(new_capacity); util::memoryCopy(new_data, data_, size_); allocator_.deallocate(data_); Allocator::deallocate(data_); data_ = new_data; capacity_ = new_capacity; Loading @@ -351,8 +348,8 @@ void Vector<ScalarType, device_name, Allocator>::resizeNoCopy(size_t new_size) { if (new_size > capacity_) { int new_capacity = (new_size / 64 + 1) * 64; allocator_.deallocate(data_); data_ = allocator_.allocate(new_capacity); Allocator::deallocate(data_); data_ = Allocator::allocate(new_capacity); capacity_ = new_capacity; size_ = new_size; Loading @@ -363,7 +360,7 @@ void Vector<ScalarType, device_name, Allocator>::resizeNoCopy(size_t new_size) { template <typename ScalarType, DeviceType device_name, class Allocator> void Vector<ScalarType, device_name, Allocator>::clear() { allocator_.deallocate(data_); Allocator::deallocate(data_); size_ = capacity_ = 0; } Loading Loading
include/dca/linalg/matrix.hpp +17 −21 Original line number Diff line number Diff line Loading @@ -26,14 +26,13 @@ #include "dca/linalg/device_type.hpp" #include "dca/linalg/util/copy.hpp" #include "dca/linalg/util/stream_functions.hpp" #include "dca/linalg/util/memory.hpp" namespace dca { namespace linalg { // dca::linalg:: template <typename ScalarType, DeviceType device_name> class Matrix { class Matrix : public util::DefaultAllocator<ScalarType, device_name> { public: using ThisType = Matrix<ScalarType, device_name>; using ValueType = ScalarType; Loading Loading @@ -237,8 +236,6 @@ private: ValueType* data_ = nullptr; Allocator allocator_; template <class ScalarType2, DeviceType device_name2> friend class dca::linalg::Matrix; }; Loading Loading @@ -274,6 +271,15 @@ template <typename ScalarType, DeviceType device_name> Matrix<ScalarType, device_name>::Matrix(std::pair<int, int> size, std::pair<int, int> capacity) : Matrix(default_name_, size, capacity) {} template <typename ScalarType, DeviceType device_name> template <DeviceType rhs_device_name> Matrix<ScalarType, device_name>::Matrix(const Matrix<ScalarType, rhs_device_name>& rhs, const std::string& name) : name_(name), size_(rhs.size_), capacity_(rhs.capacity_) { data_ = Allocator::allocate(nrElements(capacity_)); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_); } template <typename ScalarType, DeviceType device_name> Matrix<ScalarType, device_name>::Matrix(const std::string& name, std::pair<int, int> size, std::pair<int, int> capacity) Loading @@ -283,8 +289,7 @@ Matrix<ScalarType, device_name>::Matrix(const std::string& name, std::pair<int, assert(capacity.first >= size_.first && capacity.second >= size_.second); assert(capacity_.first >= capacity.first && capacity_.second >= capacity.second); data_ = allocator_.allocate(nrElements(capacity_)); util::Memory<device_name>::setToZero(data_, nrElements(capacity_)); data_ = Allocator::allocate(nrElements(capacity_)); } template <typename ScalarType, DeviceType device_name> Loading @@ -302,18 +307,9 @@ Matrix<ScalarType, device_name>::Matrix(Matrix<ScalarType, device_name>&& rhs, c rhs.data_ = nullptr; } template <typename ScalarType, DeviceType device_name> template <DeviceType rhs_device_name> Matrix<ScalarType, device_name>::Matrix(const Matrix<ScalarType, rhs_device_name>& rhs, const std::string& name) : name_(name), size_(rhs.size_), capacity_(rhs.capacity_) { data_ = allocator_.allocate(nrElements(capacity_)); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_); } template <typename ScalarType, DeviceType device_name> Matrix<ScalarType, device_name>::~Matrix() { allocator_.deallocate(data_); Allocator::deallocate(data_); } template <typename ScalarType, DeviceType device_name> Loading @@ -323,11 +319,11 @@ void Matrix<ScalarType, device_name>::resize(std::pair<int, int> new_size) { std::pair<int, int> new_capacity = capacityMultipleOfBlockSize(new_size); ValueType* new_data = nullptr; new_data = allocator_.allocate(nrElements(new_capacity)); new_data = Allocator::allocate(nrElements(new_capacity)); const std::pair<int, int> copy_size(std::min(new_size.first, size_.first), std::min(new_size.second, size_.second)); util::memoryCopy(new_data, new_capacity.first, data_, leadingDimension(), copy_size); allocator_.deallocate(data_); Allocator::deallocate(data_); data_ = new_data; capacity_ = new_capacity; Loading Loading @@ -392,8 +388,8 @@ void Matrix<ScalarType, device_name>::resizeNoCopy(std::pair<int, int> new_size) size_ = new_size; capacity_ = capacityMultipleOfBlockSize(new_size); allocator_.deallocate(data_); data_ = allocator_.allocate(nrElements(capacity_)); Allocator::deallocate(data_); data_ = Allocator::allocate(nrElements(capacity_)); } else { size_ = new_size; Loading @@ -402,7 +398,7 @@ void Matrix<ScalarType, device_name>::resizeNoCopy(std::pair<int, int> new_size) template <typename ScalarType, DeviceType device_name> void Matrix<ScalarType, device_name>::clear() { allocator_.deallocate(data_); Allocator::deallocate(data_); size_ = capacity_ = std::make_pair(0, 0); } Loading
include/dca/linalg/util/allocators/aligned_allocator.hpp +2 −4 Original line number Diff line number Diff line Loading @@ -12,16 +12,14 @@ #ifndef DCA_LINALG_UTIL_ALLOCATORS_ALIGNED_ALLOCATOR_HPP #define DCA_LINALG_UTIL_ALLOCATORS_ALIGNED_ALLOCATOR_HPP #include <vector> namespace dca { namespace linalg { namespace util { // dca::linalg::util:: template <typename T> class AlignedAllocator : public std::allocator<T> { public: class AlignedAllocator { protected: T* allocate(std::size_t n) { T* ptr; int err = posix_memalign((void**)&ptr, 128, n * sizeof(T)); Loading
include/dca/linalg/util/allocators/device_allocator.hpp +6 −3 Original line number Diff line number Diff line Loading @@ -16,7 +16,6 @@ #error "This file requires CUDA support." #endif #include <vector> #include <cuda_runtime.h> #include "dca/linalg/util/error_cuda.hpp" Loading @@ -27,8 +26,8 @@ namespace util { // dca::linalg::util:: template <typename T> class DeviceAllocator : public std::allocator<T> { public: class DeviceAllocator { protected: T* allocate(std::size_t n) { if (n == 0) return nullptr; Loading @@ -50,6 +49,10 @@ public: } ptr = nullptr; } public: // SFINAE method for setting managed memory stream. void setStream(const cudaStream_t /*stream*/) const {} }; } // util Loading
include/dca/linalg/util/allocators/managed_allocator.hpp +20 −7 Original line number Diff line number Diff line Loading @@ -16,7 +16,6 @@ #error "This file requires CUDA support." #endif #include <vector> #include <cuda_runtime.h> #include "dca/linalg/util/error_cuda.hpp" Loading @@ -27,20 +26,23 @@ namespace util { // dca::linalg::util:: template <typename T> class ManagedAllocator : public std::allocator<T> { public: class ManagedAllocator { protected: T* allocate(std::size_t n) { if (n == 0) return nullptr; T* ptr; cudaError_t ret = cudaMallocManaged((void**)&ptr, n * sizeof(T)); cudaError_t ret = cudaMallocManaged((void**)&ptr_, n * sizeof(T)); if (ret != cudaSuccess) { printErrorMessage(ret, __FUNCTION__, __FILE__, __LINE__, "\t Managed size requested : " + std::to_string(n * sizeof(T))); throw(std::bad_alloc()); } return ptr; if (stream_) cudaStreamAttachMemAsync(stream_, ptr_); return ptr_; } void deallocate(T*& ptr, std::size_t /*n*/ = 0) noexcept { Loading @@ -49,8 +51,19 @@ public: printErrorMessage(ret, __FUNCTION__, __FILE__, __LINE__); std::terminate(); } ptr = nullptr; ptr_ = ptr = nullptr; } public: void setStream(cudaStream_t stream) { stream_ = stream; if (ptr_) cudaStreamAttachMemAsync(stream, ptr_); } private: T* ptr_ = nullptr; cudaStream_t stream_ = nullptr; }; } // util Loading
include/dca/linalg/vector.hpp +8 −11 Original line number Diff line number Diff line Loading @@ -34,7 +34,7 @@ namespace linalg { template <typename ScalarType, DeviceType device_name, class Allocator = util::DefaultAllocator<ScalarType, device_name>> class Vector { class Vector : public Allocator { public: using ThisType = Vector<ScalarType, device_name, Allocator>; using ValueType = ScalarType; Loading Loading @@ -165,9 +165,6 @@ public: std::size_t deviceFingerprint() const; protected: Allocator allocator_; private: std::string name_; Loading Loading @@ -207,7 +204,7 @@ Vector<ScalarType, device_name, Allocator>::Vector(const std::string& name, size size_t capacity) : name_(name), size_(size), capacity_(capacity), data_(nullptr) { assert(capacity_ >= size_); data_ = allocator_.allocate(capacity_); data_ = Allocator::allocate(capacity_); util::Memory<device_name>::setToZero(data_, capacity_); } Loading Loading @@ -235,7 +232,7 @@ Vector<ScalarType, device_name, Allocator>::Vector(Vector<ScalarType, device_nam template <typename ScalarType, DeviceType device_name, class Allocator> Vector<ScalarType, device_name, Allocator>::~Vector() { allocator_.deallocate(data_); Allocator::deallocate(data_); } template <typename ScalarType, DeviceType device_name, class Allocator> Loading Loading @@ -334,9 +331,9 @@ void Vector<ScalarType, device_name, Allocator>::resize(size_t new_size) { if (new_size > capacity_) { int new_capacity = (new_size / 64 + 1) * 64; ValueType* new_data = allocator_.allocate(new_capacity); ValueType* new_data = Allocator::allocate(new_capacity); util::memoryCopy(new_data, data_, size_); allocator_.deallocate(data_); Allocator::deallocate(data_); data_ = new_data; capacity_ = new_capacity; Loading @@ -351,8 +348,8 @@ void Vector<ScalarType, device_name, Allocator>::resizeNoCopy(size_t new_size) { if (new_size > capacity_) { int new_capacity = (new_size / 64 + 1) * 64; allocator_.deallocate(data_); data_ = allocator_.allocate(new_capacity); Allocator::deallocate(data_); data_ = Allocator::allocate(new_capacity); capacity_ = new_capacity; size_ = new_size; Loading @@ -363,7 +360,7 @@ void Vector<ScalarType, device_name, Allocator>::resizeNoCopy(size_t new_size) { template <typename ScalarType, DeviceType device_name, class Allocator> void Vector<ScalarType, device_name, Allocator>::clear() { allocator_.deallocate(data_); Allocator::deallocate(data_); size_ = capacity_ = 0; } Loading