Loading include/dca/linalg/reshapable_matrix.hpp +65 −47 Original line number Diff line number Diff line Loading @@ -37,21 +37,29 @@ public: using ThisType = ReshapableMatrix<ScalarType, device_name, Allocator>; using ValueType = ScalarType; ReshapableMatrix(int size = 0); // Default contructor creates a matrix of zero size and capacity. ReshapableMatrix() = default; // Initializes a square size x size matrix. ReshapableMatrix(int size); // Initializes a square size.first x size.second matrix. ReshapableMatrix(std::pair<int, int> size); // Copy and move constructor: // Constructs a matrix with name name, size rhs.size() and a copy of the elements of rhs. ReshapableMatrix(const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs); // Constructs a matrix with name name, size rhs.size(). The elements of rhs are moved. // Postcondition: rhs is a (0 x 0) matrix. ReshapableMatrix(ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs); // Contructs a matrix with name name, size rhs.size() and a copy of the elements of rhs, where rhs // elements are stored on a different device. // Contructs a matrix with size rhs.size() and a copy of the elements of rhs. ReshapableMatrix(const ThisType& rhs); template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs); // Constructs a matrix with size rhs.size(). The elements of rhs are moved. ReshapableMatrix(ThisType&& rhs); // Resize the matrix to rhs.size() and copies the elements. ReshapableMatrix& operator=(const ThisType& rhs); template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix& operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs); // Moves the elements of rhs into this matrix. ReshapableMatrix& operator=(ThisType&& rhs); ~ReshapableMatrix(); // Returns true if this is equal to other, false otherwise. Loading Loading @@ -118,23 +126,19 @@ public: return size_.first; } // Resizes *this to a (new_size * new_size) matrix. // Resizes *this to a (new_size.first * new_size.second) matrix. // The previous elements are not copied, therefore all the elements // may have any value after the call to this method. // Returns: true if reallocation took place. // Remark: The capacity of the matrix and element pointers do not change // if new_size <= capacity().first and new_size <= capacity().second. bool resizeNoCopy(std::pair<int, int> new_size); // Resizes *this to a (new_size * new_size) matrix. See previous method for details. bool resizeNoCopy(int new_size) { return resizeNoCopy(std::make_pair(new_size, new_size)); } // Resizes *this to a (new_size.first * new_size.second) matrix. // The previous elements are not copied, therefore all the elements // may have any value after the call to this method. // Returns: true if reallocation took place. // Remark: The capacity of the matrix and element pointers do not change // if new_size.first <= capacity().first and new_size.second <= capacity().second. bool resizeNoCopy(std::pair<int, int> new_size); // Reserves the space for at least (new_size.first * new_size.second) elements without changing // the matrix size. The value of the matrix elements is undefined after calling this method. // Returns: true if reallocation took place. bool reserveNoCopy(std::size_t new_size); void swap(ReshapableMatrix<ScalarType, device_name, Allocator>& other); Loading @@ -142,11 +146,6 @@ public: // Releases the memory allocated by *this and sets size and capacity to zero. void clear(); // Asynchronous assignment (copy with stream = getStream(thread_id, stream_id)) // + synchronization of stream template <DeviceType rhs_device_name> void set(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); #ifdef DCA_HAVE_CUDA // Asynchronous assignment. template <DeviceType rhs_device_name> Loading @@ -161,8 +160,8 @@ public: #else // Synchronous assignment fallback for SetAsync. template <DeviceType rhs_device_name> void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /*thread_id*/, int /*stream_id*/); #endif // DCA_HAVE_CUDA Loading @@ -175,8 +174,8 @@ private: return static_cast<size_t>(size.first) * static_cast<size_t>(size.second); } std::pair<int, int> size_; std::size_t capacity_; std::pair<int, int> size_ = std::make_pair(0, 0); std::size_t capacity_ = 0; ValueType* data_ = nullptr; Loading @@ -198,27 +197,55 @@ ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(std::pair } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(const ThisType& rhs) { *this = rhs; } template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix( const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs) { const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) { *this = rhs; } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix( ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs) : size_(rhs.size_), capacity_(rhs.capacity_), data_(rhs.data_) { rhs.capacity_ = 0; rhs.size_ = std::make_pair(0, 0); rhs.data_ = nullptr; : ReshapableMatrix<ScalarType, device_name, Allocator>() { swap(rhs); } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix< ScalarType, device_name, Allocator>::operator=(const ThisType& rhs) { size_ = rhs.size_; capacity_ = rhs.capacity_; Allocator::deallocate(data_); data_ = Allocator::allocate(capacity_); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_); return *this; } template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix( const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) : size_(rhs.size_), capacity_(rhs.capacity_) { ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix< ScalarType, device_name, Allocator>::operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) { size_ = rhs.size_; capacity_ = rhs.capacity_; Allocator::deallocate(data_); data_ = Allocator::allocate(capacity_); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_); return *this; } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix< ScalarType, device_name, Allocator>::operator=(ThisType&& rhs) { swap(rhs); return *this; } template <typename ScalarType, DeviceType device_name, class Allocator> Loading Loading @@ -284,15 +311,6 @@ void ReshapableMatrix<ScalarType, device_name, Allocator>::clear() { capacity_ = 0; } template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name> void ReshapableMatrix<ScalarType, device_name, Allocator>::set( const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id) { resize(rhs.size_); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_, thread_id, stream_id); } #ifdef DCA_HAVE_CUDA template <typename ScalarType, DeviceType device_name, class Allocator> Loading Loading @@ -322,7 +340,7 @@ template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name> void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync( const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /*thread_id*/, int /*stream_id*/) { set(rhs); *this = rhs; } #endif // DCA_HAVE_CUDA Loading include/dca/linalg/util/allocators/aligned_allocator.hpp +3 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,9 @@ template <typename T> class AlignedAllocator { protected: T* allocate(std::size_t n) { if (!n) return nullptr; T* ptr; int err = posix_memalign((void**)&ptr, 128, n * sizeof(T)); if (err) Loading test/unit/linalg/CMakeLists.txt +4 −0 Original line number Diff line number Diff line Loading @@ -49,4 +49,8 @@ dca_add_gtest(matrixop_complex_gpu_test CUDA LIBS ${DCA_LIBS}) dca_add_gtest(reshapable_matrix_cpu_test GTEST_MAIN) dca_add_gtest(reshapable_matrix_gpu_test CUDA GTEST_MAIN LIBS) add_subdirectory(util) test/unit/linalg/cpu_test_util.hpp +6 −8 Original line number Diff line number Diff line Loading @@ -19,23 +19,21 @@ namespace testing { // The elements of the matrix will be set with mat(i, j) = func(i, j). // In: func // Out: mat template <typename ScalarType, typename F> void setMatrixElements(dca::linalg::Matrix<ScalarType, dca::linalg::CPU>& mat, F& func) { template <typename Matrix, typename F> void setMatrixElements(Matrix& mat, F&& func) { for (int j = 0; j < mat.nrCols(); ++j) for (int i = 0; i < mat.nrRows(); ++i) { ScalarType el(func(i, j)); mat(i, j) = el; mat(i, j) = func(i, j); } } // The elements of the vector will be set with vec[i] = func(i). // In: func // Out: vec template <typename ScalarType, typename F> void setVectorElements(dca::linalg::Vector<ScalarType, dca::linalg::CPU>& vec, F& func) { template <typename Vector, typename F> void setVectorElements(Vector& vec, F& func) { for (int i = 0; i < vec.size(); ++i) { ScalarType el(func(i)); vec[i] = el; vec[i] = func(i); } } } // testing Loading test/unit/linalg/matrix_cpu_test.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -9,7 +9,7 @@ // // This file tests the Matrix<CPU> class. #include "dca/linalg/matrix.hpp" #include "dca/linalg/reshapable_matrix.hpp" #include <complex> #include <string> #include <utility> Loading Loading
include/dca/linalg/reshapable_matrix.hpp +65 −47 Original line number Diff line number Diff line Loading @@ -37,21 +37,29 @@ public: using ThisType = ReshapableMatrix<ScalarType, device_name, Allocator>; using ValueType = ScalarType; ReshapableMatrix(int size = 0); // Default contructor creates a matrix of zero size and capacity. ReshapableMatrix() = default; // Initializes a square size x size matrix. ReshapableMatrix(int size); // Initializes a square size.first x size.second matrix. ReshapableMatrix(std::pair<int, int> size); // Copy and move constructor: // Constructs a matrix with name name, size rhs.size() and a copy of the elements of rhs. ReshapableMatrix(const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs); // Constructs a matrix with name name, size rhs.size(). The elements of rhs are moved. // Postcondition: rhs is a (0 x 0) matrix. ReshapableMatrix(ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs); // Contructs a matrix with name name, size rhs.size() and a copy of the elements of rhs, where rhs // elements are stored on a different device. // Contructs a matrix with size rhs.size() and a copy of the elements of rhs. ReshapableMatrix(const ThisType& rhs); template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs); // Constructs a matrix with size rhs.size(). The elements of rhs are moved. ReshapableMatrix(ThisType&& rhs); // Resize the matrix to rhs.size() and copies the elements. ReshapableMatrix& operator=(const ThisType& rhs); template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix& operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs); // Moves the elements of rhs into this matrix. ReshapableMatrix& operator=(ThisType&& rhs); ~ReshapableMatrix(); // Returns true if this is equal to other, false otherwise. Loading Loading @@ -118,23 +126,19 @@ public: return size_.first; } // Resizes *this to a (new_size * new_size) matrix. // Resizes *this to a (new_size.first * new_size.second) matrix. // The previous elements are not copied, therefore all the elements // may have any value after the call to this method. // Returns: true if reallocation took place. // Remark: The capacity of the matrix and element pointers do not change // if new_size <= capacity().first and new_size <= capacity().second. bool resizeNoCopy(std::pair<int, int> new_size); // Resizes *this to a (new_size * new_size) matrix. See previous method for details. bool resizeNoCopy(int new_size) { return resizeNoCopy(std::make_pair(new_size, new_size)); } // Resizes *this to a (new_size.first * new_size.second) matrix. // The previous elements are not copied, therefore all the elements // may have any value after the call to this method. // Returns: true if reallocation took place. // Remark: The capacity of the matrix and element pointers do not change // if new_size.first <= capacity().first and new_size.second <= capacity().second. bool resizeNoCopy(std::pair<int, int> new_size); // Reserves the space for at least (new_size.first * new_size.second) elements without changing // the matrix size. The value of the matrix elements is undefined after calling this method. // Returns: true if reallocation took place. bool reserveNoCopy(std::size_t new_size); void swap(ReshapableMatrix<ScalarType, device_name, Allocator>& other); Loading @@ -142,11 +146,6 @@ public: // Releases the memory allocated by *this and sets size and capacity to zero. void clear(); // Asynchronous assignment (copy with stream = getStream(thread_id, stream_id)) // + synchronization of stream template <DeviceType rhs_device_name> void set(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); #ifdef DCA_HAVE_CUDA // Asynchronous assignment. template <DeviceType rhs_device_name> Loading @@ -161,8 +160,8 @@ public: #else // Synchronous assignment fallback for SetAsync. template <DeviceType rhs_device_name> void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /*thread_id*/, int /*stream_id*/); #endif // DCA_HAVE_CUDA Loading @@ -175,8 +174,8 @@ private: return static_cast<size_t>(size.first) * static_cast<size_t>(size.second); } std::pair<int, int> size_; std::size_t capacity_; std::pair<int, int> size_ = std::make_pair(0, 0); std::size_t capacity_ = 0; ValueType* data_ = nullptr; Loading @@ -198,27 +197,55 @@ ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(std::pair } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(const ThisType& rhs) { *this = rhs; } template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix( const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs) { const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) { *this = rhs; } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix( ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs) : size_(rhs.size_), capacity_(rhs.capacity_), data_(rhs.data_) { rhs.capacity_ = 0; rhs.size_ = std::make_pair(0, 0); rhs.data_ = nullptr; : ReshapableMatrix<ScalarType, device_name, Allocator>() { swap(rhs); } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix< ScalarType, device_name, Allocator>::operator=(const ThisType& rhs) { size_ = rhs.size_; capacity_ = rhs.capacity_; Allocator::deallocate(data_); data_ = Allocator::allocate(capacity_); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_); return *this; } template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix( const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) : size_(rhs.size_), capacity_(rhs.capacity_) { ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix< ScalarType, device_name, Allocator>::operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) { size_ = rhs.size_; capacity_ = rhs.capacity_; Allocator::deallocate(data_); data_ = Allocator::allocate(capacity_); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_); return *this; } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix< ScalarType, device_name, Allocator>::operator=(ThisType&& rhs) { swap(rhs); return *this; } template <typename ScalarType, DeviceType device_name, class Allocator> Loading Loading @@ -284,15 +311,6 @@ void ReshapableMatrix<ScalarType, device_name, Allocator>::clear() { capacity_ = 0; } template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name> void ReshapableMatrix<ScalarType, device_name, Allocator>::set( const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id) { resize(rhs.size_); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_, thread_id, stream_id); } #ifdef DCA_HAVE_CUDA template <typename ScalarType, DeviceType device_name, class Allocator> Loading Loading @@ -322,7 +340,7 @@ template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name> void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync( const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /*thread_id*/, int /*stream_id*/) { set(rhs); *this = rhs; } #endif // DCA_HAVE_CUDA Loading
include/dca/linalg/util/allocators/aligned_allocator.hpp +3 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,9 @@ template <typename T> class AlignedAllocator { protected: T* allocate(std::size_t n) { if (!n) return nullptr; T* ptr; int err = posix_memalign((void**)&ptr, 128, n * sizeof(T)); if (err) Loading
test/unit/linalg/CMakeLists.txt +4 −0 Original line number Diff line number Diff line Loading @@ -49,4 +49,8 @@ dca_add_gtest(matrixop_complex_gpu_test CUDA LIBS ${DCA_LIBS}) dca_add_gtest(reshapable_matrix_cpu_test GTEST_MAIN) dca_add_gtest(reshapable_matrix_gpu_test CUDA GTEST_MAIN LIBS) add_subdirectory(util)
test/unit/linalg/cpu_test_util.hpp +6 −8 Original line number Diff line number Diff line Loading @@ -19,23 +19,21 @@ namespace testing { // The elements of the matrix will be set with mat(i, j) = func(i, j). // In: func // Out: mat template <typename ScalarType, typename F> void setMatrixElements(dca::linalg::Matrix<ScalarType, dca::linalg::CPU>& mat, F& func) { template <typename Matrix, typename F> void setMatrixElements(Matrix& mat, F&& func) { for (int j = 0; j < mat.nrCols(); ++j) for (int i = 0; i < mat.nrRows(); ++i) { ScalarType el(func(i, j)); mat(i, j) = el; mat(i, j) = func(i, j); } } // The elements of the vector will be set with vec[i] = func(i). // In: func // Out: vec template <typename ScalarType, typename F> void setVectorElements(dca::linalg::Vector<ScalarType, dca::linalg::CPU>& vec, F& func) { template <typename Vector, typename F> void setVectorElements(Vector& vec, F& func) { for (int i = 0; i < vec.size(); ++i) { ScalarType el(func(i)); vec[i] = el; vec[i] = func(i); } } } // testing Loading
test/unit/linalg/matrix_cpu_test.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -9,7 +9,7 @@ // // This file tests the Matrix<CPU> class. #include "dca/linalg/matrix.hpp" #include "dca/linalg/reshapable_matrix.hpp" #include <complex> #include <string> #include <utility> Loading