Loading applications/cluster_solver_check/cluster_solver_check.cpp +10 −15 Original line number Diff line number Diff line Loading @@ -35,7 +35,7 @@ int main(int argc, char** argv) { if (argc < 2) { std::cerr << "Usage: " << argv[0] << " input_file.json [skip ed]" << std::endl; std::cerr << "Usage: " << argv[0] << " input_file.json" << std::endl; return -1; } Loading @@ -43,8 +43,8 @@ int main(int argc, char** argv) { try { std::string input_file(argv[1]); const bool skip_ed = argc > 2 ? std::atoi(argv[2]) : false; const bool perform_statistical_test = concurrency.number_of_processors() >= 8 && !skip_ed; const bool perform_statistical_test = concurrency.number_of_processors() >= 8; Profiler::start(); Loading Loading @@ -94,26 +94,21 @@ int main(int argc, char** argv) { // ED solver EdSolver ed_solver(parameters, dca_data_imag, dca_data_real); if (!skip_ed) { ed_solver.initialize(0); ed_solver.execute(); ed_solver.finalize(dca_loop_data); if (concurrency.id() == concurrency.first()) { ed_solver.write(data_file_ed); } } const auto Sigma_ed(dca_data_imag.Sigma); const int tested_frequencies = 10; const auto G_ed(dca::math::util::cutFrequency(dca_data_imag.G_k_w, tested_frequencies)); if (concurrency.id() == concurrency.first()) { ed_solver.write(data_file_ed); } // QMC solver // The QMC solver uses the free Greens function G0 computed by the ED solver. // It is passed via the dca_data_imag object. if (skip_ed) dca_data_imag.initialize(); ClusterSolver qmc_solver(parameters, dca_data_imag); qmc_solver.initialize(1); // 1 = dummy iteration number qmc_solver.integrate(); Loading include/dca/config/accumulation_options.hpp.in +3 −3 Original line number Diff line number Diff line Loading @@ -9,8 +9,8 @@ // // This class stores compile time options for the MC accumulation. #ifndef DCA_CONFIG_ACCUMULATON_OPTIONS_HPP #define DCA_CONFIG_ACCUMULATON_OPTIONS_HPP #ifndef DCA_CONFIG_ACCUMULATION_OPTIONS_HPP #define DCA_CONFIG_ACCUMULATION_OPTIONS_HPP #ifdef DCA_HAVE_CUDA #include "dca/linalg/util/allocators/device_allocator.hpp" Loading @@ -36,4 +36,4 @@ struct AccumulationOptions { } // config } // dca #endif // DCA_CONFIG_ACCUMULATON_OPTIONS_HPP #endif // DCA_CONFIG_ACCUMULATION_OPTIONS_HPP include/dca/config/cmake_options.hpp +2 −2 Original line number Diff line number Diff line Loading @@ -50,7 +50,7 @@ struct CMakeOptions { static void print(); }; } // config } // dca } // namespace config } // namespace dca #endif // DCA_CONFIG_CMAKE_OPTIONS_HPP include/dca/linalg/reshapable_matrix.hpp +65 −47 Original line number Diff line number Diff line Loading @@ -37,21 +37,29 @@ public: using ThisType = ReshapableMatrix<ScalarType, device_name, Allocator>; using ValueType = ScalarType; ReshapableMatrix(int size = 0); // Default contructor creates a matrix of zero size and capacity. ReshapableMatrix() = default; // Initializes a square size x size matrix. ReshapableMatrix(int size); // Initializes a square size.first x size.second matrix. ReshapableMatrix(std::pair<int, int> size); // Copy and move constructor: // Constructs a matrix with name name, size rhs.size() and a copy of the elements of rhs. ReshapableMatrix(const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs); // Constructs a matrix with name name, size rhs.size(). The elements of rhs are moved. // Postcondition: rhs is a (0 x 0) matrix. ReshapableMatrix(ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs); // Contructs a matrix with name name, size rhs.size() and a copy of the elements of rhs, where rhs // elements are stored on a different device. // Contructs a matrix with size rhs.size() and a copy of the elements of rhs. ReshapableMatrix(const ThisType& rhs); template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs); // Constructs a matrix with size rhs.size(). The elements of rhs are moved. ReshapableMatrix(ThisType&& rhs); // Resize the matrix to rhs.size() and copies the elements. ReshapableMatrix& operator=(const ThisType& rhs); template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix& operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs); // Moves the elements of rhs into this matrix. ReshapableMatrix& operator=(ThisType&& rhs); ~ReshapableMatrix(); // Returns true if this is equal to other, false otherwise. Loading Loading @@ -118,23 +126,19 @@ public: return size_.first; } // Resizes *this to a (new_size * new_size) matrix. // Resizes *this to a (new_size.first * new_size.second) matrix. // The previous elements are not copied, therefore all the elements // may have any value after the call to this method. // Returns: true if reallocation took place. // Remark: The capacity of the matrix and element pointers do not change // if new_size <= capacity().first and new_size <= capacity().second. bool resizeNoCopy(std::pair<int, int> new_size); // Resizes *this to a (new_size * new_size) matrix. See previous method for details. bool resizeNoCopy(int new_size) { return resizeNoCopy(std::make_pair(new_size, new_size)); } // Resizes *this to a (new_size.first * new_size.second) matrix. // The previous elements are not copied, therefore all the elements // may have any value after the call to this method. // Returns: true if reallocation took place. // Remark: The capacity of the matrix and element pointers do not change // if new_size.first <= capacity().first and new_size.second <= capacity().second. bool resizeNoCopy(std::pair<int, int> new_size); // Reserves the space for at least (new_size.first * new_size.second) elements without changing // the matrix size. The value of the matrix elements is undefined after calling this method. // Returns: true if reallocation took place. bool reserveNoCopy(std::size_t new_size); void swap(ReshapableMatrix<ScalarType, device_name, Allocator>& other); Loading @@ -142,11 +146,6 @@ public: // Releases the memory allocated by *this and sets size and capacity to zero. void clear(); // Asynchronous assignment (copy with stream = getStream(thread_id, stream_id)) // + synchronization of stream template <DeviceType rhs_device_name> void set(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); #ifdef DCA_HAVE_CUDA // Asynchronous assignment. template <DeviceType rhs_device_name> Loading @@ -161,8 +160,8 @@ public: #else // Synchronous assignment fallback for SetAsync. template <DeviceType rhs_device_name> void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /*thread_id*/, int /*stream_id*/); #endif // DCA_HAVE_CUDA Loading @@ -175,8 +174,8 @@ private: return static_cast<size_t>(size.first) * static_cast<size_t>(size.second); } std::pair<int, int> size_; std::size_t capacity_; std::pair<int, int> size_ = std::make_pair(0, 0); std::size_t capacity_ = 0; ValueType* data_ = nullptr; Loading @@ -198,27 +197,55 @@ ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(std::pair } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(const ThisType& rhs) { *this = rhs; } template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix( const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs) { const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) { *this = rhs; } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix( ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs) : size_(rhs.size_), capacity_(rhs.capacity_), data_(rhs.data_) { rhs.capacity_ = 0; rhs.size_ = std::make_pair(0, 0); rhs.data_ = nullptr; : ReshapableMatrix<ScalarType, device_name, Allocator>() { swap(rhs); } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix< ScalarType, device_name, Allocator>::operator=(const ThisType& rhs) { size_ = rhs.size_; capacity_ = rhs.capacity_; Allocator::deallocate(data_); data_ = Allocator::allocate(capacity_); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_); return *this; } template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix( const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) : size_(rhs.size_), capacity_(rhs.capacity_) { ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix< ScalarType, device_name, Allocator>::operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) { size_ = rhs.size_; capacity_ = rhs.capacity_; Allocator::deallocate(data_); data_ = Allocator::allocate(capacity_); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_); return *this; } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix< ScalarType, device_name, Allocator>::operator=(ThisType&& rhs) { swap(rhs); return *this; } template <typename ScalarType, DeviceType device_name, class Allocator> Loading Loading @@ -284,15 +311,6 @@ void ReshapableMatrix<ScalarType, device_name, Allocator>::clear() { capacity_ = 0; } template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name> void ReshapableMatrix<ScalarType, device_name, Allocator>::set( const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id) { resize(rhs.size_); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_, thread_id, stream_id); } #ifdef DCA_HAVE_CUDA template <typename ScalarType, DeviceType device_name, class Allocator> Loading Loading @@ -322,7 +340,7 @@ template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name> void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync( const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /*thread_id*/, int /*stream_id*/) { set(rhs); *this = rhs; } #endif // DCA_HAVE_CUDA Loading include/dca/linalg/util/allocators/aligned_allocator.hpp +3 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,9 @@ template <typename T> class AlignedAllocator { protected: T* allocate(std::size_t n) { if (!n) return nullptr; T* ptr; int err = posix_memalign((void**)&ptr, 128, n * sizeof(T)); if (err) Loading Loading
applications/cluster_solver_check/cluster_solver_check.cpp +10 −15 Original line number Diff line number Diff line Loading @@ -35,7 +35,7 @@ int main(int argc, char** argv) { if (argc < 2) { std::cerr << "Usage: " << argv[0] << " input_file.json [skip ed]" << std::endl; std::cerr << "Usage: " << argv[0] << " input_file.json" << std::endl; return -1; } Loading @@ -43,8 +43,8 @@ int main(int argc, char** argv) { try { std::string input_file(argv[1]); const bool skip_ed = argc > 2 ? std::atoi(argv[2]) : false; const bool perform_statistical_test = concurrency.number_of_processors() >= 8 && !skip_ed; const bool perform_statistical_test = concurrency.number_of_processors() >= 8; Profiler::start(); Loading Loading @@ -94,26 +94,21 @@ int main(int argc, char** argv) { // ED solver EdSolver ed_solver(parameters, dca_data_imag, dca_data_real); if (!skip_ed) { ed_solver.initialize(0); ed_solver.execute(); ed_solver.finalize(dca_loop_data); if (concurrency.id() == concurrency.first()) { ed_solver.write(data_file_ed); } } const auto Sigma_ed(dca_data_imag.Sigma); const int tested_frequencies = 10; const auto G_ed(dca::math::util::cutFrequency(dca_data_imag.G_k_w, tested_frequencies)); if (concurrency.id() == concurrency.first()) { ed_solver.write(data_file_ed); } // QMC solver // The QMC solver uses the free Greens function G0 computed by the ED solver. // It is passed via the dca_data_imag object. if (skip_ed) dca_data_imag.initialize(); ClusterSolver qmc_solver(parameters, dca_data_imag); qmc_solver.initialize(1); // 1 = dummy iteration number qmc_solver.integrate(); Loading
include/dca/config/accumulation_options.hpp.in +3 −3 Original line number Diff line number Diff line Loading @@ -9,8 +9,8 @@ // // This class stores compile time options for the MC accumulation. #ifndef DCA_CONFIG_ACCUMULATON_OPTIONS_HPP #define DCA_CONFIG_ACCUMULATON_OPTIONS_HPP #ifndef DCA_CONFIG_ACCUMULATION_OPTIONS_HPP #define DCA_CONFIG_ACCUMULATION_OPTIONS_HPP #ifdef DCA_HAVE_CUDA #include "dca/linalg/util/allocators/device_allocator.hpp" Loading @@ -36,4 +36,4 @@ struct AccumulationOptions { } // config } // dca #endif // DCA_CONFIG_ACCUMULATON_OPTIONS_HPP #endif // DCA_CONFIG_ACCUMULATION_OPTIONS_HPP
include/dca/config/cmake_options.hpp +2 −2 Original line number Diff line number Diff line Loading @@ -50,7 +50,7 @@ struct CMakeOptions { static void print(); }; } // config } // dca } // namespace config } // namespace dca #endif // DCA_CONFIG_CMAKE_OPTIONS_HPP
include/dca/linalg/reshapable_matrix.hpp +65 −47 Original line number Diff line number Diff line Loading @@ -37,21 +37,29 @@ public: using ThisType = ReshapableMatrix<ScalarType, device_name, Allocator>; using ValueType = ScalarType; ReshapableMatrix(int size = 0); // Default contructor creates a matrix of zero size and capacity. ReshapableMatrix() = default; // Initializes a square size x size matrix. ReshapableMatrix(int size); // Initializes a square size.first x size.second matrix. ReshapableMatrix(std::pair<int, int> size); // Copy and move constructor: // Constructs a matrix with name name, size rhs.size() and a copy of the elements of rhs. ReshapableMatrix(const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs); // Constructs a matrix with name name, size rhs.size(). The elements of rhs are moved. // Postcondition: rhs is a (0 x 0) matrix. ReshapableMatrix(ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs); // Contructs a matrix with name name, size rhs.size() and a copy of the elements of rhs, where rhs // elements are stored on a different device. // Contructs a matrix with size rhs.size() and a copy of the elements of rhs. ReshapableMatrix(const ThisType& rhs); template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs); // Constructs a matrix with size rhs.size(). The elements of rhs are moved. ReshapableMatrix(ThisType&& rhs); // Resize the matrix to rhs.size() and copies the elements. ReshapableMatrix& operator=(const ThisType& rhs); template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix& operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs); // Moves the elements of rhs into this matrix. ReshapableMatrix& operator=(ThisType&& rhs); ~ReshapableMatrix(); // Returns true if this is equal to other, false otherwise. Loading Loading @@ -118,23 +126,19 @@ public: return size_.first; } // Resizes *this to a (new_size * new_size) matrix. // Resizes *this to a (new_size.first * new_size.second) matrix. // The previous elements are not copied, therefore all the elements // may have any value after the call to this method. // Returns: true if reallocation took place. // Remark: The capacity of the matrix and element pointers do not change // if new_size <= capacity().first and new_size <= capacity().second. bool resizeNoCopy(std::pair<int, int> new_size); // Resizes *this to a (new_size * new_size) matrix. See previous method for details. bool resizeNoCopy(int new_size) { return resizeNoCopy(std::make_pair(new_size, new_size)); } // Resizes *this to a (new_size.first * new_size.second) matrix. // The previous elements are not copied, therefore all the elements // may have any value after the call to this method. // Returns: true if reallocation took place. // Remark: The capacity of the matrix and element pointers do not change // if new_size.first <= capacity().first and new_size.second <= capacity().second. bool resizeNoCopy(std::pair<int, int> new_size); // Reserves the space for at least (new_size.first * new_size.second) elements without changing // the matrix size. The value of the matrix elements is undefined after calling this method. // Returns: true if reallocation took place. bool reserveNoCopy(std::size_t new_size); void swap(ReshapableMatrix<ScalarType, device_name, Allocator>& other); Loading @@ -142,11 +146,6 @@ public: // Releases the memory allocated by *this and sets size and capacity to zero. void clear(); // Asynchronous assignment (copy with stream = getStream(thread_id, stream_id)) // + synchronization of stream template <DeviceType rhs_device_name> void set(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); #ifdef DCA_HAVE_CUDA // Asynchronous assignment. template <DeviceType rhs_device_name> Loading @@ -161,8 +160,8 @@ public: #else // Synchronous assignment fallback for SetAsync. template <DeviceType rhs_device_name> void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /*thread_id*/, int /*stream_id*/); #endif // DCA_HAVE_CUDA Loading @@ -175,8 +174,8 @@ private: return static_cast<size_t>(size.first) * static_cast<size_t>(size.second); } std::pair<int, int> size_; std::size_t capacity_; std::pair<int, int> size_ = std::make_pair(0, 0); std::size_t capacity_ = 0; ValueType* data_ = nullptr; Loading @@ -198,27 +197,55 @@ ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(std::pair } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(const ThisType& rhs) { *this = rhs; } template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix( const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs) { const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) { *this = rhs; } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix( ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs) : size_(rhs.size_), capacity_(rhs.capacity_), data_(rhs.data_) { rhs.capacity_ = 0; rhs.size_ = std::make_pair(0, 0); rhs.data_ = nullptr; : ReshapableMatrix<ScalarType, device_name, Allocator>() { swap(rhs); } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix< ScalarType, device_name, Allocator>::operator=(const ThisType& rhs) { size_ = rhs.size_; capacity_ = rhs.capacity_; Allocator::deallocate(data_); data_ = Allocator::allocate(capacity_); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_); return *this; } template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name, class AllocatorRhs> ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix( const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) : size_(rhs.size_), capacity_(rhs.capacity_) { ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix< ScalarType, device_name, Allocator>::operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) { size_ = rhs.size_; capacity_ = rhs.capacity_; Allocator::deallocate(data_); data_ = Allocator::allocate(capacity_); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_); return *this; } template <typename ScalarType, DeviceType device_name, class Allocator> ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix< ScalarType, device_name, Allocator>::operator=(ThisType&& rhs) { swap(rhs); return *this; } template <typename ScalarType, DeviceType device_name, class Allocator> Loading Loading @@ -284,15 +311,6 @@ void ReshapableMatrix<ScalarType, device_name, Allocator>::clear() { capacity_ = 0; } template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name> void ReshapableMatrix<ScalarType, device_name, Allocator>::set( const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id) { resize(rhs.size_); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_, thread_id, stream_id); } #ifdef DCA_HAVE_CUDA template <typename ScalarType, DeviceType device_name, class Allocator> Loading Loading @@ -322,7 +340,7 @@ template <typename ScalarType, DeviceType device_name, class Allocator> template <DeviceType rhs_device_name> void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync( const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /*thread_id*/, int /*stream_id*/) { set(rhs); *this = rhs; } #endif // DCA_HAVE_CUDA Loading
include/dca/linalg/util/allocators/aligned_allocator.hpp +3 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,9 @@ template <typename T> class AlignedAllocator { protected: T* allocate(std::size_t n) { if (!n) return nullptr; T* ptr; int err = posix_memalign((void**)&ptr, 128, n * sizeof(T)); if (err) Loading