Loading CMakeLists.txt +2 −4 Original line number Diff line number Diff line Loading @@ -144,17 +144,15 @@ set(DCA_LIBS ${DCA_CONCURRENCY_LIB} ${DCA_THREADING_LIBS} lapack cuda_utils ) if (DCA_HAVE_CUDA) list(APPEND DCA_CUDA_LIBS cuda_utils) list(APPEND DCA_LIBS blas_kernels ctaux_walker_kernels dnfft_kernels lapack_kernels mc_tools_kernels mc_kernels special_transform_kernels ${DCA_CUDA_LIBS}) endif() Loading cmake/dca_compiler_options.cmake +1 −1 Original line number Diff line number Diff line Loading @@ -21,7 +21,7 @@ set(DCA_WARNINGS -Wall -Wextra -Wpedantic -Wno-sign-compare -Wno-dangling-else) # Languange standard set(DCA_STD_FLAG -std=c++14) set(DCA_STD_FLAG -std=c++17) # Set C and CXX flags. add_compile_options(${DCA_WARNINGS}) Loading include/dca/linalg/matrix_view.hpp +45 −20 Original line number Diff line number Diff line Loading @@ -19,7 +19,7 @@ #include <stdexcept> #include "dca/linalg/device_type.hpp" #include "dca/linalg/matrix.hpp" #include "dca/util/cuda_definitions.hpp" namespace dca { namespace linalg { Loading @@ -33,56 +33,61 @@ public: MatrixView(ScalarType* data, Pair size, int ld); MatrixView(ScalarType* data, int size, int ld); MatrixView(ScalarType* data, int size); template <template <typename, DeviceType> class Matrix> MatrixView(Matrix<ScalarType, device_name>& mat); template <template <typename, DeviceType> class Matrix> MatrixView(Matrix<ScalarType, device_name>& mat, int offset_i, int offset_j); template <template <typename, DeviceType> class Matrix> MatrixView(Matrix<ScalarType, device_name>& mat, int offset_i, int offset_j, int ni, int nj); template <class MatrixB> MatrixView& operator=(const MatrixB& rhs); template <template <typename, DeviceType> class Matrix> MatrixView& operator=(const Matrix<ScalarType, device_name>& rhs); MatrixView& operator=(const MatrixView<ScalarType, device_name>& rhs); void print(std::ostream& out = std::cout) const; inline int leadingDimension() const { __DEVICE__ __HOST__ inline int leadingDimension() const { return ldm_; } inline int ld() const { __DEVICE__ __HOST__ inline int ld() const { return leadingDimension(); } std::pair<int, int> size() const { __DEVICE__ __HOST__ std::pair<int, int> size() const { return size_; } ScalarType* ptr() { __DEVICE__ __HOST__ ScalarType* ptr() { return ptr_; } const ScalarType* ptr() const { __DEVICE__ __HOST__ const ScalarType* ptr() const { return ptr_; } ScalarType* ptr(int i, int j) { __DEVICE__ __HOST__ ScalarType* ptr(int i, int j) { assert(0 <= i && i <= size_.first); assert(0 <= j && j <= size_.second); return ptr_ + leadingDimension() * j + i; } const ScalarType* ptr(int i, int j) const { __DEVICE__ __HOST__ const ScalarType* ptr(int i, int j) const { assert(0 <= i && i <= size_.first); assert(0 <= j && j <= size_.second); return ptr_ + leadingDimension() * j + i; } int nrRows() const { __DEVICE__ __HOST__ int nrRows() const { return size_.first; } int nrCols() const { __DEVICE__ __HOST__ int nrCols() const { return size_.second; } bool is_square() const { return size_.first == size_.second; } ScalarType& operator()(int i, int j) { __DEVICE__ __HOST__ ScalarType& operator()(int i, int j) { assert(0 <= i && i < size_.first); assert(0 <= j && j < size_.second); return ptr_[i + j * ldm_]; } const ScalarType& operator()(int i, int j) const { __DEVICE__ __HOST__ const ScalarType& operator()(int i, int j) const { assert(0 <= i && i < size_.first); assert(0 <= j && j < size_.second); return ptr_[i + j * ldm_]; Loading Loading @@ -111,10 +116,12 @@ MatrixView<ScalarType, device_t>::MatrixView(ScalarType* const data, const int s : ptr_(data), ldm_(size), size_(std::make_pair(size, size)) {} template <typename ScalarType, DeviceType device_t> template <template <typename, DeviceType> class Matrix> MatrixView<ScalarType, device_t>::MatrixView(Matrix<ScalarType, device_t>& mat) : ptr_(mat.ptr()), ldm_(mat.leadingDimension()), size_(mat.size()) {} template <typename ScalarType, DeviceType device_t> template <template <typename, DeviceType> class Matrix> MatrixView<ScalarType, device_t>::MatrixView(Matrix<ScalarType, device_t>& mat, int offset_i, int offset_j) : MatrixView(mat, offset_i, offset_j, mat.nrRows() - offset_i, mat.nrCols() - offset_j) { Loading @@ -123,6 +130,7 @@ MatrixView<ScalarType, device_t>::MatrixView(Matrix<ScalarType, device_t>& mat, } template <typename ScalarType, DeviceType device_t> template <template <typename, DeviceType> class Matrix> MatrixView<ScalarType, device_t>::MatrixView(Matrix<ScalarType, device_t>& mat, int offset_i, int offset_j, int ni, int nj) : ptr_(mat.ptr(offset_i, offset_j)), ldm_(mat.leadingDimension()), size_(std::make_pair(ni, nj)) { Loading @@ -131,9 +139,26 @@ MatrixView<ScalarType, device_t>::MatrixView(Matrix<ScalarType, device_t>& mat, } template <typename ScalarType, DeviceType device_t> template <class MatrixB> MatrixView<ScalarType, device_t>& MatrixView<ScalarType, device_t>::operator=(const MatrixB& rhs) { assert(nrCols() == rhs.nrCols() and nrRows() == rhs.nrRows()); template <template <typename, DeviceType> class Matrix> MatrixView<ScalarType, device_t>& MatrixView<ScalarType, device_t>::operator=( const Matrix<ScalarType, device_t>& rhs) { if (nrCols() != rhs.nrCols() || nrRows() != rhs.nrRows()) { throw(std::invalid_argument("Matrix size mismatch.")); } for (int j = 0; j < nrCols(); ++j) for (int i = 0; i < nrRows(); ++i) (*this)(i, j) = rhs(i, j); return *this; } template <typename ScalarType, DeviceType device_t> MatrixView<ScalarType, device_t>& MatrixView<ScalarType, device_t>::operator=( const MatrixView<ScalarType, device_t>& rhs) { if (nrCols() != rhs.nrCols() || nrRows() != rhs.nrRows()) { throw(std::invalid_argument("Matrix size mismatch.")); } for (int j = 0; j < nrCols(); ++j) for (int i = 0; i < nrRows(); ++i) (*this)(i, j) = rhs(i, j); Loading Loading @@ -170,12 +195,12 @@ auto inline makeConstantView(ScalarType* const data, const int size) { return makeConstantView<ScalarType, device_t>(data, std::make_pair(size, size), size); } template <typename ScalarType, DeviceType device_t> template <template <typename, DeviceType> class Matrix, typename ScalarType, DeviceType device_t> auto inline makeConstantView(const Matrix<ScalarType, device_t>& mat) { return makeConstantView<ScalarType, device_t>(mat.ptr(), mat.size(), mat.leadingDimension()); } template <typename ScalarType, DeviceType device_t> template <template <typename, DeviceType> class Matrix, typename ScalarType, DeviceType device_t> auto inline makeConstantView(const Matrix<ScalarType, device_t>& mat, const int offset_i, const int offset_j) { assert(offset_i < mat.nrCols()); Loading @@ -185,7 +210,7 @@ auto inline makeConstantView(const Matrix<ScalarType, device_t>& mat, const int mat.leadingDimension()); } template <typename ScalarType, DeviceType device_t> template <template <typename, DeviceType> class Matrix, typename ScalarType, DeviceType device_t> auto inline makeConstantView(const Matrix<ScalarType, device_t>& mat, const int offset_i, const int offset_j, const int ni, const int nj) { assert(ni + offset_i <= mat.nrRows()); Loading include/dca/linalg/matrixop.hpp +43 −41 Original line number Diff line number Diff line Loading @@ -7,6 +7,7 @@ // // Author: Peter Staar (taa@zurich.ibm.com) // Raffaele Solca' (rasolca@itp.phys.ethz.ch) // Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) // // This file provides the matrix interface for the following matrix operations: // - copyCol, copyRow, copyCols, copyRows Loading Loading @@ -58,8 +59,7 @@ inline void copyMatrixToArray(const Matrix<Scalar, CPU>& mat, Scalar* a, int lda // Copies the m by n matrix stored in a to the matrix mat. // Preconditions: lda >= m. template <typename Scalar> inline void copyArrayToMatrix(int m, int n, const Scalar* a, int lda, Matrix<Scalar, CPU>& mat) { inline void copyArrayToMatrix(int m, int n, const Scalar* a, int lda, Matrix<Scalar, CPU>& mat) { assert(lda >= m); mat.resizeNoCopy(std::make_pair(m, n)); lapack::lacpy("A", mat.nrRows(), mat.nrCols(), a, lda, mat.ptr(), mat.leadingDimension()); Loading @@ -71,8 +71,7 @@ inline void copyArrayToMatrix(int m, int n, const Scalar* a, int lda, // 0 <= jx < mat_x.nrCols(), 0 <= jy < mat_y.nrCols(). template <typename Scalar, DeviceType device_name> inline void copyCol(const Matrix<Scalar, device_name>& mat_x, int jx, Matrix<Scalar, device_name>& mat_y, int jy, int thread_id = 0, int stream_id = 0) { Matrix<Scalar, device_name>& mat_y, int jy, int thread_id = 0, int stream_id = 0) { assert(jx >= 0 && jx < mat_x.nrCols()); assert(jy >= 0 && jy < mat_y.nrCols()); assert(mat_x.nrRows() == mat_y.nrRows()); Loading @@ -88,8 +87,8 @@ inline void copyCol(const Matrix<Scalar, device_name>& mat_x, int jx, // 0 <= j_y[i] < mat_y.nrCols() for 0 <= i < j_x.size(). template <typename Scalar> inline void copyCols(const Matrix<Scalar, CPU>& mat_x, const Vector<int, CPU>& j_x, Matrix<Scalar, CPU>& mat_y, const Vector<int, CPU>& j_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { Matrix<Scalar, CPU>& mat_y, const Vector<int, CPU>& j_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { assert(j_x.size() <= j_y.size()); for (int ind_j = 0; ind_j < j_x.size(); ++ind_j) Loading @@ -114,8 +113,7 @@ inline void copyCols(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& j // 0 <= ix < mat_x.nrRows(), 0 <= iy < mat_y.nrRows(). template <typename Scalar, DeviceType device_name> inline void copyRow(const Matrix<Scalar, device_name>& mat_x, int ix, Matrix<Scalar, device_name>& mat_y, int iy, int thread_id = 0, int stream_id = 0) { Matrix<Scalar, device_name>& mat_y, int iy, int thread_id = 0, int stream_id = 0) { assert(ix >= 0 && ix < mat_x.nrRows()); assert(iy >= 0 && iy < mat_y.nrRows()); assert(mat_x.nrCols() == mat_y.nrCols()); Loading @@ -132,8 +130,8 @@ inline void copyRow(const Matrix<Scalar, device_name>& mat_x, int ix, // 0 <= i_y[i] < mat_y.nrRows() for 0 <= i < i_x.size(). template <typename Scalar> inline void copyRows(const Matrix<Scalar, CPU>& mat_x, const Vector<int, CPU>& i_x, Matrix<Scalar, CPU>& mat_y, const Vector<int, CPU>& i_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { Matrix<Scalar, CPU>& mat_y, const Vector<int, CPU>& i_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { assert(i_x.size() <= i_y.size()); assert(mat_x.nrCols() == mat_y.nrCols()); Loading Loading @@ -199,8 +197,8 @@ auto difference(const Matrix<Scalar, device_name>& a, const Matrix<Scalar, CPU>& return difference(cp_a, b, diff_threshold); } template <typename Scalar, DeviceType device_name_a, DeviceType device_name_b> auto difference(const Matrix<Scalar, device_name_a>& a, const Matrix<Scalar, device_name_b>& b, double diff_threshold = 1e-3) { auto difference(const Matrix<Scalar, device_name_a>& a, const Matrix<Scalar, device_name_b>& b, double diff_threshold = 1e-3) { Matrix<Scalar, CPU> cp_b(b); return difference(a, cp_b, diff_threshold); } Loading Loading @@ -282,8 +280,7 @@ void inverse(MatrixType<Scalar, device_name>& mat) { } template <typename Scalar> void smallInverse(Matrix<Scalar, CPU>& m_inv, Vector<int, CPU>& ipiv, Vector<Scalar, CPU>& work) { void smallInverse(Matrix<Scalar, CPU>& m_inv, Vector<int, CPU>& ipiv, Vector<Scalar, CPU>& work) { assert(m_inv.is_square()); switch (m_inv.nrCols()) { case 1: Loading Loading @@ -466,8 +463,7 @@ inline void scaleRow(Matrix<Scalar, device_name>& mat, int i, Scalar val, int th // Preconditions: i.size() == val.size(), 0 <= i[k] < mat.nrRow() for 0 <= k < i.size(). template <typename Scalar> inline void scaleRows(Matrix<Scalar, CPU>& mat, const Vector<int, CPU>& i, const Vector<Scalar, CPU>& val, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { const Vector<Scalar, CPU>& val, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { assert(i.size() == val.size()); for (int j = 0; j < mat.nrCols(); ++j) Loading Loading @@ -562,8 +558,8 @@ inline void swapRowAndCol(Matrix<Scalar, device_name>& mat, int i1, int i2, int // a.nrRows() == y.size() if transa == 'N', a.nrCols() == y.size() otherwise, // a.nrCols() == x.size() if transa == 'N', a.nrRows() == x.size() otherwise. template <typename Scalar> void gemv(char transa, Scalar alpha, const Matrix<Scalar, CPU>& a, const Vector<Scalar, CPU>& x, Scalar beta, Vector<Scalar, CPU>& y) { void gemv(char transa, Scalar alpha, const Matrix<Scalar, CPU>& a, const Vector<Scalar, CPU>& x, Scalar beta, Vector<Scalar, CPU>& y) { if (transa == 'N') { assert(a.nrRows() == y.size()); assert(a.nrCols() == x.size()); Loading Loading @@ -603,8 +599,8 @@ void gemv(char transa, const Matrix<Scalar, CPU>& a, const Vector<Scalar, CPU>& template <typename Scalar, DeviceType device_name, template <typename, DeviceType> class MatrixA, template <typename, DeviceType> class MatrixB, template <typename, DeviceType> class MatrixC> void gemm(char transa, char transb, Scalar alpha, const MatrixA<Scalar, device_name>& a, const MatrixB<Scalar, device_name>& b, Scalar beta, MatrixC<Scalar, device_name>& c, int thread_id = 0, int stream_id = 0) { const MatrixB<Scalar, device_name>& b, Scalar beta, MatrixC<Scalar, device_name>& c, int thread_id = 0, int stream_id = 0) { int m = c.nrRows(); int n = c.nrCols(); int k; Loading Loading @@ -705,8 +701,8 @@ void trsm(char uplo, char diag, const Matrix<Scalar, device_name>& a, // ka == kb, where ka = a.nrCols() if transa == 'N', ka = a.nrRows() otherwise and // kb = b.nrRows() if transb == 'N', kb = b.nrCols() otherwise. template <typename Scalar> void gemm(char transa, char transb, Matrix<Scalar, CPU>& a, Matrix<std::complex<Scalar>, CPU>& b, Matrix<std::complex<Scalar>, CPU>& c) { void gemm(char transa, char transb, Matrix<Scalar, CPU>& a, Matrix<std::complex<Scalar>, CPU>& b, Matrix<std::complex<Scalar>, CPU>& c) { Matrix<Scalar, CPU> b_part(b.size()); Matrix<Scalar, CPU> c_re(c.size()); Matrix<Scalar, CPU> c_im(c.size()); Loading Loading @@ -789,8 +785,7 @@ static void gemm(char transa, char transb, Matrix<std::complex<Scalar>, CPU>& a, // and kb = b[0].nrRows() if transb == 'N', kb = b[0].nrCols() otherwise. template <typename Scalar> void multiply(char transa, char transb, const std::array<Matrix<Scalar, CPU>, 2>& a, const std::array<Matrix<Scalar, CPU>, 2>& b, std::array<Matrix<Scalar, CPU>, 2>& c, const std::array<Matrix<Scalar, CPU>, 2>& b, std::array<Matrix<Scalar, CPU>, 2>& c, std::array<Matrix<Scalar, CPU>, 5>& work) { assert(a[0].size() == a[1].size()); assert(b[0].size() == b[1].size()); Loading Loading @@ -827,8 +822,7 @@ void multiply(char transa, char transb, const std::array<Matrix<Scalar, CPU>, 2> template <typename Scalar> void multiply(const std::array<Matrix<Scalar, CPU>, 2>& a, const std::array<Matrix<Scalar, CPU>, 2>& b, std::array<Matrix<Scalar, CPU>, 2>& c, const std::array<Matrix<Scalar, CPU>, 2>& b, std::array<Matrix<Scalar, CPU>, 2>& c, std::array<Matrix<Scalar, CPU>, 5>& work) { multiply('N', 'N', a, b, c, work); } Loading @@ -847,8 +841,7 @@ void multiply(const std::array<Matrix<Scalar, CPU>, 2>& a, // and kb = b.nrRows() if transb == 'N', kb = b.nrCols() otherwise. template <typename Scalar, DeviceType device_name> void multiply(char transa, char transb, const std::array<Matrix<Scalar, device_name>, 2>& a, const Matrix<Scalar, device_name>& b, std::array<Matrix<Scalar, device_name>, 2>& c) { const Matrix<Scalar, device_name>& b, std::array<Matrix<Scalar, device_name>, 2>& c) { assert(transa == 'N' || transa == 'T' || transa == 'C'); assert(transb == 'N' || transb == 'T'); assert(a[0].size() == a[1].size()); Loading @@ -861,8 +854,7 @@ void multiply(char transa, char transb, const std::array<Matrix<Scalar, device_n template <typename Scalar, DeviceType device_name> void multiply(const std::array<Matrix<Scalar, device_name>, 2>& a, const Matrix<Scalar, device_name>& b, std::array<Matrix<Scalar, device_name>, 2>& c) { const Matrix<Scalar, device_name>& b, std::array<Matrix<Scalar, device_name>, 2>& c) { multiply('N', 'N', a, b, c); } Loading Loading @@ -953,9 +945,8 @@ inline void multiplyDiagonalRight(const Matrix<Scalar, GPU>& a, const Vector<Sca // Postcondition: lambda_re, lambda_i, are resized, vl if jobvl == 'V', vr if jobvr == 'V' are // resized. template <typename Scalar> void eigensolver(char jobvl, char jobvr, const Matrix<Scalar, CPU>& a, Vector<Scalar, CPU>& lambda_re, Vector<Scalar, CPU>& lambda_im, Matrix<Scalar, CPU>& vl, Matrix<Scalar, CPU>& vr) { void eigensolver(char jobvl, char jobvr, const Matrix<Scalar, CPU>& a, Vector<Scalar, CPU>& lambda_re, Vector<Scalar, CPU>& lambda_im, Matrix<Scalar, CPU>& vl, Matrix<Scalar, CPU>& vr) { assert(a.is_square()); Matrix<Scalar, CPU> a_copy(a); Loading Loading @@ -992,8 +983,7 @@ void eigensolver(char jobvl, char jobvr, const Matrix<Scalar, CPU>& a, // Postcondition: lambda, is resized, vl if jobvl == 'V', vr if jobvr == 'V' are resized. template <typename Scalar> void eigensolver(char jobvl, char jobvr, const Matrix<std::complex<Scalar>, CPU>& a, Vector<std::complex<Scalar>, CPU>& lambda, Matrix<std::complex<Scalar>, CPU>& vl, Vector<std::complex<Scalar>, CPU>& lambda, Matrix<std::complex<Scalar>, CPU>& vl, Matrix<std::complex<Scalar>, CPU>& vr) { assert(a.is_square()); Loading Loading @@ -1082,8 +1072,7 @@ void eigensolverHermitian(char jobv, char uplo, const Matrix<std::complex<Scalar } template <typename Scalar> void eigensolverGreensFunctionMatrix(char jobv, char uplo, const Matrix<std::complex<Scalar>, CPU>& a, void eigensolverGreensFunctionMatrix(char jobv, char uplo, const Matrix<std::complex<Scalar>, CPU>& a, Vector<Scalar, CPU>& lambda, Matrix<std::complex<Scalar>, CPU>& v) { assert(a.is_square()); Loading Loading @@ -1112,8 +1101,7 @@ void eigensolverGreensFunctionMatrix(char jobv, char uplo, // Out: a_inv // Postconditions: a_inv is resized to the needed dimension. template <typename Scalar> void pseudoInverse(const Matrix<Scalar, CPU>& a, Matrix<Scalar, CPU>& a_inv, double eps = 1.e-6) { void pseudoInverse(const Matrix<Scalar, CPU>& a, Matrix<Scalar, CPU>& a_inv, double eps = 1.e-6) { int m = a.nrRows(); int n = a.nrCols(); a_inv.resizeNoCopy(std::make_pair(n, m)); Loading Loading @@ -1171,6 +1159,20 @@ void pseudoInverse(const Matrix<Scalar, CPU>& a, Matrix<Scalar, CPU>& a_inv, gemm('N', 'C', at_a, a, a_inv); } } template <typename Scalar> bool areNear(const Matrix<Scalar, CPU>& A, const Matrix<Scalar, CPU>& B, const double err = 1e-16) { if (A.size() != B.size()) return false; for (int j = 0; j < A.size().second; j++) for (int i = 0; i < A.size().first; i++) if (std::abs(A(i, j) - B(i, j)) > err) return false; return true; } } // namespace matrixop } // namespace linalg } // namespace dca Loading include/dca/linalg/util/util_cublas.hpp +0 −5 Original line number Diff line number Diff line Loading @@ -12,11 +12,6 @@ #ifndef DCA_LINALG_UTIL_UTIL_CUBLAS_HPP #define DCA_LINALG_UTIL_UTIL_CUBLAS_HPP #include <cublas_v2.h> #include <stdexcept> #include <string> #include "dca/linalg/util/error_cuda.hpp" namespace dca { namespace linalg { namespace util { Loading Loading
CMakeLists.txt +2 −4 Original line number Diff line number Diff line Loading @@ -144,17 +144,15 @@ set(DCA_LIBS ${DCA_CONCURRENCY_LIB} ${DCA_THREADING_LIBS} lapack cuda_utils ) if (DCA_HAVE_CUDA) list(APPEND DCA_CUDA_LIBS cuda_utils) list(APPEND DCA_LIBS blas_kernels ctaux_walker_kernels dnfft_kernels lapack_kernels mc_tools_kernels mc_kernels special_transform_kernels ${DCA_CUDA_LIBS}) endif() Loading
cmake/dca_compiler_options.cmake +1 −1 Original line number Diff line number Diff line Loading @@ -21,7 +21,7 @@ set(DCA_WARNINGS -Wall -Wextra -Wpedantic -Wno-sign-compare -Wno-dangling-else) # Languange standard set(DCA_STD_FLAG -std=c++14) set(DCA_STD_FLAG -std=c++17) # Set C and CXX flags. add_compile_options(${DCA_WARNINGS}) Loading
include/dca/linalg/matrix_view.hpp +45 −20 Original line number Diff line number Diff line Loading @@ -19,7 +19,7 @@ #include <stdexcept> #include "dca/linalg/device_type.hpp" #include "dca/linalg/matrix.hpp" #include "dca/util/cuda_definitions.hpp" namespace dca { namespace linalg { Loading @@ -33,56 +33,61 @@ public: MatrixView(ScalarType* data, Pair size, int ld); MatrixView(ScalarType* data, int size, int ld); MatrixView(ScalarType* data, int size); template <template <typename, DeviceType> class Matrix> MatrixView(Matrix<ScalarType, device_name>& mat); template <template <typename, DeviceType> class Matrix> MatrixView(Matrix<ScalarType, device_name>& mat, int offset_i, int offset_j); template <template <typename, DeviceType> class Matrix> MatrixView(Matrix<ScalarType, device_name>& mat, int offset_i, int offset_j, int ni, int nj); template <class MatrixB> MatrixView& operator=(const MatrixB& rhs); template <template <typename, DeviceType> class Matrix> MatrixView& operator=(const Matrix<ScalarType, device_name>& rhs); MatrixView& operator=(const MatrixView<ScalarType, device_name>& rhs); void print(std::ostream& out = std::cout) const; inline int leadingDimension() const { __DEVICE__ __HOST__ inline int leadingDimension() const { return ldm_; } inline int ld() const { __DEVICE__ __HOST__ inline int ld() const { return leadingDimension(); } std::pair<int, int> size() const { __DEVICE__ __HOST__ std::pair<int, int> size() const { return size_; } ScalarType* ptr() { __DEVICE__ __HOST__ ScalarType* ptr() { return ptr_; } const ScalarType* ptr() const { __DEVICE__ __HOST__ const ScalarType* ptr() const { return ptr_; } ScalarType* ptr(int i, int j) { __DEVICE__ __HOST__ ScalarType* ptr(int i, int j) { assert(0 <= i && i <= size_.first); assert(0 <= j && j <= size_.second); return ptr_ + leadingDimension() * j + i; } const ScalarType* ptr(int i, int j) const { __DEVICE__ __HOST__ const ScalarType* ptr(int i, int j) const { assert(0 <= i && i <= size_.first); assert(0 <= j && j <= size_.second); return ptr_ + leadingDimension() * j + i; } int nrRows() const { __DEVICE__ __HOST__ int nrRows() const { return size_.first; } int nrCols() const { __DEVICE__ __HOST__ int nrCols() const { return size_.second; } bool is_square() const { return size_.first == size_.second; } ScalarType& operator()(int i, int j) { __DEVICE__ __HOST__ ScalarType& operator()(int i, int j) { assert(0 <= i && i < size_.first); assert(0 <= j && j < size_.second); return ptr_[i + j * ldm_]; } const ScalarType& operator()(int i, int j) const { __DEVICE__ __HOST__ const ScalarType& operator()(int i, int j) const { assert(0 <= i && i < size_.first); assert(0 <= j && j < size_.second); return ptr_[i + j * ldm_]; Loading Loading @@ -111,10 +116,12 @@ MatrixView<ScalarType, device_t>::MatrixView(ScalarType* const data, const int s : ptr_(data), ldm_(size), size_(std::make_pair(size, size)) {} template <typename ScalarType, DeviceType device_t> template <template <typename, DeviceType> class Matrix> MatrixView<ScalarType, device_t>::MatrixView(Matrix<ScalarType, device_t>& mat) : ptr_(mat.ptr()), ldm_(mat.leadingDimension()), size_(mat.size()) {} template <typename ScalarType, DeviceType device_t> template <template <typename, DeviceType> class Matrix> MatrixView<ScalarType, device_t>::MatrixView(Matrix<ScalarType, device_t>& mat, int offset_i, int offset_j) : MatrixView(mat, offset_i, offset_j, mat.nrRows() - offset_i, mat.nrCols() - offset_j) { Loading @@ -123,6 +130,7 @@ MatrixView<ScalarType, device_t>::MatrixView(Matrix<ScalarType, device_t>& mat, } template <typename ScalarType, DeviceType device_t> template <template <typename, DeviceType> class Matrix> MatrixView<ScalarType, device_t>::MatrixView(Matrix<ScalarType, device_t>& mat, int offset_i, int offset_j, int ni, int nj) : ptr_(mat.ptr(offset_i, offset_j)), ldm_(mat.leadingDimension()), size_(std::make_pair(ni, nj)) { Loading @@ -131,9 +139,26 @@ MatrixView<ScalarType, device_t>::MatrixView(Matrix<ScalarType, device_t>& mat, } template <typename ScalarType, DeviceType device_t> template <class MatrixB> MatrixView<ScalarType, device_t>& MatrixView<ScalarType, device_t>::operator=(const MatrixB& rhs) { assert(nrCols() == rhs.nrCols() and nrRows() == rhs.nrRows()); template <template <typename, DeviceType> class Matrix> MatrixView<ScalarType, device_t>& MatrixView<ScalarType, device_t>::operator=( const Matrix<ScalarType, device_t>& rhs) { if (nrCols() != rhs.nrCols() || nrRows() != rhs.nrRows()) { throw(std::invalid_argument("Matrix size mismatch.")); } for (int j = 0; j < nrCols(); ++j) for (int i = 0; i < nrRows(); ++i) (*this)(i, j) = rhs(i, j); return *this; } template <typename ScalarType, DeviceType device_t> MatrixView<ScalarType, device_t>& MatrixView<ScalarType, device_t>::operator=( const MatrixView<ScalarType, device_t>& rhs) { if (nrCols() != rhs.nrCols() || nrRows() != rhs.nrRows()) { throw(std::invalid_argument("Matrix size mismatch.")); } for (int j = 0; j < nrCols(); ++j) for (int i = 0; i < nrRows(); ++i) (*this)(i, j) = rhs(i, j); Loading Loading @@ -170,12 +195,12 @@ auto inline makeConstantView(ScalarType* const data, const int size) { return makeConstantView<ScalarType, device_t>(data, std::make_pair(size, size), size); } template <typename ScalarType, DeviceType device_t> template <template <typename, DeviceType> class Matrix, typename ScalarType, DeviceType device_t> auto inline makeConstantView(const Matrix<ScalarType, device_t>& mat) { return makeConstantView<ScalarType, device_t>(mat.ptr(), mat.size(), mat.leadingDimension()); } template <typename ScalarType, DeviceType device_t> template <template <typename, DeviceType> class Matrix, typename ScalarType, DeviceType device_t> auto inline makeConstantView(const Matrix<ScalarType, device_t>& mat, const int offset_i, const int offset_j) { assert(offset_i < mat.nrCols()); Loading @@ -185,7 +210,7 @@ auto inline makeConstantView(const Matrix<ScalarType, device_t>& mat, const int mat.leadingDimension()); } template <typename ScalarType, DeviceType device_t> template <template <typename, DeviceType> class Matrix, typename ScalarType, DeviceType device_t> auto inline makeConstantView(const Matrix<ScalarType, device_t>& mat, const int offset_i, const int offset_j, const int ni, const int nj) { assert(ni + offset_i <= mat.nrRows()); Loading
include/dca/linalg/matrixop.hpp +43 −41 Original line number Diff line number Diff line Loading @@ -7,6 +7,7 @@ // // Author: Peter Staar (taa@zurich.ibm.com) // Raffaele Solca' (rasolca@itp.phys.ethz.ch) // Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) // // This file provides the matrix interface for the following matrix operations: // - copyCol, copyRow, copyCols, copyRows Loading Loading @@ -58,8 +59,7 @@ inline void copyMatrixToArray(const Matrix<Scalar, CPU>& mat, Scalar* a, int lda // Copies the m by n matrix stored in a to the matrix mat. // Preconditions: lda >= m. template <typename Scalar> inline void copyArrayToMatrix(int m, int n, const Scalar* a, int lda, Matrix<Scalar, CPU>& mat) { inline void copyArrayToMatrix(int m, int n, const Scalar* a, int lda, Matrix<Scalar, CPU>& mat) { assert(lda >= m); mat.resizeNoCopy(std::make_pair(m, n)); lapack::lacpy("A", mat.nrRows(), mat.nrCols(), a, lda, mat.ptr(), mat.leadingDimension()); Loading @@ -71,8 +71,7 @@ inline void copyArrayToMatrix(int m, int n, const Scalar* a, int lda, // 0 <= jx < mat_x.nrCols(), 0 <= jy < mat_y.nrCols(). template <typename Scalar, DeviceType device_name> inline void copyCol(const Matrix<Scalar, device_name>& mat_x, int jx, Matrix<Scalar, device_name>& mat_y, int jy, int thread_id = 0, int stream_id = 0) { Matrix<Scalar, device_name>& mat_y, int jy, int thread_id = 0, int stream_id = 0) { assert(jx >= 0 && jx < mat_x.nrCols()); assert(jy >= 0 && jy < mat_y.nrCols()); assert(mat_x.nrRows() == mat_y.nrRows()); Loading @@ -88,8 +87,8 @@ inline void copyCol(const Matrix<Scalar, device_name>& mat_x, int jx, // 0 <= j_y[i] < mat_y.nrCols() for 0 <= i < j_x.size(). template <typename Scalar> inline void copyCols(const Matrix<Scalar, CPU>& mat_x, const Vector<int, CPU>& j_x, Matrix<Scalar, CPU>& mat_y, const Vector<int, CPU>& j_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { Matrix<Scalar, CPU>& mat_y, const Vector<int, CPU>& j_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { assert(j_x.size() <= j_y.size()); for (int ind_j = 0; ind_j < j_x.size(); ++ind_j) Loading @@ -114,8 +113,7 @@ inline void copyCols(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& j // 0 <= ix < mat_x.nrRows(), 0 <= iy < mat_y.nrRows(). template <typename Scalar, DeviceType device_name> inline void copyRow(const Matrix<Scalar, device_name>& mat_x, int ix, Matrix<Scalar, device_name>& mat_y, int iy, int thread_id = 0, int stream_id = 0) { Matrix<Scalar, device_name>& mat_y, int iy, int thread_id = 0, int stream_id = 0) { assert(ix >= 0 && ix < mat_x.nrRows()); assert(iy >= 0 && iy < mat_y.nrRows()); assert(mat_x.nrCols() == mat_y.nrCols()); Loading @@ -132,8 +130,8 @@ inline void copyRow(const Matrix<Scalar, device_name>& mat_x, int ix, // 0 <= i_y[i] < mat_y.nrRows() for 0 <= i < i_x.size(). template <typename Scalar> inline void copyRows(const Matrix<Scalar, CPU>& mat_x, const Vector<int, CPU>& i_x, Matrix<Scalar, CPU>& mat_y, const Vector<int, CPU>& i_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { Matrix<Scalar, CPU>& mat_y, const Vector<int, CPU>& i_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { assert(i_x.size() <= i_y.size()); assert(mat_x.nrCols() == mat_y.nrCols()); Loading Loading @@ -199,8 +197,8 @@ auto difference(const Matrix<Scalar, device_name>& a, const Matrix<Scalar, CPU>& return difference(cp_a, b, diff_threshold); } template <typename Scalar, DeviceType device_name_a, DeviceType device_name_b> auto difference(const Matrix<Scalar, device_name_a>& a, const Matrix<Scalar, device_name_b>& b, double diff_threshold = 1e-3) { auto difference(const Matrix<Scalar, device_name_a>& a, const Matrix<Scalar, device_name_b>& b, double diff_threshold = 1e-3) { Matrix<Scalar, CPU> cp_b(b); return difference(a, cp_b, diff_threshold); } Loading Loading @@ -282,8 +280,7 @@ void inverse(MatrixType<Scalar, device_name>& mat) { } template <typename Scalar> void smallInverse(Matrix<Scalar, CPU>& m_inv, Vector<int, CPU>& ipiv, Vector<Scalar, CPU>& work) { void smallInverse(Matrix<Scalar, CPU>& m_inv, Vector<int, CPU>& ipiv, Vector<Scalar, CPU>& work) { assert(m_inv.is_square()); switch (m_inv.nrCols()) { case 1: Loading Loading @@ -466,8 +463,7 @@ inline void scaleRow(Matrix<Scalar, device_name>& mat, int i, Scalar val, int th // Preconditions: i.size() == val.size(), 0 <= i[k] < mat.nrRow() for 0 <= k < i.size(). template <typename Scalar> inline void scaleRows(Matrix<Scalar, CPU>& mat, const Vector<int, CPU>& i, const Vector<Scalar, CPU>& val, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { const Vector<Scalar, CPU>& val, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { assert(i.size() == val.size()); for (int j = 0; j < mat.nrCols(); ++j) Loading Loading @@ -562,8 +558,8 @@ inline void swapRowAndCol(Matrix<Scalar, device_name>& mat, int i1, int i2, int // a.nrRows() == y.size() if transa == 'N', a.nrCols() == y.size() otherwise, // a.nrCols() == x.size() if transa == 'N', a.nrRows() == x.size() otherwise. template <typename Scalar> void gemv(char transa, Scalar alpha, const Matrix<Scalar, CPU>& a, const Vector<Scalar, CPU>& x, Scalar beta, Vector<Scalar, CPU>& y) { void gemv(char transa, Scalar alpha, const Matrix<Scalar, CPU>& a, const Vector<Scalar, CPU>& x, Scalar beta, Vector<Scalar, CPU>& y) { if (transa == 'N') { assert(a.nrRows() == y.size()); assert(a.nrCols() == x.size()); Loading Loading @@ -603,8 +599,8 @@ void gemv(char transa, const Matrix<Scalar, CPU>& a, const Vector<Scalar, CPU>& template <typename Scalar, DeviceType device_name, template <typename, DeviceType> class MatrixA, template <typename, DeviceType> class MatrixB, template <typename, DeviceType> class MatrixC> void gemm(char transa, char transb, Scalar alpha, const MatrixA<Scalar, device_name>& a, const MatrixB<Scalar, device_name>& b, Scalar beta, MatrixC<Scalar, device_name>& c, int thread_id = 0, int stream_id = 0) { const MatrixB<Scalar, device_name>& b, Scalar beta, MatrixC<Scalar, device_name>& c, int thread_id = 0, int stream_id = 0) { int m = c.nrRows(); int n = c.nrCols(); int k; Loading Loading @@ -705,8 +701,8 @@ void trsm(char uplo, char diag, const Matrix<Scalar, device_name>& a, // ka == kb, where ka = a.nrCols() if transa == 'N', ka = a.nrRows() otherwise and // kb = b.nrRows() if transb == 'N', kb = b.nrCols() otherwise. template <typename Scalar> void gemm(char transa, char transb, Matrix<Scalar, CPU>& a, Matrix<std::complex<Scalar>, CPU>& b, Matrix<std::complex<Scalar>, CPU>& c) { void gemm(char transa, char transb, Matrix<Scalar, CPU>& a, Matrix<std::complex<Scalar>, CPU>& b, Matrix<std::complex<Scalar>, CPU>& c) { Matrix<Scalar, CPU> b_part(b.size()); Matrix<Scalar, CPU> c_re(c.size()); Matrix<Scalar, CPU> c_im(c.size()); Loading Loading @@ -789,8 +785,7 @@ static void gemm(char transa, char transb, Matrix<std::complex<Scalar>, CPU>& a, // and kb = b[0].nrRows() if transb == 'N', kb = b[0].nrCols() otherwise. template <typename Scalar> void multiply(char transa, char transb, const std::array<Matrix<Scalar, CPU>, 2>& a, const std::array<Matrix<Scalar, CPU>, 2>& b, std::array<Matrix<Scalar, CPU>, 2>& c, const std::array<Matrix<Scalar, CPU>, 2>& b, std::array<Matrix<Scalar, CPU>, 2>& c, std::array<Matrix<Scalar, CPU>, 5>& work) { assert(a[0].size() == a[1].size()); assert(b[0].size() == b[1].size()); Loading Loading @@ -827,8 +822,7 @@ void multiply(char transa, char transb, const std::array<Matrix<Scalar, CPU>, 2> template <typename Scalar> void multiply(const std::array<Matrix<Scalar, CPU>, 2>& a, const std::array<Matrix<Scalar, CPU>, 2>& b, std::array<Matrix<Scalar, CPU>, 2>& c, const std::array<Matrix<Scalar, CPU>, 2>& b, std::array<Matrix<Scalar, CPU>, 2>& c, std::array<Matrix<Scalar, CPU>, 5>& work) { multiply('N', 'N', a, b, c, work); } Loading @@ -847,8 +841,7 @@ void multiply(const std::array<Matrix<Scalar, CPU>, 2>& a, // and kb = b.nrRows() if transb == 'N', kb = b.nrCols() otherwise. template <typename Scalar, DeviceType device_name> void multiply(char transa, char transb, const std::array<Matrix<Scalar, device_name>, 2>& a, const Matrix<Scalar, device_name>& b, std::array<Matrix<Scalar, device_name>, 2>& c) { const Matrix<Scalar, device_name>& b, std::array<Matrix<Scalar, device_name>, 2>& c) { assert(transa == 'N' || transa == 'T' || transa == 'C'); assert(transb == 'N' || transb == 'T'); assert(a[0].size() == a[1].size()); Loading @@ -861,8 +854,7 @@ void multiply(char transa, char transb, const std::array<Matrix<Scalar, device_n template <typename Scalar, DeviceType device_name> void multiply(const std::array<Matrix<Scalar, device_name>, 2>& a, const Matrix<Scalar, device_name>& b, std::array<Matrix<Scalar, device_name>, 2>& c) { const Matrix<Scalar, device_name>& b, std::array<Matrix<Scalar, device_name>, 2>& c) { multiply('N', 'N', a, b, c); } Loading Loading @@ -953,9 +945,8 @@ inline void multiplyDiagonalRight(const Matrix<Scalar, GPU>& a, const Vector<Sca // Postcondition: lambda_re, lambda_i, are resized, vl if jobvl == 'V', vr if jobvr == 'V' are // resized. template <typename Scalar> void eigensolver(char jobvl, char jobvr, const Matrix<Scalar, CPU>& a, Vector<Scalar, CPU>& lambda_re, Vector<Scalar, CPU>& lambda_im, Matrix<Scalar, CPU>& vl, Matrix<Scalar, CPU>& vr) { void eigensolver(char jobvl, char jobvr, const Matrix<Scalar, CPU>& a, Vector<Scalar, CPU>& lambda_re, Vector<Scalar, CPU>& lambda_im, Matrix<Scalar, CPU>& vl, Matrix<Scalar, CPU>& vr) { assert(a.is_square()); Matrix<Scalar, CPU> a_copy(a); Loading Loading @@ -992,8 +983,7 @@ void eigensolver(char jobvl, char jobvr, const Matrix<Scalar, CPU>& a, // Postcondition: lambda, is resized, vl if jobvl == 'V', vr if jobvr == 'V' are resized. template <typename Scalar> void eigensolver(char jobvl, char jobvr, const Matrix<std::complex<Scalar>, CPU>& a, Vector<std::complex<Scalar>, CPU>& lambda, Matrix<std::complex<Scalar>, CPU>& vl, Vector<std::complex<Scalar>, CPU>& lambda, Matrix<std::complex<Scalar>, CPU>& vl, Matrix<std::complex<Scalar>, CPU>& vr) { assert(a.is_square()); Loading Loading @@ -1082,8 +1072,7 @@ void eigensolverHermitian(char jobv, char uplo, const Matrix<std::complex<Scalar } template <typename Scalar> void eigensolverGreensFunctionMatrix(char jobv, char uplo, const Matrix<std::complex<Scalar>, CPU>& a, void eigensolverGreensFunctionMatrix(char jobv, char uplo, const Matrix<std::complex<Scalar>, CPU>& a, Vector<Scalar, CPU>& lambda, Matrix<std::complex<Scalar>, CPU>& v) { assert(a.is_square()); Loading Loading @@ -1112,8 +1101,7 @@ void eigensolverGreensFunctionMatrix(char jobv, char uplo, // Out: a_inv // Postconditions: a_inv is resized to the needed dimension. template <typename Scalar> void pseudoInverse(const Matrix<Scalar, CPU>& a, Matrix<Scalar, CPU>& a_inv, double eps = 1.e-6) { void pseudoInverse(const Matrix<Scalar, CPU>& a, Matrix<Scalar, CPU>& a_inv, double eps = 1.e-6) { int m = a.nrRows(); int n = a.nrCols(); a_inv.resizeNoCopy(std::make_pair(n, m)); Loading Loading @@ -1171,6 +1159,20 @@ void pseudoInverse(const Matrix<Scalar, CPU>& a, Matrix<Scalar, CPU>& a_inv, gemm('N', 'C', at_a, a, a_inv); } } template <typename Scalar> bool areNear(const Matrix<Scalar, CPU>& A, const Matrix<Scalar, CPU>& B, const double err = 1e-16) { if (A.size() != B.size()) return false; for (int j = 0; j < A.size().second; j++) for (int i = 0; i < A.size().first; i++) if (std::abs(A(i, j) - B(i, j)) > err) return false; return true; } } // namespace matrixop } // namespace linalg } // namespace dca Loading
include/dca/linalg/util/util_cublas.hpp +0 −5 Original line number Diff line number Diff line Loading @@ -12,11 +12,6 @@ #ifndef DCA_LINALG_UTIL_UTIL_CUBLAS_HPP #define DCA_LINALG_UTIL_UTIL_CUBLAS_HPP #include <cublas_v2.h> #include <stdexcept> #include <string> #include "dca/linalg/util/error_cuda.hpp" namespace dca { namespace linalg { namespace util { Loading