Loading include/dca/linalg/matrixop.hpp +5 −5 Original line number Diff line number Diff line Loading @@ -154,7 +154,7 @@ inline void copyRow(const Matrix<Scalar, device_name>& mat_x, int ix, // 0 <= i_y[i] < mat_y.nrRows() for 0 <= i < i_x.size(). template <typename Scalar, class Vec, class ALLOC> inline void copyRows(const Matrix<Scalar, CPU, ALLOC>& mat_x, const Vec& i_x, Matrix<Scalar, CPU, ALLOC>& mat_y, const Vec& i_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { const Vec& i_y, const int /*thread_id*/ = 0, const int /*stream_id*/ = 0) { assert(i_x.size() <= i_y.size()); assert(mat_x.nrCols() == mat_y.nrCols()); Loading @@ -165,8 +165,8 @@ inline void copyRows(const Matrix<Scalar, CPU, ALLOC>& mat_x, const Vec& i_x, Ma #ifdef DCA_HAVE_GPU template <typename Scalar> inline void copyRows(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& i_x, Matrix<Scalar, GPU>& mat_y, const Vector<int, GPU>& i_y, int thread_id = 0, int stream_id = 0) { Matrix<Scalar, GPU>& mat_y, const Vector<int, GPU>& i_y, const int thread_id, const int stream_id) { assert(i_x.size() <= i_y.size()); assert(mat_x.nrCols() == mat_y.nrCols()); Loading @@ -179,9 +179,9 @@ inline void copyRows(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& i // In/Out: mat_y // Preconditions: mat_x.nrCols() == mat_y.nrCols() // 0 <= i_x[i] < mat_x.nrRows() for 0 <= i < i_x.size(). template <typename Scalar, class ALLOC> template <typename Scalar> inline void copyRows(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& i_x, Matrix<Scalar, GPU>& mat_y, int thread_id = 0, int stream_id = 0) { Matrix<Scalar, GPU>& mat_y, const int thread_id, const int stream_id) { assert(mat_x.nrCols() == mat_y.nrCols()); blas::copyRows(mat_x.nrCols(), i_x.size(), i_x.ptr(), mat_x.ptr(), mat_x.leadingDimension(), Loading include/dca/linalg/vector.hpp +7 −1 Original line number Diff line number Diff line Loading @@ -50,7 +50,13 @@ public: Vector(size_t size, size_t capacity); Vector(const std::string& name, size_t size, size_t capacity); Vector(const ThisType& rhs, const std::string& name = default_name_); /** copy constructor except for name. * this is strange but for historical reasons is kept. * has needed to be explicit because with the `const ThisType&` somehow lead to an implicit conversion * from an int to a Vector& argument that landed here. * This occurred in Debug with */ explicit Vector(const ThisType& rhs, const std::string& name = default_name_); template <DeviceType device_name2, class Allocator2> Vector(const Vector<ScalarType, device_name2, Allocator2>& rhs, Loading include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_gpu_submatrix.hpp +1 −0 Original line number Diff line number Diff line Loading @@ -294,6 +294,7 @@ void CtintWalkerSubmatrixGpu<Parameters, DIST>::updateM() { old_G.resizeNoCopy(std::make_pair(n_max_[s], gamma_size)); old_M.resizeNoCopy(std::make_pair(gamma_size, n_max_[s])); assert(dca::linalg::util::getStream(thread_id_, s) == get_stream(s)); move_indices_dev_[s].setAsync(move_indices_[s], get_stream(s)); // Note: an event synchronization might be necessary if the order of operation is changed. linalg::matrixop::copyCols(G_dev_[s], move_indices_dev_[s], old_G, thread_id_, s); Loading include/dca/testing/minimalist_printer.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ #ifndef DCA_TESTING_MINIMALIST_PRINTER_HPP #define DCA_TESTING_MINIMALIST_PRINTER_HPP #include "gtest/gtest.h" #include "dca/testing/gtest_h_w_warning_blocking.h" namespace dca { namespace testing { Loading src/phys/dca_step/cluster_solver/ctint/walker/walker_kernels.cu +1 −1 Original line number Diff line number Diff line Loading @@ -43,7 +43,7 @@ __global__ void setRightSectorToIdKernel(Scalar* m, const int ldm, const int n0, Scalar the_one{}; the_one += 1.0; Scalar the_zero{}; assert(the_one == (the_zero -= 1.0)); //assert(the_one == (the_zero -= 1.0)); m[i + ldm * j] = (i == j) ? the_one : the_zero; } Loading Loading
include/dca/linalg/matrixop.hpp +5 −5 Original line number Diff line number Diff line Loading @@ -154,7 +154,7 @@ inline void copyRow(const Matrix<Scalar, device_name>& mat_x, int ix, // 0 <= i_y[i] < mat_y.nrRows() for 0 <= i < i_x.size(). template <typename Scalar, class Vec, class ALLOC> inline void copyRows(const Matrix<Scalar, CPU, ALLOC>& mat_x, const Vec& i_x, Matrix<Scalar, CPU, ALLOC>& mat_y, const Vec& i_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { const Vec& i_y, const int /*thread_id*/ = 0, const int /*stream_id*/ = 0) { assert(i_x.size() <= i_y.size()); assert(mat_x.nrCols() == mat_y.nrCols()); Loading @@ -165,8 +165,8 @@ inline void copyRows(const Matrix<Scalar, CPU, ALLOC>& mat_x, const Vec& i_x, Ma #ifdef DCA_HAVE_GPU template <typename Scalar> inline void copyRows(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& i_x, Matrix<Scalar, GPU>& mat_y, const Vector<int, GPU>& i_y, int thread_id = 0, int stream_id = 0) { Matrix<Scalar, GPU>& mat_y, const Vector<int, GPU>& i_y, const int thread_id, const int stream_id) { assert(i_x.size() <= i_y.size()); assert(mat_x.nrCols() == mat_y.nrCols()); Loading @@ -179,9 +179,9 @@ inline void copyRows(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& i // In/Out: mat_y // Preconditions: mat_x.nrCols() == mat_y.nrCols() // 0 <= i_x[i] < mat_x.nrRows() for 0 <= i < i_x.size(). template <typename Scalar, class ALLOC> template <typename Scalar> inline void copyRows(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& i_x, Matrix<Scalar, GPU>& mat_y, int thread_id = 0, int stream_id = 0) { Matrix<Scalar, GPU>& mat_y, const int thread_id, const int stream_id) { assert(mat_x.nrCols() == mat_y.nrCols()); blas::copyRows(mat_x.nrCols(), i_x.size(), i_x.ptr(), mat_x.ptr(), mat_x.leadingDimension(), Loading
include/dca/linalg/vector.hpp +7 −1 Original line number Diff line number Diff line Loading @@ -50,7 +50,13 @@ public: Vector(size_t size, size_t capacity); Vector(const std::string& name, size_t size, size_t capacity); Vector(const ThisType& rhs, const std::string& name = default_name_); /** copy constructor except for name. * this is strange but for historical reasons is kept. * has needed to be explicit because with the `const ThisType&` somehow lead to an implicit conversion * from an int to a Vector& argument that landed here. * This occurred in Debug with */ explicit Vector(const ThisType& rhs, const std::string& name = default_name_); template <DeviceType device_name2, class Allocator2> Vector(const Vector<ScalarType, device_name2, Allocator2>& rhs, Loading
include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_gpu_submatrix.hpp +1 −0 Original line number Diff line number Diff line Loading @@ -294,6 +294,7 @@ void CtintWalkerSubmatrixGpu<Parameters, DIST>::updateM() { old_G.resizeNoCopy(std::make_pair(n_max_[s], gamma_size)); old_M.resizeNoCopy(std::make_pair(gamma_size, n_max_[s])); assert(dca::linalg::util::getStream(thread_id_, s) == get_stream(s)); move_indices_dev_[s].setAsync(move_indices_[s], get_stream(s)); // Note: an event synchronization might be necessary if the order of operation is changed. linalg::matrixop::copyCols(G_dev_[s], move_indices_dev_[s], old_G, thread_id_, s); Loading
include/dca/testing/minimalist_printer.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ #ifndef DCA_TESTING_MINIMALIST_PRINTER_HPP #define DCA_TESTING_MINIMALIST_PRINTER_HPP #include "gtest/gtest.h" #include "dca/testing/gtest_h_w_warning_blocking.h" namespace dca { namespace testing { Loading
src/phys/dca_step/cluster_solver/ctint/walker/walker_kernels.cu +1 −1 Original line number Diff line number Diff line Loading @@ -43,7 +43,7 @@ __global__ void setRightSectorToIdKernel(Scalar* m, const int ldm, const int n0, Scalar the_one{}; the_one += 1.0; Scalar the_zero{}; assert(the_one == (the_zero -= 1.0)); //assert(the_one == (the_zero -= 1.0)); m[i + ldm * j] = (i == j) ? the_one : the_zero; } Loading