Loading include/dca/linalg/blas/kernels_gpu.hpp +10 −3 Original line number Diff line number Diff line Loading @@ -32,6 +32,9 @@ inline void copyRows(int row_size, int n_rows, const int* i_x, const std::comple auto cu_y = util::castCudaComplex(y); copyRows(row_size, n_rows, i_x, cu_x, ldx, i_y, cu_y, ldy, thread_id, stream_id); } template <typename Type> void copyRows(int row_size, int n_rows, const int* i_x, const Type* x, int ldx, Type* y, int ldy, int thread_id, int stream_id); template <typename Type> void copyCols(int col_size, int n_cols, const int* j_x, const Type* x, int ldx, const int* j_y, Loading @@ -43,6 +46,9 @@ inline void copyCols(int col_size, int n_cols, const int* j_x, const std::comple auto cu_y = util::castCudaComplex(y); copyCols(col_size, n_cols, j_x, cu_x, ldx, j_y, cu_y, ldy, thread_id, stream_id); } template <typename Type> void copyCols(int col_size, int n_cols, const int* j_x, const Type* x, int ldx, Type* y, int ldy, int thread_id, int stream_id); template <typename Type> void moveLeft(int m, int n, Type* a, int lda); Loading Loading @@ -90,8 +96,9 @@ inline void swapCols(int col_size, int n_cols, const int* j_1, const int* j_2, auto cu_a = util::castCudaComplex(a); swapCols(col_size, n_cols, j_1, j_2, cu_a, lda, thread_id, stream_id); } } // blas } // linalg } // dca } // namespace blas } // namespace linalg } // namespace dca #endif // DCA_LINALG_BLAS_KERNELS_GPU_HPP include/dca/linalg/matrixop.hpp +16 −4 Original line number Diff line number Diff line Loading @@ -87,10 +87,9 @@ inline void copyCol(const Matrix<Scalar, device_name>& mat_x, int jx, // Preconditions: j_x.size() <= j_y.size(), mat_x.nrRows() == mat_y.nrRows() // 0 <= j_x[i] < mat_x.nrCols() for 0 <= i < j_x.size(), // 0 <= j_y[i] < mat_y.nrCols() for 0 <= i < j_x.size(). template <typename Scalar> inline void copyCols(const Matrix<Scalar, CPU>& mat_x, const Vector<int, CPU>& j_x, Matrix<Scalar, CPU>& mat_y, const Vector<int, CPU>& j_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { template <typename Scalar, class Vec> inline void copyCols(const Matrix<Scalar, CPU>& mat_x, const Vec& j_x, Matrix<Scalar, CPU>& mat_y, const Vec& j_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { assert(j_x.size() <= j_y.size()); for (int ind_j = 0; ind_j < j_x.size(); ++ind_j) Loading @@ -107,6 +106,19 @@ inline void copyCols(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& j blas::copyCols(mat_x.nrRows(), j_x.size(), j_x.ptr(), mat_x.ptr(), mat_x.leadingDimension(), j_y.ptr(), mat_y.ptr(), mat_y.leadingDimension(), thread_id, stream_id); } // Copies the j_x columns of mat_x into the mat_y, for 0 <= i < j_x.size(). // In/Out: mat_y // Preconditions: mat_x.nrRows() == mat_y.nrRows() // 0 <= j_x[i] < mat_x.nrCols() for 0 <= i < j_x.size(), template <typename Scalar> inline void copyCols(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& j_x, Matrix<Scalar, GPU>& mat_y, int thread_id = 0, int stream_id = 0) { assert(mat_x.nrRows() == mat_y.nrRows()); blas::copyCols(mat_x.nrRows(), j_x.size(), j_x.ptr(), mat_x.ptr(), mat_x.leadingDimension(), mat_y.ptr(), mat_y.leadingDimension(), thread_id, stream_id); } #endif // DCA_HAVE_CUDA // Copies the ix-th row of mat_x into the iy-th row of mat_y. Loading include/dca/math/util/vector_operations.hpp +24 −1 Original line number Diff line number Diff line Loading @@ -7,6 +7,7 @@ // // Author: Peter Staar (taa@zurich.ibm.com) // Urs R. Haehner (haehneru@itp.phys.ethz.ch) // Giovanni Balduzzi (gbaludzz@itp.phys.ethz.ch) // // This file provides utility functions to do various vector operations. Loading @@ -17,6 +18,7 @@ #include <cmath> #include <complex> #include <iostream> #include <functional> #include <type_traits> #include <vector> Loading Loading @@ -270,6 +272,27 @@ auto operator-(const std::vector<T>& x, const std::vector<T>& y) { return subtract(y, x); // Note: subtract(x, y) is defined as y - x; } // Returns true if pred evaluates to true for any element of v. Returns false if v is empty. template <class Vec> bool any(const Vec& v, const std::function<bool(typename Vec::value_type)>& pred) { for (const auto& x : v) { if (pred(x)) { return true; } } return false; } // Returns true if pred evaluates to true for all element of v. Returns true if v is empty. template <class Vec> bool all(const Vec& v, const std::function<bool(typename Vec::value_type)>& pred) { for (const auto& x : v) { if (!pred(x)) { return false; } } return true; } } // namespace util } // namespace math Loading include/dca/parallel/stdthread/thread_pool/affinity.hpp +5 −1 Original line number Diff line number Diff line Loading @@ -19,11 +19,15 @@ namespace parallel { // dca::parallel:: // Returns a list of cores id for which the calling thread has affinity. // If the macro __linux__ is not defined, returns an empty vector. std::vector<int> get_affinity(); // Sets the affinity list of the current thread. // If the macro __linux__ is not defined, performs a no-op. void set_affinity(const std::vector<int>& cores); // Number of cores used by this process. // Returns the number of visible hardware cores. // If the macro __linux__ is not defined, returns std::hardware_concurrency(). int get_core_count(); } // namespace parallel Loading include/dca/phys/dca_data/dca_data.hpp +2 −2 Original line number Diff line number Diff line Loading @@ -232,9 +232,9 @@ public: // Optional members getters. "non_density_interaction")); return *non_density_interactions_; } const auto& get_nondensity_interactions() const { const auto& get_non_density_interactions() const { assert(non_density_interactions_); return non_density_interactions_; return *non_density_interactions_; } bool has_non_density_interactions() const { Loading Loading
include/dca/linalg/blas/kernels_gpu.hpp +10 −3 Original line number Diff line number Diff line Loading @@ -32,6 +32,9 @@ inline void copyRows(int row_size, int n_rows, const int* i_x, const std::comple auto cu_y = util::castCudaComplex(y); copyRows(row_size, n_rows, i_x, cu_x, ldx, i_y, cu_y, ldy, thread_id, stream_id); } template <typename Type> void copyRows(int row_size, int n_rows, const int* i_x, const Type* x, int ldx, Type* y, int ldy, int thread_id, int stream_id); template <typename Type> void copyCols(int col_size, int n_cols, const int* j_x, const Type* x, int ldx, const int* j_y, Loading @@ -43,6 +46,9 @@ inline void copyCols(int col_size, int n_cols, const int* j_x, const std::comple auto cu_y = util::castCudaComplex(y); copyCols(col_size, n_cols, j_x, cu_x, ldx, j_y, cu_y, ldy, thread_id, stream_id); } template <typename Type> void copyCols(int col_size, int n_cols, const int* j_x, const Type* x, int ldx, Type* y, int ldy, int thread_id, int stream_id); template <typename Type> void moveLeft(int m, int n, Type* a, int lda); Loading Loading @@ -90,8 +96,9 @@ inline void swapCols(int col_size, int n_cols, const int* j_1, const int* j_2, auto cu_a = util::castCudaComplex(a); swapCols(col_size, n_cols, j_1, j_2, cu_a, lda, thread_id, stream_id); } } // blas } // linalg } // dca } // namespace blas } // namespace linalg } // namespace dca #endif // DCA_LINALG_BLAS_KERNELS_GPU_HPP
include/dca/linalg/matrixop.hpp +16 −4 Original line number Diff line number Diff line Loading @@ -87,10 +87,9 @@ inline void copyCol(const Matrix<Scalar, device_name>& mat_x, int jx, // Preconditions: j_x.size() <= j_y.size(), mat_x.nrRows() == mat_y.nrRows() // 0 <= j_x[i] < mat_x.nrCols() for 0 <= i < j_x.size(), // 0 <= j_y[i] < mat_y.nrCols() for 0 <= i < j_x.size(). template <typename Scalar> inline void copyCols(const Matrix<Scalar, CPU>& mat_x, const Vector<int, CPU>& j_x, Matrix<Scalar, CPU>& mat_y, const Vector<int, CPU>& j_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { template <typename Scalar, class Vec> inline void copyCols(const Matrix<Scalar, CPU>& mat_x, const Vec& j_x, Matrix<Scalar, CPU>& mat_y, const Vec& j_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { assert(j_x.size() <= j_y.size()); for (int ind_j = 0; ind_j < j_x.size(); ++ind_j) Loading @@ -107,6 +106,19 @@ inline void copyCols(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& j blas::copyCols(mat_x.nrRows(), j_x.size(), j_x.ptr(), mat_x.ptr(), mat_x.leadingDimension(), j_y.ptr(), mat_y.ptr(), mat_y.leadingDimension(), thread_id, stream_id); } // Copies the j_x columns of mat_x into the mat_y, for 0 <= i < j_x.size(). // In/Out: mat_y // Preconditions: mat_x.nrRows() == mat_y.nrRows() // 0 <= j_x[i] < mat_x.nrCols() for 0 <= i < j_x.size(), template <typename Scalar> inline void copyCols(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& j_x, Matrix<Scalar, GPU>& mat_y, int thread_id = 0, int stream_id = 0) { assert(mat_x.nrRows() == mat_y.nrRows()); blas::copyCols(mat_x.nrRows(), j_x.size(), j_x.ptr(), mat_x.ptr(), mat_x.leadingDimension(), mat_y.ptr(), mat_y.leadingDimension(), thread_id, stream_id); } #endif // DCA_HAVE_CUDA // Copies the ix-th row of mat_x into the iy-th row of mat_y. Loading
include/dca/math/util/vector_operations.hpp +24 −1 Original line number Diff line number Diff line Loading @@ -7,6 +7,7 @@ // // Author: Peter Staar (taa@zurich.ibm.com) // Urs R. Haehner (haehneru@itp.phys.ethz.ch) // Giovanni Balduzzi (gbaludzz@itp.phys.ethz.ch) // // This file provides utility functions to do various vector operations. Loading @@ -17,6 +18,7 @@ #include <cmath> #include <complex> #include <iostream> #include <functional> #include <type_traits> #include <vector> Loading Loading @@ -270,6 +272,27 @@ auto operator-(const std::vector<T>& x, const std::vector<T>& y) { return subtract(y, x); // Note: subtract(x, y) is defined as y - x; } // Returns true if pred evaluates to true for any element of v. Returns false if v is empty. template <class Vec> bool any(const Vec& v, const std::function<bool(typename Vec::value_type)>& pred) { for (const auto& x : v) { if (pred(x)) { return true; } } return false; } // Returns true if pred evaluates to true for all element of v. Returns true if v is empty. template <class Vec> bool all(const Vec& v, const std::function<bool(typename Vec::value_type)>& pred) { for (const auto& x : v) { if (!pred(x)) { return false; } } return true; } } // namespace util } // namespace math Loading
include/dca/parallel/stdthread/thread_pool/affinity.hpp +5 −1 Original line number Diff line number Diff line Loading @@ -19,11 +19,15 @@ namespace parallel { // dca::parallel:: // Returns a list of cores id for which the calling thread has affinity. // If the macro __linux__ is not defined, returns an empty vector. std::vector<int> get_affinity(); // Sets the affinity list of the current thread. // If the macro __linux__ is not defined, performs a no-op. void set_affinity(const std::vector<int>& cores); // Number of cores used by this process. // Returns the number of visible hardware cores. // If the macro __linux__ is not defined, returns std::hardware_concurrency(). int get_core_count(); } // namespace parallel Loading
include/dca/phys/dca_data/dca_data.hpp +2 −2 Original line number Diff line number Diff line Loading @@ -232,9 +232,9 @@ public: // Optional members getters. "non_density_interaction")); return *non_density_interactions_; } const auto& get_nondensity_interactions() const { const auto& get_non_density_interactions() const { assert(non_density_interactions_); return non_density_interactions_; return *non_density_interactions_; } bool has_non_density_interactions() const { Loading