Merge remote-tracking branch 'origin/master' into new_models (5b13616d) · Commits · NDIP / Tool Sources / Direct-Geometry Spectroscopy / DCA / DCA Main

include/dca/linalg/blas/kernels_gpu.hpp

+10 −3

Original line number	Diff line number	Diff line
		@@ -32,6 +32,9 @@ inline void copyRows(int row_size, int n_rows, const int* i_x, const std::comple
		auto cu_y = util::castCudaComplex(y);
		copyRows(row_size, n_rows, i_x, cu_x, ldx, i_y, cu_y, ldy, thread_id, stream_id);
		}
		template <typename Type>
		void copyRows(int row_size, int n_rows, const int* i_x, const Type* x, int ldx, Type* y, int ldy,
		int thread_id, int stream_id);

		template <typename Type>
		void copyCols(int col_size, int n_cols, const int* j_x, const Type* x, int ldx, const int* j_y,
		@@ -43,6 +46,9 @@ inline void copyCols(int col_size, int n_cols, const int* j_x, const std::comple
		auto cu_y = util::castCudaComplex(y);
		copyCols(col_size, n_cols, j_x, cu_x, ldx, j_y, cu_y, ldy, thread_id, stream_id);
		}
		template <typename Type>
		void copyCols(int col_size, int n_cols, const int* j_x, const Type* x, int ldx, Type* y, int ldy,
		int thread_id, int stream_id);

		template <typename Type>
		void moveLeft(int m, int n, Type* a, int lda);
		@@ -90,8 +96,9 @@ inline void swapCols(int col_size, int n_cols, const int* j_1, const int* j_2,
		auto cu_a = util::castCudaComplex(a);
		swapCols(col_size, n_cols, j_1, j_2, cu_a, lda, thread_id, stream_id);
		}
		} // blas
		} // linalg
		} // dca

		} // namespace blas
		} // namespace linalg
		} // namespace dca

		#endif // DCA_LINALG_BLAS_KERNELS_GPU_HPP

include/dca/linalg/matrixop.hpp

+16 −4

Original line number	Diff line number	Diff line
		@@ -87,10 +87,9 @@ inline void copyCol(const Matrix<Scalar, device_name>& mat_x, int jx,
		// Preconditions: j_x.size() <= j_y.size(), mat_x.nrRows() == mat_y.nrRows()
		// 0 <= j_x[i] < mat_x.nrCols() for 0 <= i < j_x.size(),
		// 0 <= j_y[i] < mat_y.nrCols() for 0 <= i < j_x.size().
		template <typename Scalar>
		inline void copyCols(const Matrix<Scalar, CPU>& mat_x, const Vector<int, CPU>& j_x,
		Matrix<Scalar, CPU>& mat_y, const Vector<int, CPU>& j_y, int /thread_id/ = 0,
		int /stream_id/ = 0) {
		template <typename Scalar, class Vec>
		inline void copyCols(const Matrix<Scalar, CPU>& mat_x, const Vec& j_x, Matrix<Scalar, CPU>& mat_y,
		const Vec& j_y, int /thread_id/ = 0, int /stream_id/ = 0) {
		assert(j_x.size() <= j_y.size());

		for (int ind_j = 0; ind_j < j_x.size(); ++ind_j)
		@@ -107,6 +106,19 @@ inline void copyCols(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& j
		blas::copyCols(mat_x.nrRows(), j_x.size(), j_x.ptr(), mat_x.ptr(), mat_x.leadingDimension(),
		j_y.ptr(), mat_y.ptr(), mat_y.leadingDimension(), thread_id, stream_id);
		}

		// Copies the j_x columns of mat_x into the mat_y, for 0 <= i < j_x.size().
		// In/Out: mat_y
		// Preconditions: mat_x.nrRows() == mat_y.nrRows()
		// 0 <= j_x[i] < mat_x.nrCols() for 0 <= i < j_x.size(),
		template <typename Scalar>
		inline void copyCols(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& j_x,
		Matrix<Scalar, GPU>& mat_y, int thread_id = 0, int stream_id = 0) {
		assert(mat_x.nrRows() == mat_y.nrRows());

		blas::copyCols(mat_x.nrRows(), j_x.size(), j_x.ptr(), mat_x.ptr(), mat_x.leadingDimension(),
		mat_y.ptr(), mat_y.leadingDimension(), thread_id, stream_id);
		}
		#endif // DCA_HAVE_CUDA

		// Copies the ix-th row of mat_x into the iy-th row of mat_y.

include/dca/math/util/vector_operations.hpp

+24 −1

Original line number	Diff line number	Diff line
		@@ -7,6 +7,7 @@
		//
		// Author: Peter Staar (taa@zurich.ibm.com)
		// Urs R. Haehner (haehneru@itp.phys.ethz.ch)
		// Giovanni Balduzzi (gbaludzz@itp.phys.ethz.ch)
		//
		// This file provides utility functions to do various vector operations.

		@@ -17,6 +18,7 @@
		#include <cmath>
		#include <complex>
		#include <iostream>
		#include <functional>
		#include <type_traits>
		#include <vector>

		@@ -270,6 +272,27 @@ auto operator-(const std::vector<T>& x, const std::vector<T>& y) {
		return subtract(y, x); // Note: subtract(x, y) is defined as y - x;
		}

		// Returns true if pred evaluates to true for any element of v. Returns false if v is empty.
		template <class Vec>
		bool any(const Vec& v, const std::function<bool(typename Vec::value_type)>& pred) {
		for (const auto& x : v) {
		if (pred(x)) {
		return true;
		}
		}
		return false;
		}

		// Returns true if pred evaluates to true for all element of v. Returns true if v is empty.
		template <class Vec>
		bool all(const Vec& v, const std::function<bool(typename Vec::value_type)>& pred) {
		for (const auto& x : v) {
		if (!pred(x)) {
		return false;
		}
		}
		return true;
		}

		} // namespace util
		} // namespace math

include/dca/parallel/stdthread/thread_pool/affinity.hpp

+5 −1

Original line number	Diff line number	Diff line
		@@ -19,11 +19,15 @@ namespace parallel {
		// dca::parallel::

		// Returns a list of cores id for which the calling thread has affinity.
		// If the macro __linux__ is not defined, returns an empty vector.
		std::vector<int> get_affinity();

		// Sets the affinity list of the current thread.
		// If the macro __linux__ is not defined, performs a no-op.
		void set_affinity(const std::vector<int>& cores);

		// Number of cores used by this process.
		// Returns the number of visible hardware cores.
		// If the macro __linux__ is not defined, returns std::hardware_concurrency().
		int get_core_count();

		} // namespace parallel

include/dca/phys/dca_data/dca_data.hpp

+2 −2

Original line number	Diff line number	Diff line
		@@ -232,9 +232,9 @@ public: // Optional members getters.
		"non_density_interaction"));
		return *non_density_interactions_;
		}
		const auto& get_nondensity_interactions() const {
		const auto& get_non_density_interactions() const {
		assert(non_density_interactions_);
		return non_density_interactions_;
		return *non_density_interactions_;
		}

		bool has_non_density_interactions() const {