Merge remote-tracking branch 'origin/master' into gpu_trunk2 (9434713d) · Commits · NDIP / Tool Sources / Direct-Geometry Spectroscopy / DCA / DCA Main

.clang-format

+1 −0

Original line number	Diff line number	Diff line
		@@ -63,6 +63,7 @@ PenaltyReturnTypeOnItsOwnLine: 100000000
		PointerAlignment: Left
		#ReflowComments: true # Supported only from clang 3.9
		SortIncludes: false
		SortUsingDeclarations: false
		SpaceAfterCStyleCast: false
		SpaceBeforeAssignmentOperators: true
		SpaceBeforeParens: ControlStatements

applications/cluster_solver_check/cluster_solver_check.cpp

+10 −15

Original line number	Diff line number	Diff line
		@@ -35,7 +35,7 @@

		int main(int argc, char** argv) {
		if (argc < 2) {
		std::cerr << "Usage: " << argv[0] << " input_file.json [skip ed]" << std::endl;
		std::cerr << "Usage: " << argv[0] << " input_file.json" << std::endl;
		return -1;
		}

		@@ -43,8 +43,8 @@ int main(int argc, char** argv) {

		try {
		std::string input_file(argv[1]);
		const bool skip_ed = argc > 2 ? std::atoi(argv[2]) : false;
		const bool perform_statistical_test = concurrency.number_of_processors() >= 8 && !skip_ed;

		const bool perform_statistical_test = concurrency.number_of_processors() >= 8;

		Profiler::start();

		@@ -94,26 +94,21 @@ int main(int argc, char** argv) {

		// ED solver
		EdSolver ed_solver(parameters, dca_data_imag, dca_data_real);
		if (!skip_ed) {
		ed_solver.initialize(0);
		ed_solver.execute();
		ed_solver.finalize(dca_loop_data);

		if (concurrency.id() == concurrency.first()) {
		ed_solver.write(data_file_ed);
		}
		}

		const auto Sigma_ed(dca_data_imag.Sigma);
		const int tested_frequencies = 10;
		const auto G_ed(dca::math::util::cutFrequency(dca_data_imag.G_k_w, tested_frequencies));

		if (concurrency.id() == concurrency.first()) {
		ed_solver.write(data_file_ed);
		}

		// QMC solver
		// The QMC solver uses the free Greens function G0 computed by the ED solver.
		// It is passed via the dca_data_imag object.
		if (skip_ed)
		dca_data_imag.initialize();

		ClusterSolver qmc_solver(parameters, dca_data_imag);
		qmc_solver.initialize(1); // 1 = dummy iteration number
		qmc_solver.integrate();

include/dca/linalg/reshapable_matrix.hpp

+65 −47

Original line number	Diff line number	Diff line
		@@ -37,21 +37,29 @@ public:
		using ThisType = ReshapableMatrix<ScalarType, device_name, Allocator>;
		using ValueType = ScalarType;

		ReshapableMatrix(int size = 0);
		// Default contructor creates a matrix of zero size and capacity.
		ReshapableMatrix() = default;
		// Initializes a square size x size matrix.
		ReshapableMatrix(int size);
		// Initializes a square size.first x size.second matrix.
		ReshapableMatrix(std::pair<int, int> size);

		// Copy and move constructor:
		// Constructs a matrix with name name, size rhs.size() and a copy of the elements of rhs.
		ReshapableMatrix(const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs);
		// Constructs a matrix with name name, size rhs.size(). The elements of rhs are moved.
		// Postcondition: rhs is a (0 x 0) matrix.
		ReshapableMatrix(ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs);

		// Contructs a matrix with name name, size rhs.size() and a copy of the elements of rhs, where rhs
		// elements are stored on a different device.
		// Contructs a matrix with size rhs.size() and a copy of the elements of rhs.
		ReshapableMatrix(const ThisType& rhs);
		template <DeviceType rhs_device_name, class AllocatorRhs>
		ReshapableMatrix(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs);

		// Constructs a matrix with size rhs.size(). The elements of rhs are moved.
		ReshapableMatrix(ThisType&& rhs);

		// Resize the matrix to rhs.size() and copies the elements.
		ReshapableMatrix& operator=(const ThisType& rhs);
		template <DeviceType rhs_device_name, class AllocatorRhs>
		ReshapableMatrix& operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs);

		// Moves the elements of rhs into this matrix.
		ReshapableMatrix& operator=(ThisType&& rhs);

		~ReshapableMatrix();

		// Returns true if this is equal to other, false otherwise.
		@@ -118,23 +126,19 @@ public:
		return size_.first;
		}

		// Resizes this to a (new_size new_size) matrix.
		// Resizes this to a (new_size.first new_size.second) matrix.
		// The previous elements are not copied, therefore all the elements
		// may have any value after the call to this method.
		// Returns: true if reallocation took place.
		// Remark: The capacity of the matrix and element pointers do not change
		// if new_size <= capacity().first and new_size <= capacity().second.
		bool resizeNoCopy(std::pair<int, int> new_size);
		// Resizes this to a (new_size new_size) matrix. See previous method for details.
		bool resizeNoCopy(int new_size) {
		return resizeNoCopy(std::make_pair(new_size, new_size));
		}
		// Resizes this to a (new_size.first new_size.second) matrix.
		// The previous elements are not copied, therefore all the elements
		// may have any value after the call to this method.
		// Returns: true if reallocation took place.
		// Remark: The capacity of the matrix and element pointers do not change
		// if new_size.first <= capacity().first and new_size.second <= capacity().second.
		bool resizeNoCopy(std::pair<int, int> new_size);

		// Reserves the space for at least (new_size.first * new_size.second) elements without changing
		// the matrix size. The value of the matrix elements is undefined after calling this method.
		// Returns: true if reallocation took place.
		bool reserveNoCopy(std::size_t new_size);

		void swap(ReshapableMatrix<ScalarType, device_name, Allocator>& other);
		@@ -142,11 +146,6 @@ public:
		// Releases the memory allocated by *this and sets size and capacity to zero.
		void clear();

		// Asynchronous assignment (copy with stream = getStream(thread_id, stream_id))
		// + synchronization of stream
		template <DeviceType rhs_device_name>
		void set(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id);

		#ifdef DCA_HAVE_CUDA
		// Asynchronous assignment.
		template <DeviceType rhs_device_name>
		@@ -161,8 +160,8 @@ public:
		#else
		// Synchronous assignment fallback for SetAsync.
		template <DeviceType rhs_device_name>
		void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id,
		int stream_id);
		void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /thread_id/,
		int /stream_id/);

		#endif // DCA_HAVE_CUDA

		@@ -175,8 +174,8 @@ private:
		return static_cast<size_t>(size.first) * static_cast<size_t>(size.second);
		}

		std::pair<int, int> size_;
		std::size_t capacity_;
		std::pair<int, int> size_ = std::make_pair(0, 0);
		std::size_t capacity_ = 0;

		ValueType* data_ = nullptr;

		@@ -198,27 +197,55 @@ ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(std::pair
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(const ThisType& rhs) {
		*this = rhs;
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		template <DeviceType rhs_device_name, class AllocatorRhs>
		ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(
		const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs) {
		const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) {
		*this = rhs;
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(
		ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs)
		: size_(rhs.size_), capacity_(rhs.capacity_), data_(rhs.data_) {
		rhs.capacity_ = 0;
		rhs.size_ = std::make_pair(0, 0);
		rhs.data_ = nullptr;
		: ReshapableMatrix<ScalarType, device_name, Allocator>() {
		swap(rhs);
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix<
		ScalarType, device_name, Allocator>::operator=(const ThisType& rhs) {
		size_ = rhs.size_;
		capacity_ = rhs.capacity_;

		Allocator::deallocate(data_);
		data_ = Allocator::allocate(capacity_);
		util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
		return *this;
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		template <DeviceType rhs_device_name, class AllocatorRhs>
		ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(
		const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs)
		: size_(rhs.size_), capacity_(rhs.capacity_) {
		ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix<
		ScalarType, device_name,
		Allocator>::operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) {
		size_ = rhs.size_;
		capacity_ = rhs.capacity_;

		Allocator::deallocate(data_);
		data_ = Allocator::allocate(capacity_);
		util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
		return *this;
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix<
		ScalarType, device_name, Allocator>::operator=(ThisType&& rhs) {
		swap(rhs);
		return *this;
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		@@ -284,15 +311,6 @@ void ReshapableMatrix<ScalarType, device_name, Allocator>::clear() {
		capacity_ = 0;
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		template <DeviceType rhs_device_name>
		void ReshapableMatrix<ScalarType, device_name, Allocator>::set(
		const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id) {
		resize(rhs.size_);
		util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_, thread_id,
		stream_id);
		}

		#ifdef DCA_HAVE_CUDA

		template <typename ScalarType, DeviceType device_name, class Allocator>
		@@ -322,7 +340,7 @@ template <typename ScalarType, DeviceType device_name, class Allocator>
		template <DeviceType rhs_device_name>
		void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync(
		const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /thread_id/, int /stream_id/) {
		set(rhs);
		*this = rhs;
		}

		#endif // DCA_HAVE_CUDA

include/dca/linalg/util/allocators/aligned_allocator.hpp

+3 −0

Original line number	Diff line number	Diff line
		@@ -21,6 +21,9 @@ template <typename T>
		class AlignedAllocator {
		protected:
		T* allocate(std::size_t n) {
		if (!n)
		return nullptr;

		T* ptr;
		int err = posix_memalign((void*)&ptr, 128, n sizeof(T));
		if (err)

include/dca/math/function_transform/special_transforms/space_transform_2D_gpu.hpp

+8 −10

Original line number	Diff line number	Diff line
		@@ -145,11 +145,14 @@ void SpaceTransform2DGpu<RDmn, KDmn, Real>::execute(RMatrix& M) {
		}

		template <class RDmn, class KDmn, typename Real>
		void SpaceTransform2DGpu<RDmn, KDmn, Real>::phaseFactorsAndRearrange(const RMatrix& in, RMatrix& out) {
		void SpaceTransform2DGpu<RDmn, KDmn, Real>::phaseFactorsAndRearrange(const RMatrix& in,
		RMatrix& out) {
		out.resizeNoCopy(in.size());
		const Complex* const phase_factors_ptr =
		BaseClass::hasPhaseFactors() ? getPhaseFactors().ptr() : nullptr;
		details::phaseFactorsAndRearrange(in.ptr(), in.leadingDimension(), out.ptr(),
		out.leadingDimension(), n_bands_, nc_, nw_,
		getPhaseFactors().ptr(), stream_);
		out.leadingDimension(), n_bands_, nc_, nw_, phase_factors_ptr,
		stream_);
		}

		template <class RDmn, class KDmn, typename Real>
		@@ -167,18 +170,13 @@ const linalg::Matrix<std::complex<Real>, linalg::GPU>& SpaceTransform2DGpu<RDmn,
		template <class RDmn, class KDmn, typename Real>
		const auto& SpaceTransform2DGpu<RDmn, KDmn, Real>::getPhaseFactors() {
		auto initialize = []() {
		if (!BaseClass::hasPhaseFactors()) {
		return VectorDev();
		}

		const auto& phase_factors = BaseClass::getPhaseFactors();
		linalg::Vector<std::complex<Real>, linalg::CPU> host_vector(phase_factors.size());
		std::copy_n(phase_factors.values(), phase_factors.size(), host_vector.data());
		std::copy_n(phase_factors.values(), phase_factors.size(), host_vector.ptr());
		return VectorDev(host_vector);
		};

		static const VectorDev phase_factors_dev(initialize(), "Phase factors GPU.");
		assert(BaseClass::hasPhaseFactors() \|\| phase_factors_dev.ptr() == nullptr);
		static const VectorDev phase_factors_dev(initialize());

		return phase_factors_dev;
		}