Merge branch 'master' into olcf_buildinfo (2662f629) · Commits · NDIP / Tool Sources / Direct-Geometry Spectroscopy / DCA / DCA Main

cmake/dca_config.cmake

+8 −0

Original line number	Diff line number	Diff line
		@@ -378,6 +378,14 @@ if(DCA_SYMMETRIZE)
		add_compile_definitions(DCA_WITH_SYMMETRIZATION)
		endif()

		################################################################################
		# Workarounds
		option(DCA_FIX_BROKEN_MPICH "Re-define MPI_CXX_* datatypes as the corresponding MPI_C_* datatypes when mpich is the mpi provider."
		OFF)
		if(DCA_FIX_BROKEN_MPICH)
		add_compile_definitions(DCA_FIX_BROKEN_MPICH)
		endif()

		################################################################################
		# Generate applications' config files.
		configure_file("${PROJECT_SOURCE_DIR}/include/dca/config/analysis.hpp.in"

include/dca/linalg/matrix.hpp

+11 −12

Original line number	Diff line number	Diff line
		@@ -313,14 +313,13 @@ void Matrix<ScalarType, device_name>::resize(std::pair<int, int> new_size) {
		assert(new_size.first >= 0 && new_size.second >= 0);
		if (new_size.first > capacity_.first \|\| new_size.second > capacity_.second) {
		std::pair<int, int> new_capacity = capacityMultipleOfBlockSize(new_size);

		ValueType* new_data = nullptr;
		new_data = Allocator::allocate(nrElements(new_capacity));
		// hip memorycpy2D routines don't tolerate leadingDimension = 0
		const std::pair<int, int> copy_size(std::min(new_size.first, size_.first),
		std::min(new_size.second, size_.second));
		util::memoryCopy(new_data, new_capacity.first, data_, leadingDimension(), copy_size);
		Allocator::deallocate(data_);

		data_ = new_data;
		capacity_ = new_capacity;
		size_ = new_size;

include/dca/linalg/reshapable_matrix.hpp

+0 −1

Original line number	Diff line number	Diff line
		@@ -348,7 +348,6 @@ void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync(

		template <typename ScalarType, DeviceType device_name, class Allocator>
		std::size_t ReshapableMatrix<ScalarType, device_name, Allocator>::nextCapacity(const std::size_t size) {
		assert(size >= 0);
		constexpr std::size_t block_size = 512;

		auto next_power_of_two = [](std::size_t x) {

include/dca/linalg/util/copy.hpp

+8 −1

Original line number	Diff line number	Diff line
		@@ -40,7 +40,6 @@ void memoryCopyCpu(ScalarType* dest, int ld_dest, const ScalarType* src, int ld_
		assert(size.first <= ld_src);
		assert(size.first >= 0);
		assert(size.second >= 0);

		size_t ncols = size.second;
		for (size_t i = 0; i < ncols; ++i) {
		memoryCopyCpu(dest + i * ld_dest, src + i * ld_src, size.first);
		@@ -53,6 +52,8 @@ void memoryCopyCpu(ScalarType* dest, int ld_dest, const ScalarType* src, int ld_
		// The host continues the execution of the program when the copy is terminated.
		template <typename ScalarType>
		void memoryCopy(ScalarType* dest, const ScalarType* src, size_t size) {
		if (size == 0)
		return;
		cudaError_t ret = cudaMemcpy(dest, src, size * sizeof(ScalarType), cudaMemcpyDefault);
		checkRC(ret);
		}
		@@ -64,6 +65,8 @@ void memoryCopy(ScalarType* dest, const ScalarType* src, size_t size) {
		template <typename ScalarType>
		void memoryCopy(ScalarType* dest, int ld_dest, const ScalarType* src, int ld_src,
		std::pair<int, int> size) {
		if (ld_dest == 0 \|\| ld_src == 0 \|\| (size.first == 0 && size.second == 0))
		return;
		cudaError_t ret = cudaMemcpy2D(dest, ld_dest * sizeof(ScalarType), src, ld_src * sizeof(ScalarType),
		size.first * sizeof(ScalarType), size.second, cudaMemcpyDefault);
		try {
		@@ -78,6 +81,8 @@ void memoryCopy(ScalarType* dest, int ld_dest, const ScalarType* src, int ld_src
		// Asynchronous 1D memory copy.
		template <typename ScalarType>
		void memoryCopyAsync(ScalarType* dest, const ScalarType* src, size_t size, const cudaStream_t stream) {
		if (size == 0)
		return;
		cudaError_t ret = cudaMemcpyAsync(dest, src, size * sizeof(ScalarType), cudaMemcpyDefault, stream);
		try {
		checkRC(ret);
		@@ -102,6 +107,8 @@ void memoryCopyAsync(ScalarType* dest, const ScalarType* src, size_t size, int t
		template <typename ScalarType>
		void memoryCopyAsync(ScalarType* dest, int ld_dest, const ScalarType* src, int ld_src,
		std::pair<int, int> size, const cudaStream_t stream) {
		if (ld_dest == 0 \|\| ld_src == 0 \|\| (size.first == 0 && size.second == 0))
		return;
		cudaError_t ret =
		cudaMemcpy2DAsync(dest, ld_dest * sizeof(ScalarType), src, ld_src * sizeof(ScalarType),
		size.first * sizeof(ScalarType), size.second, cudaMemcpyDefault, stream);

include/dca/parallel/mpi_concurrency/dca_mpi.h

0 → 100644

+19 −0

Original line number	Diff line number	Diff line
		#include <mpi.h>

		#ifdef MPICH_NUMVERSION
		#ifdef DCA_FIX_BROKEN_MPICH
		/* Fix broken MPI-3 C++ types due to bad compiles of mpich */
		#undef MPI_CXX_BOOL
		#define MPI_CXX_BOOL MPI_C_BOOL

		#undef MPI_CXX_FLOAT_COMPLEX
		#define MPI_CXX_FLOAT_COMPLEX MPI_C_FLOAT_COMPLEX

		#undef MPI_CXX_DOUBLE_COMPLEX
		#define MPI_CXX_DOUBLE_COMPLEX MPI_C_DOUBLE_COMPLEX

		#undef MPI_CXX_LONG_DOUBLE_COMPLEX
		#define MPI_CXX_LONG_DOUBLE_COMPLEX MPI_C_LONG_DOUBLE_COMPLEX

		#endif // DCA_FIX_BROKEN_MPICH
		#endif // MPICH_NUMVERSION