Unverified Commit 2662f629 authored by Peter Doak's avatar Peter Doak Committed by GitHub
Browse files

Merge branch 'master' into olcf_buildinfo

parents 4e7b5f74 a22a2736
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -378,6 +378,14 @@ if(DCA_SYMMETRIZE)
  add_compile_definitions(DCA_WITH_SYMMETRIZATION)
endif()

################################################################################
# Workarounds
option(DCA_FIX_BROKEN_MPICH "Re-define MPI_CXX_* datatypes as the corresponding MPI_C_* datatypes when mpich is the mpi provider."
       OFF)
if(DCA_FIX_BROKEN_MPICH)
  add_compile_definitions(DCA_FIX_BROKEN_MPICH)
endif()

################################################################################
# Generate applications' config files.
configure_file("${PROJECT_SOURCE_DIR}/include/dca/config/analysis.hpp.in"
+11 −12
Original line number Diff line number Diff line
@@ -313,14 +313,13 @@ void Matrix<ScalarType, device_name>::resize(std::pair<int, int> new_size) {
  assert(new_size.first >= 0 && new_size.second >= 0);
  if (new_size.first > capacity_.first || new_size.second > capacity_.second) {
    std::pair<int, int> new_capacity = capacityMultipleOfBlockSize(new_size);

	ValueType* new_data = nullptr;
	new_data = Allocator::allocate(nrElements(new_capacity));
	// hip memorycpy2D routines don't tolerate leadingDimension = 0
	const std::pair<int, int> copy_size(std::min(new_size.first, size_.first),
					    std::min(new_size.second, size_.second));
	util::memoryCopy(new_data, new_capacity.first, data_, leadingDimension(), copy_size);
	Allocator::deallocate(data_);

	data_ = new_data;
	capacity_ = new_capacity;
	size_ = new_size;
+0 −1
Original line number Diff line number Diff line
@@ -348,7 +348,6 @@ void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync(

template <typename ScalarType, DeviceType device_name, class Allocator>
std::size_t ReshapableMatrix<ScalarType, device_name, Allocator>::nextCapacity(const std::size_t size) {
  assert(size >= 0);
  constexpr std::size_t block_size = 512;

  auto next_power_of_two = [](std::size_t x) {
+8 −1
Original line number Diff line number Diff line
@@ -40,7 +40,6 @@ void memoryCopyCpu(ScalarType* dest, int ld_dest, const ScalarType* src, int ld_
  assert(size.first <= ld_src);
  assert(size.first >= 0);
  assert(size.second >= 0);

  size_t ncols = size.second;
  for (size_t i = 0; i < ncols; ++i) {
    memoryCopyCpu(dest + i * ld_dest, src + i * ld_src, size.first);
@@ -53,6 +52,8 @@ void memoryCopyCpu(ScalarType* dest, int ld_dest, const ScalarType* src, int ld_
// The host continues the execution of the program when the copy is terminated.
template <typename ScalarType>
void memoryCopy(ScalarType* dest, const ScalarType* src, size_t size) {
  if (size == 0)
    return;
  cudaError_t ret = cudaMemcpy(dest, src, size * sizeof(ScalarType), cudaMemcpyDefault);
  checkRC(ret);
}
@@ -64,6 +65,8 @@ void memoryCopy(ScalarType* dest, const ScalarType* src, size_t size) {
template <typename ScalarType>
void memoryCopy(ScalarType* dest, int ld_dest, const ScalarType* src, int ld_src,
                std::pair<int, int> size) {
  if (ld_dest == 0 || ld_src == 0 || (size.first == 0 && size.second == 0))
    return;
  cudaError_t ret = cudaMemcpy2D(dest, ld_dest * sizeof(ScalarType), src, ld_src * sizeof(ScalarType),
                                 size.first * sizeof(ScalarType), size.second, cudaMemcpyDefault);
  try {
@@ -78,6 +81,8 @@ void memoryCopy(ScalarType* dest, int ld_dest, const ScalarType* src, int ld_src
// Asynchronous 1D memory copy.
template <typename ScalarType>
void memoryCopyAsync(ScalarType* dest, const ScalarType* src, size_t size, const cudaStream_t stream) {
  if (size == 0)
    return;
  cudaError_t ret = cudaMemcpyAsync(dest, src, size * sizeof(ScalarType), cudaMemcpyDefault, stream);
  try {
    checkRC(ret);
@@ -102,6 +107,8 @@ void memoryCopyAsync(ScalarType* dest, const ScalarType* src, size_t size, int t
template <typename ScalarType>
void memoryCopyAsync(ScalarType* dest, int ld_dest, const ScalarType* src, int ld_src,
                     std::pair<int, int> size, const cudaStream_t stream) {
  if (ld_dest == 0 || ld_src == 0 || (size.first == 0 && size.second == 0))
    return;
  cudaError_t ret =
      cudaMemcpy2DAsync(dest, ld_dest * sizeof(ScalarType), src, ld_src * sizeof(ScalarType),
                        size.first * sizeof(ScalarType), size.second, cudaMemcpyDefault, stream);
+19 −0
Original line number Diff line number Diff line
#include <mpi.h>

#ifdef MPICH_NUMVERSION
#ifdef DCA_FIX_BROKEN_MPICH
/* Fix broken MPI-3 C++ types due to bad compiles of mpich */
#undef MPI_CXX_BOOL
#define MPI_CXX_BOOL                MPI_C_BOOL

#undef MPI_CXX_FLOAT_COMPLEX
#define MPI_CXX_FLOAT_COMPLEX       MPI_C_FLOAT_COMPLEX

#undef MPI_CXX_DOUBLE_COMPLEX
#define MPI_CXX_DOUBLE_COMPLEX      MPI_C_DOUBLE_COMPLEX

#undef MPI_CXX_LONG_DOUBLE_COMPLEX
#define MPI_CXX_LONG_DOUBLE_COMPLEX MPI_C_LONG_DOUBLE_COMPLEX

#endif // DCA_FIX_BROKEN_MPICH
#endif // MPICH_NUMVERSION
Loading