Unverified Commit 9b1ddbd3 authored by gbalduzz's avatar gbalduzz Committed by GitHub
Browse files

Merge branch 'master' into write_checkpoints

parents 991c26df 7e0b7747
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -104,6 +104,9 @@ function(dca_add_gtest name)
      target_include_directories(${name} PRIVATE ${MAGMA_INCLUDE_DIR})
      target_compile_definitions(${name} PRIVATE DCA_HAVE_MAGMA)
    endif()
    if(DCA_WITH_CUDA_AWARE_MPI)
      target_compile_definitions(${name} PRIVATE DCA_HAVE_CUDA_AWARE_MPI)
    endif()
    cuda_add_cublas_to_target(${name})
  endif()

@@ -118,7 +121,7 @@ function(dca_add_gtest name)

    add_test(NAME ${name}
             COMMAND ${TEST_RUNNER} ${MPIEXEC_NUMPROC_FLAG} ${DCA_ADD_GTEST_MPI_NUMPROC}
                     ${MPIEXEC_PREFLAGS}  ${CVD_LAUNCHER} "$<TARGET_FILE:${name}>")
                     ${MPIEXEC_PREFLAGS} ${SMPIARGS_FLAG_MPI} ${CVD_LAUNCHER} "$<TARGET_FILE:${name}>")
                 target_link_libraries(${name} ${MPI_C_LIBRARIES})
  else()
    if (TEST_RUNNER)
+6 −6
Original line number Diff line number Diff line
@@ -123,10 +123,10 @@ private:
  std::array<MPI_Request, 2> recv_requests_{MPI_REQUEST_NULL, MPI_REQUEST_NULL};
  std::array<MPI_Request, 2> send_requests_{MPI_REQUEST_NULL, MPI_REQUEST_NULL};

#ifndef DCA_WITH_CUDA_AWARE_MPI
#ifndef DCA_HAVE_CUDA_AWARE_MPI
  std::array<std::vector<Complex>, 2> sendbuffer_;
  std::array<std::vector<Complex>, 2> recvbuffer_;
#endif  // DCA_WITH_CUDA_AWARE_MPI
#endif  // DCA_HAVE_CUDA_AWARE_MPI
};

template <class Parameters>
@@ -366,7 +366,7 @@ void TpAccumulator<Parameters, linalg::GPU, DistType::MPI>::send(const std::arra
  using dca::parallel::MPITypeMap;
  const auto g_size = data[0].size().first * data[0].size().second;

#ifdef DCA_WITH_CUDA_AWARE_MPI
#ifdef DCA_HAVE_CUDA_AWARE_MPI
  for (int s = 0; s < 2; ++s) {
    MPI_Isend(data[s].ptr(), g_size, MPITypeMap<Complex>::value(), target, thread_id_ + 1,
              MPI_COMM_WORLD, &request[s]);
@@ -381,7 +381,7 @@ void TpAccumulator<Parameters, linalg::GPU, DistType::MPI>::send(const std::arra
    MPI_Isend(sendbuffer_[s].data(), g_size, MPITypeMap<Complex>::value(), target, thread_id_ + 1,
              MPI_COMM_WORLD, &request[s]);
  }
#endif  // DCA_WITH_CUDA_AWARE_MPI
#endif  // DCA_HAVE_CUDA_AWARE_MPI
}

template <class Parameters>
@@ -390,7 +390,7 @@ void TpAccumulator<Parameters, linalg::GPU, DistType::MPI>::receive(
  using dca::parallel::MPITypeMap;
  const auto g_size = data[0].size().first * data[0].size().second;

#ifdef DCA_WITH_CUDA_AWARE_MPI
#ifdef DCA_HAVE_CUDA_AWARE_MPI
  for (int s = 0; s < 2; ++s) {
    MPI_Irecv(data[s].ptr(), g_size, MPITypeMap<Complex>::value(), source, thread_id_ + 1,
              MPI_COMM_WORLD, &request[s]);
@@ -408,7 +408,7 @@ void TpAccumulator<Parameters, linalg::GPU, DistType::MPI>::receive(
    cudaMemcpy(data[s].ptr(), recvbuffer_[s].data(), g_size * sizeof(Complex),
               cudaMemcpyHostToDevice);
  }
#endif  // DCA_WITH_CUDA_AWARE_MPI
#endif  // DCA_HAVE_CUDA_AWARE_MPI
}

}  // namespace accumulator
+0 −130
Original line number Diff line number Diff line
// Copyright (C) 2020 ETH Zurich
// Copyright (C) 2020 UT-Battelle, LLC
// All rights reserved.
//
// See LICENSE.txt for terms of usage.
// See CITATION.txt for citation guidelines if you use this code for scientific publications.
//
// Author: Peter Doak (doakpw@ornl.gov)
// Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch)
//
// This file implements a no-change test for the two particles accumulation with the distributed G4 for MPI/GPU.



#include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp"

#include <array>
#include <functional>
#include <string>
#include "gtest/gtest.h"

#include "dca/function/util/difference.hpp"
#include "dca/math/random/std_random_wrapper.hpp"
#include "dca/phys/four_point_type.hpp"
#include "test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/accumulation_test.hpp"
#include "test/unit/phys/dca_step/cluster_solver/test_setup.hpp"

constexpr bool update_baseline = false;

#define INPUT_DIR \
  DCA_SOURCE_DIR "/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/"

constexpr char input_file[] = INPUT_DIR "input_4x4_multitransfer.json";

using ConfigGenerator = dca::testing::AccumulationTest<double>;
using Configuration = ConfigGenerator::Configuration;
using Sample = ConfigGenerator::Sample;

using TpAccumulatorGpuTest =
    dca::testing::G0Setup<dca::testing::LatticeBilayer, dca::phys::solver::CT_AUX, input_file>;

uint loop_counter = 0;



TEST_F(TpAccumulatorGpuTest, Accumulate) {
  dca::linalg::util::initializeMagma();

  const std::array<int, 2> n{27, 24};
  Sample M;
  Configuration config;
  ConfigGenerator::prepareConfiguration(config, M, TpAccumulatorGpuTest::BDmn::dmn_size(),
                                        TpAccumulatorGpuTest::RDmn::dmn_size(),
                                        parameters_.get_beta(), n);

  using namespace dca::phys;
  parameters_.set_four_point_channels(
      std::vector<FourPointType>{PARTICLE_HOLE_TRANSVERSE, PARTICLE_HOLE_MAGNETIC,
                                 PARTICLE_HOLE_CHARGE, PARTICLE_HOLE_LONGITUDINAL_UP_UP,
                                 PARTICLE_HOLE_LONGITUDINAL_UP_DOWN, PARTICLE_PARTICLE_UP_DOWN});

  dca::phys::solver::accumulator::TpAccumulator<Parameters, dca::linalg::CPU> accumulatorHost(
      data_->G0_k_w_cluster_excluded, parameters_);
  dca::phys::solver::accumulator::TpAccumulator<Parameters, dca::linalg::GPU, dca::DistType::MPI> accumulatorDevice(
      data_->G0_k_w_cluster_excluded, parameters_);
  const int sign = 1;

  accumulatorDevice.resetAccumulation(loop_counter);
  accumulatorDevice.accumulate(M, config, sign);
  accumulatorDevice.finalize();

  accumulatorHost.resetAccumulation(loop_counter);
  accumulatorHost.accumulate(M, config, sign);
  accumulatorHost.finalize();

  ++loop_counter;

  for (std::size_t channel = 0; channel < accumulatorHost.get_sign_times_G4().size(); ++channel) {
    const auto diff = dca::func::util::difference(accumulatorHost.get_sign_times_G4()[channel],
                                                  accumulatorDevice.get_sign_times_G4()[channel]);
    EXPECT_GT(5e-7, diff.l_inf);
  }
}

TEST_F(TpAccumulatorGpuTest, SumToAndFinalize) {
  dca::linalg::util::initializeMagma();

  parameters_.set_four_point_channel(dca::phys::PARTICLE_HOLE_TRANSVERSE);

  using Accumulator =
      dca::phys::solver::accumulator::TpAccumulator<G0Setup::Parameters, dca::linalg::GPU>;
  Accumulator accumulator_sum(data_->G0_k_w_cluster_excluded, parameters_, 0);
  Accumulator accumulator1(data_->G0_k_w_cluster_excluded, parameters_, 1);
  Accumulator accumulator2(data_->G0_k_w_cluster_excluded, parameters_, 2);
  Accumulator accumulator3(data_->G0_k_w_cluster_excluded, parameters_, 3);

  auto prepare_configuration = [&](auto& M, auto& configuration, const auto& n) {
    ConfigGenerator::prepareConfiguration(M, configuration, TpAccumulatorGpuTest::BDmn::dmn_size(),
                                          TpAccumulatorGpuTest::RDmn::dmn_size(),
                                          parameters_.get_beta(), n);
  };

  const std::array<int, 2> n{3, 4};
  const int sign = -1;
  Sample M1, M2;
  Configuration config1, config2;
  prepare_configuration(config1, M1, n);
  prepare_configuration(config2, M2, n);

  const int loop_id = loop_counter++;
  accumulator1.resetAccumulation(loop_id);
  accumulator2.resetAccumulation(loop_id);
  accumulator_sum.resetAccumulation(loop_id);

  accumulator1.accumulate(M1, config1, sign);
  accumulator2.accumulate(M2, config2, sign);
  accumulator1.sumTo(accumulator_sum);
  accumulator2.sumTo(accumulator_sum);
  accumulator_sum.finalize();

  // Reset the G4 on the GPU to zero.
  accumulator3.resetAccumulation(loop_counter++);
  accumulator3.accumulate(M1, config1, sign);
  accumulator3.accumulate(M2, config2, sign);
  accumulator3.finalize();

  const auto diff = dca::func::util::difference(accumulator3.get_sign_times_G4()[0],
                                                accumulator_sum.get_sign_times_G4()[0]);
  EXPECT_GT(5e-7, diff.l_inf);
}