Unverified Commit ec228d9a authored by Peter Doak's avatar Peter Doak Committed by GitHub
Browse files

Merge pull request #168 from gbalduzz/fix_ctaux_race_condition

Fix ctaux race condition
parents 4c6e31e7 397886f6
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -144,11 +144,10 @@ set(DCA_LIBS
  ${DCA_CONCURRENCY_LIB}
  ${DCA_THREADING_LIBS}
  lapack
  cuda_utils
)

if (DCA_HAVE_CUDA)
  list(APPEND DCA_CUDA_LIBS
    cuda_utils)
  list(APPEND DCA_LIBS
    blas_kernels
    dnfft_kernels
+0 −5
Original line number Diff line number Diff line
@@ -12,11 +12,6 @@
#ifndef DCA_LINALG_UTIL_UTIL_CUBLAS_HPP
#define DCA_LINALG_UTIL_UTIL_CUBLAS_HPP

#include <cublas_v2.h>
#include <stdexcept>
#include <string>
#include "dca/linalg/util/error_cuda.hpp"

namespace dca {
namespace linalg {
namespace util {
+1 −0
Original line number Diff line number Diff line
@@ -310,6 +310,7 @@ void Vector<ScalarType, device_name, Allocator>::setAsync(const Container& rhs,
                                                          const cudaStream_t stream) {
  resizeNoCopy(rhs.size());
  util::memoryCopyAsync(data_, rhs.data(), size_, stream);
//  cudaDeviceSynchronize();
}

template <typename ScalarType, DeviceType device_name, class Allocator>
+1 −0
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include "dca/phys/domains/time_and_frequency/time_domain_left_oriented.hpp"

#ifdef DCA_HAVE_CUDA
#include "dca/linalg/util/cuda_event.hpp"
#include "dca/phys/dca_step/cluster_solver/ctaux/walker/tools/g0_interpolation/g0_interpolation_kernels.hpp"
#endif

+8 −1
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications.
//
// Author: Peter Staar (taa@zurich.ibm.com)
//         Giovanni Balduzzi (gbalduzz@itp.phys.eth.ch)
//
// This class organizes the interpolation of \f$G^{0}\f$ towards the \f$G^{0}\f$-matrix.
// Template specialization for GPU.
@@ -80,6 +81,8 @@ private:
  dca::linalg::Vector<double, dca::linalg::GPU> tau_GPU;

  using G0_INTERPOLATION_TEMPLATE<parameters_type>::beta;

  linalg::util::CudaEvent config_copied_;
};

template <typename parameters_type>
@@ -172,6 +175,7 @@ void G0_INTERPOLATION<dca::linalg::GPU, parameters_type>::build_G0_matrix(

  G0_e_spin.resizeNoCopy(configuration_size);

  config_copied_.block();
  b_ind.resize(configuration_size);
  r_ind.resize(configuration_size);
  tau.resize(configuration_size);
@@ -212,6 +216,7 @@ void G0_INTERPOLATION<dca::linalg::GPU, parameters_type>::update_G0_matrix(

  int first_shuffled_index = configuration.get_first_shuffled_spin_index(e_spin);

  config_copied_.block();
  b_ind.resize(configuration_size);
  r_ind.resize(configuration_size);
  tau.resize(configuration_size);
@@ -222,10 +227,12 @@ void G0_INTERPOLATION<dca::linalg::GPU, parameters_type>::update_G0_matrix(
    tau[l] = configuration_e_spin[l].get_tau();
  }

  auto stream = linalg::util::getStream(thread_id, stream_id);
  cudaStream_t stream = linalg::util::getStream(thread_id, stream_id);
  // TODO: create generic container for AoS.
  b_ind_GPU.setAsync(b_ind, stream);
  r_ind_GPU.setAsync(r_ind, stream);
  tau_GPU.setAsync(tau, stream);
  config_copied_.record(stream);

  g0kernels::akima_interpolation_on_GPU(
      Nb, Nr, Nt, beta, first_shuffled_index, configuration_size, b_ind_GPU.ptr(), r_ind_GPU.ptr(),
Loading