Commit a3a3042c authored by Doak, Peter W.'s avatar Doak, Peter W.
Browse files

Final push of fixes and cleanup for complex g0 tp accumulation.

parent 95584d2d
Loading
Loading
Loading
Loading
+7 −10
Original line number Diff line number Diff line
@@ -63,7 +63,7 @@ private:
                                  e_spin_states_type e_spin);

  template <class configuration_type>
  static void swap_and_remove_vertices(configuration_type& full_configuration,
  static bool swap_and_remove_vertices(configuration_type& full_configuration,
                                       linalg::util::HostVector<int>& source_index,
                                       linalg::util::HostVector<int>& target_index,
                                       e_spin_states_type e_spin);
@@ -195,24 +195,20 @@ void SHRINK_TOOLS<Profiler, device_t, Real>::reorganize_configuration_test(

  swap_and_remove_vertices(full_configuration, source_index_up_, target_index_up_, e_UP);
  swap_and_remove_vertices(full_configuration, source_index_dn_, target_index_dn_, e_DN);

#ifndef NDEBUG
  test_swap_vectors(source_index_up_, target_index_up_, N_up.size().first);
  test_swap_vectors(source_index_dn_, target_index_dn_, N_dn.size().first);
#endif  // NDEBUG

  shrink_tools_algorithm_obj_.execute(source_index_up_, target_index_up_, N_up, G0_up,
                                      source_index_dn_, target_index_dn_, N_dn, G0_dn);

  erase_non_creatable_and_non_annihilatable_spins(full_configuration, N_up, N_dn, G0_up, G0_dn);
  assert(full_configuration.assert_consistency());
}

template <class Profiler, dca::linalg::DeviceType device_t, typename Real>
template <class configuration_type>
void SHRINK_TOOLS<Profiler, device_t, Real>::swap_and_remove_vertices(configuration_type& full_configuration,
                                                            HostVector& source_index,
                                                            HostVector& target_index,
bool SHRINK_TOOLS<Profiler, device_t, Real>::swap_and_remove_vertices(
    configuration_type& full_configuration, HostVector& source_index, HostVector& target_index,
    e_spin_states_type e_spin) {
  const auto death_condition = [](const vertex_singleton& v) { return v.get_HS_spin() == HS_ZERO; };

@@ -220,7 +216,7 @@ void SHRINK_TOOLS<Profiler, device_t, Real>::swap_and_remove_vertices(configurat
  int configuration_size = configuration_e_spin.size();

  if (configuration_size == 0) {
    return;
    return false;
  }

  int dead_spin = 0;
@@ -249,7 +245,7 @@ void SHRINK_TOOLS<Profiler, device_t, Real>::swap_and_remove_vertices(configurat
    }

    if (dead_spin >= living_spin) {
      break;
      return false;
    }

    assert(configuration_e_spin[dead_spin].get_HS_spin() == HS_ZERO);
@@ -270,6 +266,7 @@ void SHRINK_TOOLS<Profiler, device_t, Real>::swap_and_remove_vertices(configurat
    dead_spin++;
    living_spin--;
  }
  return true;
}

template <class Profiler, linalg::DeviceType device_t, typename Real>
+2 −0
Original line number Diff line number Diff line
@@ -655,6 +655,8 @@ void StdThreadQmciClusterSolver<QmciSolver>::startWalkerAndAccumulator(int id,

  finalizeWalker(walker, id);

  accum_fingerprints_[id] = accumulator_obj.deviceFingerprint();

  Profiler::stop_threading(id);

  if (current_exception)
+11 −7
Original line number Diff line number Diff line
@@ -93,13 +93,11 @@ template <typename Scalar>
bool CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar>::test_max_min(
  int n, dca::linalg::Matrix<Scalar, dca::linalg::CPU>& Gamma_LU, Real max_ref, Real min_ref) {
  Real Gamma_val = std::abs(Gamma_LU(0, 0));

  Real max = Gamma_val;
  Real min = Gamma_val;

  for (int i = 1; i < n + 1; i++) {
    Gamma_val = std::abs(Gamma_LU(i, i));

    max = Gamma_val > max ? Gamma_val : max;
    min = Gamma_val < min ? Gamma_val : min;
  }
@@ -107,18 +105,19 @@ bool CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar>::test_max_min(
  if (std::abs(max_ref - max) < 1.e-12 and std::fabs(min_ref - min) < 1.e-12)
    return true;
  else {
    std::cout << __FUNCTION__ << '\n';
    std::cout << __FUNCTION__ << " for Gamma_LU has failed!\n";
    std::cout << "Has failed!\n";
    std::cout.precision(16);
    std::cout << "\n\t n : " << n << "\n";
    std::cout << std::scientific;
    std::cout << "max" << "\t" << "max_ref" << "\t" << "std::fabs(max_ref - max)" << '\n';
    std::cout << max << "\t" << max_ref << "\t" << std::fabs(max_ref - max) << '\n';
    std::cout << min << "\t" << min_ref << "\t" << std::fabs(min_ref - min) << '\n';
    std::cout << std::endl;

    Gamma_LU.print();

    throw std::logic_error(__FUNCTION__);
    return false;
  }
}

@@ -149,8 +148,10 @@ auto CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar>::solve_Gamma(
    min = Gamma_val;
  }

  assert(test_max_min(n, Gamma_LU, max, min));

#ifndef NDEBUG
  if(!test_max_min(n, Gamma_LU, max, min))
    throw std::runtime_error("solve_Gamma_blocked test_max_min on Gamma_LU failed!");
#endif
  Scalar phani_gamma = exp_delta_V - Real(1.);
  Scalar determinant_ratio = -phani_gamma * Gamma_LU_n_n;

@@ -370,7 +371,10 @@ auto CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar>::solve_Gamma_blocked(
  }

  // std::cout << min << ", " << max << ")\t";
  assert(test_max_min(n, Gamma_LU, max, min));
#ifndef NDEBUG
  if(!test_max_min(n, Gamma_LU, max, min))
    throw std::runtime_error("solve_Gamma_blocked test_max_min on Gamma_LU failed!");
#endif

  auto phani_gamma = exp_delta_V - Real(1.);
  auto determinant_ratio = -phani_gamma * Gamma_LU_n_n;
+5 −21
Original line number Diff line number Diff line
@@ -28,9 +28,6 @@ namespace ctaux {
namespace walkerkernels {
// dca::phys::solver::ctaux::walkerkernels::

  template<typename T>
  using IsCudaComplex_t = dca::util::IsCudaComplex_t<T>;
  
template <class T>
__global__ void compute_Gamma_kernel(T* Gamma, int Gamma_n, int Gamma_ld, const T* N, int N_r,
                                     int N_c, int N_ld, const T* G, int G_r, int G_c, int G_ld,
@@ -43,29 +40,16 @@ __global__ void compute_Gamma_kernel(T* Gamma, int Gamma_n, int Gamma_ld, const

  const int vertex_index = N_c - G_c;

    T the_one;
  auto the_one = dca::util::TheOne<T>::value;
  T the_zero{};
    if constexpr (IsCudaComplex_t<T>::value)
      the_one = T{1.0, 0.0};
    else
      the_one = 1.0;

  if (i < Gamma_n and j < Gamma_n) {
    const int configuration_e_spin_index_i = random_vertex_vector[i];
    const int configuration_e_spin_index_j = random_vertex_vector[j];

    if (configuration_e_spin_index_j < vertex_index) {
      T delta;
      if (configuration_e_spin_index_i == configuration_e_spin_index_j)
        if constexpr (dca::util::IsCudaComplex_t<T>::value)
          delta = {1., 0};
        else
          delta = 1;

      T delta = (configuration_e_spin_index_i == configuration_e_spin_index_j) ? the_one : the_zero;
      const auto N_ij = N[configuration_e_spin_index_i + configuration_e_spin_index_j * N_ld];

      
      
      Gamma[i + j * Gamma_ld] = (N_ij * exp_V[j] - delta) / (exp_V[j] - the_one);
    }
    else
+1 −1
Original line number Diff line number Diff line
@@ -74,6 +74,6 @@
  "CT-AUX" :
  {
    "initial-configuration-size" :5,
    "max-submatrix-size":1
    "max-submatrix-size":4
  }
}