Loading include/dca/phys/dca_step/cluster_solver/ctaux/walker/tools/shrink_tools.hpp +7 −10 Original line number Diff line number Diff line Loading @@ -63,7 +63,7 @@ private: e_spin_states_type e_spin); template <class configuration_type> static void swap_and_remove_vertices(configuration_type& full_configuration, static bool swap_and_remove_vertices(configuration_type& full_configuration, linalg::util::HostVector<int>& source_index, linalg::util::HostVector<int>& target_index, e_spin_states_type e_spin); Loading Loading @@ -195,24 +195,20 @@ void SHRINK_TOOLS<Profiler, device_t, Real>::reorganize_configuration_test( swap_and_remove_vertices(full_configuration, source_index_up_, target_index_up_, e_UP); swap_and_remove_vertices(full_configuration, source_index_dn_, target_index_dn_, e_DN); #ifndef NDEBUG test_swap_vectors(source_index_up_, target_index_up_, N_up.size().first); test_swap_vectors(source_index_dn_, target_index_dn_, N_dn.size().first); #endif // NDEBUG shrink_tools_algorithm_obj_.execute(source_index_up_, target_index_up_, N_up, G0_up, source_index_dn_, target_index_dn_, N_dn, G0_dn); erase_non_creatable_and_non_annihilatable_spins(full_configuration, N_up, N_dn, G0_up, G0_dn); assert(full_configuration.assert_consistency()); } template <class Profiler, dca::linalg::DeviceType device_t, typename Real> template <class configuration_type> void SHRINK_TOOLS<Profiler, device_t, Real>::swap_and_remove_vertices(configuration_type& full_configuration, HostVector& source_index, HostVector& target_index, bool SHRINK_TOOLS<Profiler, device_t, Real>::swap_and_remove_vertices( configuration_type& full_configuration, HostVector& source_index, HostVector& target_index, e_spin_states_type e_spin) { const auto death_condition = [](const vertex_singleton& v) { return v.get_HS_spin() == HS_ZERO; }; Loading @@ -220,7 +216,7 @@ void SHRINK_TOOLS<Profiler, device_t, Real>::swap_and_remove_vertices(configurat int configuration_size = configuration_e_spin.size(); if (configuration_size == 0) { return; return false; } int dead_spin = 0; Loading Loading @@ -249,7 +245,7 @@ void SHRINK_TOOLS<Profiler, device_t, Real>::swap_and_remove_vertices(configurat } if (dead_spin >= living_spin) { break; return false; } assert(configuration_e_spin[dead_spin].get_HS_spin() == HS_ZERO); Loading @@ -270,6 +266,7 @@ void SHRINK_TOOLS<Profiler, device_t, Real>::swap_and_remove_vertices(configurat dead_spin++; living_spin--; } return true; } template <class Profiler, linalg::DeviceType device_t, typename Real> Loading include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp +2 −0 Original line number Diff line number Diff line Loading @@ -655,6 +655,8 @@ void StdThreadQmciClusterSolver<QmciSolver>::startWalkerAndAccumulator(int id, finalizeWalker(walker, id); accum_fingerprints_[id] = accumulator_obj.deviceFingerprint(); Profiler::stop_threading(id); if (current_exception) Loading src/phys/dca_step/cluster_solver/ctaux/walker/ct_aux_walker_tools.cpp +11 −7 Original line number Diff line number Diff line Loading @@ -93,13 +93,11 @@ template <typename Scalar> bool CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar>::test_max_min( int n, dca::linalg::Matrix<Scalar, dca::linalg::CPU>& Gamma_LU, Real max_ref, Real min_ref) { Real Gamma_val = std::abs(Gamma_LU(0, 0)); Real max = Gamma_val; Real min = Gamma_val; for (int i = 1; i < n + 1; i++) { Gamma_val = std::abs(Gamma_LU(i, i)); max = Gamma_val > max ? Gamma_val : max; min = Gamma_val < min ? Gamma_val : min; } Loading @@ -107,18 +105,19 @@ bool CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar>::test_max_min( if (std::abs(max_ref - max) < 1.e-12 and std::fabs(min_ref - min) < 1.e-12) return true; else { std::cout << __FUNCTION__ << '\n'; std::cout << __FUNCTION__ << " for Gamma_LU has failed!\n"; std::cout << "Has failed!\n"; std::cout.precision(16); std::cout << "\n\t n : " << n << "\n"; std::cout << std::scientific; std::cout << "max" << "\t" << "max_ref" << "\t" << "std::fabs(max_ref - max)" << '\n'; std::cout << max << "\t" << max_ref << "\t" << std::fabs(max_ref - max) << '\n'; std::cout << min << "\t" << min_ref << "\t" << std::fabs(min_ref - min) << '\n'; std::cout << std::endl; Gamma_LU.print(); throw std::logic_error(__FUNCTION__); return false; } } Loading Loading @@ -149,8 +148,10 @@ auto CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar>::solve_Gamma( min = Gamma_val; } assert(test_max_min(n, Gamma_LU, max, min)); #ifndef NDEBUG if(!test_max_min(n, Gamma_LU, max, min)) throw std::runtime_error("solve_Gamma_blocked test_max_min on Gamma_LU failed!"); #endif Scalar phani_gamma = exp_delta_V - Real(1.); Scalar determinant_ratio = -phani_gamma * Gamma_LU_n_n; Loading Loading @@ -370,7 +371,10 @@ auto CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar>::solve_Gamma_blocked( } // std::cout << min << ", " << max << ")\t"; assert(test_max_min(n, Gamma_LU, max, min)); #ifndef NDEBUG if(!test_max_min(n, Gamma_LU, max, min)) throw std::runtime_error("solve_Gamma_blocked test_max_min on Gamma_LU failed!"); #endif auto phani_gamma = exp_delta_V - Real(1.); auto determinant_ratio = -phani_gamma * Gamma_LU_n_n; Loading src/phys/dca_step/cluster_solver/ctaux/walker/ct_aux_walker_tools_kernels.cu +5 −21 Original line number Diff line number Diff line Loading @@ -28,9 +28,6 @@ namespace ctaux { namespace walkerkernels { // dca::phys::solver::ctaux::walkerkernels:: template<typename T> using IsCudaComplex_t = dca::util::IsCudaComplex_t<T>; template <class T> __global__ void compute_Gamma_kernel(T* Gamma, int Gamma_n, int Gamma_ld, const T* N, int N_r, int N_c, int N_ld, const T* G, int G_r, int G_c, int G_ld, Loading @@ -43,29 +40,16 @@ __global__ void compute_Gamma_kernel(T* Gamma, int Gamma_n, int Gamma_ld, const const int vertex_index = N_c - G_c; T the_one; auto the_one = dca::util::TheOne<T>::value; T the_zero{}; if constexpr (IsCudaComplex_t<T>::value) the_one = T{1.0, 0.0}; else the_one = 1.0; if (i < Gamma_n and j < Gamma_n) { const int configuration_e_spin_index_i = random_vertex_vector[i]; const int configuration_e_spin_index_j = random_vertex_vector[j]; if (configuration_e_spin_index_j < vertex_index) { T delta; if (configuration_e_spin_index_i == configuration_e_spin_index_j) if constexpr (dca::util::IsCudaComplex_t<T>::value) delta = {1., 0}; else delta = 1; T delta = (configuration_e_spin_index_i == configuration_e_spin_index_j) ? the_one : the_zero; const auto N_ij = N[configuration_e_spin_index_i + configuration_e_spin_index_j * N_ld]; Gamma[i + j * Gamma_ld] = (N_ij * exp_V[j] - delta) / (exp_V[j] - the_one); } else Loading test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/input_222-2_rashba.json +1 −1 Original line number Diff line number Diff line Loading @@ -74,6 +74,6 @@ "CT-AUX" : { "initial-configuration-size" :5, "max-submatrix-size":1 "max-submatrix-size":4 } } Loading
include/dca/phys/dca_step/cluster_solver/ctaux/walker/tools/shrink_tools.hpp +7 −10 Original line number Diff line number Diff line Loading @@ -63,7 +63,7 @@ private: e_spin_states_type e_spin); template <class configuration_type> static void swap_and_remove_vertices(configuration_type& full_configuration, static bool swap_and_remove_vertices(configuration_type& full_configuration, linalg::util::HostVector<int>& source_index, linalg::util::HostVector<int>& target_index, e_spin_states_type e_spin); Loading Loading @@ -195,24 +195,20 @@ void SHRINK_TOOLS<Profiler, device_t, Real>::reorganize_configuration_test( swap_and_remove_vertices(full_configuration, source_index_up_, target_index_up_, e_UP); swap_and_remove_vertices(full_configuration, source_index_dn_, target_index_dn_, e_DN); #ifndef NDEBUG test_swap_vectors(source_index_up_, target_index_up_, N_up.size().first); test_swap_vectors(source_index_dn_, target_index_dn_, N_dn.size().first); #endif // NDEBUG shrink_tools_algorithm_obj_.execute(source_index_up_, target_index_up_, N_up, G0_up, source_index_dn_, target_index_dn_, N_dn, G0_dn); erase_non_creatable_and_non_annihilatable_spins(full_configuration, N_up, N_dn, G0_up, G0_dn); assert(full_configuration.assert_consistency()); } template <class Profiler, dca::linalg::DeviceType device_t, typename Real> template <class configuration_type> void SHRINK_TOOLS<Profiler, device_t, Real>::swap_and_remove_vertices(configuration_type& full_configuration, HostVector& source_index, HostVector& target_index, bool SHRINK_TOOLS<Profiler, device_t, Real>::swap_and_remove_vertices( configuration_type& full_configuration, HostVector& source_index, HostVector& target_index, e_spin_states_type e_spin) { const auto death_condition = [](const vertex_singleton& v) { return v.get_HS_spin() == HS_ZERO; }; Loading @@ -220,7 +216,7 @@ void SHRINK_TOOLS<Profiler, device_t, Real>::swap_and_remove_vertices(configurat int configuration_size = configuration_e_spin.size(); if (configuration_size == 0) { return; return false; } int dead_spin = 0; Loading Loading @@ -249,7 +245,7 @@ void SHRINK_TOOLS<Profiler, device_t, Real>::swap_and_remove_vertices(configurat } if (dead_spin >= living_spin) { break; return false; } assert(configuration_e_spin[dead_spin].get_HS_spin() == HS_ZERO); Loading @@ -270,6 +266,7 @@ void SHRINK_TOOLS<Profiler, device_t, Real>::swap_and_remove_vertices(configurat dead_spin++; living_spin--; } return true; } template <class Profiler, linalg::DeviceType device_t, typename Real> Loading
include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp +2 −0 Original line number Diff line number Diff line Loading @@ -655,6 +655,8 @@ void StdThreadQmciClusterSolver<QmciSolver>::startWalkerAndAccumulator(int id, finalizeWalker(walker, id); accum_fingerprints_[id] = accumulator_obj.deviceFingerprint(); Profiler::stop_threading(id); if (current_exception) Loading
src/phys/dca_step/cluster_solver/ctaux/walker/ct_aux_walker_tools.cpp +11 −7 Original line number Diff line number Diff line Loading @@ -93,13 +93,11 @@ template <typename Scalar> bool CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar>::test_max_min( int n, dca::linalg::Matrix<Scalar, dca::linalg::CPU>& Gamma_LU, Real max_ref, Real min_ref) { Real Gamma_val = std::abs(Gamma_LU(0, 0)); Real max = Gamma_val; Real min = Gamma_val; for (int i = 1; i < n + 1; i++) { Gamma_val = std::abs(Gamma_LU(i, i)); max = Gamma_val > max ? Gamma_val : max; min = Gamma_val < min ? Gamma_val : min; } Loading @@ -107,18 +105,19 @@ bool CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar>::test_max_min( if (std::abs(max_ref - max) < 1.e-12 and std::fabs(min_ref - min) < 1.e-12) return true; else { std::cout << __FUNCTION__ << '\n'; std::cout << __FUNCTION__ << " for Gamma_LU has failed!\n"; std::cout << "Has failed!\n"; std::cout.precision(16); std::cout << "\n\t n : " << n << "\n"; std::cout << std::scientific; std::cout << "max" << "\t" << "max_ref" << "\t" << "std::fabs(max_ref - max)" << '\n'; std::cout << max << "\t" << max_ref << "\t" << std::fabs(max_ref - max) << '\n'; std::cout << min << "\t" << min_ref << "\t" << std::fabs(min_ref - min) << '\n'; std::cout << std::endl; Gamma_LU.print(); throw std::logic_error(__FUNCTION__); return false; } } Loading Loading @@ -149,8 +148,10 @@ auto CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar>::solve_Gamma( min = Gamma_val; } assert(test_max_min(n, Gamma_LU, max, min)); #ifndef NDEBUG if(!test_max_min(n, Gamma_LU, max, min)) throw std::runtime_error("solve_Gamma_blocked test_max_min on Gamma_LU failed!"); #endif Scalar phani_gamma = exp_delta_V - Real(1.); Scalar determinant_ratio = -phani_gamma * Gamma_LU_n_n; Loading Loading @@ -370,7 +371,10 @@ auto CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar>::solve_Gamma_blocked( } // std::cout << min << ", " << max << ")\t"; assert(test_max_min(n, Gamma_LU, max, min)); #ifndef NDEBUG if(!test_max_min(n, Gamma_LU, max, min)) throw std::runtime_error("solve_Gamma_blocked test_max_min on Gamma_LU failed!"); #endif auto phani_gamma = exp_delta_V - Real(1.); auto determinant_ratio = -phani_gamma * Gamma_LU_n_n; Loading
src/phys/dca_step/cluster_solver/ctaux/walker/ct_aux_walker_tools_kernels.cu +5 −21 Original line number Diff line number Diff line Loading @@ -28,9 +28,6 @@ namespace ctaux { namespace walkerkernels { // dca::phys::solver::ctaux::walkerkernels:: template<typename T> using IsCudaComplex_t = dca::util::IsCudaComplex_t<T>; template <class T> __global__ void compute_Gamma_kernel(T* Gamma, int Gamma_n, int Gamma_ld, const T* N, int N_r, int N_c, int N_ld, const T* G, int G_r, int G_c, int G_ld, Loading @@ -43,29 +40,16 @@ __global__ void compute_Gamma_kernel(T* Gamma, int Gamma_n, int Gamma_ld, const const int vertex_index = N_c - G_c; T the_one; auto the_one = dca::util::TheOne<T>::value; T the_zero{}; if constexpr (IsCudaComplex_t<T>::value) the_one = T{1.0, 0.0}; else the_one = 1.0; if (i < Gamma_n and j < Gamma_n) { const int configuration_e_spin_index_i = random_vertex_vector[i]; const int configuration_e_spin_index_j = random_vertex_vector[j]; if (configuration_e_spin_index_j < vertex_index) { T delta; if (configuration_e_spin_index_i == configuration_e_spin_index_j) if constexpr (dca::util::IsCudaComplex_t<T>::value) delta = {1., 0}; else delta = 1; T delta = (configuration_e_spin_index_i == configuration_e_spin_index_j) ? the_one : the_zero; const auto N_ij = N[configuration_e_spin_index_i + configuration_e_spin_index_j * N_ld]; Gamma[i + j * Gamma_ld] = (N_ij * exp_V[j] - delta) / (exp_V[j] - the_one); } else Loading
test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/input_222-2_rashba.json +1 −1 Original line number Diff line number Diff line Loading @@ -74,6 +74,6 @@ "CT-AUX" : { "initial-configuration-size" :5, "max-submatrix-size":1 "max-submatrix-size":4 } }