Loading include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp +29 −20 Original line number Diff line number Diff line Loading @@ -201,10 +201,8 @@ CtauxClusterSolver<device_t, Parameters, Data, DIST>::CtauxClusterSolver( M_r_w_squared_("M_r_w_squared"), averaged_(false), writer_(writer) { if (concurrency_.id() == concurrency_.first()) std::cout << "\n\n\t CT-AUX Integrator is born \n" << std::endl; } template <dca::linalg::DeviceType device_t, class Parameters, class Data, DistType DIST> Loading Loading @@ -414,7 +412,8 @@ void CtauxClusterSolver<device_t, Parameters, Data, DIST>::computeErrorBars() { accumulator_.finalize(); M_r_w_new = accumulator_.get_sign_times_M_r_w(); M_r_w_new /= static_cast<typename decltype(M_r_w_new)::this_scalar_type>(accumulator_.get_accumulated_phase()); M_r_w_new /= static_cast<typename decltype(M_r_w_new)::this_scalar_type>( accumulator_.get_accumulated_phase()); math::transform::FunctionTransform<RDmn, KDmn>::execute(M_r_w_new, M_k_w_new); Loading @@ -434,8 +433,8 @@ void CtauxClusterSolver<device_t, Parameters, Data, DIST>::computeErrorBars() { std::vector<typename Data::TpGreensFunction> G4 = accumulator_.get_sign_times_G4(); for (std::size_t channel = 0; channel < G4.size(); ++channel) { G4[channel] /= TpComplex{parameters_.get_beta() * parameters_.get_beta()} * TpComplex{accumulator_.get_accumulated_sign().sum()}; G4[channel] /= TpComplex{parameters_.get_beta() * parameters_.get_beta()} * TpComplex{accumulator_.get_accumulated_sign().sum()}; concurrency_.average_and_compute_stddev(G4[channel], data_.get_G4_stdv()[channel]); } } Loading @@ -458,7 +457,8 @@ void CtauxClusterSolver<device_t, Parameters, Data, DIST>::collect_measurements( concurrency_.delayedSum(accumulator_.get_Gflop()); accumulated_sign_ = accumulator_.get_accumulated_phase(); collect_delayed(accumulated_sign_); static_assert(std::is_same_v<decltype(M_r_w_), std::decay_t<decltype(accumulator_.get_sign_times_M_r_w())>>); static_assert( std::is_same_v<decltype(M_r_w_), std::decay_t<decltype(accumulator_.get_sign_times_M_r_w())>>); M_r_w_ = accumulator_.get_sign_times_M_r_w(); collect_delayed(M_r_w_); Loading @@ -481,7 +481,9 @@ void CtauxClusterSolver<device_t, Parameters, Data, DIST>::collect_measurements( for (int channel = 0; channel < data_.get_G4().size(); ++channel) { auto& G4 = data_.get_G4()[channel]; // function operator = will reset this G4 size to other G4 size if they are not equal static_assert(std::is_same_v<std::remove_reference_t<decltype(G4)>,std::decay_t<decltype(accumulator_.get_sign_times_G4()[channel])>>); static_assert( std::is_same_v<std::remove_reference_t<decltype(G4)>, std::decay_t<decltype(accumulator_.get_sign_times_G4()[channel])>>); G4 = accumulator_.get_sign_times_G4()[channel]; if (parameters_.get_g4_distribution() != DistType::NONE) { // do nothing, no accumulation should be performed as G4 size cannot fit into one GPU Loading @@ -504,20 +506,28 @@ void CtauxClusterSolver<device_t, Parameters, Data, DIST>::collect_measurements( } M_r_w_ /= static_cast<typename decltype(M_r_w_)::this_scalar_type>(accumulated_sign_); M_r_w_squared_ /= static_cast<typename decltype(M_r_w_squared_)::this_scalar_type>(accumulated_sign_); M_r_w_squared_ /= static_cast<typename decltype(M_r_w_squared_)::this_scalar_type>(accumulated_sign_); if (accumulator_.perform_tp_accumulation()) { for (auto& G4 : data_.get_G4()) G4 /= static_cast<typename std::remove_reference<decltype(G4)>::type::this_scalar_type>(accumulated_sign_) * static_cast<Real>(parameters_.get_beta() * parameters_.get_beta()); G4 /= static_cast<typename std::remove_reference<decltype(G4)>::type::this_scalar_type>( accumulated_sign_) * static_cast<Real>(parameters_.get_beta() * parameters_.get_beta()); } if (accumulator_.perform_equal_time_accumulation()) { accumulator_.get_G_r_t() /= static_cast<typename std::remove_reference_t<decltype(accumulator_.get_G_r_t())>::this_scalar_type>(accumulated_sign_); static_assert(std::is_same_v<decltype(data_.G_r_t), std::remove_reference_t<decltype(accumulator_.get_G_r_t())>>); accumulator_.get_G_r_t() /= static_cast<typename std::remove_reference_t<decltype(accumulator_.get_G_r_t())>::this_scalar_type>( accumulated_sign_); static_assert(std::is_same_v<decltype(data_.G_r_t), std::remove_reference_t<decltype(accumulator_.get_G_r_t())>>); data_.G_r_t = accumulator_.get_G_r_t(); auto stddev_normalization = accumulated_sign_ * static_cast<typename std::decay_t<decltype(accumulator_.get_G_r_t_stddev())>::this_scalar_type>(std::sqrt(static_cast<Real>(parameters_.get_measurements()[dca_iteration_]))); auto stddev_normalization = accumulated_sign_ * static_cast<typename std::decay_t<decltype(accumulator_.get_G_r_t_stddev())>::this_scalar_type>( std::sqrt(static_cast<Real>(parameters_.get_measurements()[dca_iteration_]))); accumulator_.get_G_r_t_stddev() /= stddev_normalization; accumulator_.get_charge_cluster_moment() /= accumulated_sign_; accumulator_.get_magnetic_cluster_moment() /= accumulated_sign_; accumulator_.get_dwave_pp_correlator() /= accumulated_sign_; Loading Loading @@ -552,9 +562,8 @@ void CtauxClusterSolver<device_t, Parameters, Data, DIST>::symmetrize_measuremen } template <dca::linalg::DeviceType device_t, class Parameters, class Data, DistType DIST> void CtauxClusterSolver<device_t, Parameters, Data, DIST>::computeG_k_w(const SpGreensFunction& G0, const SpGreensFunction& M_k_w, SpGreensFunction& G_k_w) const { void CtauxClusterSolver<device_t, Parameters, Data, DIST>::computeG_k_w( const SpGreensFunction& G0, const SpGreensFunction& M_k_w, SpGreensFunction& G_k_w) const { const int matrix_dim = nu::dmn_size(); dca::linalg::Matrix<std::complex<double>, dca::linalg::CPU> G0_times_M_matrix( "GO_M_matrix", matrix_dim, matrix_dim); Loading include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_walker.hpp +36 −23 Original line number Diff line number Diff line Loading @@ -65,7 +65,10 @@ public: static constexpr bool is_complex = dca::util::IsComplex<Scalar>::value; public: CtauxWalker(const Parameters& parameters_ref, Data& MOMS_ref, Rng& rng_ref, int id); /** constructor * param[in] id thread id? */ CtauxWalker(Parameters& parameters_ref, Data& MOMS_ref, Rng& rng_ref, int id); void initialize(int iteration); Loading Loading @@ -204,12 +207,37 @@ private: void recomputeMCWeight(); protected: // for testing auto& getNUp() { return N_up; } auto& getNDown() { return N_dn; } private: using WalkerBIT<Parameters, Data>::check_G0_matrices; using WalkerBIT<Parameters, Data>::check_N_matrices; using WalkerBIT<Parameters, Data>::check_G_matrices; private: protected: const Parameters& parameters_; Data& data_; const Concurrency& concurrency_; int thread_id; int stream_id; CV<Parameters> CV_obj; CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar> ctaux_tools; Rng& rng; Configuration configuration_; G0Interpolation<device_t, Parameters> G0_tools_obj; N_TOOLS<device_t, Parameters> N_tools_obj; G_TOOLS<device_t, Parameters> G_tools_obj; SHRINK_TOOLS<Profiler, device_t, Scalar> SHRINK_tools_obj; struct delayed_spin_struct { int delayed_spin_index; Loading Loading @@ -242,24 +270,10 @@ private: }; private: const Parameters& parameters_; Data& data_; const Concurrency& concurrency_; int thread_id; int stream_id; CV<Parameters> CV_obj; CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar> ctaux_tools; Rng& rng; Configuration configuration_; G0Interpolation<device_t, Parameters> G0_tools_obj; N_TOOLS<device_t, Parameters> N_tools_obj; G_TOOLS<device_t, Parameters> G_tools_obj; SHRINK_TOOLS<Profiler, device_t, Scalar> SHRINK_tools_obj; using CtauxWalkerData<device_t, Parameters>::N_up; using CtauxWalkerData<device_t, Parameters>::N_dn; Loading Loading @@ -326,20 +340,19 @@ private: std::array<linalg::Vector<Scalar, device_t>, 2> exp_v_minus_one_dev_; std::array<linalg::util::GpuEvent, 2> m_computed_events_; bool config_initialized_; double sweeps_per_measurement_ = 1.; unsigned long n_steps_ = 0; linalg::util::GpuEvent sync_streams_event_; bool config_initialized_; }; template <dca::linalg::DeviceType device_t, class Parameters, class Data> CtauxWalker<device_t, Parameters, Data>::CtauxWalker(const Parameters& parameters_ref, CtauxWalker<device_t, Parameters, Data>::CtauxWalker(Parameters& parameters_ref, Data& MOMS_ref, Rng& rng_ref, int id) : WalkerBIT<Parameters, Data>(parameters_ref, MOMS_ref, id), CtauxWalkerData<device_t, Parameters>(parameters_ref, id), parameters_(parameters_ref), data_(MOMS_ref), concurrency_(parameters_.get_concurrency()), Loading include/dca/phys/dca_step/cluster_solver/ctaux/structs/ct_aux_hs_configuration.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -159,7 +159,7 @@ CT_AUX_HS_configuration<parameters_type>::CT_AUX_HS_configuration(const paramete rng(rng_ref), // Rounding up ensures a value >= 1. max_num_noninteracting_spins_((parameters.get_max_submatrix_size() + 1) / 2), max_num_noninteracting_spins_((parameters_ref.get_max_submatrix_size() + 1) / 2), next_vertex_id_(0) {} Loading include/dca/phys/dca_step/cluster_solver/ctaux/walker/tools/g0_interpolation/g0_interpolation_base.hpp +2 −2 Original line number Diff line number Diff line Loading @@ -34,8 +34,8 @@ namespace ctaux { template <typename Parameters> class G0InterpolationBase { public: using Real = typename dca::config::McOptions::MC_REAL; using Scalar = typename dca::util::ScalarSelect<Real,Parameters::complex_g0>::type; using Real = typename Parameters::Real; using Scalar = typename Parameters::Scalar; using t = func::dmn_0<domains::time_domain>; using b = func::dmn_0<domains::electron_band_domain>; Loading include/dca/phys/dca_step/cluster_solver/ctaux/walker/tools/g0_interpolation/g0_interpolation_gpu.hpp +2 −2 Original line number Diff line number Diff line Loading @@ -30,8 +30,8 @@ namespace ctaux { template <typename Parameters> class G0Interpolation<dca::linalg::GPU, Parameters> : public G0InterpolationBase<Parameters> { public: using Real = typename dca::config::McOptions::MC_REAL; using Scalar = typename dca::util::ScalarSelect<Real,Parameters::complex_g0>::type; using Real = typename Parameters::Real; using Scalar = typename Parameters::Scalar; using vertex_singleton_type = vertex_singleton; using shifted_t = func::dmn_0<domains::time_domain_left_oriented>; using b = func::dmn_0<domains::electron_band_domain>; Loading Loading
include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp +29 −20 Original line number Diff line number Diff line Loading @@ -201,10 +201,8 @@ CtauxClusterSolver<device_t, Parameters, Data, DIST>::CtauxClusterSolver( M_r_w_squared_("M_r_w_squared"), averaged_(false), writer_(writer) { if (concurrency_.id() == concurrency_.first()) std::cout << "\n\n\t CT-AUX Integrator is born \n" << std::endl; } template <dca::linalg::DeviceType device_t, class Parameters, class Data, DistType DIST> Loading Loading @@ -414,7 +412,8 @@ void CtauxClusterSolver<device_t, Parameters, Data, DIST>::computeErrorBars() { accumulator_.finalize(); M_r_w_new = accumulator_.get_sign_times_M_r_w(); M_r_w_new /= static_cast<typename decltype(M_r_w_new)::this_scalar_type>(accumulator_.get_accumulated_phase()); M_r_w_new /= static_cast<typename decltype(M_r_w_new)::this_scalar_type>( accumulator_.get_accumulated_phase()); math::transform::FunctionTransform<RDmn, KDmn>::execute(M_r_w_new, M_k_w_new); Loading @@ -434,8 +433,8 @@ void CtauxClusterSolver<device_t, Parameters, Data, DIST>::computeErrorBars() { std::vector<typename Data::TpGreensFunction> G4 = accumulator_.get_sign_times_G4(); for (std::size_t channel = 0; channel < G4.size(); ++channel) { G4[channel] /= TpComplex{parameters_.get_beta() * parameters_.get_beta()} * TpComplex{accumulator_.get_accumulated_sign().sum()}; G4[channel] /= TpComplex{parameters_.get_beta() * parameters_.get_beta()} * TpComplex{accumulator_.get_accumulated_sign().sum()}; concurrency_.average_and_compute_stddev(G4[channel], data_.get_G4_stdv()[channel]); } } Loading @@ -458,7 +457,8 @@ void CtauxClusterSolver<device_t, Parameters, Data, DIST>::collect_measurements( concurrency_.delayedSum(accumulator_.get_Gflop()); accumulated_sign_ = accumulator_.get_accumulated_phase(); collect_delayed(accumulated_sign_); static_assert(std::is_same_v<decltype(M_r_w_), std::decay_t<decltype(accumulator_.get_sign_times_M_r_w())>>); static_assert( std::is_same_v<decltype(M_r_w_), std::decay_t<decltype(accumulator_.get_sign_times_M_r_w())>>); M_r_w_ = accumulator_.get_sign_times_M_r_w(); collect_delayed(M_r_w_); Loading @@ -481,7 +481,9 @@ void CtauxClusterSolver<device_t, Parameters, Data, DIST>::collect_measurements( for (int channel = 0; channel < data_.get_G4().size(); ++channel) { auto& G4 = data_.get_G4()[channel]; // function operator = will reset this G4 size to other G4 size if they are not equal static_assert(std::is_same_v<std::remove_reference_t<decltype(G4)>,std::decay_t<decltype(accumulator_.get_sign_times_G4()[channel])>>); static_assert( std::is_same_v<std::remove_reference_t<decltype(G4)>, std::decay_t<decltype(accumulator_.get_sign_times_G4()[channel])>>); G4 = accumulator_.get_sign_times_G4()[channel]; if (parameters_.get_g4_distribution() != DistType::NONE) { // do nothing, no accumulation should be performed as G4 size cannot fit into one GPU Loading @@ -504,20 +506,28 @@ void CtauxClusterSolver<device_t, Parameters, Data, DIST>::collect_measurements( } M_r_w_ /= static_cast<typename decltype(M_r_w_)::this_scalar_type>(accumulated_sign_); M_r_w_squared_ /= static_cast<typename decltype(M_r_w_squared_)::this_scalar_type>(accumulated_sign_); M_r_w_squared_ /= static_cast<typename decltype(M_r_w_squared_)::this_scalar_type>(accumulated_sign_); if (accumulator_.perform_tp_accumulation()) { for (auto& G4 : data_.get_G4()) G4 /= static_cast<typename std::remove_reference<decltype(G4)>::type::this_scalar_type>(accumulated_sign_) * static_cast<Real>(parameters_.get_beta() * parameters_.get_beta()); G4 /= static_cast<typename std::remove_reference<decltype(G4)>::type::this_scalar_type>( accumulated_sign_) * static_cast<Real>(parameters_.get_beta() * parameters_.get_beta()); } if (accumulator_.perform_equal_time_accumulation()) { accumulator_.get_G_r_t() /= static_cast<typename std::remove_reference_t<decltype(accumulator_.get_G_r_t())>::this_scalar_type>(accumulated_sign_); static_assert(std::is_same_v<decltype(data_.G_r_t), std::remove_reference_t<decltype(accumulator_.get_G_r_t())>>); accumulator_.get_G_r_t() /= static_cast<typename std::remove_reference_t<decltype(accumulator_.get_G_r_t())>::this_scalar_type>( accumulated_sign_); static_assert(std::is_same_v<decltype(data_.G_r_t), std::remove_reference_t<decltype(accumulator_.get_G_r_t())>>); data_.G_r_t = accumulator_.get_G_r_t(); auto stddev_normalization = accumulated_sign_ * static_cast<typename std::decay_t<decltype(accumulator_.get_G_r_t_stddev())>::this_scalar_type>(std::sqrt(static_cast<Real>(parameters_.get_measurements()[dca_iteration_]))); auto stddev_normalization = accumulated_sign_ * static_cast<typename std::decay_t<decltype(accumulator_.get_G_r_t_stddev())>::this_scalar_type>( std::sqrt(static_cast<Real>(parameters_.get_measurements()[dca_iteration_]))); accumulator_.get_G_r_t_stddev() /= stddev_normalization; accumulator_.get_charge_cluster_moment() /= accumulated_sign_; accumulator_.get_magnetic_cluster_moment() /= accumulated_sign_; accumulator_.get_dwave_pp_correlator() /= accumulated_sign_; Loading Loading @@ -552,9 +562,8 @@ void CtauxClusterSolver<device_t, Parameters, Data, DIST>::symmetrize_measuremen } template <dca::linalg::DeviceType device_t, class Parameters, class Data, DistType DIST> void CtauxClusterSolver<device_t, Parameters, Data, DIST>::computeG_k_w(const SpGreensFunction& G0, const SpGreensFunction& M_k_w, SpGreensFunction& G_k_w) const { void CtauxClusterSolver<device_t, Parameters, Data, DIST>::computeG_k_w( const SpGreensFunction& G0, const SpGreensFunction& M_k_w, SpGreensFunction& G_k_w) const { const int matrix_dim = nu::dmn_size(); dca::linalg::Matrix<std::complex<double>, dca::linalg::CPU> G0_times_M_matrix( "GO_M_matrix", matrix_dim, matrix_dim); Loading
include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_walker.hpp +36 −23 Original line number Diff line number Diff line Loading @@ -65,7 +65,10 @@ public: static constexpr bool is_complex = dca::util::IsComplex<Scalar>::value; public: CtauxWalker(const Parameters& parameters_ref, Data& MOMS_ref, Rng& rng_ref, int id); /** constructor * param[in] id thread id? */ CtauxWalker(Parameters& parameters_ref, Data& MOMS_ref, Rng& rng_ref, int id); void initialize(int iteration); Loading Loading @@ -204,12 +207,37 @@ private: void recomputeMCWeight(); protected: // for testing auto& getNUp() { return N_up; } auto& getNDown() { return N_dn; } private: using WalkerBIT<Parameters, Data>::check_G0_matrices; using WalkerBIT<Parameters, Data>::check_N_matrices; using WalkerBIT<Parameters, Data>::check_G_matrices; private: protected: const Parameters& parameters_; Data& data_; const Concurrency& concurrency_; int thread_id; int stream_id; CV<Parameters> CV_obj; CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar> ctaux_tools; Rng& rng; Configuration configuration_; G0Interpolation<device_t, Parameters> G0_tools_obj; N_TOOLS<device_t, Parameters> N_tools_obj; G_TOOLS<device_t, Parameters> G_tools_obj; SHRINK_TOOLS<Profiler, device_t, Scalar> SHRINK_tools_obj; struct delayed_spin_struct { int delayed_spin_index; Loading Loading @@ -242,24 +270,10 @@ private: }; private: const Parameters& parameters_; Data& data_; const Concurrency& concurrency_; int thread_id; int stream_id; CV<Parameters> CV_obj; CT_AUX_WALKER_TOOLS<dca::linalg::CPU, Scalar> ctaux_tools; Rng& rng; Configuration configuration_; G0Interpolation<device_t, Parameters> G0_tools_obj; N_TOOLS<device_t, Parameters> N_tools_obj; G_TOOLS<device_t, Parameters> G_tools_obj; SHRINK_TOOLS<Profiler, device_t, Scalar> SHRINK_tools_obj; using CtauxWalkerData<device_t, Parameters>::N_up; using CtauxWalkerData<device_t, Parameters>::N_dn; Loading Loading @@ -326,20 +340,19 @@ private: std::array<linalg::Vector<Scalar, device_t>, 2> exp_v_minus_one_dev_; std::array<linalg::util::GpuEvent, 2> m_computed_events_; bool config_initialized_; double sweeps_per_measurement_ = 1.; unsigned long n_steps_ = 0; linalg::util::GpuEvent sync_streams_event_; bool config_initialized_; }; template <dca::linalg::DeviceType device_t, class Parameters, class Data> CtauxWalker<device_t, Parameters, Data>::CtauxWalker(const Parameters& parameters_ref, CtauxWalker<device_t, Parameters, Data>::CtauxWalker(Parameters& parameters_ref, Data& MOMS_ref, Rng& rng_ref, int id) : WalkerBIT<Parameters, Data>(parameters_ref, MOMS_ref, id), CtauxWalkerData<device_t, Parameters>(parameters_ref, id), parameters_(parameters_ref), data_(MOMS_ref), concurrency_(parameters_.get_concurrency()), Loading
include/dca/phys/dca_step/cluster_solver/ctaux/structs/ct_aux_hs_configuration.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -159,7 +159,7 @@ CT_AUX_HS_configuration<parameters_type>::CT_AUX_HS_configuration(const paramete rng(rng_ref), // Rounding up ensures a value >= 1. max_num_noninteracting_spins_((parameters.get_max_submatrix_size() + 1) / 2), max_num_noninteracting_spins_((parameters_ref.get_max_submatrix_size() + 1) / 2), next_vertex_id_(0) {} Loading
include/dca/phys/dca_step/cluster_solver/ctaux/walker/tools/g0_interpolation/g0_interpolation_base.hpp +2 −2 Original line number Diff line number Diff line Loading @@ -34,8 +34,8 @@ namespace ctaux { template <typename Parameters> class G0InterpolationBase { public: using Real = typename dca::config::McOptions::MC_REAL; using Scalar = typename dca::util::ScalarSelect<Real,Parameters::complex_g0>::type; using Real = typename Parameters::Real; using Scalar = typename Parameters::Scalar; using t = func::dmn_0<domains::time_domain>; using b = func::dmn_0<domains::electron_band_domain>; Loading
include/dca/phys/dca_step/cluster_solver/ctaux/walker/tools/g0_interpolation/g0_interpolation_gpu.hpp +2 −2 Original line number Diff line number Diff line Loading @@ -30,8 +30,8 @@ namespace ctaux { template <typename Parameters> class G0Interpolation<dca::linalg::GPU, Parameters> : public G0InterpolationBase<Parameters> { public: using Real = typename dca::config::McOptions::MC_REAL; using Scalar = typename dca::util::ScalarSelect<Real,Parameters::complex_g0>::type; using Real = typename Parameters::Real; using Scalar = typename Parameters::Scalar; using vertex_singleton_type = vertex_singleton; using shifted_t = func::dmn_0<domains::time_domain_left_oriented>; using b = func::dmn_0<domains::electron_band_domain>; Loading