Loading include/dca/phys/dca_data/dca_data.hpp +19 −0 Original line number Diff line number Diff line Loading @@ -288,6 +288,11 @@ DcaData<Parameters>::DcaData(Parameters& parameters_ref) orbital_occupancy("orbital_occupancy") { H_symmetry = -1; // Reserve storage in advance such that we don't have to copy elements when we fill the vector. // We want to avoid copies because function's copy ctor does not copy the name (and because copies // are expensive). G4_.reserve(parameters_.numG4Channels()); // Allocate memory for G4. // Ensure backward compatibility. if (parameters_.get_four_point_type() != NONE) Loading @@ -304,12 +309,20 @@ DcaData<Parameters>::DcaData(Parameters& parameters_ref) if (parameters_.accumulateG4ParticleHoleCharge()) G4_.emplace_back("G4_" + toString(PARTICLE_HOLE_CHARGE)); if (parameters_.accumulateG4ParticleHoleLongitudinalUpUp()) G4_.emplace_back("G4_" + toString(PARTICLE_HOLE_LONGITUDINAL_UP_UP)); if (parameters_.accumulateG4ParticleHoleLongitudinalUpDown()) G4_.emplace_back("G4_" + toString(PARTICLE_HOLE_LONGITUDINAL_UP_DOWN)); if (parameters_.accumulateG4ParticleParticleUpDown()) G4_.emplace_back("G4_" + toString(PARTICLE_PARTICLE_UP_DOWN)); } // Allocate memory for error on G4. if (parameters_.get_error_computation_type() != ErrorComputationType::NONE) { G4_err_.reserve(parameters_.numG4Channels()); if (parameters_.get_four_point_type() != NONE) G4_err_.emplace_back("G4-error"); Loading @@ -323,6 +336,12 @@ DcaData<Parameters>::DcaData(Parameters& parameters_ref) if (parameters_.accumulateG4ParticleHoleCharge()) G4_err_.emplace_back("G4_" + toString(PARTICLE_HOLE_CHARGE) + "_err"); if (parameters_.accumulateG4ParticleHoleLongitudinalUpUp()) G4_err_.emplace_back("G4_" + toString(PARTICLE_HOLE_LONGITUDINAL_UP_UP) + "_err"); if (parameters_.accumulateG4ParticleHoleLongitudinalUpDown()) G4_err_.emplace_back("G4_" + toString(PARTICLE_HOLE_LONGITUDINAL_UP_DOWN) + "_err"); if (parameters_.accumulateG4ParticleParticleUpDown()) G4_err_.emplace_back("G4_" + toString(PARTICLE_PARTICLE_UP_DOWN) + "_err"); } Loading include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator.hpp +104 −35 Original line number Diff line number Diff line Loading @@ -47,7 +47,6 @@ class TpAccumulator<Parameters, linalg::CPU> { public: using Real = typename Parameters::MC_measurement_scalar_type; protected: using RDmn = typename Parameters::RClusterDmn; using KDmn = typename Parameters::KClusterDmn; using KExchangeDmn = func::dmn_0<domains::MomentumExchangeDomain>; Loading @@ -56,9 +55,29 @@ protected: using NuDmn = func::dmn_variadic<BDmn, SDmn>; using WDmn = func::dmn_0<domains::frequency_domain>; public: using WTpDmn = func::dmn_0<domains::vertex_frequency_domain<domains::COMPACT>>; using WTpPosDmn = func::dmn_0<domains::vertex_frequency_domain<domains::COMPACT_POSITIVE>>; using WTpExtDmn = func::dmn_0<domains::vertex_frequency_domain<domains::EXTENDED>>; using WTpExtPosDmn = func::dmn_0<domains::vertex_frequency_domain<domains::EXTENDED_POSITIVE>>; using WExchangeDmn = func::dmn_0<domains::FrequencyExchangeDomain>; using this_type = TpAccumulator<Parameters>; protected: using Profiler = typename Parameters::profiler_type; using Complex = std::complex<Real>; using Matrix = linalg::Matrix<Complex, linalg::CPU>; using SpGreenFunction = func::function<Complex, func::dmn_variadic<BDmn, BDmn, SDmn, KDmn, KDmn, WTpExtPosDmn, WTpExtDmn>>; using TpDomain = func::dmn_variadic<BDmn, BDmn, BDmn, BDmn, KDmn, KDmn, KExchangeDmn, WTpDmn, WTpDmn, WExchangeDmn>; public: using TpGreensFunction = func::function<Complex, TpDomain>; // Constructor: // In: G0: non interacting greens function. // In: pars: parameters object. Loading Loading @@ -100,23 +119,6 @@ public: } protected: using Profiler = typename Parameters::profiler_type; using WTpDmn = func::dmn_0<domains::vertex_frequency_domain<domains::COMPACT>>; using WTpPosDmn = func::dmn_0<domains::vertex_frequency_domain<domains::COMPACT_POSITIVE>>; using WTpExtDmn = func::dmn_0<domains::vertex_frequency_domain<domains::EXTENDED>>; using WTpExtPosDmn = func::dmn_0<domains::vertex_frequency_domain<domains::EXTENDED_POSITIVE>>; using WExchangeDmn = func::dmn_0<domains::FrequencyExchangeDomain>; using Complex = std::complex<Real>; using SpGreenFunction = func::function<Complex, func::dmn_variadic<BDmn, BDmn, SDmn, KDmn, KDmn, WTpExtPosDmn, WTpExtDmn>>; using TpDomain = func::dmn_variadic<BDmn, BDmn, BDmn, BDmn, KDmn, KDmn, KExchangeDmn, WTpDmn, WTpDmn, WExchangeDmn>; using TpGreensFunction = func::function<Complex, TpDomain>; using Matrix = linalg::Matrix<Complex, linalg::CPU>; void initializeG0(); double computeG(); Loading Loading @@ -188,6 +190,11 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( throw(std::logic_error("The number of single particle frequencies is too small.")); initializeG0(); // Reserve storage in advance such that we don't have to copy elements when we fill the vector. // We want to avoid copies because function's copy ctor does not copy the name (and because copies // are expensive). G4_.reserve(pars.numG4Channels()); // Ensure backward compatibility. if (pars.get_four_point_type() != NONE) { G4_.emplace_back("G4_" + toString(pars.get_four_point_type())); Loading @@ -204,6 +211,12 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( if (pars.accumulateG4ParticleHoleCharge()) G4_.emplace_back("G4_" + toString(PARTICLE_HOLE_CHARGE)); if (pars.accumulateG4ParticleHoleLongitudinalUpUp()) G4_.emplace_back("G4_" + toString(PARTICLE_HOLE_LONGITUDINAL_UP_UP)); if (pars.accumulateG4ParticleHoleLongitudinalUpDown()) G4_.emplace_back("G4_" + toString(PARTICLE_HOLE_LONGITUDINAL_UP_DOWN)); if (pars.accumulateG4ParticleParticleUpDown()) G4_.emplace_back("G4_" + toString(PARTICLE_PARTICLE_UP_DOWN)); } Loading Loading @@ -419,12 +432,11 @@ double TpAccumulator<Parameters, linalg::CPU>::updateG4(TpGreensFunction& G4) { switch (channel) { case PARTICLE_HOLE_MAGNETIC: // Note: sums over spin indices are implied. // // G4(k1, k2, k_ex) = 1/2 (s1 * s2) <c^+(k1 + k_ex, s1) c(k1, s1) // c^+(k2, s2) c(k2 + k_ex, s2)> // = 1/2 (s1 * s2) <G(k1, k1 + k_ex, s1) G(k2 + k_ex, k2, s2) - // (s1 == s2) G(k2 + k_ex, k1 + k_ex, s1) G(k1, k2, s1)>. // G4(k1, k2, k_ex) = 1/2 sum_{s1, s2} (s1 * s2) // <c^+(k1+k_ex, s1) c(k1, s1) c^+(k2, s2) c(k2+k_ex, s2)> // = 1/2 sum_{s1, s2} (s1 * s2) // [G(k1, k1+k_ex, s1) G(k2+k_ex, k2, s2) // - (s1 == s2) G(k2+k_ex, k1+k_ex, s1) G(k1, k2, s1)] for (int w_ex_idx = 0; w_ex_idx < exchange_frq.size(); ++w_ex_idx) { const int w_ex = exchange_frq[w_ex_idx]; for (int w2 = 0; w2 < WTpDmn::dmn_size(); ++w2) Loading @@ -448,9 +460,11 @@ double TpAccumulator<Parameters, linalg::CPU>::updateG4(TpGreensFunction& G4) { break; case PARTICLE_HOLE_CHARGE: // G4(k1, k2, k_ex) += 1/2 <c^+(k1 + k_ex, s1) c(k1, s1) c^+(k2, s2) c(k2 + k_ex, s2)> = // = 1/2 <G(k1, k1 + k_ex, s1) G(k2 + k_ex, k2, s2) - // (s1 == s2) G(k2 + k_ex, k1 + k_ex, s1) G(k1, k2, s1)>. // G4(k1, k2, k_ex) = 1/2 sum_{s1, s2} // <c^+(k1+k_ex, s1) c(k1, s1) c^+(k2, s2) c(k2+k_ex, s2)> // = 1/2 sum_{s1, s2} // [G(k1, k1+k_ex, s1) G(k2+k_ex, k2, s2) // - (s1 == s2) G(k2+k_ex, k1+k_ex, s1) G(k1, k2, s1)] for (int w_ex_idx = 0; w_ex_idx < exchange_frq.size(); ++w_ex_idx) { const int w_ex = exchange_frq[w_ex_idx]; for (int w2 = 0; w2 < WTpDmn::dmn_size(); ++w2) Loading @@ -474,9 +488,62 @@ double TpAccumulator<Parameters, linalg::CPU>::updateG4(TpGreensFunction& G4) { flops += n_loops * (flops_update_spin_diff + 2 * flops_update_atomic); break; case PARTICLE_HOLE_LONGITUDINAL_UP_UP: // G4(k1, k2, k_ex) = 1/2 sum_s <c^+(k1+k_ex, s) c(k1, s) c^+(k2, s) c(k2+k_ex, s)> // = 1/2 sum_s [G(k1, k1+k_ex, s) G(k2+k_ex, k2, s) // - G(k2+k_ex, k1+k_ex, s) G(k1, k2, s)] for (int w_ex_idx = 0; w_ex_idx < exchange_frq.size(); ++w_ex_idx) { const int w_ex = exchange_frq[w_ex_idx]; for (int w2 = 0; w2 < WTpDmn::dmn_size(); ++w2) for (int w1 = 0; w1 < WTpDmn::dmn_size(); ++w1) for (int k_ex_idx = 0; k_ex_idx < exchange_mom.size(); ++k_ex_idx) { const int k_ex = exchange_mom[k_ex_idx]; for (int k2 = 0; k2 < KDmn::dmn_size(); ++k2) for (int k1 = 0; k1 < KDmn::dmn_size(); ++k1) { Complex* const G4_ptr = &G4(0, 0, 0, 0, k1, k2, k_ex_idx, w1, w2, w_ex_idx); for (int s = 0; s < 2; ++s) updateG4Atomic(G4_ptr, s, k1, momentum_sum(k1, k_ex), w1, w_plus_w_ex(w1, w_ex), s, momentum_sum(k2, k_ex), k2, w_plus_w_ex(w2, w_ex), w2, sign_over_2, false); for (int s = 0; s < 2; ++s) updateG4Atomic(G4_ptr, s, k1, k2, w1, w2, s, momentum_sum(k2, k_ex), momentum_sum(k1, k_ex), w_plus_w_ex(w2, w_ex), w_plus_w_ex(w1, w_ex), -sign_over_2, true); } } } flops += n_loops * 4 * flops_update_atomic; break; case PARTICLE_HOLE_LONGITUDINAL_UP_DOWN: // G4(k1, k2, k_ex) = 1/2 sum_s <c^+(k1+k_ex, s) c(k1, s) c^+(k2, -s) c(k2+k_ex, -s)> // = 1/2 sum_s G(k1, k1+k_ex, s) G(k2+k_ex, k2, -s) for (int w_ex_idx = 0; w_ex_idx < exchange_frq.size(); ++w_ex_idx) { const int w_ex = exchange_frq[w_ex_idx]; for (int w2 = 0; w2 < WTpDmn::dmn_size(); ++w2) for (int w1 = 0; w1 < WTpDmn::dmn_size(); ++w1) for (int k_ex_idx = 0; k_ex_idx < exchange_mom.size(); ++k_ex_idx) { const int k_ex = exchange_mom[k_ex_idx]; for (int k2 = 0; k2 < KDmn::dmn_size(); ++k2) for (int k1 = 0; k1 < KDmn::dmn_size(); ++k1) { Complex* const G4_ptr = &G4(0, 0, 0, 0, k1, k2, k_ex_idx, w1, w2, w_ex_idx); for (int s = 0; s < 2; ++s) updateG4Atomic(G4_ptr, s, k1, momentum_sum(k1, k_ex), w1, w_plus_w_ex(w1, w_ex), !s, momentum_sum(k2, k_ex), k2, w_plus_w_ex(w2, w_ex), w2, sign_over_2, false); } } } flops += n_loops * 4 * flops_update_atomic; break; case PARTICLE_HOLE_TRANSVERSE: // G4 = 1/2 <c^+(k1 + k_ex, s) c(k1, -s) c^+(k2, -s) c(k2 + k_ex, s)> // = -1/2 G(k2 + k_ex, k1 + k_ex, s) G(k1, k2, -s). // G4(k1, k2, k_ex) = 1/2 sum_s <c^+(k1+k_ex, s) c(k1, -s) c^+(k2, -s) c(k2+k_ex, s)> // = -1/2 sum_s G(k2+k_ex, k1+k_ex, s) G(k1, k2, -s) for (int w_ex_idx = 0; w_ex_idx < exchange_frq.size(); ++w_ex_idx) { const int w_ex = exchange_frq[w_ex_idx]; for (int w2 = 0; w2 < WTpDmn::dmn_size(); ++w2) Loading @@ -498,8 +565,8 @@ double TpAccumulator<Parameters, linalg::CPU>::updateG4(TpGreensFunction& G4) { break; case PARTICLE_PARTICLE_UP_DOWN: // G4 = 1/2 <c^+(k_ex-k1, s) c^+(k1, -s) c(k2, -s) c(k_ex-k2, s)> // = 1/2 G(k_ex-k2, k_ex-k1, s) G(k2, k1, -s). // G4(k1, k2, k_ex) = 1/2 sum_s <c^+(k_ex-k1, s) c^+(k1, -s) c(k2, -s) c(k_ex-k2, s)> // = 1/2 sum_s G(k_ex-k2, k_ex-k1, s) G(k2, k1, -s) for (int w_ex_idx = 0; w_ex_idx < exchange_frq.size(); ++w_ex_idx) { const int w_ex = exchange_frq[w_ex_idx]; for (int w2 = 0; w2 < WTpDmn::dmn_size(); ++w2) Loading Loading @@ -535,6 +602,7 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4Atomic( // This function performs the following update for each band: // // G4(k1, k2, w1, w2) += alpha * G(s_a, k1_a, k2_a, w1_a, w2_a) * G(s_b, k1_b, k2_b, w1_b, w2_b) // // INTERNAL: would use __restrict__ pointer make sense? if (n_bands_ == 1) { *G4_ptr += alpha * getGSingleband(s_a, k1_a, k2_a, w1_a, w2_a) * Loading Loading @@ -570,9 +638,10 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4SpinDifference( const bool cross_legs) { // This function performs the following update for each band: // // G4(k1, k2, w1, w2) += alpha * (G(up, k1_a, k2_a, w1_a, w2_a) // + sign * G(down,k1_a, k2_a, w1_a, w2_a)) * // (G(up,k1_b,k2_b,w1_b,w2_b) + sign * G(down,k1_b,k2_b,w1_b,w2_b)) // G4(k1, k2, w1, w2) += alpha * [G(up, k1_a, k2_a, w1_a, w2_a) // + sign * G(down,k1_a, k2_a, w1_a, w2_a)] // * [G(up, k1_b, k2_b, w1_b, w2_b) // + sign * G(down, k1_b, k2_b, w1_b, w2_b)] if (n_bands_ == 1) { *G4_ptr += alpha * (getGSingleband(0, k1_a, k2_a, w1_a, w2_a) + Loading include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp +12 −0 Original line number Diff line number Diff line Loading @@ -402,6 +402,18 @@ void TpAccumulator<Parameters, linalg::GPU>::updateG4(const std::size_t channel_ G_[1].leadingDimension(), n_bands_, KDmn::dmn_size(), WTpPosDmn::dmn_size(), nw_exchange, nk_exchange, sign_, streams_[0]); break; case PARTICLE_HOLE_LONGITUDINAL_UP_UP: details::updateG4<Real, PARTICLE_HOLE_LONGITUDINAL_UP_UP>( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), G_[1].leadingDimension(), n_bands_, KDmn::dmn_size(), WTpPosDmn::dmn_size(), nw_exchange, nk_exchange, sign_, streams_[0]); break; case PARTICLE_HOLE_LONGITUDINAL_UP_DOWN: details::updateG4<Real, PARTICLE_HOLE_LONGITUDINAL_UP_DOWN>( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), G_[1].leadingDimension(), n_bands_, KDmn::dmn_size(), WTpPosDmn::dmn_size(), nw_exchange, nk_exchange, sign_, streams_[0]); break; case PARTICLE_HOLE_TRANSVERSE: details::updateG4<Real, PARTICLE_HOLE_TRANSVERSE>( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), Loading include/dca/phys/four_point_type.hpp +2 −0 Original line number Diff line number Diff line Loading @@ -23,6 +23,8 @@ enum FourPointType : int { PARTICLE_HOLE_TRANSVERSE, PARTICLE_HOLE_MAGNETIC, PARTICLE_HOLE_CHARGE, PARTICLE_HOLE_LONGITUDINAL_UP_UP, PARTICLE_HOLE_LONGITUDINAL_UP_DOWN, PARTICLE_PARTICLE_UP_DOWN }; Loading include/dca/phys/parameters/four_point_parameters.hpp +45 −1 Original line number Diff line number Diff line Loading @@ -41,6 +41,8 @@ public: ph_transverse_(false), ph_magnetic_(false), ph_charge_(false), ph_longitudinal_up_up_(false), ph_longitudinal_up_down_(false), pp_up_down_(false), four_point_momentum_transfer_input_(lattice_dimension, 0.), four_point_frequency_transfer_(0), Loading @@ -58,7 +60,17 @@ public: // Returns true, if any G4 channel should be accumulated. Otherwise returns false. bool accumulateG4() const { return four_point_type_ != NONE || ph_transverse_ || ph_magnetic_ || ph_charge_ || pp_up_down_; return four_point_type_ != NONE || ph_transverse_ || ph_magnetic_ || ph_charge_ || ph_longitudinal_up_up_ || ph_longitudinal_up_down_ || pp_up_down_; } // Returns the number of channels of G4 to accumulate. std::size_t numG4Channels() const { if (four_point_type_ != NONE) return 1; else return ph_transverse_ + ph_magnetic_ + ph_charge_ + ph_longitudinal_up_up_ + ph_longitudinal_up_down_ + pp_up_down_; } FourPointType get_four_point_type() const { Loading Loading @@ -89,6 +101,20 @@ public: ph_charge_ = accumulate_channel; } bool accumulateG4ParticleHoleLongitudinalUpUp() const { return ph_longitudinal_up_up_; } void accumulateG4ParticleHoleLongitudinalUpUp(const bool accumulate_channel) { ph_longitudinal_up_up_ = accumulate_channel; } bool accumulateG4ParticleHoleLongitudinalUpDown() const { return ph_longitudinal_up_down_; } void accumulateG4ParticleHoleLongitudinalUpDown(const bool accumulate_channel) { ph_longitudinal_up_down_ = accumulate_channel; } bool accumulateG4ParticleParticleUpDown() const { return pp_up_down_; } Loading Loading @@ -140,6 +166,8 @@ private: bool ph_transverse_; bool ph_magnetic_; bool ph_charge_; bool ph_longitudinal_up_up_; bool ph_longitudinal_up_down_; bool pp_up_down_; std::vector<double> four_point_momentum_transfer_input_; Loading @@ -156,6 +184,8 @@ int FourPointParameters<lattice_dimension>::getBufferSize(const Concurrency& con buffer_size += concurrency.get_buffer_size(ph_transverse_); buffer_size += concurrency.get_buffer_size(ph_magnetic_); buffer_size += concurrency.get_buffer_size(ph_charge_); buffer_size += concurrency.get_buffer_size(ph_longitudinal_up_up_); buffer_size += concurrency.get_buffer_size(ph_longitudinal_up_down_); buffer_size += concurrency.get_buffer_size(pp_up_down_); buffer_size += concurrency.get_buffer_size(four_point_momentum_transfer_input_); buffer_size += concurrency.get_buffer_size(four_point_frequency_transfer_); Loading @@ -172,6 +202,8 @@ void FourPointParameters<lattice_dimension>::pack(const Concurrency& concurrency concurrency.pack(buffer, buffer_size, position, ph_transverse_); concurrency.pack(buffer, buffer_size, position, ph_magnetic_); concurrency.pack(buffer, buffer_size, position, ph_charge_); concurrency.pack(buffer, buffer_size, position, ph_longitudinal_up_up_); concurrency.pack(buffer, buffer_size, position, ph_longitudinal_up_down_); concurrency.pack(buffer, buffer_size, position, pp_up_down_); concurrency.pack(buffer, buffer_size, position, four_point_momentum_transfer_input_); concurrency.pack(buffer, buffer_size, position, four_point_frequency_transfer_); Loading @@ -186,6 +218,8 @@ void FourPointParameters<lattice_dimension>::unpack(const Concurrency& concurren concurrency.unpack(buffer, buffer_size, position, ph_transverse_); concurrency.unpack(buffer, buffer_size, position, ph_magnetic_); concurrency.unpack(buffer, buffer_size, position, ph_charge_); concurrency.unpack(buffer, buffer_size, position, ph_longitudinal_up_up_); concurrency.unpack(buffer, buffer_size, position, ph_longitudinal_up_down_); concurrency.unpack(buffer, buffer_size, position, pp_up_down_); concurrency.unpack(buffer, buffer_size, position, four_point_momentum_transfer_input_); concurrency.unpack(buffer, buffer_size, position, four_point_frequency_transfer_); Loading Loading @@ -220,6 +254,16 @@ void FourPointParameters<lattice_dimension>::readWrite(ReaderOrWriter& reader_or } catch (const std::exception& r_e) { } try { reader_or_writer.execute("particle-hole-longitudinal-up-up", ph_longitudinal_up_up_); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("particle-hole-longitudinal-up-down", ph_longitudinal_up_down_); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("particle-particle-up-down", pp_up_down_); } Loading Loading
include/dca/phys/dca_data/dca_data.hpp +19 −0 Original line number Diff line number Diff line Loading @@ -288,6 +288,11 @@ DcaData<Parameters>::DcaData(Parameters& parameters_ref) orbital_occupancy("orbital_occupancy") { H_symmetry = -1; // Reserve storage in advance such that we don't have to copy elements when we fill the vector. // We want to avoid copies because function's copy ctor does not copy the name (and because copies // are expensive). G4_.reserve(parameters_.numG4Channels()); // Allocate memory for G4. // Ensure backward compatibility. if (parameters_.get_four_point_type() != NONE) Loading @@ -304,12 +309,20 @@ DcaData<Parameters>::DcaData(Parameters& parameters_ref) if (parameters_.accumulateG4ParticleHoleCharge()) G4_.emplace_back("G4_" + toString(PARTICLE_HOLE_CHARGE)); if (parameters_.accumulateG4ParticleHoleLongitudinalUpUp()) G4_.emplace_back("G4_" + toString(PARTICLE_HOLE_LONGITUDINAL_UP_UP)); if (parameters_.accumulateG4ParticleHoleLongitudinalUpDown()) G4_.emplace_back("G4_" + toString(PARTICLE_HOLE_LONGITUDINAL_UP_DOWN)); if (parameters_.accumulateG4ParticleParticleUpDown()) G4_.emplace_back("G4_" + toString(PARTICLE_PARTICLE_UP_DOWN)); } // Allocate memory for error on G4. if (parameters_.get_error_computation_type() != ErrorComputationType::NONE) { G4_err_.reserve(parameters_.numG4Channels()); if (parameters_.get_four_point_type() != NONE) G4_err_.emplace_back("G4-error"); Loading @@ -323,6 +336,12 @@ DcaData<Parameters>::DcaData(Parameters& parameters_ref) if (parameters_.accumulateG4ParticleHoleCharge()) G4_err_.emplace_back("G4_" + toString(PARTICLE_HOLE_CHARGE) + "_err"); if (parameters_.accumulateG4ParticleHoleLongitudinalUpUp()) G4_err_.emplace_back("G4_" + toString(PARTICLE_HOLE_LONGITUDINAL_UP_UP) + "_err"); if (parameters_.accumulateG4ParticleHoleLongitudinalUpDown()) G4_err_.emplace_back("G4_" + toString(PARTICLE_HOLE_LONGITUDINAL_UP_DOWN) + "_err"); if (parameters_.accumulateG4ParticleParticleUpDown()) G4_err_.emplace_back("G4_" + toString(PARTICLE_PARTICLE_UP_DOWN) + "_err"); } Loading
include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator.hpp +104 −35 Original line number Diff line number Diff line Loading @@ -47,7 +47,6 @@ class TpAccumulator<Parameters, linalg::CPU> { public: using Real = typename Parameters::MC_measurement_scalar_type; protected: using RDmn = typename Parameters::RClusterDmn; using KDmn = typename Parameters::KClusterDmn; using KExchangeDmn = func::dmn_0<domains::MomentumExchangeDomain>; Loading @@ -56,9 +55,29 @@ protected: using NuDmn = func::dmn_variadic<BDmn, SDmn>; using WDmn = func::dmn_0<domains::frequency_domain>; public: using WTpDmn = func::dmn_0<domains::vertex_frequency_domain<domains::COMPACT>>; using WTpPosDmn = func::dmn_0<domains::vertex_frequency_domain<domains::COMPACT_POSITIVE>>; using WTpExtDmn = func::dmn_0<domains::vertex_frequency_domain<domains::EXTENDED>>; using WTpExtPosDmn = func::dmn_0<domains::vertex_frequency_domain<domains::EXTENDED_POSITIVE>>; using WExchangeDmn = func::dmn_0<domains::FrequencyExchangeDomain>; using this_type = TpAccumulator<Parameters>; protected: using Profiler = typename Parameters::profiler_type; using Complex = std::complex<Real>; using Matrix = linalg::Matrix<Complex, linalg::CPU>; using SpGreenFunction = func::function<Complex, func::dmn_variadic<BDmn, BDmn, SDmn, KDmn, KDmn, WTpExtPosDmn, WTpExtDmn>>; using TpDomain = func::dmn_variadic<BDmn, BDmn, BDmn, BDmn, KDmn, KDmn, KExchangeDmn, WTpDmn, WTpDmn, WExchangeDmn>; public: using TpGreensFunction = func::function<Complex, TpDomain>; // Constructor: // In: G0: non interacting greens function. // In: pars: parameters object. Loading Loading @@ -100,23 +119,6 @@ public: } protected: using Profiler = typename Parameters::profiler_type; using WTpDmn = func::dmn_0<domains::vertex_frequency_domain<domains::COMPACT>>; using WTpPosDmn = func::dmn_0<domains::vertex_frequency_domain<domains::COMPACT_POSITIVE>>; using WTpExtDmn = func::dmn_0<domains::vertex_frequency_domain<domains::EXTENDED>>; using WTpExtPosDmn = func::dmn_0<domains::vertex_frequency_domain<domains::EXTENDED_POSITIVE>>; using WExchangeDmn = func::dmn_0<domains::FrequencyExchangeDomain>; using Complex = std::complex<Real>; using SpGreenFunction = func::function<Complex, func::dmn_variadic<BDmn, BDmn, SDmn, KDmn, KDmn, WTpExtPosDmn, WTpExtDmn>>; using TpDomain = func::dmn_variadic<BDmn, BDmn, BDmn, BDmn, KDmn, KDmn, KExchangeDmn, WTpDmn, WTpDmn, WExchangeDmn>; using TpGreensFunction = func::function<Complex, TpDomain>; using Matrix = linalg::Matrix<Complex, linalg::CPU>; void initializeG0(); double computeG(); Loading Loading @@ -188,6 +190,11 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( throw(std::logic_error("The number of single particle frequencies is too small.")); initializeG0(); // Reserve storage in advance such that we don't have to copy elements when we fill the vector. // We want to avoid copies because function's copy ctor does not copy the name (and because copies // are expensive). G4_.reserve(pars.numG4Channels()); // Ensure backward compatibility. if (pars.get_four_point_type() != NONE) { G4_.emplace_back("G4_" + toString(pars.get_four_point_type())); Loading @@ -204,6 +211,12 @@ TpAccumulator<Parameters, linalg::CPU>::TpAccumulator( if (pars.accumulateG4ParticleHoleCharge()) G4_.emplace_back("G4_" + toString(PARTICLE_HOLE_CHARGE)); if (pars.accumulateG4ParticleHoleLongitudinalUpUp()) G4_.emplace_back("G4_" + toString(PARTICLE_HOLE_LONGITUDINAL_UP_UP)); if (pars.accumulateG4ParticleHoleLongitudinalUpDown()) G4_.emplace_back("G4_" + toString(PARTICLE_HOLE_LONGITUDINAL_UP_DOWN)); if (pars.accumulateG4ParticleParticleUpDown()) G4_.emplace_back("G4_" + toString(PARTICLE_PARTICLE_UP_DOWN)); } Loading Loading @@ -419,12 +432,11 @@ double TpAccumulator<Parameters, linalg::CPU>::updateG4(TpGreensFunction& G4) { switch (channel) { case PARTICLE_HOLE_MAGNETIC: // Note: sums over spin indices are implied. // // G4(k1, k2, k_ex) = 1/2 (s1 * s2) <c^+(k1 + k_ex, s1) c(k1, s1) // c^+(k2, s2) c(k2 + k_ex, s2)> // = 1/2 (s1 * s2) <G(k1, k1 + k_ex, s1) G(k2 + k_ex, k2, s2) - // (s1 == s2) G(k2 + k_ex, k1 + k_ex, s1) G(k1, k2, s1)>. // G4(k1, k2, k_ex) = 1/2 sum_{s1, s2} (s1 * s2) // <c^+(k1+k_ex, s1) c(k1, s1) c^+(k2, s2) c(k2+k_ex, s2)> // = 1/2 sum_{s1, s2} (s1 * s2) // [G(k1, k1+k_ex, s1) G(k2+k_ex, k2, s2) // - (s1 == s2) G(k2+k_ex, k1+k_ex, s1) G(k1, k2, s1)] for (int w_ex_idx = 0; w_ex_idx < exchange_frq.size(); ++w_ex_idx) { const int w_ex = exchange_frq[w_ex_idx]; for (int w2 = 0; w2 < WTpDmn::dmn_size(); ++w2) Loading @@ -448,9 +460,11 @@ double TpAccumulator<Parameters, linalg::CPU>::updateG4(TpGreensFunction& G4) { break; case PARTICLE_HOLE_CHARGE: // G4(k1, k2, k_ex) += 1/2 <c^+(k1 + k_ex, s1) c(k1, s1) c^+(k2, s2) c(k2 + k_ex, s2)> = // = 1/2 <G(k1, k1 + k_ex, s1) G(k2 + k_ex, k2, s2) - // (s1 == s2) G(k2 + k_ex, k1 + k_ex, s1) G(k1, k2, s1)>. // G4(k1, k2, k_ex) = 1/2 sum_{s1, s2} // <c^+(k1+k_ex, s1) c(k1, s1) c^+(k2, s2) c(k2+k_ex, s2)> // = 1/2 sum_{s1, s2} // [G(k1, k1+k_ex, s1) G(k2+k_ex, k2, s2) // - (s1 == s2) G(k2+k_ex, k1+k_ex, s1) G(k1, k2, s1)] for (int w_ex_idx = 0; w_ex_idx < exchange_frq.size(); ++w_ex_idx) { const int w_ex = exchange_frq[w_ex_idx]; for (int w2 = 0; w2 < WTpDmn::dmn_size(); ++w2) Loading @@ -474,9 +488,62 @@ double TpAccumulator<Parameters, linalg::CPU>::updateG4(TpGreensFunction& G4) { flops += n_loops * (flops_update_spin_diff + 2 * flops_update_atomic); break; case PARTICLE_HOLE_LONGITUDINAL_UP_UP: // G4(k1, k2, k_ex) = 1/2 sum_s <c^+(k1+k_ex, s) c(k1, s) c^+(k2, s) c(k2+k_ex, s)> // = 1/2 sum_s [G(k1, k1+k_ex, s) G(k2+k_ex, k2, s) // - G(k2+k_ex, k1+k_ex, s) G(k1, k2, s)] for (int w_ex_idx = 0; w_ex_idx < exchange_frq.size(); ++w_ex_idx) { const int w_ex = exchange_frq[w_ex_idx]; for (int w2 = 0; w2 < WTpDmn::dmn_size(); ++w2) for (int w1 = 0; w1 < WTpDmn::dmn_size(); ++w1) for (int k_ex_idx = 0; k_ex_idx < exchange_mom.size(); ++k_ex_idx) { const int k_ex = exchange_mom[k_ex_idx]; for (int k2 = 0; k2 < KDmn::dmn_size(); ++k2) for (int k1 = 0; k1 < KDmn::dmn_size(); ++k1) { Complex* const G4_ptr = &G4(0, 0, 0, 0, k1, k2, k_ex_idx, w1, w2, w_ex_idx); for (int s = 0; s < 2; ++s) updateG4Atomic(G4_ptr, s, k1, momentum_sum(k1, k_ex), w1, w_plus_w_ex(w1, w_ex), s, momentum_sum(k2, k_ex), k2, w_plus_w_ex(w2, w_ex), w2, sign_over_2, false); for (int s = 0; s < 2; ++s) updateG4Atomic(G4_ptr, s, k1, k2, w1, w2, s, momentum_sum(k2, k_ex), momentum_sum(k1, k_ex), w_plus_w_ex(w2, w_ex), w_plus_w_ex(w1, w_ex), -sign_over_2, true); } } } flops += n_loops * 4 * flops_update_atomic; break; case PARTICLE_HOLE_LONGITUDINAL_UP_DOWN: // G4(k1, k2, k_ex) = 1/2 sum_s <c^+(k1+k_ex, s) c(k1, s) c^+(k2, -s) c(k2+k_ex, -s)> // = 1/2 sum_s G(k1, k1+k_ex, s) G(k2+k_ex, k2, -s) for (int w_ex_idx = 0; w_ex_idx < exchange_frq.size(); ++w_ex_idx) { const int w_ex = exchange_frq[w_ex_idx]; for (int w2 = 0; w2 < WTpDmn::dmn_size(); ++w2) for (int w1 = 0; w1 < WTpDmn::dmn_size(); ++w1) for (int k_ex_idx = 0; k_ex_idx < exchange_mom.size(); ++k_ex_idx) { const int k_ex = exchange_mom[k_ex_idx]; for (int k2 = 0; k2 < KDmn::dmn_size(); ++k2) for (int k1 = 0; k1 < KDmn::dmn_size(); ++k1) { Complex* const G4_ptr = &G4(0, 0, 0, 0, k1, k2, k_ex_idx, w1, w2, w_ex_idx); for (int s = 0; s < 2; ++s) updateG4Atomic(G4_ptr, s, k1, momentum_sum(k1, k_ex), w1, w_plus_w_ex(w1, w_ex), !s, momentum_sum(k2, k_ex), k2, w_plus_w_ex(w2, w_ex), w2, sign_over_2, false); } } } flops += n_loops * 4 * flops_update_atomic; break; case PARTICLE_HOLE_TRANSVERSE: // G4 = 1/2 <c^+(k1 + k_ex, s) c(k1, -s) c^+(k2, -s) c(k2 + k_ex, s)> // = -1/2 G(k2 + k_ex, k1 + k_ex, s) G(k1, k2, -s). // G4(k1, k2, k_ex) = 1/2 sum_s <c^+(k1+k_ex, s) c(k1, -s) c^+(k2, -s) c(k2+k_ex, s)> // = -1/2 sum_s G(k2+k_ex, k1+k_ex, s) G(k1, k2, -s) for (int w_ex_idx = 0; w_ex_idx < exchange_frq.size(); ++w_ex_idx) { const int w_ex = exchange_frq[w_ex_idx]; for (int w2 = 0; w2 < WTpDmn::dmn_size(); ++w2) Loading @@ -498,8 +565,8 @@ double TpAccumulator<Parameters, linalg::CPU>::updateG4(TpGreensFunction& G4) { break; case PARTICLE_PARTICLE_UP_DOWN: // G4 = 1/2 <c^+(k_ex-k1, s) c^+(k1, -s) c(k2, -s) c(k_ex-k2, s)> // = 1/2 G(k_ex-k2, k_ex-k1, s) G(k2, k1, -s). // G4(k1, k2, k_ex) = 1/2 sum_s <c^+(k_ex-k1, s) c^+(k1, -s) c(k2, -s) c(k_ex-k2, s)> // = 1/2 sum_s G(k_ex-k2, k_ex-k1, s) G(k2, k1, -s) for (int w_ex_idx = 0; w_ex_idx < exchange_frq.size(); ++w_ex_idx) { const int w_ex = exchange_frq[w_ex_idx]; for (int w2 = 0; w2 < WTpDmn::dmn_size(); ++w2) Loading Loading @@ -535,6 +602,7 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4Atomic( // This function performs the following update for each band: // // G4(k1, k2, w1, w2) += alpha * G(s_a, k1_a, k2_a, w1_a, w2_a) * G(s_b, k1_b, k2_b, w1_b, w2_b) // // INTERNAL: would use __restrict__ pointer make sense? if (n_bands_ == 1) { *G4_ptr += alpha * getGSingleband(s_a, k1_a, k2_a, w1_a, w2_a) * Loading Loading @@ -570,9 +638,10 @@ void TpAccumulator<Parameters, linalg::CPU>::updateG4SpinDifference( const bool cross_legs) { // This function performs the following update for each band: // // G4(k1, k2, w1, w2) += alpha * (G(up, k1_a, k2_a, w1_a, w2_a) // + sign * G(down,k1_a, k2_a, w1_a, w2_a)) * // (G(up,k1_b,k2_b,w1_b,w2_b) + sign * G(down,k1_b,k2_b,w1_b,w2_b)) // G4(k1, k2, w1, w2) += alpha * [G(up, k1_a, k2_a, w1_a, w2_a) // + sign * G(down,k1_a, k2_a, w1_a, w2_a)] // * [G(up, k1_b, k2_b, w1_b, w2_b) // + sign * G(down, k1_b, k2_b, w1_b, w2_b)] if (n_bands_ == 1) { *G4_ptr += alpha * (getGSingleband(0, k1_a, k2_a, w1_a, w2_a) + Loading
include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp +12 −0 Original line number Diff line number Diff line Loading @@ -402,6 +402,18 @@ void TpAccumulator<Parameters, linalg::GPU>::updateG4(const std::size_t channel_ G_[1].leadingDimension(), n_bands_, KDmn::dmn_size(), WTpPosDmn::dmn_size(), nw_exchange, nk_exchange, sign_, streams_[0]); break; case PARTICLE_HOLE_LONGITUDINAL_UP_UP: details::updateG4<Real, PARTICLE_HOLE_LONGITUDINAL_UP_UP>( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), G_[1].leadingDimension(), n_bands_, KDmn::dmn_size(), WTpPosDmn::dmn_size(), nw_exchange, nk_exchange, sign_, streams_[0]); break; case PARTICLE_HOLE_LONGITUDINAL_UP_DOWN: details::updateG4<Real, PARTICLE_HOLE_LONGITUDINAL_UP_DOWN>( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), G_[1].leadingDimension(), n_bands_, KDmn::dmn_size(), WTpPosDmn::dmn_size(), nw_exchange, nk_exchange, sign_, streams_[0]); break; case PARTICLE_HOLE_TRANSVERSE: details::updateG4<Real, PARTICLE_HOLE_TRANSVERSE>( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), Loading
include/dca/phys/four_point_type.hpp +2 −0 Original line number Diff line number Diff line Loading @@ -23,6 +23,8 @@ enum FourPointType : int { PARTICLE_HOLE_TRANSVERSE, PARTICLE_HOLE_MAGNETIC, PARTICLE_HOLE_CHARGE, PARTICLE_HOLE_LONGITUDINAL_UP_UP, PARTICLE_HOLE_LONGITUDINAL_UP_DOWN, PARTICLE_PARTICLE_UP_DOWN }; Loading
include/dca/phys/parameters/four_point_parameters.hpp +45 −1 Original line number Diff line number Diff line Loading @@ -41,6 +41,8 @@ public: ph_transverse_(false), ph_magnetic_(false), ph_charge_(false), ph_longitudinal_up_up_(false), ph_longitudinal_up_down_(false), pp_up_down_(false), four_point_momentum_transfer_input_(lattice_dimension, 0.), four_point_frequency_transfer_(0), Loading @@ -58,7 +60,17 @@ public: // Returns true, if any G4 channel should be accumulated. Otherwise returns false. bool accumulateG4() const { return four_point_type_ != NONE || ph_transverse_ || ph_magnetic_ || ph_charge_ || pp_up_down_; return four_point_type_ != NONE || ph_transverse_ || ph_magnetic_ || ph_charge_ || ph_longitudinal_up_up_ || ph_longitudinal_up_down_ || pp_up_down_; } // Returns the number of channels of G4 to accumulate. std::size_t numG4Channels() const { if (four_point_type_ != NONE) return 1; else return ph_transverse_ + ph_magnetic_ + ph_charge_ + ph_longitudinal_up_up_ + ph_longitudinal_up_down_ + pp_up_down_; } FourPointType get_four_point_type() const { Loading Loading @@ -89,6 +101,20 @@ public: ph_charge_ = accumulate_channel; } bool accumulateG4ParticleHoleLongitudinalUpUp() const { return ph_longitudinal_up_up_; } void accumulateG4ParticleHoleLongitudinalUpUp(const bool accumulate_channel) { ph_longitudinal_up_up_ = accumulate_channel; } bool accumulateG4ParticleHoleLongitudinalUpDown() const { return ph_longitudinal_up_down_; } void accumulateG4ParticleHoleLongitudinalUpDown(const bool accumulate_channel) { ph_longitudinal_up_down_ = accumulate_channel; } bool accumulateG4ParticleParticleUpDown() const { return pp_up_down_; } Loading Loading @@ -140,6 +166,8 @@ private: bool ph_transverse_; bool ph_magnetic_; bool ph_charge_; bool ph_longitudinal_up_up_; bool ph_longitudinal_up_down_; bool pp_up_down_; std::vector<double> four_point_momentum_transfer_input_; Loading @@ -156,6 +184,8 @@ int FourPointParameters<lattice_dimension>::getBufferSize(const Concurrency& con buffer_size += concurrency.get_buffer_size(ph_transverse_); buffer_size += concurrency.get_buffer_size(ph_magnetic_); buffer_size += concurrency.get_buffer_size(ph_charge_); buffer_size += concurrency.get_buffer_size(ph_longitudinal_up_up_); buffer_size += concurrency.get_buffer_size(ph_longitudinal_up_down_); buffer_size += concurrency.get_buffer_size(pp_up_down_); buffer_size += concurrency.get_buffer_size(four_point_momentum_transfer_input_); buffer_size += concurrency.get_buffer_size(four_point_frequency_transfer_); Loading @@ -172,6 +202,8 @@ void FourPointParameters<lattice_dimension>::pack(const Concurrency& concurrency concurrency.pack(buffer, buffer_size, position, ph_transverse_); concurrency.pack(buffer, buffer_size, position, ph_magnetic_); concurrency.pack(buffer, buffer_size, position, ph_charge_); concurrency.pack(buffer, buffer_size, position, ph_longitudinal_up_up_); concurrency.pack(buffer, buffer_size, position, ph_longitudinal_up_down_); concurrency.pack(buffer, buffer_size, position, pp_up_down_); concurrency.pack(buffer, buffer_size, position, four_point_momentum_transfer_input_); concurrency.pack(buffer, buffer_size, position, four_point_frequency_transfer_); Loading @@ -186,6 +218,8 @@ void FourPointParameters<lattice_dimension>::unpack(const Concurrency& concurren concurrency.unpack(buffer, buffer_size, position, ph_transverse_); concurrency.unpack(buffer, buffer_size, position, ph_magnetic_); concurrency.unpack(buffer, buffer_size, position, ph_charge_); concurrency.unpack(buffer, buffer_size, position, ph_longitudinal_up_up_); concurrency.unpack(buffer, buffer_size, position, ph_longitudinal_up_down_); concurrency.unpack(buffer, buffer_size, position, pp_up_down_); concurrency.unpack(buffer, buffer_size, position, four_point_momentum_transfer_input_); concurrency.unpack(buffer, buffer_size, position, four_point_frequency_transfer_); Loading Loading @@ -220,6 +254,16 @@ void FourPointParameters<lattice_dimension>::readWrite(ReaderOrWriter& reader_or } catch (const std::exception& r_e) { } try { reader_or_writer.execute("particle-hole-longitudinal-up-up", ph_longitudinal_up_up_); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("particle-hole-longitudinal-up-down", ph_longitudinal_up_down_); } catch (const std::exception& r_e) { } try { reader_or_writer.execute("particle-particle-up-down", pp_up_down_); } Loading