Loading include/dca/linalg/matrix.hpp +0 −14 Original line number Diff line number Diff line Loading @@ -193,11 +193,6 @@ public: // Swaps the contents of the matrix, included the name, with those of rhs. void swapWithName(Matrix<ScalarType, device_name>& rhs); // Asynchronous assignment (copy with stream = getStream(thread_id, stream_id)) // + synchronization of stream template <DeviceType rhs_device_name> void set(const Matrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); #ifdef DCA_HAVE_CUDA // Asynchronous assignment. template <DeviceType rhs_device_name> Loading Loading @@ -417,15 +412,6 @@ void Matrix<ScalarType, device_name>::swapWithName(Matrix<ScalarType, device_nam swap(rhs); } template <typename ScalarType, DeviceType device_name> template <DeviceType rhs_device_name> void Matrix<ScalarType, device_name>::set(const Matrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id) { resize(rhs.size_); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_, thread_id, stream_id); } #ifdef DCA_HAVE_CUDA template <typename ScalarType, DeviceType device_name> Loading include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_walker.hpp +6 −60 Original line number Diff line number Diff line Loading @@ -284,6 +284,8 @@ private: std::array<linalg::util::CudaEvent, 2> m_computed_events_; bool config_initialized_; linalg::util::CudaEvent sync_streams_event_; }; template <dca::linalg::DeviceType device_t, class Parameters, class Data, typename Real> Loading Loading @@ -586,18 +588,6 @@ void CtauxWalker<device_t, Parameters, Data, Real>::add_non_interacting_spins_to check_G0_matrices(configuration, G0_up, G0_dn); #endif // DCA_WITH_QMC_BIT } /* if(true) { std::cout << "\n\n\t G0-TOOLS \n\n"; G0_CPU_tools_obj.build_G0_matrix(configuration, G0_up_CPU, e_UP); G0_CPU_tools_obj.build_G0_matrix(configuration, G0_dn_CPU, e_DN); dca::linalg::matrixop::difference(G0_up_CPU, G0_up); dca::linalg::matrixop::difference(G0_dn_CPU, G0_dn); } */ { // update N for new shuffled vertices // profiler_type profiler("N-matrix (update)", "CT-AUX walker", __LINE__, thread_id); Loading @@ -609,17 +599,6 @@ void CtauxWalker<device_t, Parameters, Data, Real>::add_non_interacting_spins_to #endif // DCA_WITH_QMC_BIT } /* if(true) { std::cout << "\n\n\t N-TOOLS : " << sign << "\t" << configuration.size() << "\n\n"; N_CPU_tools_obj.build_N_matrix(configuration, N_up_CPU, G0_up_CPU, e_UP); N_CPU_tools_obj.build_N_matrix(configuration, N_dn_CPU, G0_dn_CPU, e_DN); dca::linalg::matrixop::difference(N_up_CPU, N_up); dca::linalg::matrixop::difference(N_dn_CPU, N_dn); } */ { // update N for new shuffled vertices // profiler_type profiler("G-matrix (update)", "CT-AUX walker", __LINE__, thread_id); Loading @@ -630,26 +609,6 @@ void CtauxWalker<device_t, Parameters, Data, Real>::add_non_interacting_spins_to check_G_matrices(configuration, G0_up, G0_dn, N_up, N_dn, G_up, G_dn); #endif // DCA_WITH_QMC_BIT } /* { std::cout << "\n\n\t G-TOOLS\n\n"; G_CPU_tools_obj.build_G_matrix(configuration, N_up_CPU, G0_up_CPU, G_up_CPU, e_UP); G_CPU_tools_obj.build_G_matrix(configuration, N_dn_CPU, G0_dn_CPU, G_dn_CPU, e_DN); dca::linalg::matrixop::difference(G_up_CPU, G_up); dca::linalg::matrixop::difference(G_dn_CPU, G_dn); } */ /* #ifdef DCA_WITH_QMC_BIT if(concurrency.id()==0 and thread_id==0) std::cout << "\t N-update check :" << std::endl; N_tools_obj.check_N_matrix(configuration, N_up, G0_up, Gamma_up, e_UP); N_tools_obj.check_N_matrix(configuration, N_dn, G0_dn, Gamma_dn, e_DN); #endif // DCA_WITH_QMC_BIT */ } template <dca::linalg::DeviceType device_t, class Parameters, class Data, typename Real> Loading Loading @@ -1427,23 +1386,6 @@ void CtauxWalker<device_t, Parameters, Data, Real>::clean_up_the_configuration() SHRINK_tools_obj.reorganize_configuration_test(configuration, N_up, N_dn, G0_up, G0_dn); assert(configuration.assert_consistency()); // #ifdef DCA_WITH_QMC_BIT // check_N_matrices(configuration, G0_up, G0_dn, N_up, N_dn); // if (concurrency.id() == concurrency.first()) { // std::cout << "\t\t <k> = " << // configuration.get_number_of_interacting_HS_spins() // << std::endl; // std::cout << "\t\t # creatable spins = " << // configuration.get_number_of_creatable_HS_spins() // << std::endl; // std::cout << "\t N-woodburry check (2) :" << std::endl; // } // N_tools_obj.check_N_matrix(configuration, N_up, G0_up, Gamma_up, e_UP); // N_tools_obj.check_N_matrix(configuration, N_dn, G0_dn, Gamma_dn, e_DN); // #endif // DCA_WITH_QMC_BIT } template <dca::linalg::DeviceType device_t, class Parameters, class Data, typename Real> Loading Loading @@ -1560,6 +1502,10 @@ template <dca::linalg::DeviceType device_t, class Parameters, class Data, typena template <typename AccumType> const linalg::util::CudaEvent* CtauxWalker<device_t, Parameters, Data, Real>::compute_M( std::array<linalg::Matrix<AccumType, device_t>, 2>& Ms) { // Stream 1 waits on stream 0. sync_streams_event_.record(linalg::util::getStream(thread_id, 0)); sync_streams_event_.block(linalg::util::getStream(thread_id, 1)); for (int s = 0; s < 2; ++s) { const auto& config = get_configuration().get(s == 0 ? e_UP : e_DN); exp_v_minus_one_[s].resizeNoCopy(config.size()); Loading include/dca/phys/dca_step/cluster_solver/ctaux/structs/ct_aux_hs_configuration.hpp +10 −24 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ #include <vector> #include "dca/io/buffer.hpp" #include "dca/linalg/util/allocators/vectors_typedefs.hpp" #include "dca/phys/dca_step/cluster_solver/ctaux/domains/hs_field_sign_domain.hpp" #include "dca/phys/dca_step/cluster_solver/ctaux/domains/hs_spin_domain.hpp" #include "dca/phys/dca_step/cluster_solver/ctaux/structs/vertex_pair.hpp" Loading Loading @@ -70,7 +71,8 @@ public: std::vector<int>& get_changed_spin_indices(); std::vector<HS_spin_states_type>& get_changed_spin_values(); std::vector<int>& get_changed_spin_indices_e_spin(e_spin_states_type e_spin_type); auto& get_changed_spin_indices_e_spin(e_spin_states_type e_spin_type); std::vector<HS_spin_states_type>& get_changed_spin_values_e_spin(e_spin_states_type e_spin_type); int get_number_of_interacting_HS_spins(); Loading Loading @@ -127,10 +129,11 @@ private: std::vector<int> changed_spin_indices; std::vector<HS_spin_states_type> changed_spin_values; std::vector<int> changed_spin_indices_e_UP; // = { changed_spin_indices of configuration_e_UP} std::vector<HS_spin_states_type> changed_spin_values_e_UP; using HostVector = linalg::util::HostVector<int>; HostVector changed_spin_indices_e_UP; // = { changed_spin_indices of configuration_e_UP} HostVector changed_spin_indices_e_DN; // = { changed_spin_indices of configuration_e_DN} std::vector<int> changed_spin_indices_e_DN; // = { changed_spin_indices of configuration_e_DN} std::vector<HS_spin_states_type> changed_spin_values_e_UP; std::vector<HS_spin_states_type> changed_spin_values_e_DN; const int max_num_noninteracting_spins_; Loading @@ -144,23 +147,6 @@ CT_AUX_HS_configuration<parameters_type>::CT_AUX_HS_configuration(parameters_typ : parameters(parameters_ref), rng(rng_ref), configuration(), configuration_e_UP(0), configuration_e_DN(0), current_Nb_of_creatable_spins(0), current_Nb_of_annihilatable_spins(0), changed_spin_indices(0), changed_spin_values(0), changed_spin_indices_e_UP(0), changed_spin_values_e_UP(0), changed_spin_indices_e_DN(0), changed_spin_values_e_DN(0), // Rounding up ensures a value >= 1. max_num_noninteracting_spins_((parameters.get_max_submatrix_size() + 1) / 2), Loading Loading @@ -426,12 +412,12 @@ void CT_AUX_HS_configuration<parameters_type>::add_delayed_HS_spin(int configura template <class parameters_type> void CT_AUX_HS_configuration<parameters_type>::add_delayed_HS_spin_to_configuration_e_spin( int configuration_index, HS_spin_states_type spin_value) { std::vector<int>& changed_spin_indices_e_spin_first = auto& changed_spin_indices_e_spin_first = get_changed_spin_indices_e_spin(configuration[configuration_index].get_e_spins().first); std::vector<HS_spin_states_type>& changed_spin_values_e_spin_first = get_changed_spin_values_e_spin(configuration[configuration_index].get_e_spins().first); std::vector<int>& changed_spin_indices_e_spin_second = auto& changed_spin_indices_e_spin_second = get_changed_spin_indices_e_spin(configuration[configuration_index].get_e_spins().second); std::vector<HS_spin_states_type>& changed_spin_values_e_spin_second = get_changed_spin_values_e_spin(configuration[configuration_index].get_e_spins().second); Loading Loading @@ -473,7 +459,7 @@ std::vector<HS_spin_states_type>& CT_AUX_HS_configuration<parameters_type>::get_ } template <class parameters_type> std::vector<int>& CT_AUX_HS_configuration<parameters_type>::get_changed_spin_indices_e_spin( auto& CT_AUX_HS_configuration<parameters_type>::get_changed_spin_indices_e_spin( e_spin_states_type e_spin) { if (e_spin == e_UP) return changed_spin_indices_e_UP; Loading include/dca/phys/dca_step/cluster_solver/ctaux/walker/tools/n_matrix_tools/n_matrix_tools_cpu.inc +9 −6 Original line number Diff line number Diff line Loading @@ -17,13 +17,16 @@ class N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real> { typedef typename Parameters::concurrency_type concurrency_type; typedef typename Parameters::profiler_type profiler_t; template<class T> using HostVector = linalg::util::HostVector<T>; public: N_MATRIX_TOOLS(int id,Parameters& parameters_ref); Real* get_device_ptr(dca::linalg::Vector<Real, dca::linalg::CPU>& v); int* get_permutation(); void set_permutation(std::vector<int>& p); const int* get_permutation() const; void set_permutation(const HostVector<int>& p); void set_d_vector(dca::linalg::Vector<Real, dca::linalg::CPU>& d_inv); Loading @@ -32,7 +35,7 @@ public: void copy_rows(dca::linalg::Matrix<Real, dca::linalg::CPU>& N, dca::linalg::Matrix<Real, dca::linalg::CPU>& N_new_spins); void compute_G_cols(std::vector<Real>& exp_V, dca::linalg::Matrix<Real, dca::linalg::CPU>& N, void compute_G_cols(HostVector<Real>& exp_V, dca::linalg::Matrix<Real, dca::linalg::CPU>& N, dca::linalg::Matrix<Real, dca::linalg::CPU>& G, dca::linalg::Matrix<Real, dca::linalg::CPU>& G_cols); Loading Loading @@ -77,12 +80,12 @@ Real* N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real>::get_device_ptr( } template <class Parameters, typename Real> int* N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real>::get_permutation() { const int* N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real>::get_permutation() const { return permutation.ptr(); } template <class Parameters, typename Real> void N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real>::set_permutation(std::vector<int>& p) { void N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real>::set_permutation(const HostVector<int>& p) { permutation = p; } Loading Loading @@ -113,7 +116,7 @@ void N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real>::copy_rows( template <class Parameters, typename Real> void N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real>::compute_G_cols( std::vector<Real>& exp_V, dca::linalg::Matrix<Real, dca::linalg::CPU>& N, HostVector<Real>& exp_V, dca::linalg::Matrix<Real, dca::linalg::CPU>& N, dca::linalg::Matrix<Real, dca::linalg::CPU>& G, dca::linalg::Matrix<Real, dca::linalg::CPU>& G_cols) { assert(N.nrRows() == G.nrRows()); Loading include/dca/phys/dca_step/cluster_solver/ctaux/walker/tools/n_matrix_tools/n_matrix_tools_gpu.inc +11 −8 Original line number Diff line number Diff line Loading @@ -17,11 +17,14 @@ class N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real> { typedef typename Parameters::concurrency_type concurrency_type; typedef typename Parameters::profiler_type profiler_t; template<class T> using HostVector = linalg::util::HostVector<T>; public: N_MATRIX_TOOLS(int id, Parameters& parameters_ref); int* get_permutation(); void set_permutation(std::vector<int>& p); const int* get_permutation() const; void set_permutation(const HostVector<int>& p); void set_d_vector(dca::linalg::Vector<Real, dca::linalg::CPU>& d_inv); Loading @@ -32,7 +35,7 @@ public: void copy_rows(dca::linalg::Matrix<Real, dca::linalg::GPU>& N, dca::linalg::Matrix<Real, dca::linalg::GPU>& N_new_spins); void compute_G_cols(std::vector<Real>& exp_V, dca::linalg::Matrix<Real, dca::linalg::GPU>& N, void compute_G_cols(HostVector <Real> &exp_V, dca::linalg::Matrix<Real, dca::linalg::GPU>& N, dca::linalg::Matrix<Real, dca::linalg::GPU>& G, dca::linalg::Matrix<Real, dca::linalg::GPU>& G_cols); Loading Loading @@ -85,12 +88,12 @@ N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real>::N_MATRIX_TOOLS(int id, Param } template <class Parameters, typename Real> int* N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real>::get_permutation() { const int* N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real>::get_permutation() const { return permutation.ptr(); } template <class Parameters, typename Real> void N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real>::set_permutation(std::vector<int>& p) { void N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real>::set_permutation(const HostVector<int>& p) { permutation.setAsync(p, thread_id, stream_id); } Loading Loading @@ -129,7 +132,7 @@ void N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real>::copy_rows( template <class Parameters, typename Real> void N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real>::compute_G_cols( std::vector<Real>& exp_V_CPU, dca::linalg::Matrix<Real, dca::linalg::GPU>& N, HostVector<Real> &exp_V_CPU, dca::linalg::Matrix<Real, dca::linalg::GPU>& N, dca::linalg::Matrix<Real, dca::linalg::GPU>& G, dca::linalg::Matrix<Real, dca::linalg::GPU>& G_cols) { exp_V.setAsync(exp_V_CPU, linalg::util::getStream(thread_id, stream_id)); Loading Loading
include/dca/linalg/matrix.hpp +0 −14 Original line number Diff line number Diff line Loading @@ -193,11 +193,6 @@ public: // Swaps the contents of the matrix, included the name, with those of rhs. void swapWithName(Matrix<ScalarType, device_name>& rhs); // Asynchronous assignment (copy with stream = getStream(thread_id, stream_id)) // + synchronization of stream template <DeviceType rhs_device_name> void set(const Matrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); #ifdef DCA_HAVE_CUDA // Asynchronous assignment. template <DeviceType rhs_device_name> Loading Loading @@ -417,15 +412,6 @@ void Matrix<ScalarType, device_name>::swapWithName(Matrix<ScalarType, device_nam swap(rhs); } template <typename ScalarType, DeviceType device_name> template <DeviceType rhs_device_name> void Matrix<ScalarType, device_name>::set(const Matrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id) { resize(rhs.size_); util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_, thread_id, stream_id); } #ifdef DCA_HAVE_CUDA template <typename ScalarType, DeviceType device_name> Loading
include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_walker.hpp +6 −60 Original line number Diff line number Diff line Loading @@ -284,6 +284,8 @@ private: std::array<linalg::util::CudaEvent, 2> m_computed_events_; bool config_initialized_; linalg::util::CudaEvent sync_streams_event_; }; template <dca::linalg::DeviceType device_t, class Parameters, class Data, typename Real> Loading Loading @@ -586,18 +588,6 @@ void CtauxWalker<device_t, Parameters, Data, Real>::add_non_interacting_spins_to check_G0_matrices(configuration, G0_up, G0_dn); #endif // DCA_WITH_QMC_BIT } /* if(true) { std::cout << "\n\n\t G0-TOOLS \n\n"; G0_CPU_tools_obj.build_G0_matrix(configuration, G0_up_CPU, e_UP); G0_CPU_tools_obj.build_G0_matrix(configuration, G0_dn_CPU, e_DN); dca::linalg::matrixop::difference(G0_up_CPU, G0_up); dca::linalg::matrixop::difference(G0_dn_CPU, G0_dn); } */ { // update N for new shuffled vertices // profiler_type profiler("N-matrix (update)", "CT-AUX walker", __LINE__, thread_id); Loading @@ -609,17 +599,6 @@ void CtauxWalker<device_t, Parameters, Data, Real>::add_non_interacting_spins_to #endif // DCA_WITH_QMC_BIT } /* if(true) { std::cout << "\n\n\t N-TOOLS : " << sign << "\t" << configuration.size() << "\n\n"; N_CPU_tools_obj.build_N_matrix(configuration, N_up_CPU, G0_up_CPU, e_UP); N_CPU_tools_obj.build_N_matrix(configuration, N_dn_CPU, G0_dn_CPU, e_DN); dca::linalg::matrixop::difference(N_up_CPU, N_up); dca::linalg::matrixop::difference(N_dn_CPU, N_dn); } */ { // update N for new shuffled vertices // profiler_type profiler("G-matrix (update)", "CT-AUX walker", __LINE__, thread_id); Loading @@ -630,26 +609,6 @@ void CtauxWalker<device_t, Parameters, Data, Real>::add_non_interacting_spins_to check_G_matrices(configuration, G0_up, G0_dn, N_up, N_dn, G_up, G_dn); #endif // DCA_WITH_QMC_BIT } /* { std::cout << "\n\n\t G-TOOLS\n\n"; G_CPU_tools_obj.build_G_matrix(configuration, N_up_CPU, G0_up_CPU, G_up_CPU, e_UP); G_CPU_tools_obj.build_G_matrix(configuration, N_dn_CPU, G0_dn_CPU, G_dn_CPU, e_DN); dca::linalg::matrixop::difference(G_up_CPU, G_up); dca::linalg::matrixop::difference(G_dn_CPU, G_dn); } */ /* #ifdef DCA_WITH_QMC_BIT if(concurrency.id()==0 and thread_id==0) std::cout << "\t N-update check :" << std::endl; N_tools_obj.check_N_matrix(configuration, N_up, G0_up, Gamma_up, e_UP); N_tools_obj.check_N_matrix(configuration, N_dn, G0_dn, Gamma_dn, e_DN); #endif // DCA_WITH_QMC_BIT */ } template <dca::linalg::DeviceType device_t, class Parameters, class Data, typename Real> Loading Loading @@ -1427,23 +1386,6 @@ void CtauxWalker<device_t, Parameters, Data, Real>::clean_up_the_configuration() SHRINK_tools_obj.reorganize_configuration_test(configuration, N_up, N_dn, G0_up, G0_dn); assert(configuration.assert_consistency()); // #ifdef DCA_WITH_QMC_BIT // check_N_matrices(configuration, G0_up, G0_dn, N_up, N_dn); // if (concurrency.id() == concurrency.first()) { // std::cout << "\t\t <k> = " << // configuration.get_number_of_interacting_HS_spins() // << std::endl; // std::cout << "\t\t # creatable spins = " << // configuration.get_number_of_creatable_HS_spins() // << std::endl; // std::cout << "\t N-woodburry check (2) :" << std::endl; // } // N_tools_obj.check_N_matrix(configuration, N_up, G0_up, Gamma_up, e_UP); // N_tools_obj.check_N_matrix(configuration, N_dn, G0_dn, Gamma_dn, e_DN); // #endif // DCA_WITH_QMC_BIT } template <dca::linalg::DeviceType device_t, class Parameters, class Data, typename Real> Loading Loading @@ -1560,6 +1502,10 @@ template <dca::linalg::DeviceType device_t, class Parameters, class Data, typena template <typename AccumType> const linalg::util::CudaEvent* CtauxWalker<device_t, Parameters, Data, Real>::compute_M( std::array<linalg::Matrix<AccumType, device_t>, 2>& Ms) { // Stream 1 waits on stream 0. sync_streams_event_.record(linalg::util::getStream(thread_id, 0)); sync_streams_event_.block(linalg::util::getStream(thread_id, 1)); for (int s = 0; s < 2; ++s) { const auto& config = get_configuration().get(s == 0 ? e_UP : e_DN); exp_v_minus_one_[s].resizeNoCopy(config.size()); Loading
include/dca/phys/dca_step/cluster_solver/ctaux/structs/ct_aux_hs_configuration.hpp +10 −24 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ #include <vector> #include "dca/io/buffer.hpp" #include "dca/linalg/util/allocators/vectors_typedefs.hpp" #include "dca/phys/dca_step/cluster_solver/ctaux/domains/hs_field_sign_domain.hpp" #include "dca/phys/dca_step/cluster_solver/ctaux/domains/hs_spin_domain.hpp" #include "dca/phys/dca_step/cluster_solver/ctaux/structs/vertex_pair.hpp" Loading Loading @@ -70,7 +71,8 @@ public: std::vector<int>& get_changed_spin_indices(); std::vector<HS_spin_states_type>& get_changed_spin_values(); std::vector<int>& get_changed_spin_indices_e_spin(e_spin_states_type e_spin_type); auto& get_changed_spin_indices_e_spin(e_spin_states_type e_spin_type); std::vector<HS_spin_states_type>& get_changed_spin_values_e_spin(e_spin_states_type e_spin_type); int get_number_of_interacting_HS_spins(); Loading Loading @@ -127,10 +129,11 @@ private: std::vector<int> changed_spin_indices; std::vector<HS_spin_states_type> changed_spin_values; std::vector<int> changed_spin_indices_e_UP; // = { changed_spin_indices of configuration_e_UP} std::vector<HS_spin_states_type> changed_spin_values_e_UP; using HostVector = linalg::util::HostVector<int>; HostVector changed_spin_indices_e_UP; // = { changed_spin_indices of configuration_e_UP} HostVector changed_spin_indices_e_DN; // = { changed_spin_indices of configuration_e_DN} std::vector<int> changed_spin_indices_e_DN; // = { changed_spin_indices of configuration_e_DN} std::vector<HS_spin_states_type> changed_spin_values_e_UP; std::vector<HS_spin_states_type> changed_spin_values_e_DN; const int max_num_noninteracting_spins_; Loading @@ -144,23 +147,6 @@ CT_AUX_HS_configuration<parameters_type>::CT_AUX_HS_configuration(parameters_typ : parameters(parameters_ref), rng(rng_ref), configuration(), configuration_e_UP(0), configuration_e_DN(0), current_Nb_of_creatable_spins(0), current_Nb_of_annihilatable_spins(0), changed_spin_indices(0), changed_spin_values(0), changed_spin_indices_e_UP(0), changed_spin_values_e_UP(0), changed_spin_indices_e_DN(0), changed_spin_values_e_DN(0), // Rounding up ensures a value >= 1. max_num_noninteracting_spins_((parameters.get_max_submatrix_size() + 1) / 2), Loading Loading @@ -426,12 +412,12 @@ void CT_AUX_HS_configuration<parameters_type>::add_delayed_HS_spin(int configura template <class parameters_type> void CT_AUX_HS_configuration<parameters_type>::add_delayed_HS_spin_to_configuration_e_spin( int configuration_index, HS_spin_states_type spin_value) { std::vector<int>& changed_spin_indices_e_spin_first = auto& changed_spin_indices_e_spin_first = get_changed_spin_indices_e_spin(configuration[configuration_index].get_e_spins().first); std::vector<HS_spin_states_type>& changed_spin_values_e_spin_first = get_changed_spin_values_e_spin(configuration[configuration_index].get_e_spins().first); std::vector<int>& changed_spin_indices_e_spin_second = auto& changed_spin_indices_e_spin_second = get_changed_spin_indices_e_spin(configuration[configuration_index].get_e_spins().second); std::vector<HS_spin_states_type>& changed_spin_values_e_spin_second = get_changed_spin_values_e_spin(configuration[configuration_index].get_e_spins().second); Loading Loading @@ -473,7 +459,7 @@ std::vector<HS_spin_states_type>& CT_AUX_HS_configuration<parameters_type>::get_ } template <class parameters_type> std::vector<int>& CT_AUX_HS_configuration<parameters_type>::get_changed_spin_indices_e_spin( auto& CT_AUX_HS_configuration<parameters_type>::get_changed_spin_indices_e_spin( e_spin_states_type e_spin) { if (e_spin == e_UP) return changed_spin_indices_e_UP; Loading
include/dca/phys/dca_step/cluster_solver/ctaux/walker/tools/n_matrix_tools/n_matrix_tools_cpu.inc +9 −6 Original line number Diff line number Diff line Loading @@ -17,13 +17,16 @@ class N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real> { typedef typename Parameters::concurrency_type concurrency_type; typedef typename Parameters::profiler_type profiler_t; template<class T> using HostVector = linalg::util::HostVector<T>; public: N_MATRIX_TOOLS(int id,Parameters& parameters_ref); Real* get_device_ptr(dca::linalg::Vector<Real, dca::linalg::CPU>& v); int* get_permutation(); void set_permutation(std::vector<int>& p); const int* get_permutation() const; void set_permutation(const HostVector<int>& p); void set_d_vector(dca::linalg::Vector<Real, dca::linalg::CPU>& d_inv); Loading @@ -32,7 +35,7 @@ public: void copy_rows(dca::linalg::Matrix<Real, dca::linalg::CPU>& N, dca::linalg::Matrix<Real, dca::linalg::CPU>& N_new_spins); void compute_G_cols(std::vector<Real>& exp_V, dca::linalg::Matrix<Real, dca::linalg::CPU>& N, void compute_G_cols(HostVector<Real>& exp_V, dca::linalg::Matrix<Real, dca::linalg::CPU>& N, dca::linalg::Matrix<Real, dca::linalg::CPU>& G, dca::linalg::Matrix<Real, dca::linalg::CPU>& G_cols); Loading Loading @@ -77,12 +80,12 @@ Real* N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real>::get_device_ptr( } template <class Parameters, typename Real> int* N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real>::get_permutation() { const int* N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real>::get_permutation() const { return permutation.ptr(); } template <class Parameters, typename Real> void N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real>::set_permutation(std::vector<int>& p) { void N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real>::set_permutation(const HostVector<int>& p) { permutation = p; } Loading Loading @@ -113,7 +116,7 @@ void N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real>::copy_rows( template <class Parameters, typename Real> void N_MATRIX_TOOLS<dca::linalg::CPU,Parameters, Real>::compute_G_cols( std::vector<Real>& exp_V, dca::linalg::Matrix<Real, dca::linalg::CPU>& N, HostVector<Real>& exp_V, dca::linalg::Matrix<Real, dca::linalg::CPU>& N, dca::linalg::Matrix<Real, dca::linalg::CPU>& G, dca::linalg::Matrix<Real, dca::linalg::CPU>& G_cols) { assert(N.nrRows() == G.nrRows()); Loading
include/dca/phys/dca_step/cluster_solver/ctaux/walker/tools/n_matrix_tools/n_matrix_tools_gpu.inc +11 −8 Original line number Diff line number Diff line Loading @@ -17,11 +17,14 @@ class N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real> { typedef typename Parameters::concurrency_type concurrency_type; typedef typename Parameters::profiler_type profiler_t; template<class T> using HostVector = linalg::util::HostVector<T>; public: N_MATRIX_TOOLS(int id, Parameters& parameters_ref); int* get_permutation(); void set_permutation(std::vector<int>& p); const int* get_permutation() const; void set_permutation(const HostVector<int>& p); void set_d_vector(dca::linalg::Vector<Real, dca::linalg::CPU>& d_inv); Loading @@ -32,7 +35,7 @@ public: void copy_rows(dca::linalg::Matrix<Real, dca::linalg::GPU>& N, dca::linalg::Matrix<Real, dca::linalg::GPU>& N_new_spins); void compute_G_cols(std::vector<Real>& exp_V, dca::linalg::Matrix<Real, dca::linalg::GPU>& N, void compute_G_cols(HostVector <Real> &exp_V, dca::linalg::Matrix<Real, dca::linalg::GPU>& N, dca::linalg::Matrix<Real, dca::linalg::GPU>& G, dca::linalg::Matrix<Real, dca::linalg::GPU>& G_cols); Loading Loading @@ -85,12 +88,12 @@ N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real>::N_MATRIX_TOOLS(int id, Param } template <class Parameters, typename Real> int* N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real>::get_permutation() { const int* N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real>::get_permutation() const { return permutation.ptr(); } template <class Parameters, typename Real> void N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real>::set_permutation(std::vector<int>& p) { void N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real>::set_permutation(const HostVector<int>& p) { permutation.setAsync(p, thread_id, stream_id); } Loading Loading @@ -129,7 +132,7 @@ void N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real>::copy_rows( template <class Parameters, typename Real> void N_MATRIX_TOOLS<dca::linalg::GPU, Parameters, Real>::compute_G_cols( std::vector<Real>& exp_V_CPU, dca::linalg::Matrix<Real, dca::linalg::GPU>& N, HostVector<Real> &exp_V_CPU, dca::linalg::Matrix<Real, dca::linalg::GPU>& N, dca::linalg::Matrix<Real, dca::linalg::GPU>& G, dca::linalg::Matrix<Real, dca::linalg::GPU>& G_cols) { exp_V.setAsync(exp_V_CPU, linalg::util::getStream(thread_id, stream_id)); Loading