Loading include/dca/io/hdf5/hdf5_writer.hpp +0 −1 Original line number Diff line number Diff line Loading @@ -297,7 +297,6 @@ bool HDF5Writer::execute(const std::string& name, std::vector<hsize_t> dims{hsize_t(A.nrRows()), hsize_t(A.nrCols())}; std::vector<Scalar> linearized(A.nrRows() * A.nrCols()); int linindex = 0; // Note: Matrices are row major, while HDF5 is column major for (int i = 0; i < A.nrRows(); ++i) for (int j = 0; j < A.nrCols(); ++j) Loading src/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_kernels.cu +2 −2 Original line number Diff line number Diff line Loading @@ -181,10 +181,10 @@ __global__ void computeGMultibandKernel(CudaComplex<Real>* __restrict__ G, int l } if (k1 == k2 && w1 == w2) // G0_w1 == G0_w2) G_val_store += G0_w1[b1 + ldg0 * b2] * beta; G_val_store += G0_w1[b2 + ldg0 * b1] * beta; #ifdef DEBUG_G4_GPU printf("%lf %lf %lf %lf %lf %lf -- %d %d %d %d %d %d %f,%f\n", M[b1 + ldm * b2].x, M[b1 + ldm * b2].y, G0_w1[b1 + ldg0 * b2].x, G0_w1[b1 + ldg0 * b2].y, M[b1 + ldm * b2].y, G0_w1[b2 + ldg0 * b1].x, G0_w1[b2 + ldg0 * b1].y, G0_w2[b1 + ldg0 * b2].x, G0_w2[b1 + ldg0 * b2].y, b1, b2, k1, k2, w1, w2, G_val.x, G_val.y); #endif G_val = G_val_store; Loading test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/input_1x1_rashba.json +17 −3 Original line number Diff line number Diff line Loading @@ -9,7 +9,9 @@ "physics" : { "beta" : 2, "chemical-potential" : 0 "density" : 0.85, "chemical-potential" : 0, "adjust-chemical-potential": true }, "Rashba-Hubbard-model": Loading @@ -19,7 +21,19 @@ "lambda" : 0.5, "U" : 6.0 }, "DCA" : { "iterations": 1, "accuracy": 1.e-3, "self-energy-mixing-factor": 0.8, "interacting-orbitals": [0,1], "coarse-graining": { "k-mesh-recursion": 3, "periods": 0, "quadrature-rule": 1, "threads": 1, "tail-frequencies": 10 } }, "domains": { "real-space-grids": { "cluster": [[2, 2], Loading test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/input_222-2_rashba.json +19 −17 Original line number Diff line number Diff line Loading @@ -32,6 +32,7 @@ "quadrature-rule": 1, "threads": 1, "tail-frequencies": 10 } }, "domains": { "real-space-grids": { Loading @@ -47,13 +48,14 @@ "imaginary-frequency": { "sp-fermionic-frequencies": 512, "four-point-fermionic-frequencies" : 1 "four-point-fermionic-frequencies" : 5 } }, "four-point": { "type": "NONE", "frequency-transfer": 0 "type": "PARTICLE_PARTICLE_UP_DOWN", "frequency-transfer": 2, "compute-all-transfers": true }, Loading @@ -73,7 +75,7 @@ }, "CT-AUX" : { "initial-configuration-size" :5, "max-submatrix-size":4 "initial-configuration-size" :100, "max-submatrix-size":1 } } test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_complex_g0_gpu_test.cpp +38 −5 Original line number Diff line number Diff line // Copyright (C) 2018 ETH Zurich // Copyright (C) 2018 UT-Battelle, LLC // Copyright (C) 2023 ETH Zurich // Copyright (C) 2023 UT-Battelle, LLC // All rights reserved. // // See LICENSE.txt for terms of usage. // See CITATION.txt for citation guidelines if you use this code for scientific publications. // // Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) // Peter W. Doak (doakpw@ornl.gov) // // This file implements a no-change test for the two particles accumulation on the GPU with // the Rashba model. Loading @@ -25,7 +26,9 @@ using McOptions = MockMcOptions<Scalar>; } // namespace config } // namespace dca #include "dca/phys/dca_step/lattice_mapping/lattice_mapping_sp.hpp" #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp" #include "dca/function/domains.hpp" #include <array> #include <functional> Loading Loading @@ -81,6 +84,19 @@ uint loop_counter = 0; using TestTypes = ::testing::Types<std::complex<double>>; TYPED_TEST_CASE(TpAccumulatorComplexG0GpuTest, TestTypes); using namespace dca::phys; template <class Parameters> using k_DCA = dca::func::dmn_0<domains::cluster_domain<double, Parameters::lattice_type::DIMENSION, domains::CLUSTER, domains::MOMENTUM_SPACE, domains::BRILLOUIN_ZONE>>; template <class Parameters> using k_HOST = dca::func::dmn_0<domains::cluster_domain<double, Parameters::lattice_type::DIMENSION, domains::LATTICE_SP, domains::MOMENTUM_SPACE, domains::BRILLOUIN_ZONE>>; template <class Parameters, class k_DCA, class k_HOST> using LatticeMapSpType = latticemapping::lattice_mapping_sp<Parameters, k_DCA, k_HOST>; #define TYPING_PREFACE \ using Scalar = TypeParam; \ using ConfigGenerator = dca::testing::AccumulationTest<Scalar>; \ Loading @@ -90,7 +106,7 @@ TYPED_TEST_CASE(TpAccumulatorComplexG0GpuTest, TestTypes); TYPED_TEST(TpAccumulatorComplexG0GpuTest, Accumulate) { TYPING_PREFACE const std::array<int, 2> n{18, 22}; const std::array<int, 2> n{23, 23}; Sample M; Configuration config; using FourPointType = dca::phys::FourPointType; Loading @@ -105,6 +121,24 @@ TYPED_TEST(TpAccumulatorComplexG0GpuTest, Accumulate) { this->host_setup.parameters_.set_four_point_channels(four_point_channels); this->gpu_setup.parameters_.set_four_point_channels(four_point_channels); //this->host_setup.data_->initializeSigma("zero"); //this->gpu_setup.data_->initializeSigma("zero"); //this->gpu_setup.parameters_.get_initial_self_energy()); using ParametersHost = typename decltype(this->host_setup)::Parameters; using ParametersGPU = typename decltype(this->gpu_setup)::Parameters; // LatticeMapSpType<ParametersHost, // k_DCA<ParametersHost>, // k_HOST<ParametersHost>> lattice_mapping_obj_host(this->host_setup.parameters_); // auto& host_data = this->host_setup.data_; // lattice_mapping_obj_host.execute(host_data->Sigma, host_data->Sigma_lattice_interpolated, // host_data->Sigma_lattice_coarsegrained, host_data->Sigma_lattice); // LatticeMapSpType<ParametersGPU, k_DCA<ParametersGPU>, k_HOST<ParametersGPU>> lattice_mapping_obj_gpu(this->gpu_setup.parameters_); // auto& gpu_data = this->gpu_setup.data_; // lattice_mapping_obj_gpu.execute(gpu_data->Sigma, gpu_data->Sigma_lattice_interpolated, // gpu_data->Sigma_lattice_coarsegrained, gpu_data->Sigma_lattice); dca::phys::solver::accumulator::TpAccumulator<decltype(this->host_setup.parameters_), dca::DistType::NONE, dca::linalg::CPU> accumulatorHost(this->host_setup.data_->G0_k_w_cluster_excluded, this->host_setup.parameters_); Loading Loading @@ -220,7 +254,6 @@ TYPED_TEST(TpAccumulatorComplexG0GpuTest, Accumulate) { else { std::vector<int> success_index(host_G4.get_domain().get_leaf_domain_sizes().size()); host_G4.linind_2_subind(i, success_index); std::cout << "success-> " << dca::vectorToString(success_index) << '\n'; } } if (fail_count > 0) Loading Loading
include/dca/io/hdf5/hdf5_writer.hpp +0 −1 Original line number Diff line number Diff line Loading @@ -297,7 +297,6 @@ bool HDF5Writer::execute(const std::string& name, std::vector<hsize_t> dims{hsize_t(A.nrRows()), hsize_t(A.nrCols())}; std::vector<Scalar> linearized(A.nrRows() * A.nrCols()); int linindex = 0; // Note: Matrices are row major, while HDF5 is column major for (int i = 0; i < A.nrRows(); ++i) for (int j = 0; j < A.nrCols(); ++j) Loading
src/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_kernels.cu +2 −2 Original line number Diff line number Diff line Loading @@ -181,10 +181,10 @@ __global__ void computeGMultibandKernel(CudaComplex<Real>* __restrict__ G, int l } if (k1 == k2 && w1 == w2) // G0_w1 == G0_w2) G_val_store += G0_w1[b1 + ldg0 * b2] * beta; G_val_store += G0_w1[b2 + ldg0 * b1] * beta; #ifdef DEBUG_G4_GPU printf("%lf %lf %lf %lf %lf %lf -- %d %d %d %d %d %d %f,%f\n", M[b1 + ldm * b2].x, M[b1 + ldm * b2].y, G0_w1[b1 + ldg0 * b2].x, G0_w1[b1 + ldg0 * b2].y, M[b1 + ldm * b2].y, G0_w1[b2 + ldg0 * b1].x, G0_w1[b2 + ldg0 * b1].y, G0_w2[b1 + ldg0 * b2].x, G0_w2[b1 + ldg0 * b2].y, b1, b2, k1, k2, w1, w2, G_val.x, G_val.y); #endif G_val = G_val_store; Loading
test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/input_1x1_rashba.json +17 −3 Original line number Diff line number Diff line Loading @@ -9,7 +9,9 @@ "physics" : { "beta" : 2, "chemical-potential" : 0 "density" : 0.85, "chemical-potential" : 0, "adjust-chemical-potential": true }, "Rashba-Hubbard-model": Loading @@ -19,7 +21,19 @@ "lambda" : 0.5, "U" : 6.0 }, "DCA" : { "iterations": 1, "accuracy": 1.e-3, "self-energy-mixing-factor": 0.8, "interacting-orbitals": [0,1], "coarse-graining": { "k-mesh-recursion": 3, "periods": 0, "quadrature-rule": 1, "threads": 1, "tail-frequencies": 10 } }, "domains": { "real-space-grids": { "cluster": [[2, 2], Loading
test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/input_222-2_rashba.json +19 −17 Original line number Diff line number Diff line Loading @@ -32,6 +32,7 @@ "quadrature-rule": 1, "threads": 1, "tail-frequencies": 10 } }, "domains": { "real-space-grids": { Loading @@ -47,13 +48,14 @@ "imaginary-frequency": { "sp-fermionic-frequencies": 512, "four-point-fermionic-frequencies" : 1 "four-point-fermionic-frequencies" : 5 } }, "four-point": { "type": "NONE", "frequency-transfer": 0 "type": "PARTICLE_PARTICLE_UP_DOWN", "frequency-transfer": 2, "compute-all-transfers": true }, Loading @@ -73,7 +75,7 @@ }, "CT-AUX" : { "initial-configuration-size" :5, "max-submatrix-size":4 "initial-configuration-size" :100, "max-submatrix-size":1 } }
test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_complex_g0_gpu_test.cpp +38 −5 Original line number Diff line number Diff line // Copyright (C) 2018 ETH Zurich // Copyright (C) 2018 UT-Battelle, LLC // Copyright (C) 2023 ETH Zurich // Copyright (C) 2023 UT-Battelle, LLC // All rights reserved. // // See LICENSE.txt for terms of usage. // See CITATION.txt for citation guidelines if you use this code for scientific publications. // // Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) // Peter W. Doak (doakpw@ornl.gov) // // This file implements a no-change test for the two particles accumulation on the GPU with // the Rashba model. Loading @@ -25,7 +26,9 @@ using McOptions = MockMcOptions<Scalar>; } // namespace config } // namespace dca #include "dca/phys/dca_step/lattice_mapping/lattice_mapping_sp.hpp" #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp" #include "dca/function/domains.hpp" #include <array> #include <functional> Loading Loading @@ -81,6 +84,19 @@ uint loop_counter = 0; using TestTypes = ::testing::Types<std::complex<double>>; TYPED_TEST_CASE(TpAccumulatorComplexG0GpuTest, TestTypes); using namespace dca::phys; template <class Parameters> using k_DCA = dca::func::dmn_0<domains::cluster_domain<double, Parameters::lattice_type::DIMENSION, domains::CLUSTER, domains::MOMENTUM_SPACE, domains::BRILLOUIN_ZONE>>; template <class Parameters> using k_HOST = dca::func::dmn_0<domains::cluster_domain<double, Parameters::lattice_type::DIMENSION, domains::LATTICE_SP, domains::MOMENTUM_SPACE, domains::BRILLOUIN_ZONE>>; template <class Parameters, class k_DCA, class k_HOST> using LatticeMapSpType = latticemapping::lattice_mapping_sp<Parameters, k_DCA, k_HOST>; #define TYPING_PREFACE \ using Scalar = TypeParam; \ using ConfigGenerator = dca::testing::AccumulationTest<Scalar>; \ Loading @@ -90,7 +106,7 @@ TYPED_TEST_CASE(TpAccumulatorComplexG0GpuTest, TestTypes); TYPED_TEST(TpAccumulatorComplexG0GpuTest, Accumulate) { TYPING_PREFACE const std::array<int, 2> n{18, 22}; const std::array<int, 2> n{23, 23}; Sample M; Configuration config; using FourPointType = dca::phys::FourPointType; Loading @@ -105,6 +121,24 @@ TYPED_TEST(TpAccumulatorComplexG0GpuTest, Accumulate) { this->host_setup.parameters_.set_four_point_channels(four_point_channels); this->gpu_setup.parameters_.set_four_point_channels(four_point_channels); //this->host_setup.data_->initializeSigma("zero"); //this->gpu_setup.data_->initializeSigma("zero"); //this->gpu_setup.parameters_.get_initial_self_energy()); using ParametersHost = typename decltype(this->host_setup)::Parameters; using ParametersGPU = typename decltype(this->gpu_setup)::Parameters; // LatticeMapSpType<ParametersHost, // k_DCA<ParametersHost>, // k_HOST<ParametersHost>> lattice_mapping_obj_host(this->host_setup.parameters_); // auto& host_data = this->host_setup.data_; // lattice_mapping_obj_host.execute(host_data->Sigma, host_data->Sigma_lattice_interpolated, // host_data->Sigma_lattice_coarsegrained, host_data->Sigma_lattice); // LatticeMapSpType<ParametersGPU, k_DCA<ParametersGPU>, k_HOST<ParametersGPU>> lattice_mapping_obj_gpu(this->gpu_setup.parameters_); // auto& gpu_data = this->gpu_setup.data_; // lattice_mapping_obj_gpu.execute(gpu_data->Sigma, gpu_data->Sigma_lattice_interpolated, // gpu_data->Sigma_lattice_coarsegrained, gpu_data->Sigma_lattice); dca::phys::solver::accumulator::TpAccumulator<decltype(this->host_setup.parameters_), dca::DistType::NONE, dca::linalg::CPU> accumulatorHost(this->host_setup.data_->G0_k_w_cluster_excluded, this->host_setup.parameters_); Loading Loading @@ -220,7 +254,6 @@ TYPED_TEST(TpAccumulatorComplexG0GpuTest, Accumulate) { else { std::vector<int> success_index(host_G4.get_domain().get_leaf_domain_sizes().size()); host_G4.linind_2_subind(i, success_index); std::cout << "success-> " << dca::vectorToString(success_index) << '\n'; } } if (fail_count > 0) Loading