look to have fixed rashba G (867d8356) · Commits · NDIP / Tool Sources / Direct-Geometry Spectroscopy / DCA / DCA Main

include/dca/io/hdf5/hdf5_writer.hpp

+0 −1

Original line number	Diff line number	Diff line
		@@ -297,7 +297,6 @@ bool HDF5Writer::execute(const std::string& name,
		std::vector<hsize_t> dims{hsize_t(A.nrRows()), hsize_t(A.nrCols())};
		std::vector<Scalar> linearized(A.nrRows() * A.nrCols());

		int linindex = 0;
		// Note: Matrices are row major, while HDF5 is column major
		for (int i = 0; i < A.nrRows(); ++i)
		for (int j = 0; j < A.nrCols(); ++j)

src/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_kernels.cu

+2 −2

Original line number	Diff line number	Diff line
		@@ -181,10 +181,10 @@ __global__ void computeGMultibandKernel(CudaComplex<Real>* __restrict__ G, int l
		}

		if (k1 == k2 && w1 == w2) // G0_w1 == G0_w2)
		G_val_store += G0_w1[b1 + ldg0 * b2] * beta;
		G_val_store += G0_w1[b2 + ldg0 * b1] * beta;
		#ifdef DEBUG_G4_GPU
		printf("%lf %lf %lf %lf %lf %lf -- %d %d %d %d %d %d %f,%f\n", M[b1 + ldm * b2].x,
		M[b1 + ldm * b2].y, G0_w1[b1 + ldg0 * b2].x, G0_w1[b1 + ldg0 * b2].y,
		M[b1 + ldm * b2].y, G0_w1[b2 + ldg0 * b1].x, G0_w1[b2 + ldg0 * b1].y,
		G0_w2[b1 + ldg0 * b2].x, G0_w2[b1 + ldg0 * b2].y, b1, b2, k1, k2, w1, w2, G_val.x, G_val.y);
		#endif
		G_val = G_val_store;

test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/input_1x1_rashba.json

+17 −3

Original line number	Diff line number	Diff line
		@@ -9,7 +9,9 @@
		"physics" :
		{
		"beta" : 2,
		"chemical-potential" : 0
		"density" : 0.85,
		"chemical-potential" : 0,
		"adjust-chemical-potential": true
		},

		"Rashba-Hubbard-model":
		@@ -19,7 +21,19 @@
		"lambda" : 0.5,
		"U" : 6.0
		},

		"DCA" : {
		"iterations": 1,
		"accuracy": 1.e-3,
		"self-energy-mixing-factor": 0.8,
		"interacting-orbitals": [0,1],
		"coarse-graining": {
		"k-mesh-recursion": 3,
		"periods": 0,
		"quadrature-rule": 1,
		"threads": 1,
		"tail-frequencies": 10
		}
		},
		"domains": {
		"real-space-grids": {
		"cluster": [[2, 2],

test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/input_222-2_rashba.json

+19 −17

Original line number	Diff line number	Diff line
		@@ -32,6 +32,7 @@
		"quadrature-rule": 1,
		"threads": 1,
		"tail-frequencies": 10
		}
		},
		"domains": {
		"real-space-grids": {
		@@ -47,13 +48,14 @@

		"imaginary-frequency": {
		"sp-fermionic-frequencies": 512,
		"four-point-fermionic-frequencies" : 1
		"four-point-fermionic-frequencies" : 5
		}
		},

		"four-point": {
		"type": "NONE",
		"frequency-transfer": 0
		"type": "PARTICLE_PARTICLE_UP_DOWN",
		"frequency-transfer": 2,
		"compute-all-transfers": true
		},


		@@ -73,7 +75,7 @@
		},
		"CT-AUX" :
		{
		"initial-configuration-size" :5,
		"max-submatrix-size":4
		"initial-configuration-size" :100,
		"max-submatrix-size":1
		}
		}

test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_complex_g0_gpu_test.cpp

+38 −5

Original line number	Diff line number	Diff line
		// Copyright (C) 2018 ETH Zurich
		// Copyright (C) 2018 UT-Battelle, LLC
		// Copyright (C) 2023 ETH Zurich
		// Copyright (C) 2023 UT-Battelle, LLC
		// All rights reserved.
		//
		// See LICENSE.txt for terms of usage.
		// See CITATION.txt for citation guidelines if you use this code for scientific publications.
		//
		// Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch)
		// Peter W. Doak (doakpw@ornl.gov)
		//
		// This file implements a no-change test for the two particles accumulation on the GPU with
		// the Rashba model.
		@@ -25,7 +26,9 @@ using McOptions = MockMcOptions<Scalar>;
		} // namespace config
		} // namespace dca

		#include "dca/phys/dca_step/lattice_mapping/lattice_mapping_sp.hpp"
		#include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp"
		#include "dca/function/domains.hpp"

		#include <array>
		#include <functional>
		@@ -81,6 +84,19 @@ uint loop_counter = 0;
		using TestTypes = ::testing::Types<std::complex<double>>;
		TYPED_TEST_CASE(TpAccumulatorComplexG0GpuTest, TestTypes);

		using namespace dca::phys;

		template <class Parameters>
		using k_DCA =
		dca::func::dmn_0<domains::cluster_domain<double, Parameters::lattice_type::DIMENSION, domains::CLUSTER,
		domains::MOMENTUM_SPACE, domains::BRILLOUIN_ZONE>>;
		template <class Parameters>
		using k_HOST =
		dca::func::dmn_0<domains::cluster_domain<double, Parameters::lattice_type::DIMENSION, domains::LATTICE_SP,
		domains::MOMENTUM_SPACE, domains::BRILLOUIN_ZONE>>;
		template <class Parameters, class k_DCA, class k_HOST>
		using LatticeMapSpType = latticemapping::lattice_mapping_sp<Parameters, k_DCA, k_HOST>;

		#define TYPING_PREFACE \
		using Scalar = TypeParam; \
		using ConfigGenerator = dca::testing::AccumulationTest<Scalar>; \
		@@ -90,7 +106,7 @@ TYPED_TEST_CASE(TpAccumulatorComplexG0GpuTest, TestTypes);
		TYPED_TEST(TpAccumulatorComplexG0GpuTest, Accumulate) {
		TYPING_PREFACE

		const std::array<int, 2> n{18, 22};
		const std::array<int, 2> n{23, 23};
		Sample M;
		Configuration config;
		using FourPointType = dca::phys::FourPointType;
		@@ -105,6 +121,24 @@ TYPED_TEST(TpAccumulatorComplexG0GpuTest, Accumulate) {
		this->host_setup.parameters_.set_four_point_channels(four_point_channels);
		this->gpu_setup.parameters_.set_four_point_channels(four_point_channels);

		//this->host_setup.data_->initializeSigma("zero");
		//this->gpu_setup.data_->initializeSigma("zero"); //this->gpu_setup.parameters_.get_initial_self_energy());

		using ParametersHost = typename decltype(this->host_setup)::Parameters;
		using ParametersGPU = typename decltype(this->gpu_setup)::Parameters;

		// LatticeMapSpType<ParametersHost,
		// k_DCA<ParametersHost>,
		// k_HOST<ParametersHost>> lattice_mapping_obj_host(this->host_setup.parameters_);
		// auto& host_data = this->host_setup.data_;
		// lattice_mapping_obj_host.execute(host_data->Sigma, host_data->Sigma_lattice_interpolated,
		// host_data->Sigma_lattice_coarsegrained, host_data->Sigma_lattice);

		// LatticeMapSpType<ParametersGPU, k_DCA<ParametersGPU>, k_HOST<ParametersGPU>> lattice_mapping_obj_gpu(this->gpu_setup.parameters_);
		// auto& gpu_data = this->gpu_setup.data_;
		// lattice_mapping_obj_gpu.execute(gpu_data->Sigma, gpu_data->Sigma_lattice_interpolated,
		// gpu_data->Sigma_lattice_coarsegrained, gpu_data->Sigma_lattice);

		dca::phys::solver::accumulator::TpAccumulator<decltype(this->host_setup.parameters_),
		dca::DistType::NONE, dca::linalg::CPU>
		accumulatorHost(this->host_setup.data_->G0_k_w_cluster_excluded, this->host_setup.parameters_);
		@@ -220,7 +254,6 @@ TYPED_TEST(TpAccumulatorComplexG0GpuTest, Accumulate) {
		else {
		std::vector<int> success_index(host_G4.get_domain().get_leaf_domain_sizes().size());
		host_G4.linind_2_subind(i, success_index);
		std::cout << "success-> " << dca::vectorToString(success_index) << '\n';
		}
		}
		if (fail_count > 0)