Test ndft transform in single and double precisioin. (204ba89b) · Commits · NDIP / Tool Sources / Direct-Geometry Spectroscopy / DCA / DCA Main

test/unit/math/function_transform/space_transform_2D_test.cpp

+0 −1

Original line number	Diff line number	Diff line
		@@ -15,7 +15,6 @@
		#include <string>
		#include <random>

		#include "dca/config/accumulation_options.hpp"
		#include "dca/io/json/json_reader.hpp"
		#include "dca/phys/domains/cluster/symmetries/point_groups/no_symmetry.hpp"
		#include "dca/phys/domains/quantum/electron_band_domain.hpp"

test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/accumulation_test.hpp

+13 −4

Original line number	Diff line number	Diff line
		@@ -22,6 +22,11 @@

		namespace dca {
		namespace testing {
		namespace {
		// Flag for single initialization when multiple types are used.
		bool accumulation_test_initialized = false;
		} // namespace
		// dca::testing::

		template <typename AccumType, int n_bands = 2, int n_sites = 3, int n_frqs = 64>
		class AccumulationTest : public SingleSectorAccumulationTest<AccumType, n_bands, n_sites, n_frqs> {
		@@ -38,8 +43,12 @@ protected:
		BaseClass::SetUpTestCase();

		// Initialize time domain.
		if (!accumulation_test_initialized) {
		const int n_times = n_frqs;
		dca::phys::domains::time_domain::initialize(BaseClass::beta_, n_times);

		accumulation_test_initialized = true;
		}
		}

		void SetUp() {}
		@@ -60,7 +69,7 @@ protected:
		Parameters parameters_{BaseClass::get_beta()};
		};

		} // testing
		} // dca
		} // namespace testing
		} // namespace dca

		#endif // TEST_UNIT_PHYS_DCA_STEP_CLUSTER_SOLVER_SHARED_TOOLS_ACCUMULATION_ACCUMULATION_TEST_HPP

test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/single_sector_accumulation_test.hpp

+19 −11

Original line number	Diff line number	Diff line
		@@ -99,6 +99,11 @@ struct Vertex {
		double tau_;
		};

		namespace {
		// Flag for single initialization when multiple types are used.
		bool single_sector_accumulator_test_initialized = false;
		} // namespace

		template <typename Real = double, int n_bands = 2, int n_sites = 3, int n_frqs = 64>
		class SingleSectorAccumulationTest : public ::testing::Test {
		public:
		@@ -113,15 +118,15 @@ public:
		using Matrix = dca::linalg::Matrix<double, dca::linalg::CPU>;

		using F_w_w =
		dca::func::function<std::complex<double>,
		dca::func::dmn_variadic<BDmn, BDmn, RDmn, RDmn, FreqDmn, FreqDmn>>;
		dca::func::function<Complex, dca::func::dmn_variadic<BDmn, BDmn, RDmn, RDmn, FreqDmn, FreqDmn>>;

		static double get_beta() {
		return beta_;
		}

		protected:
		public:
		static void SetUpTestCase() {
		if (!single_sector_accumulator_test_initialized) {
		// Initialize the frequency domains.
		dca::phys::domains::frequency_domain::initialize(beta_, n_frqs);
		PositiveFrq::initialize(n_frqs);
		@@ -130,6 +135,9 @@ protected:
		BDmn::parameter_type::initialize(
		mock_parameter, n_bands, std::vector<int>(),
		std::vector<std::vector<double>>(n_bands, std::vector<double>(n_bands, 0)));

		single_sector_accumulator_test_initialized = true;
		}
		}

		void SetUp() {}

test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/ndft/cached_ndft_cpu_test.cpp

+35 −24

Original line number	Diff line number	Diff line
		@@ -25,51 +25,62 @@
		constexpr int n_sites = 4;
		constexpr int n_bands = 3;
		constexpr int n_frqs = 16;
		using CachedNdftCpuTest =
		dca::testing::SingleSectorAccumulationTest<double, n_bands, n_sites, n_frqs>;

		double computeWithFastDNFT(const CachedNdftCpuTest::Configuration& config,
		const CachedNdftCpuTest::Matrix& M, CachedNdftCpuTest::F_w_w& f_w);
		template <class Real>
		using CachedNdftCpuTest = dca::testing::SingleSectorAccumulationTest<Real, n_bands, n_sites, n_frqs>;

		template <typename Real>
		double computeWithFastDNFT(const typename CachedNdftCpuTest<Real>::Configuration& config,
		const typename CachedNdftCpuTest<Real>::Matrix& M,
		typename CachedNdftCpuTest<Real>::F_w_w& f_w);

		using TestTypes = ::testing::Types<float, double>;
		TYPED_TEST_CASE(CachedNdftCpuTest, TestTypes);

		// Compare the result provided by the CPU version of CachedNdft::execute with the definition of the
		// DNFT f(w1, w2) = \sum_{t1, t2} f(t1, t2) exp(i * t1 * w1 - t2 w2) stored in f_baseline_.
		TEST_F(CachedNdftCpuTest, Execute) {
		TYPED_TEST(CachedNdftCpuTest, Execute) {
		constexpr int n_samples = 40;
		prepareConfiguration(configuration_, M_, n_samples);

		F_w_w f_w_fast("f_w_fast");
		const double time = computeWithFastDNFT(configuration_, M_, f_w_fast);
		TestFixture::prepareConfiguration(TestFixture::configuration_, TestFixture::M_, n_samples);

		using Real = TypeParam;
		typename TestFixture::F_w_w f_w_fast("f_w_fast");
		const double time =
		computeWithFastDNFT<Real>(TestFixture::configuration_, TestFixture::M_, f_w_fast);

		auto f_baseline = CachedNdftCpuTest::compute2DFTBaseline();
		auto f_baseline = TestFixture::compute2DFTBaseline();
		const auto err = dca::func::util::difference(f_baseline, f_w_fast);
		EXPECT_LT(err.l_inf, 1e-14);
		EXPECT_LT(err.l_inf, 100 * std::numeric_limits<Real>::epsilon());

		std::cout << "\nCached ndft time [sec]:\t " << time << "\n";
		}

		double computeWithFastDNFT(const CachedNdftCpuTest::Configuration& config,
		const CachedNdftCpuTest::Matrix& M, CachedNdftCpuTest::F_w_w& f_w) {
		template <typename Real>
		double computeWithFastDNFT(const typename CachedNdftCpuTest<Real>::Configuration& config,
		const typename CachedNdftCpuTest<Real>::Matrix& M,
		typename CachedNdftCpuTest<Real>::F_w_w& f_w) {
		using BDmn = typename CachedNdftCpuTest<Real>::BDmn;
		using RDmn = typename CachedNdftCpuTest<Real>::RDmn;
		using PosFreqDmn = typename CachedNdftCpuTest<Real>::PosFreqDmn;
		using FreqDmn = typename CachedNdftCpuTest<Real>::FreqDmn;
		dca::func::function<std::complex<double>,
		dca::func::dmn_variadic<CachedNdftCpuTest::BDmn, CachedNdftCpuTest::BDmn,
		CachedNdftCpuTest::RDmn, CachedNdftCpuTest::RDmn,
		CachedNdftCpuTest::PosFreqDmn, CachedNdftCpuTest::FreqDmn>>
		dca::func::dmn_variadic<BDmn, BDmn, RDmn, RDmn, PosFreqDmn, FreqDmn>>
		f_b_b_r_r_w_w;
		dca::phys::solver::accumulator::CachedNdft<double, CachedNdftCpuTest::RDmn, CachedNdftCpuTest::FreqDmn,
		CachedNdftCpuTest::PosFreqDmn, dca::linalg::CPU>
		nft_obj;
		dca::phys::solver::accumulator::CachedNdft<double, RDmn, FreqDmn, PosFreqDmn, dca::linalg::CPU> nft_obj;

		dca::profiling::WallTime start_time;
		nft_obj.execute(config, M, f_b_b_r_r_w_w);
		dca::profiling::WallTime end_time;

		// Rearrange output.
		const int n_w = CachedNdftCpuTest::PosFreqDmn::dmn_size();
		const int n_w = PosFreqDmn::dmn_size();
		auto invert_w = [=](const int w) { return 2 * n_w - 1 - w; };
		for (int b2 = 0; b2 < CachedNdftCpuTest::BDmn::dmn_size(); ++b2)
		for (int b1 = 0; b1 < CachedNdftCpuTest::BDmn::dmn_size(); ++b1)
		for (int r2 = 0; r2 < CachedNdftCpuTest::RDmn::dmn_size(); ++r2)
		for (int r1 = 0; r1 < CachedNdftCpuTest::RDmn::dmn_size(); ++r1)
		for (int w2 = 0; w2 < CachedNdftCpuTest::FreqDmn::dmn_size(); ++w2)
		for (int b2 = 0; b2 < BDmn::dmn_size(); ++b2)
		for (int b1 = 0; b1 < BDmn::dmn_size(); ++b1)
		for (int r2 = 0; r2 < RDmn::dmn_size(); ++r2)
		for (int r1 = 0; r1 < RDmn::dmn_size(); ++r1)
		for (int w2 = 0; w2 < FreqDmn::dmn_size(); ++w2)
		for (int w1 = 0; w1 < n_w; ++w1) {
		f_w(b1, b2, r1, r2, w1 + n_w, w2) = f_b_b_r_r_w_w(b1, b2, r1, r2, w1, w2);
		f_w(b1, b2, r1, r2, invert_w(w1 + n_w), invert_w(w2)) =

test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/ndft/cached_ndft_gpu_test.cpp

+34 −19

Original line number	Diff line number	Diff line
		@@ -12,6 +12,7 @@
		#include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/ndft/cached_ndft_gpu.hpp"

		#include <complex>
		#include <limits>

		#include "gtest/gtest.h"

		@@ -26,40 +27,54 @@
		constexpr int n_bands = 2;
		constexpr int n_sites = 3;
		constexpr int n_frqs = 7;
		using CachedNdftGpuTest =
		dca::testing::SingleSectorAccumulationTest<double, n_bands, n_sites, n_frqs>;

		double computeWithFastNDFT(const CachedNdftGpuTest::Configuration& config,
		const CachedNdftGpuTest::Matrix& M, CachedNdftGpuTest::F_w_w& f_w);
		template <typename Real>
		using CachedNdftGpuTest = dca::testing::SingleSectorAccumulationTest<Real, n_bands, n_sites, n_frqs>;

		template <typename Real>
		double computeWithFastNDFT(const typename CachedNdftGpuTest<Real>::Configuration& config,
		const typename CachedNdftGpuTest<Real>::Matrix& M,
		typename CachedNdftGpuTest<Real>::F_w_w& f_w);

		using TestTypes = ::testing::Types<float, double>;
		TYPED_TEST_CASE(CachedNdftGpuTest, TestTypes);

		// Compare the result provided by the GPU version of CachedNdft::execute with the definition of the
		// NDFT f(w1, w2) = \sum_{t1, t2} f(t1, t2) exp(i * t1 * w1 - t2 w2) stored in f_baseline_.
		TEST_F(CachedNdftGpuTest, Execute) {
		TYPED_TEST(CachedNdftGpuTest, Execute) {
		constexpr int n_samples = 31;
		prepareConfiguration(configuration_, M_, n_samples);
		TestFixture::prepareConfiguration(TestFixture::configuration_, TestFixture::M_, n_samples);

		F_w_w f_w_fast("f_w_fast");
		using Real = TypeParam;
		typename TestFixture::F_w_w f_w_fast("f_w_fast");

		// Compute the NDFT with the CachedNdft class and rearrange the result with the same order as
		// f_baseline_.
		const double time = computeWithFastNDFT(configuration_, M_, f_w_fast);
		const double time =
		computeWithFastNDFT<Real>(TestFixture::configuration_, TestFixture::M_, f_w_fast);

		auto f_baseline = CachedNdftGpuTest::compute2DFTBaseline();
		auto f_baseline = TestFixture::compute2DFTBaseline();
		const auto err = dca::func::util::difference(f_baseline, f_w_fast);
		EXPECT_LT(err.l_inf, 1e-14);
		EXPECT_LT(err.l_inf, 100 * std::numeric_limits<Real>::epsilon());

		std::cout << "\nCached GPU ndft time [sec]:\t " << time << "\n";
		}

		double computeWithFastNDFT(const CachedNdftGpuTest::Configuration& config,
		const CachedNdftGpuTest::Matrix& M, CachedNdftGpuTest::F_w_w& f_w) {
		template <typename Real>
		double computeWithFastNDFT(const typename CachedNdftGpuTest<Real>::Configuration& config,
		const typename CachedNdftGpuTest<Real>::Matrix& M,
		typename CachedNdftGpuTest<Real>::F_w_w& f_w) {
		dca::linalg::util::initializeMagma();
		magma_queue_t queue;
		magma_queue_create(&queue);

		dca::phys::solver::accumulator::CachedNdft<double, CachedNdftGpuTest::RDmn, CachedNdftGpuTest::FreqDmn,
		CachedNdftGpuTest::PosFreqDmn, dca::linalg::GPU>
		nft_obj(queue);
		using BDmn = typename CachedNdftGpuTest<Real>::BDmn;
		using RDmn = typename CachedNdftGpuTest<Real>::RDmn;
		using FreqDmn = typename CachedNdftGpuTest<Real>::FreqDmn;
		using PosFreqDmn = typename CachedNdftGpuTest<Real>::PosFreqDmn;

		dca::phys::solver::accumulator::CachedNdft<double, RDmn, FreqDmn, PosFreqDmn, dca::linalg::GPU> nft_obj(
		queue);
		EXPECT_EQ(magma_queue_get_cuda_stream(queue), nft_obj.get_stream());

		dca::linalg::Matrix<double, dca::linalg::GPU> M_dev(M);
		@@ -77,15 +92,15 @@ double computeWithFastNDFT(const CachedNdftGpuTest::Configuration& config,

		// Rearrange the output from a function of (r1, b1, w1, r2, b2, w2) to a function of (b1, b2, r1,
		// r2, w1, w2).
		const int nb = CachedNdftGpuTest::BDmn::dmn_size();
		const int nr = CachedNdftGpuTest::RDmn::dmn_size();
		const int n_w = CachedNdftGpuTest::PosFreqDmn::dmn_size();
		const int nb = BDmn::dmn_size();
		const int nr = RDmn::dmn_size();
		const int n_w = PosFreqDmn::dmn_size();
		auto invert_w = [=](const int w) { return 2 * n_w - 1 - w; };
		for (int b2 = 0; b2 < nb; ++b2)
		for (int b1 = 0; b1 < nb; ++b1)
		for (int r2 = 0; r2 < nr; ++r2)
		for (int r1 = 0; r1 < nr; ++r1)
		for (int w2 = 0; w2 < CachedNdftGpuTest::FreqDmn::dmn_size(); ++w2)
		for (int w2 = 0; w2 < FreqDmn::dmn_size(); ++w2)
		for (int w1 = 0; w1 < n_w; ++w1) {
		const auto val = result_host(r1 + b1 * nr + w1 * nr * nb, r2 + b2 * nr + w2 * nr * nb);
		f_w(b1, b2, r1, r2, w1 + n_w, w2) = val;