Commit 204ba89b authored by gbalduzz's avatar gbalduzz
Browse files

Test ndft transform in single and double precisioin.

parent c659839c
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -15,7 +15,6 @@
#include <string>
#include <random>

#include "dca/config/accumulation_options.hpp"
#include "dca/io/json/json_reader.hpp"
#include "dca/phys/domains/cluster/symmetries/point_groups/no_symmetry.hpp"
#include "dca/phys/domains/quantum/electron_band_domain.hpp"
+13 −4
Original line number Diff line number Diff line
@@ -22,6 +22,11 @@

namespace dca {
namespace testing {
namespace {
// Flag for single initialization when multiple types are used.
bool accumulation_test_initialized = false;
}  // namespace
// dca::testing::

template <typename AccumType, int n_bands = 2, int n_sites = 3, int n_frqs = 64>
class AccumulationTest : public SingleSectorAccumulationTest<AccumType, n_bands, n_sites, n_frqs> {
@@ -38,8 +43,12 @@ protected:
    BaseClass::SetUpTestCase();

    // Initialize time domain.
    if (!accumulation_test_initialized) {
      const int n_times = n_frqs;
      dca::phys::domains::time_domain::initialize(BaseClass::beta_, n_times);

      accumulation_test_initialized = true;
    }
  }

  void SetUp() {}
@@ -60,7 +69,7 @@ protected:
  Parameters parameters_{BaseClass::get_beta()};
};

}  // testing
}  // dca
}  // namespace testing
}  // namespace dca

#endif  // TEST_UNIT_PHYS_DCA_STEP_CLUSTER_SOLVER_SHARED_TOOLS_ACCUMULATION_ACCUMULATION_TEST_HPP
+19 −11
Original line number Diff line number Diff line
@@ -99,6 +99,11 @@ struct Vertex {
  double tau_;
};

namespace {
// Flag for single initialization when multiple types are used.
bool single_sector_accumulator_test_initialized = false;
}  // namespace

template <typename Real = double, int n_bands = 2, int n_sites = 3, int n_frqs = 64>
class SingleSectorAccumulationTest : public ::testing::Test {
public:
@@ -113,15 +118,15 @@ public:
  using Matrix = dca::linalg::Matrix<double, dca::linalg::CPU>;

  using F_w_w =
      dca::func::function<std::complex<double>,
                          dca::func::dmn_variadic<BDmn, BDmn, RDmn, RDmn, FreqDmn, FreqDmn>>;
      dca::func::function<Complex, dca::func::dmn_variadic<BDmn, BDmn, RDmn, RDmn, FreqDmn, FreqDmn>>;

  static double get_beta() {
    return beta_;
  }

protected:
public:
  static void SetUpTestCase() {
    if (!single_sector_accumulator_test_initialized) {
      // Initialize the frequency domains.
      dca::phys::domains::frequency_domain::initialize(beta_, n_frqs);
      PositiveFrq::initialize(n_frqs);
@@ -130,6 +135,9 @@ protected:
      BDmn::parameter_type::initialize(
          mock_parameter, n_bands, std::vector<int>(),
          std::vector<std::vector<double>>(n_bands, std::vector<double>(n_bands, 0)));

      single_sector_accumulator_test_initialized = true;
    }
  }

  void SetUp() {}
+35 −24
Original line number Diff line number Diff line
@@ -25,51 +25,62 @@
constexpr int n_sites = 4;
constexpr int n_bands = 3;
constexpr int n_frqs = 16;
using CachedNdftCpuTest =
    dca::testing::SingleSectorAccumulationTest<double, n_bands, n_sites, n_frqs>;

double computeWithFastDNFT(const CachedNdftCpuTest::Configuration& config,
                           const CachedNdftCpuTest::Matrix& M, CachedNdftCpuTest::F_w_w& f_w);
template <class Real>
using CachedNdftCpuTest = dca::testing::SingleSectorAccumulationTest<Real, n_bands, n_sites, n_frqs>;

template <typename Real>
double computeWithFastDNFT(const typename CachedNdftCpuTest<Real>::Configuration& config,
                           const typename CachedNdftCpuTest<Real>::Matrix& M,
                           typename CachedNdftCpuTest<Real>::F_w_w& f_w);

using TestTypes = ::testing::Types<float, double>;
TYPED_TEST_CASE(CachedNdftCpuTest, TestTypes);

// Compare the result provided by the CPU version of CachedNdft::execute with the definition of the
// DNFT f(w1, w2) = \sum_{t1, t2} f(t1, t2) exp(i * t1 * w1 - t2 w2) stored in f_baseline_.
TEST_F(CachedNdftCpuTest, Execute) {
TYPED_TEST(CachedNdftCpuTest, Execute) {
  constexpr int n_samples = 40;
  prepareConfiguration(configuration_, M_, n_samples);

  F_w_w f_w_fast("f_w_fast");
  const double time = computeWithFastDNFT(configuration_, M_, f_w_fast);
  TestFixture::prepareConfiguration(TestFixture::configuration_, TestFixture::M_, n_samples);

  using Real = TypeParam;
  typename TestFixture::F_w_w f_w_fast("f_w_fast");
  const double time =
      computeWithFastDNFT<Real>(TestFixture::configuration_, TestFixture::M_, f_w_fast);

  auto f_baseline = CachedNdftCpuTest::compute2DFTBaseline();
  auto f_baseline = TestFixture::compute2DFTBaseline();
  const auto err = dca::func::util::difference(f_baseline, f_w_fast);
  EXPECT_LT(err.l_inf, 1e-14);
  EXPECT_LT(err.l_inf, 100 * std::numeric_limits<Real>::epsilon());

  std::cout << "\nCached ndft time [sec]:\t " << time << "\n";
}

double computeWithFastDNFT(const CachedNdftCpuTest::Configuration& config,
                           const CachedNdftCpuTest::Matrix& M, CachedNdftCpuTest::F_w_w& f_w) {
template <typename Real>
double computeWithFastDNFT(const typename CachedNdftCpuTest<Real>::Configuration& config,
                           const typename CachedNdftCpuTest<Real>::Matrix& M,
                           typename CachedNdftCpuTest<Real>::F_w_w& f_w) {
  using BDmn = typename CachedNdftCpuTest<Real>::BDmn;
  using RDmn = typename CachedNdftCpuTest<Real>::RDmn;
  using PosFreqDmn = typename CachedNdftCpuTest<Real>::PosFreqDmn;
  using FreqDmn = typename CachedNdftCpuTest<Real>::FreqDmn;
  dca::func::function<std::complex<double>,
                      dca::func::dmn_variadic<CachedNdftCpuTest::BDmn, CachedNdftCpuTest::BDmn,
                                              CachedNdftCpuTest::RDmn, CachedNdftCpuTest::RDmn,
                                              CachedNdftCpuTest::PosFreqDmn, CachedNdftCpuTest::FreqDmn>>
                      dca::func::dmn_variadic<BDmn, BDmn, RDmn, RDmn, PosFreqDmn, FreqDmn>>
      f_b_b_r_r_w_w;
  dca::phys::solver::accumulator::CachedNdft<double, CachedNdftCpuTest::RDmn, CachedNdftCpuTest::FreqDmn,
                                             CachedNdftCpuTest::PosFreqDmn, dca::linalg::CPU>
      nft_obj;
  dca::phys::solver::accumulator::CachedNdft<double, RDmn, FreqDmn, PosFreqDmn, dca::linalg::CPU> nft_obj;

  dca::profiling::WallTime start_time;
  nft_obj.execute(config, M, f_b_b_r_r_w_w);
  dca::profiling::WallTime end_time;

  // Rearrange output.
  const int n_w = CachedNdftCpuTest::PosFreqDmn::dmn_size();
  const int n_w = PosFreqDmn::dmn_size();
  auto invert_w = [=](const int w) { return 2 * n_w - 1 - w; };
  for (int b2 = 0; b2 < CachedNdftCpuTest::BDmn::dmn_size(); ++b2)
    for (int b1 = 0; b1 < CachedNdftCpuTest::BDmn::dmn_size(); ++b1)
      for (int r2 = 0; r2 < CachedNdftCpuTest::RDmn::dmn_size(); ++r2)
        for (int r1 = 0; r1 < CachedNdftCpuTest::RDmn::dmn_size(); ++r1)
          for (int w2 = 0; w2 < CachedNdftCpuTest::FreqDmn::dmn_size(); ++w2)
  for (int b2 = 0; b2 < BDmn::dmn_size(); ++b2)
    for (int b1 = 0; b1 < BDmn::dmn_size(); ++b1)
      for (int r2 = 0; r2 < RDmn::dmn_size(); ++r2)
        for (int r1 = 0; r1 < RDmn::dmn_size(); ++r1)
          for (int w2 = 0; w2 < FreqDmn::dmn_size(); ++w2)
            for (int w1 = 0; w1 < n_w; ++w1) {
              f_w(b1, b2, r1, r2, w1 + n_w, w2) = f_b_b_r_r_w_w(b1, b2, r1, r2, w1, w2);
              f_w(b1, b2, r1, r2, invert_w(w1 + n_w), invert_w(w2)) =
+34 −19
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/ndft/cached_ndft_gpu.hpp"

#include <complex>
#include <limits>

#include "gtest/gtest.h"

@@ -26,40 +27,54 @@
constexpr int n_bands = 2;
constexpr int n_sites = 3;
constexpr int n_frqs = 7;
using CachedNdftGpuTest =
    dca::testing::SingleSectorAccumulationTest<double, n_bands, n_sites, n_frqs>;

double computeWithFastNDFT(const CachedNdftGpuTest::Configuration& config,
                           const CachedNdftGpuTest::Matrix& M, CachedNdftGpuTest::F_w_w& f_w);
template <typename Real>
using CachedNdftGpuTest = dca::testing::SingleSectorAccumulationTest<Real, n_bands, n_sites, n_frqs>;

template <typename Real>
double computeWithFastNDFT(const typename CachedNdftGpuTest<Real>::Configuration& config,
                           const typename CachedNdftGpuTest<Real>::Matrix& M,
                           typename CachedNdftGpuTest<Real>::F_w_w& f_w);

using TestTypes = ::testing::Types<float, double>;
TYPED_TEST_CASE(CachedNdftGpuTest, TestTypes);

// Compare the result provided by the GPU version of CachedNdft::execute with the definition of the
// NDFT f(w1, w2) = \sum_{t1, t2} f(t1, t2) exp(i * t1 * w1 - t2 w2) stored in f_baseline_.
TEST_F(CachedNdftGpuTest, Execute) {
TYPED_TEST(CachedNdftGpuTest, Execute) {
  constexpr int n_samples = 31;
  prepareConfiguration(configuration_, M_, n_samples);
  TestFixture::prepareConfiguration(TestFixture::configuration_, TestFixture::M_, n_samples);

  F_w_w f_w_fast("f_w_fast");
  using Real = TypeParam;
  typename TestFixture::F_w_w f_w_fast("f_w_fast");

  // Compute the NDFT with the CachedNdft class and rearrange the result with the same order as
  // f_baseline_.
  const double time = computeWithFastNDFT(configuration_, M_, f_w_fast);
  const double time =
      computeWithFastNDFT<Real>(TestFixture::configuration_, TestFixture::M_, f_w_fast);

  auto f_baseline = CachedNdftGpuTest::compute2DFTBaseline();
  auto f_baseline = TestFixture::compute2DFTBaseline();
  const auto err = dca::func::util::difference(f_baseline, f_w_fast);
  EXPECT_LT(err.l_inf, 1e-14);
  EXPECT_LT(err.l_inf, 100 * std::numeric_limits<Real>::epsilon());

  std::cout << "\nCached GPU ndft time [sec]:\t " << time << "\n";
}

double computeWithFastNDFT(const CachedNdftGpuTest::Configuration& config,
                           const CachedNdftGpuTest::Matrix& M, CachedNdftGpuTest::F_w_w& f_w) {
template <typename Real>
double computeWithFastNDFT(const typename CachedNdftGpuTest<Real>::Configuration& config,
                           const typename CachedNdftGpuTest<Real>::Matrix& M,
                           typename CachedNdftGpuTest<Real>::F_w_w& f_w) {
  dca::linalg::util::initializeMagma();
  magma_queue_t queue;
  magma_queue_create(&queue);

  dca::phys::solver::accumulator::CachedNdft<double, CachedNdftGpuTest::RDmn, CachedNdftGpuTest::FreqDmn,
                                             CachedNdftGpuTest::PosFreqDmn, dca::linalg::GPU>
      nft_obj(queue);
  using BDmn = typename CachedNdftGpuTest<Real>::BDmn;
  using RDmn = typename CachedNdftGpuTest<Real>::RDmn;
  using FreqDmn = typename CachedNdftGpuTest<Real>::FreqDmn;
  using PosFreqDmn = typename CachedNdftGpuTest<Real>::PosFreqDmn;

  dca::phys::solver::accumulator::CachedNdft<double, RDmn, FreqDmn, PosFreqDmn, dca::linalg::GPU> nft_obj(
      queue);
  EXPECT_EQ(magma_queue_get_cuda_stream(queue), nft_obj.get_stream());

  dca::linalg::Matrix<double, dca::linalg::GPU> M_dev(M);
@@ -77,15 +92,15 @@ double computeWithFastNDFT(const CachedNdftGpuTest::Configuration& config,

  // Rearrange the output from a function of (r1, b1, w1, r2, b2, w2) to a function of (b1, b2, r1,
  // r2, w1, w2).
  const int nb = CachedNdftGpuTest::BDmn::dmn_size();
  const int nr = CachedNdftGpuTest::RDmn::dmn_size();
  const int n_w = CachedNdftGpuTest::PosFreqDmn::dmn_size();
  const int nb = BDmn::dmn_size();
  const int nr = RDmn::dmn_size();
  const int n_w = PosFreqDmn::dmn_size();
  auto invert_w = [=](const int w) { return 2 * n_w - 1 - w; };
  for (int b2 = 0; b2 < nb; ++b2)
    for (int b1 = 0; b1 < nb; ++b1)
      for (int r2 = 0; r2 < nr; ++r2)
        for (int r1 = 0; r1 < nr; ++r1)
          for (int w2 = 0; w2 < CachedNdftGpuTest::FreqDmn::dmn_size(); ++w2)
          for (int w2 = 0; w2 < FreqDmn::dmn_size(); ++w2)
            for (int w1 = 0; w1 < n_w; ++w1) {
              const auto val = result_host(r1 + b1 * nr + w1 * nr * nb, r2 + b2 * nr + w2 * nr * nb);
              f_w(b1, b2, r1, r2, w1 + n_w, w2) = val;