Commit 9def90b3 authored by Doak, Peter W.'s avatar Doak, Peter W.
Browse files

closer to mixed precision build of complexG0

parent 654d9d20
Loading
Loading
Loading
Loading
+31 −7
Original line number Diff line number Diff line
@@ -318,7 +318,6 @@ public:
    return fnc_values_[dmn(static_cast<int>(t), static_cast<int>(subindices)...)];
  }

  
  void operator+=(const function<scalartype, domain, DT>& other);
  void operator-=(const function<scalartype, domain, DT>& other);
  void operator*=(const function<scalartype, domain, DT>& other);
@@ -367,7 +366,6 @@ public:
  template <typename new_scalartype>
  void distribute(int sbdm_index_1, int sbdm_index_2, int* subind, const new_scalartype* fnc_vals);

  
  //
  // Methods for printing
  //
@@ -432,7 +430,8 @@ function<scalartype, domain, DT>::function(const std::string& name)
  end_ = dmn.get_size();
  // If the function is more than 256 megs report it.
  if (end_ > 268435456) {
    std::cerr << "function " << name << " allocates " << sizeof(scalartype) * end_ / 1024 / 1024 << " MB" << '\n';
    std::cerr << "function " << name << " allocates " << sizeof(scalartype) * end_ / 1024 / 1024
              << " MB" << '\n';
    if (name_ == "no-name")
      std::cerr << "large functions need names give yourself a chance.\n";
  }
@@ -669,6 +668,7 @@ template <typename Scalar, class domain, DistType DT>
template <typename Scalar2>
inline function<Scalar, domain, DT>& function<Scalar, domain, DT>::operator=(
    const function<Scalar2, domain, DT>& other) {
  if constexpr (std::is_same_v<decltype(*this), decltype(other)>) {
    if (this != &other) {
      if constexpr (dist == DistType::NONE) {
        if (size() != other.size()) {
@@ -683,6 +683,30 @@ inline function<Scalar, domain, DT>& function<Scalar, domain, DT>::operator=(
      }
      fnc_values_ = other.fnc_values_;
    }
  }
  else {
    if constexpr (dist == DistType::NONE) {
      if (size() != other.size()) {
        throw(std::logic_error("Function size does not match."));
      }
    }
    else if constexpr (dist == DistType::LINEAR || dist == DistType::BLOCKED) {
      Nb_sbdms = other.dmn.get_leaf_domain_sizes().size();
      start_ = other.start_;
      end_ = other.end_;
      fnc_values_.resize(other.size(), {});
    }
    auto kConvert = [](auto& kvec) -> std::vector<Scalar> {
      std::vector<Scalar> k_converted(kvec.size());
      std::transform(kvec.begin(), kvec.end(), k_converted.begin(),
                     [](auto& val) -> typename decltype(k_converted)::value_type {
                       return static_cast<typename decltype(k_converted)::value_type>(val);
                     });
      return k_converted;
    };

    fnc_values_ = kConvert(other.getValues());
  }
  return *this;
}

+2 −1
Original line number Diff line number Diff line
@@ -303,7 +303,8 @@ template <typename ScalarRhs, DeviceType rhs_device_name, class rhs_ALLOC>
Matrix<ScalarType, device_name,  ALLOC>::Matrix(const Matrix<ScalarRhs, rhs_device_name, rhs_ALLOC>& rhs,
                                        const std::string& name)
    : name_(name), size_(rhs.size_), capacity_(rhs.capacity_) {
  static_assert(sizeof(ScalarType) == sizeof(ScalarRhs));
  if (sizeof(ScalarType) != sizeof(ScalarRhs))
    throw std::runtime_error("conversion of both type and location of Matrix not currently possible!");
  data_ = Allocator::allocate(nrElements(capacity_));
  util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
}
+2 −0
Original line number Diff line number Diff line
@@ -294,6 +294,7 @@ void insertRow(Matrix<Scalar, CPU, ALLOC>& mat, int i) {
// Out: ipiv, work
// Preconditions: mat is a square matrix.
// Postconditions: ipiv and work are resized to the needed dimension.
// \todo consider doing inverse at full precision reguardless of incoming Scalar precision
template <typename Scalar, DeviceType device_name, template <typename, DeviceType> class MatrixType>
void inverse(MatrixType<Scalar, device_name>& mat, Vector<int, CPU>& ipiv,
             Vector<Scalar, device_name>& work) {
@@ -310,6 +311,7 @@ void inverse(MatrixType<Scalar, device_name>& mat, Vector<int, CPU>& ipiv,
  lapack::UseDevice<device_name>::getri(mat.nrRows(), mat.ptr(), mat.leadingDimension(), ipiv.ptr(),
                                        work.ptr(), lwork);
}

template <typename Scalar, DeviceType device_name, template <typename, DeviceType> class MatrixType>
void inverse(MatrixType<Scalar, device_name>& mat) {
  Vector<int, CPU> ipiv;
+2 −1
Original line number Diff line number Diff line
@@ -114,6 +114,7 @@ const linalg::Matrix<dca::util::ComplexAlias<Scalar>, linalg::CPU>& SpaceTransfo
template <class RDmn, class KDmn, typename Scalar>
const auto& SpaceTransform2D<RDmn, KDmn, Scalar>::getPhaseFactors() {
  static func::function<Complex, func::dmn_variadic<BDmn, KDmn>> phase_factors("Phase factors.");
  using Real = dca::util::RealAlias<Scalar>;
  static std::once_flag flag;

  // Initialize the phase factors Exp[i k a[b]].
@@ -126,7 +127,7 @@ const auto& SpaceTransform2D<RDmn, KDmn, Scalar>::getPhaseFactors() {
      const auto& k_vec = KDmn::get_elements()[k];
      for (int b = 0; b < BDmn::dmn_size(); ++b) {
	// Scalar could be cuComplex or cuDoubleComplex so...
	std::complex<dca::util::RealAlias<Scalar>> temp_phase{0., util::innerProduct(k_vec, a_vecs[b])};
	std::complex<Real> temp_phase{0., static_cast<Real>(util::innerProduct(k_vec, a_vecs[b]))};
	temp_phase = std::exp(temp_phase);
	phase_factors(b,k) = Complex{temp_phase.real(), temp_phase.imag()};
      }
+7 −3
Original line number Diff line number Diff line
@@ -85,7 +85,7 @@ public:
  }

  static constexpr int window_sampling_ = 32;
  static constexpr double window_function_sigma_ = 2.;
  static constexpr Real window_function_sigma_ = 2.;

  using WindowFunction = kaiser_bessel_function;

@@ -131,6 +131,10 @@ private:

  void foldTimeDomainBack();

  /** FTau to f_w transform
   *  right now the fftw call wrapped by these functions  uses double precision
   *  regardless of OutReal's precision
   */
  template <class OutReal>
  void transformFTauToFW(func::function<std::complex<OutReal>, func::dmn_variadic<WDmn, PDmn>>& f_w) const;
  // Implementation for Real types
@@ -492,9 +496,9 @@ void Dnfft1D<Scalar, WDmn, PDmn, oversampling, mode>::transformFTauToFW(
  const int n = nfft_time_domain<LEFT_ORIENTED, ThisType>::get_size();
  const int padding = (n_padded - n) / 2;

  std::vector<std::complex<double>> f_in(n);
  std::vector<std::complex<Real>> f_in(n);

  std::vector<std::complex<double>> f_out(n);
  std::vector<std::complex<Real>> f_out(n);
  fftw_plan plan;

  {
Loading