closer to mixed precision build of complexG0 (9def90b3) · Commits · NDIP / Tool Sources / Direct-Geometry Spectroscopy / DCA / DCA Main

include/dca/function/function.hpp

+31 −7

Original line number	Diff line number	Diff line
		@@ -318,7 +318,6 @@ public:
		return fnc_values_[dmn(static_cast<int>(t), static_cast<int>(subindices)...)];
		}


		void operator+=(const function<scalartype, domain, DT>& other);
		void operator-=(const function<scalartype, domain, DT>& other);
		void operator*=(const function<scalartype, domain, DT>& other);
		@@ -367,7 +366,6 @@ public:
		template <typename new_scalartype>
		void distribute(int sbdm_index_1, int sbdm_index_2, int* subind, const new_scalartype* fnc_vals);


		//
		// Methods for printing
		//
		@@ -432,7 +430,8 @@ function<scalartype, domain, DT>::function(const std::string& name)
		end_ = dmn.get_size();
		// If the function is more than 256 megs report it.
		if (end_ > 268435456) {
		std::cerr << "function " << name << " allocates " << sizeof(scalartype) * end_ / 1024 / 1024 << " MB" << '\n';
		std::cerr << "function " << name << " allocates " << sizeof(scalartype) * end_ / 1024 / 1024
		<< " MB" << '\n';
		if (name_ == "no-name")
		std::cerr << "large functions need names give yourself a chance.\n";
		}
		@@ -669,6 +668,7 @@ template <typename Scalar, class domain, DistType DT>
		template <typename Scalar2>
		inline function<Scalar, domain, DT>& function<Scalar, domain, DT>::operator=(
		const function<Scalar2, domain, DT>& other) {
		if constexpr (std::is_same_v<decltype(*this), decltype(other)>) {
		if (this != &other) {
		if constexpr (dist == DistType::NONE) {
		if (size() != other.size()) {
		@@ -683,6 +683,30 @@ inline function<Scalar, domain, DT>& function<Scalar, domain, DT>::operator=(
		}
		fnc_values_ = other.fnc_values_;
		}
		}
		else {
		if constexpr (dist == DistType::NONE) {
		if (size() != other.size()) {
		throw(std::logic_error("Function size does not match."));
		}
		}
		else if constexpr (dist == DistType::LINEAR \|\| dist == DistType::BLOCKED) {
		Nb_sbdms = other.dmn.get_leaf_domain_sizes().size();
		start_ = other.start_;
		end_ = other.end_;
		fnc_values_.resize(other.size(), {});
		}
		auto kConvert = [](auto& kvec) -> std::vector<Scalar> {
		std::vector<Scalar> k_converted(kvec.size());
		std::transform(kvec.begin(), kvec.end(), k_converted.begin(),
		[](auto& val) -> typename decltype(k_converted)::value_type {
		return static_cast<typename decltype(k_converted)::value_type>(val);
		});
		return k_converted;
		};

		fnc_values_ = kConvert(other.getValues());
		}
		return *this;
		}

include/dca/linalg/matrix.hpp

+2 −1

Original line number	Diff line number	Diff line
		@@ -303,7 +303,8 @@ template <typename ScalarRhs, DeviceType rhs_device_name, class rhs_ALLOC>
		Matrix<ScalarType, device_name, ALLOC>::Matrix(const Matrix<ScalarRhs, rhs_device_name, rhs_ALLOC>& rhs,
		const std::string& name)
		: name_(name), size_(rhs.size_), capacity_(rhs.capacity_) {
		static_assert(sizeof(ScalarType) == sizeof(ScalarRhs));
		if (sizeof(ScalarType) != sizeof(ScalarRhs))
		throw std::runtime_error("conversion of both type and location of Matrix not currently possible!");
		data_ = Allocator::allocate(nrElements(capacity_));
		util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
		}

include/dca/linalg/matrixop.hpp

+2 −0

Original line number	Diff line number	Diff line
		@@ -294,6 +294,7 @@ void insertRow(Matrix<Scalar, CPU, ALLOC>& mat, int i) {
		// Out: ipiv, work
		// Preconditions: mat is a square matrix.
		// Postconditions: ipiv and work are resized to the needed dimension.
		// \todo consider doing inverse at full precision reguardless of incoming Scalar precision
		template <typename Scalar, DeviceType device_name, template <typename, DeviceType> class MatrixType>
		void inverse(MatrixType<Scalar, device_name>& mat, Vector<int, CPU>& ipiv,
		Vector<Scalar, device_name>& work) {
		@@ -310,6 +311,7 @@ void inverse(MatrixType<Scalar, device_name>& mat, Vector<int, CPU>& ipiv,
		lapack::UseDevice<device_name>::getri(mat.nrRows(), mat.ptr(), mat.leadingDimension(), ipiv.ptr(),
		work.ptr(), lwork);
		}

		template <typename Scalar, DeviceType device_name, template <typename, DeviceType> class MatrixType>
		void inverse(MatrixType<Scalar, device_name>& mat) {
		Vector<int, CPU> ipiv;

include/dca/math/function_transform/special_transforms/space_transform_2D.hpp

+2 −1

Original line number	Diff line number	Diff line
		@@ -114,6 +114,7 @@ const linalg::Matrix<dca::util::ComplexAlias<Scalar>, linalg::CPU>& SpaceTransfo
		template <class RDmn, class KDmn, typename Scalar>
		const auto& SpaceTransform2D<RDmn, KDmn, Scalar>::getPhaseFactors() {
		static func::function<Complex, func::dmn_variadic<BDmn, KDmn>> phase_factors("Phase factors.");
		using Real = dca::util::RealAlias<Scalar>;
		static std::once_flag flag;

		// Initialize the phase factors Exp[i k a[b]].
		@@ -126,7 +127,7 @@ const auto& SpaceTransform2D<RDmn, KDmn, Scalar>::getPhaseFactors() {
		const auto& k_vec = KDmn::get_elements()[k];
		for (int b = 0; b < BDmn::dmn_size(); ++b) {
		// Scalar could be cuComplex or cuDoubleComplex so...
		std::complex<dca::util::RealAlias<Scalar>> temp_phase{0., util::innerProduct(k_vec, a_vecs[b])};
		std::complex<Real> temp_phase{0., static_cast<Real>(util::innerProduct(k_vec, a_vecs[b]))};
		temp_phase = std::exp(temp_phase);
		phase_factors(b,k) = Complex{temp_phase.real(), temp_phase.imag()};
		}

include/dca/math/nfft/dnfft_1d.hpp

+7 −3

Original line number	Diff line number	Diff line
		@@ -85,7 +85,7 @@ public:
		}

		static constexpr int window_sampling_ = 32;
		static constexpr double window_function_sigma_ = 2.;
		static constexpr Real window_function_sigma_ = 2.;

		using WindowFunction = kaiser_bessel_function;

		@@ -131,6 +131,10 @@ private:

		void foldTimeDomainBack();

		/** FTau to f_w transform
		* right now the fftw call wrapped by these functions uses double precision
		* regardless of OutReal's precision
		*/
		template <class OutReal>
		void transformFTauToFW(func::function<std::complex<OutReal>, func::dmn_variadic<WDmn, PDmn>>& f_w) const;
		// Implementation for Real types
		@@ -492,9 +496,9 @@ void Dnfft1D<Scalar, WDmn, PDmn, oversampling, mode>::transformFTauToFW(
		const int n = nfft_time_domain<LEFT_ORIENTED, ThisType>::get_size();
		const int padding = (n_padded - n) / 2;

		std::vector<std::complex<double>> f_in(n);
		std::vector<std::complex<Real>> f_in(n);

		std::vector<std::complex<double>> f_out(n);
		std::vector<std::complex<Real>> f_out(n);
		fftw_plan plan;

		{