Merge pull request #98 from CompFUSE/tweak_accum_memory2 (a3932a44) · Commits · NDIP / Tool Sources / Direct-Geometry Spectroscopy / DCA / DCA Main

include/dca/linalg/reshapable_matrix.hpp

0 → 100644

+376 −0

Original line number	Diff line number	Diff line
		// Copyright (C) 2018 ETH Zurich
		// Copyright (C) 2018 UT-Battelle, LLC
		// All rights reserved.
		//
		// See LICENSE for terms of usage.
		// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications.
		//
		// Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch)
		//
		// This file provides a matrix with a more efficient reshaping between different rectangular shapes
		// of similar total size.

		#pragma once

		#include <cassert>
		#include <cmath>
		#include <sstream>
		#include <iostream>
		#include <stdexcept>
		#include <string>
		#include <type_traits>
		#include <utility>

		#include "dca/linalg/util/allocators/allocators.hpp"
		#include "dca/linalg/device_type.hpp"
		#include "dca/linalg/util/copy.hpp"
		#include "dca/linalg/util/stream_functions.hpp"

		namespace dca {
		namespace linalg {
		// dca::linalg::

		template <typename ScalarType, DeviceType device_name,
		class Allocator = util::DefaultAllocator<ScalarType, device_name>>
		class ReshapableMatrix : public Allocator {
		public:
		using ThisType = ReshapableMatrix<ScalarType, device_name, Allocator>;
		using ValueType = ScalarType;

		// Default contructor creates a matrix of zero size and capacity.
		ReshapableMatrix() = default;
		// Initializes a square size x size matrix.
		ReshapableMatrix(int size);
		// Initializes a square size.first x size.second matrix.
		ReshapableMatrix(std::pair<int, int> size);

		// Contructs a matrix with size rhs.size() and a copy of the elements of rhs.
		ReshapableMatrix(const ThisType& rhs);
		template <DeviceType rhs_device_name, class AllocatorRhs>
		ReshapableMatrix(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs);

		// Constructs a matrix with size rhs.size(). The elements of rhs are moved.
		ReshapableMatrix(ThisType&& rhs);

		// Resize the matrix to rhs.size() and copies the elements.
		ReshapableMatrix& operator=(const ThisType& rhs);
		template <DeviceType rhs_device_name, class AllocatorRhs>
		ReshapableMatrix& operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs);

		// Moves the elements of rhs into this matrix.
		ReshapableMatrix& operator=(ThisType&& rhs);

		~ReshapableMatrix();

		// Returns true if this is equal to other, false otherwise.
		// Two matrices are equal, if they have the same size and contain the same elements. Name and
		// capacity are ignored.
		// Special case: two matrices without elements are equal.
		bool operator==(const ReshapableMatrix<ScalarType, device_name, Allocator>& other) const;

		// Returns true if this is not equal to other, false otherwise.
		// See description of operator== for the definition of equality.
		bool operator!=(const ReshapableMatrix<ScalarType, device_name, Allocator>& other) const;

		// Returns the (i,j)-th element of the matrix.
		// Preconditions: 0 <= i < size().first, 0 <= j < size().second.
		// This method is available only if device_name == CPU.
		template <DeviceType dn = device_name, typename = std::enable_if_t<dn == CPU>>
		ScalarType& operator()(int i, int j) {
		assert(i >= 0 && i < size_.first);
		assert(j >= 0 && j < size_.second);
		return data_[i + j * leadingDimension()];
		}
		template <DeviceType dn = device_name, typename = std::enable_if_t<dn == CPU>>
		const ScalarType& operator()(int i, int j) const {
		assert(i >= 0 && i < size_.first);
		assert(j >= 0 && j < size_.second);
		return data_[i + j * leadingDimension()];
		}

		// Returns the pointer to the (0,0)-th element.
		ValueType* ptr() {
		return data_;
		}
		const ValueType* ptr() const {
		return data_;
		}

		// Returns the pointer to the (i,j)-th element i < size().first and 0 < j < size().second, or
		// a pointer past the end of the range if i == size().first or j == size().second.
		// Preconditions: 0 <= i <= size().first, 0 <= j <= size().second.
		ValueType* ptr(int i, int j) {
		assert(i >= 0 && i <= size_.first);
		assert(j >= 0 && j <= size_.second);
		return data_ + i + j * leadingDimension();
		}
		const ValueType* ptr(int i, int j) const {
		assert(i >= 0 && i <= size_.first);
		assert(j >= 0 && j <= size_.second);
		return data_ + i + j * leadingDimension();
		}

		const std::pair<int, int> size() const {
		return size_;
		}
		std::size_t capacity() const {
		return capacity_;
		}
		int nrRows() const {
		return size_.first;
		}
		int nrCols() const {
		return size_.second;
		}
		int leadingDimension() const {
		return size_.first;
		}

		// Resizes this to a (new_size.first new_size.second) matrix.
		// The previous elements are not copied, therefore all the elements
		// may have any value after the call to this method.
		// Returns: true if reallocation took place.
		bool resizeNoCopy(std::pair<int, int> new_size);
		// Resizes this to a (new_size new_size) matrix. See previous method for details.
		bool resizeNoCopy(int new_size) {
		return resizeNoCopy(std::make_pair(new_size, new_size));
		}

		// Reserves the space for at least (new_size.first * new_size.second) elements without changing
		// the matrix size. The value of the matrix elements is undefined after calling this method.
		// Returns: true if reallocation took place.
		bool reserveNoCopy(std::size_t new_size);

		void swap(ReshapableMatrix<ScalarType, device_name, Allocator>& other);

		// Releases the memory allocated by *this and sets size and capacity to zero.
		void clear();

		#ifdef DCA_HAVE_CUDA
		// Asynchronous assignment.
		template <DeviceType rhs_device_name>
		void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, cudaStream_t stream);

		// Asynchronous assignment (copy with stream = getStream(thread_id, stream_id))
		template <DeviceType rhs_device_name>
		void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id,
		int stream_id);

		void setToZero(cudaStream_t stream);
		#else
		// Synchronous assignment fallback for SetAsync.
		template <DeviceType rhs_device_name>
		void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /thread_id/,
		int /stream_id/);

		#endif // DCA_HAVE_CUDA

		// Returns the allocated device memory in bytes.
		std::size_t deviceFingerprint() const;

		private:
		static std::size_t nextCapacity(std::size_t size);
		inline static size_t nrElements(std::pair<int, int> size) {
		return static_cast<size_t>(size.first) * static_cast<size_t>(size.second);
		}

		std::pair<int, int> size_ = std::make_pair(0, 0);
		std::size_t capacity_ = 0;

		ValueType* data_ = nullptr;

		template <class ScalarType2, DeviceType device_name2, class Allocator2>
		friend class dca::linalg::ReshapableMatrix;
		};

		template <typename ScalarType, DeviceType device_name, class Allocator>
		ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(int size)
		: ReshapableMatrix(std::make_pair(size, size)) {}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(std::pair<int, int> size)
		: size_(size), capacity_(nextCapacity(nrElements(size))) {
		assert(size_.first >= 0 && size_.second >= 0);
		assert(capacity_ >= nrElements(size_));

		data_ = Allocator::allocate(capacity_);
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(const ThisType& rhs) {
		*this = rhs;
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		template <DeviceType rhs_device_name, class AllocatorRhs>
		ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(
		const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) {
		*this = rhs;
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(
		ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs)
		: ReshapableMatrix<ScalarType, device_name, Allocator>() {
		swap(rhs);
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix<
		ScalarType, device_name, Allocator>::operator=(const ThisType& rhs) {
		size_ = rhs.size_;
		capacity_ = rhs.capacity_;

		Allocator::deallocate(data_);
		data_ = Allocator::allocate(capacity_);
		util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
		return *this;
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		template <DeviceType rhs_device_name, class AllocatorRhs>
		ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix<
		ScalarType, device_name,
		Allocator>::operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) {
		size_ = rhs.size_;
		capacity_ = rhs.capacity_;

		Allocator::deallocate(data_);
		data_ = Allocator::allocate(capacity_);
		util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
		return *this;
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix<
		ScalarType, device_name, Allocator>::operator=(ThisType&& rhs) {
		swap(rhs);
		return *this;
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		ReshapableMatrix<ScalarType, device_name, Allocator>::~ReshapableMatrix() {
		Allocator::deallocate(data_);
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		bool ReshapableMatrix<ScalarType, device_name, Allocator>::operator==(
		const ReshapableMatrix<ScalarType, device_name, Allocator>& other) const {
		if (device_name == GPU)
		return ReshapableMatrix<ScalarType, CPU>(*this) == ReshapableMatrix<ScalarType, CPU>(other);

		if (size() != other.size())
		return nrRows() * nrCols() == 0 and other.nrRows() * other.nrCols() == 0;

		for (int j = 0; j < nrCols(); ++j)
		for (int i = 0; i < nrRows(); ++i)
		if ((*this)(i, j) != other(i, j))
		return false;

		return true;
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		bool ReshapableMatrix<ScalarType, device_name, Allocator>::operator!=(
		const ReshapableMatrix<ScalarType, device_name, Allocator>& other) const {
		return !(*this == other);
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		bool ReshapableMatrix<ScalarType, device_name, Allocator>::resizeNoCopy(
		const std::pair<int, int> new_size) {
		const bool realloc = reserveNoCopy(nrElements(new_size));
		size_ = new_size;
		assert(capacity_ >= nrElements(size_));
		return realloc;
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		bool ReshapableMatrix<ScalarType, device_name, Allocator>::reserveNoCopy(std::size_t new_size) {
		if (new_size > capacity_) {
		Allocator::deallocate(data_);
		capacity_ = nextCapacity(new_size);
		data_ = Allocator::allocate(capacity_);
		return true;
		}
		return false;
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		void ReshapableMatrix<ScalarType, device_name, Allocator>::swap(
		ReshapableMatrix<ScalarType, device_name, Allocator>& other) {
		std::swap(size_, other.size_);
		std::swap(capacity_, other.capacity_);
		std::swap(data_, other.data_);
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		void ReshapableMatrix<ScalarType, device_name, Allocator>::clear() {
		Allocator::deallocate(data_);
		size_ = std::make_pair(0, 0);
		capacity_ = 0;
		}

		#ifdef DCA_HAVE_CUDA

		template <typename ScalarType, DeviceType device_name, class Allocator>
		template <DeviceType rhs_device_name>
		void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync(
		const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, const cudaStream_t stream) {
		resizeNoCopy(rhs.size_);
		util::memoryCopyAsync(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_, stream);
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		template <DeviceType rhs_device_name>
		void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync(
		const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, const int thread_id,
		const int stream_id) {
		setAsync(rhs, util::getStream(thread_id, stream_id));
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		void ReshapableMatrix<ScalarType, device_name, Allocator>::setToZero(cudaStream_t stream) {
		cudaMemsetAsync(data_, 0, leadingDimension() * nrCols() * sizeof(ScalarType), stream);
		}

		#else // DCA_HAVE_CUDA

		template <typename ScalarType, DeviceType device_name, class Allocator>
		template <DeviceType rhs_device_name>
		void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync(
		const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /thread_id/, int /stream_id/) {
		*this = rhs;
		}

		#endif // DCA_HAVE_CUDA

		template <typename ScalarType, DeviceType device_name, class Allocator>
		std::size_t ReshapableMatrix<ScalarType, device_name, Allocator>::nextCapacity(const std::size_t size) {
		assert(size >= 0);
		constexpr std::size_t block_size = 512;

		auto next_power_of_two = [](std::size_t x) {
		if (!x)
		return std::size_t(0);
		std::size_t result = 1;
		while (result < x) {
		result <<= 1;
		}
		return result;
		};

		return size <= block_size ? next_power_of_two(size)
		: (size + block_size - 1) / block_size * block_size;
		}

		template <typename ScalarType, DeviceType device_name, class Allocator>
		std::size_t ReshapableMatrix<ScalarType, device_name, Allocator>::deviceFingerprint() const {
		if (device_name == GPU)
		return capacity_ * sizeof(ScalarType);
		else
		return 0;
		}

		} // linalg
		} // dca

include/dca/linalg/util/allocators/aligned_allocator.hpp

+3 −0

Original line number	Diff line number	Diff line
		@@ -21,6 +21,9 @@ template <typename T>
		class AlignedAllocator {
		protected:
		T* allocate(std::size_t n) {
		if (!n)
		return nullptr;

		T* ptr;
		int err = posix_memalign((void*)&ptr, 128, n sizeof(T));
		if (err)

include/dca/math/function_transform/special_transforms/space_transform_2D_gpu.hpp

+33 −16

Original line number	Diff line number	Diff line
		@@ -19,6 +19,10 @@

		#include "dca/math/function_transform/special_transforms/space_transform_2D_gpu.hpp"

		#include <array>
		#include <memory>

		#include "dca/linalg/reshapable_matrix.hpp"
		#include "dca/linalg/util/magma_batched_gemm.hpp"
		#include "dca/math/function_transform/special_transforms/kernels_interface.hpp"
		#include "dca/phys/domains/quantum/electron_band_domain.hpp"
		@@ -33,6 +37,7 @@ class SpaceTransform2DGpu : private SpaceTransform2D<RDmn, KDmn, Real> {
		private:
		using Complex = std::complex<Real>;
		using MatrixDev = linalg::Matrix<Complex, linalg::GPU>;
		using RMatrix = linalg::ReshapableMatrix<Complex, linalg::GPU>;

		public:
		// Constructor
		@@ -44,7 +49,11 @@ public:
		// order of M's labels from (r, b, w) to (b, r, w).
		// The transform is equivalent to M(k1, k2) = \sum_{r1, r2} exp(i(k1 * r1 - k2 * r2)) M(r1, r2)
		// In/Out: M
		void execute(MatrixDev& M);
		void execute(RMatrix& M);

		void setWorkspace(const std::shared_ptr<RMatrix>& workspace) {
		workspace_ = workspace;
		}

		// Returns: the stream associated with the magma queue.
		cudaStream_t get_stream() const {
		@@ -52,7 +61,11 @@ public:
		}

		std::size_t deviceFingerprint() const {
		return T_times_M_.deviceFingerprint() + T_times_M_times_T_.deviceFingerprint();
		std::size_t res(0);

		if (workspace_.unique())
		res += workspace_->deviceFingerprint();
		return res;
		}

		private:
		@@ -60,7 +73,7 @@ private:
		using BDmn = func::dmn_0<phys::domains::electron_band_domain>;

		static const MatrixDev& get_T_matrix();
		void rearrangeResult(const MatrixDev& in, MatrixDev& out);
		void rearrangeResult(const RMatrix& in, RMatrix& out);

		const int n_bands_;
		const int nw_;
		@@ -69,8 +82,7 @@ private:
		magma_queue_t queue_;
		cudaStream_t stream_;

		MatrixDev T_times_M_;
		MatrixDev T_times_M_times_T_;
		std::shared_ptr<RMatrix> workspace_;

		linalg::util::MagmaBatchedGemm<Complex> plan1_;
		linalg::util::MagmaBatchedGemm<Complex> plan2_;
		@@ -84,11 +96,16 @@ SpaceTransform2DGpu<RDmn, KDmn, Real>::SpaceTransform2DGpu(const int nw_pos, mag
		queue_(queue),
		stream_(magma_queue_get_cuda_stream(queue_)),
		plan1_(queue_),
		plan2_(queue_) {}
		plan2_(queue_) {
		workspace_ = std::make_shared<RMatrix>();
		}

		template <class RDmn, class KDmn, typename Real>
		void SpaceTransform2DGpu<RDmn, KDmn, Real>::execute(MatrixDev& M) {
		T_times_M_.resizeNoCopy(M.size());
		void SpaceTransform2DGpu<RDmn, KDmn, Real>::execute(RMatrix& M) {
		auto& T_times_M = *(workspace_);
		auto& T_times_M_times_T = M;

		T_times_M.resizeNoCopy(M.size());
		const auto& T = get_T_matrix();

		{
		@@ -96,33 +113,33 @@ void SpaceTransform2DGpu<RDmn, KDmn, Real>::execute(MatrixDev& M) {

		plan1_.synchronizeCopy();
		for (int i = 0; i < n_trafo; ++i)
		plan1_.addGemm(T.ptr(), M.ptr(i * nc_, 0), T_times_M_.ptr(i * nc_, 0));
		plan1_.addGemm(T.ptr(), M.ptr(i * nc_, 0), T_times_M.ptr(i * nc_, 0));

		const int lda = T.leadingDimension();
		const int ldb = M.leadingDimension();
		const int ldc = T_times_M_.leadingDimension();
		const int ldc = T_times_M.leadingDimension();
		plan1_.execute('N', 'N', nc_, M.nrCols(), nc_, Complex(1), Complex(0), lda, ldb, ldc);
		}

		T_times_M_times_T_.resizeNoCopy(M.size());
		{
		const int n_trafo = M.nrCols() / nc_;
		plan2_.synchronizeCopy();
		for (int j = 0; j < n_trafo; ++j)
		plan2_.addGemm(T_times_M_.ptr(0, j * nc_), T.ptr(), T_times_M_times_T_.ptr(0, j * nc_));
		plan2_.addGemm(T_times_M.ptr(0, j * nc_), T.ptr(), T_times_M_times_T.ptr(0, j * nc_));

		const int lda = T_times_M_.leadingDimension();
		const int lda = T_times_M.leadingDimension();
		const int ldb = T.leadingDimension();
		const int ldc = T_times_M_times_T_.leadingDimension();
		const int ldc = T_times_M_times_T.leadingDimension();
		const Complex norm(1. / nc_);
		plan2_.execute('N', 'C', M.nrRows(), nc_, nc_, norm, Complex(0), lda, ldb, ldc);
		}

		rearrangeResult(T_times_M_times_T_, M);
		rearrangeResult(T_times_M_times_T, *workspace_);
		M.swap(*workspace_);
		}

		template <class RDmn, class KDmn, typename Real>
		void SpaceTransform2DGpu<RDmn, KDmn, Real>::rearrangeResult(const MatrixDev& in, MatrixDev& out) {
		void SpaceTransform2DGpu<RDmn, KDmn, Real>::rearrangeResult(const RMatrix& in, RMatrix& out) {
		out.resizeNoCopy(in.size());
		details::rearrangeResult(in.ptr(), in.leadingDimension(), out.ptr(), out.leadingDimension(),
		n_bands_, nc_, nw_, stream_);

include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/ndft/cached_ndft_gpu.hpp

+63 −32

File changed.

Preview size limit exceeded, changes collapsed.

include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp

+19 −2

Original line number	Diff line number	Diff line
		@@ -22,6 +22,7 @@

		#include "dca/config/accumulation_options.hpp"
		#include "dca/linalg/lapack/magma.hpp"
		#include "dca/linalg/reshapable_matrix.hpp"
		#include "dca/linalg/util/cuda_event.hpp"
		#include "dca/linalg/util/magma_queue.hpp"
		#include "dca/math/function_transform/special_transforms/space_transform_2D_gpu.hpp"
		@@ -97,6 +98,8 @@ public:

		std::size_t deviceFingerprint() const {
		std::size_t res(0);
		for (const auto& work : workspaces_)
		res += work->deviceFingerprint();
		for (int s = 0; s < 2; ++s)
		res += ndft_objs_[s].deviceFingerprint() + G_[s].deviceFingerprint() +
		space_trsf_objs_[s].deviceFingerprint();
		@@ -122,6 +125,8 @@ private:
		using typename BaseClass::TpGreenFunction;
		using typename BaseClass::Complex;

		using Matrix = linalg::Matrix<Complex, linalg::GPU>;

		void initializeG0();
		void resetG4();
		void initializeG4Helpers() const;
		@@ -155,19 +160,21 @@ private:
		using BaseClass::n_pos_frqs_;

		using MatrixDev = linalg::Matrix<Complex, linalg::GPU>;
		using RMatrix = linalg::ReshapableMatrix<Complex, linalg::GPU>;
		using MatrixHost = linalg::Matrix<Complex, linalg::CPU>;

		std::array<linalg::util::MagmaQueue, 2> queues_;
		std::array<cudaStream_t, 2> streams_;
		linalg::util::CudaEvent event_;

		std::vector<std::shared_ptr<RMatrix>> workspaces_;

		using NdftType = CachedNdft<Real, RDmn, WTpExtDmn, WTpExtPosDmn, linalg::GPU, non_density_density_>;
		std::array<NdftType, 2> ndft_objs_;
		using DftType = math::transform::SpaceTransform2DGpu<RDmn, KDmn, Real>;
		std::array<DftType, 2> space_trsf_objs_;

		std::array<MatrixDev, 2> G_;
		std::array<MatrixHost, 2> G_matrix_host_;
		std::array<RMatrix, 2> G_;

		bool finalized_ = false;
		bool initialized_ = false;
		@@ -188,6 +195,16 @@ TpAccumulator<Parameters, linalg::GPU>::TpAccumulator(
		ndft_objs_{NdftType(queues_[0]), NdftType(queues_[1])},
		space_trsf_objs_{DftType(n_pos_frqs_, queues_[0]), DftType(n_pos_frqs_, queues_[1])} {
		initializeG4Helpers();

		// Create shared workspaces.
		workspaces_.resize(n_ndft_streams_);
		for (auto& work : workspaces_)
		work = std::make_shared<RMatrix>();

		for (int i = 0; i < n_ndft_streams_; ++i) {
		ndft_objs_[i].setWorkspace(workspaces_[i]);
		space_trsf_objs_[i].setWorkspace(workspaces_[i]);
		}
		}

		template <class Parameters>