Loading CMakeLists.txt +0 −6 Original line number Diff line number Diff line Loading @@ -147,11 +147,7 @@ set(DCA_LIBS cuda_utils ) set(SYSTEM_GPU_COUNT 0) if (DCA_HAVE_CUDA) EXECUTE_PROCESS(COMMAND bash -c "nvidia-smi -L | awk 'BEGIN { num_gpu=0;} /GPU/ { num_gpu++;} END { printf(\"%d\", num_gpu) }'" OUTPUT_VARIABLE SYSTEM_GPU_COUNT) list(APPEND DCA_LIBS blas_kernels dnfft_kernels Loading @@ -171,8 +167,6 @@ option(DCA_WITH_TESTS_EXTENSIVE "Build DCA++'s extensive tests." OFF) option(DCA_WITH_TESTS_PERFORMANCE "Build DCA++'s performance tests. (Only in Release mode.)" OFF) option(DCA_WITH_TESTS_STOCHASTIC "Build DCA++'s stochastic tests." OFF) set(DCA_TEST_GPU_COUNT "${SYSTEM_GPU_COUNT}" CACHE INTEGER "Number of GPUs available on one node for one test.") set(TEST_RUNNER "" CACHE STRING "Command for executing (MPI) programs.") set(MPIEXEC_NUMPROC_FLAG "-n" CACHE STRING "Flag used by TEST_RUNNER to specify the number of processes.") set(MPIEXEC_PREFLAGS "" CACHE STRING "Flags to pass to TEST_RUNNER directly before the executable to run.") Loading build-aux/summit.cmake +2 −5 Original line number Diff line number Diff line Loading @@ -23,14 +23,11 @@ set(MPIEXEC_PREFLAGS "-a 1 -g 1 -c 5" CACHE STRING set(SMPIARGS_FLAG_NOMPI "--smpiargs=none" CACHE STRING "Spectrum MPI argument list flag for serial tests.") # Let's keep this option in case we need it again in the future. set(SMPIARGS_FLAG_MPI "" CACHE STRING "Spectrum MPI argument list flag for MPI tests.") # When we want to us a cuda visible devices restriction we need this flag set(SMPIARGS_FLAG_MPI_CVD "--smpiargs=-gpu" CACHE STRING "Spectrum MPI argument list for cuda-mpi tests") set(SMPIARGS_FLAG_MPI "--smpiargs=\"-gpu\"" CACHE STRING "Spectrum MPI argument list flag for MPI tests.") # Enable the GPU support. option(DCA_WITH_CUDA "Enable GPU support." ON) option(DCA_WITH_CUDA_AWARE_MPI "Enable CUDA aware MPI." ON) # Compile for Volta compute architecture. set(CUDA_GPU_ARCH "sm_70" CACHE STRING "Name of the *real* architecture to build for.") Loading cmake/dca_cuda.cmake +9 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,10 @@ if (CUDA_FOUND) list(APPEND DCA_CUDA_LIBS ${CUDA_LIBRARIES} ${CUDA_cusparse_LIBRARY} ${CUDA_cublas_LIBRARY}) CUDA_INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS}) set(CUDA_SEPARABLE_COMPILATION ON) set(CVD_LAUNCHER "" CACHE INTERNAL "launch script for setting the Cuda visible devices.") # Use the following script for systems with multiple gpus visible from a rank. # set(CVD_LAUNCHER "test/cvd_launcher.sh" CACHE INTERNAL "") endif() # Find MAGMA. Loading Loading @@ -48,4 +52,9 @@ endif() if (CUDA_FOUND AND DCA_HAVE_MAGMA) set(DCA_HAVE_CUDA TRUE CACHE INTERNAL "") dca_add_haves_define(DCA_HAVE_CUDA) option(DCA_WITH_CUDA_AWARE_MPI "Enable CUDA aware MPI." OFF) if(DCA_WITH_CUDA_AWARE_MPI) dca_add_haves_define(DCA_HAVE_CUDA_AWARE_MPI) endif() endif() cmake/dca_testing.cmake +5 −17 Original line number Diff line number Diff line Loading @@ -24,7 +24,7 @@ include(CMakeParseArguments) # MPI or CUDA may be given to indicate that the test requires these libraries. MPI_NUMPROC is the # number of MPI processes to use for a test with MPI, the default value is 1. function(dca_add_gtest name) set(options FAST EXTENSIVE STOCHASTIC PERFORMANCE GTEST_MAIN MPI CUDA CUDA_CVD) set(options FAST EXTENSIVE STOCHASTIC PERFORMANCE GTEST_MAIN MPI CUDA) set(oneValueArgs MPI_NUMPROC) set(multiValueArgs INCLUDE_DIRS SOURCES LIBS) cmake_parse_arguments(DCA_ADD_GTEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) Loading Loading @@ -82,14 +82,6 @@ function(dca_add_gtest name) return() endif() if (DCA_ADD_GTEST_CUDA_CVD AND NOT DCA_HAVE_CUDA ) return() endif() if (DCA_ADD_GTEST_CUDA_CVD AND (DCA_TEST_GPU_COUNT LESS 3) ) return() endif() add_executable(${name} ${name}.cpp ${DCA_ADD_GTEST_SOURCES}) # Create a macro with the project source dir. We use this as the root path for reading files in Loading @@ -104,7 +96,7 @@ function(dca_add_gtest name) target_link_libraries(${name} gtest ${DCA_ADD_GTEST_LIBS}) endif() if (DCA_ADD_GTEST_CUDA OR DCA_ADD_GTEST_CUDA_CVD) if (DCA_ADD_GTEST_CUDA) target_include_directories(${name} PRIVATE ${CUDA_TOOLKIT_INCLUDE}) target_link_libraries(${name} ${DCA_CUDA_LIBS}) target_compile_definitions(${name} PRIVATE DCA_HAVE_CUDA) Loading @@ -113,11 +105,6 @@ function(dca_add_gtest name) target_compile_definitions(${name} PRIVATE DCA_HAVE_MAGMA) endif() cuda_add_cublas_to_target(${name}) # a less hacky way to do this would be good but this is used to test # development only feature distributed G4 at the moment. if (DCA_ADD_GTEST_CUDA_CVD) set(CVD_LAUNCHER "${PROJECT_SOURCE_DIR}/test/cvdlauncher.sh") endif() endif() target_include_directories(${name} PRIVATE Loading @@ -131,13 +118,14 @@ function(dca_add_gtest name) add_test(NAME ${name} COMMAND ${TEST_RUNNER} ${MPIEXEC_NUMPROC_FLAG} ${DCA_ADD_GTEST_MPI_NUMPROC} ${MPIEXEC_PREFLAGS} ${SMPIARGS_FLAG_MPI_CVD} ${CVD_LAUNCHER} "$<TARGET_FILE:${name}>") ${MPIEXEC_PREFLAGS} ${CVD_LAUNCHER} "$<TARGET_FILE:${name}>") target_link_libraries(${name} ${MPI_C_LIBRARIES}) else() if (TEST_RUNNER) add_test(NAME ${name} COMMAND ${TEST_RUNNER} ${MPIEXEC_NUMPROC_FLAG} 1 ${MPIEXEC_PREFLAGS} ${SMPIARGS_FLAG_NOMPI} "$<TARGET_FILE:${name}>") ${MPIEXEC_PREFLAGS} ${SMPIARGS_FLAG_NOMPI} ${CVD_LAUNCHER} "$<TARGET_FILE:${name}>") else (TEST_RUNNER) add_test(NAME ${name} COMMAND "$<TARGET_FILE:${name}>") Loading include/dca/function/function.hpp +110 −114 Original line number Diff line number Diff line Loading @@ -30,15 +30,10 @@ #include "dca/distribution/dist_types.hpp" #include "dca/function/scalar_cast.hpp" #include "dca/function/set_to_zero.hpp" #include "dca/util/ignore.hpp" #include "dca/util/pack_operations.hpp" #include "dca/util/integer_division.hpp" #include "dca/util/type_utils.hpp" #include "dca/parallel/util/get_workload.hpp" #ifdef DCA_HAVE_MPI #include "mpi.h" #endif namespace dca { namespace func { // dca::func:: Loading @@ -54,9 +49,11 @@ public: // Default constructor // Constructs the function with the name name. // Postcondition: All elements are set to zero. // Special case: when distributed_g4_enabled, G4 related variables only gets // allocation of 1/p of original G4 size, where p = #mpiranks function(const std::string& name = default_name_, const DistType dist = DistType::NONE); function(const std::string& name = default_name_); // Distributed function. Access with multi-index operator() is not safe. template <class Concurrency> function(const std::string& name, const Concurrency& concurrency); // Copy constructor // Constructs the function with the a copy of elements and name of other. Loading Loading @@ -97,8 +94,6 @@ public: // The other function is in a non-specified state. function<scalartype, domain>& operator=(function<scalartype, domain>&& other); ~function(); // Resets the function by resetting the domain object and reallocating the memory for the function // elements. // Postcondition: All elements are set to zero. Loading @@ -118,11 +113,12 @@ public: return Nb_sbdms; } std::size_t size() const { return nb_elements_; return fnc_values_.size(); } // TODO: remove as it breaks class' invariant. void resize(std::size_t nb_elements_new) { nb_elements_ = nb_elements_new; fnc_values_.resize(nb_elements_new); } // Returns the size of the leaf domain with the given index. // Does not return function values! Loading @@ -131,31 +127,31 @@ public: } // Begin and end methods for compatibility with range for loop. scalartype* begin() { return fnc_values; auto begin() { return fnc_values_.begin(); } scalartype* end() { return fnc_values + nb_elements_; auto end() { return fnc_values_.end(); } const scalartype* begin() const { return fnc_values; const auto begin() const { return fnc_values_.begin(); } const scalartype* end() const { return fnc_values + nb_elements_; const auto end() const { return fnc_values_.end(); } // Returns a pointer to the function's elements. scalartype* values() { return fnc_values; return fnc_values_.data(); } const scalartype* values() const { return fnc_values; return fnc_values_.data(); } scalartype* data() { return fnc_values; return fnc_values_.data(); } const scalartype* data() const { return fnc_values; return fnc_values_.data(); } // Loading Loading @@ -197,7 +193,7 @@ public: template <typename T> int subind_2_linind(const T ind) const { static_assert(std::is_integral<T>::value, "Index ind must be an integer."); assert(ind >= 0 && ind < nb_elements_); assert(ind >= 0 && ind < size()); return ind; } Loading @@ -211,24 +207,24 @@ public: template <typename T> scalartype& operator()(const T linind) { static_assert(std::is_integral<T>::value, "Index linind must be an integer."); assert(linind >= 0 && linind < nb_elements_); return fnc_values[linind]; assert(linind >= 0 && linind < size()); return fnc_values_[linind]; } template <typename T> const scalartype& operator()(const T linind) const { static_assert(std::is_integral<T>::value, "Index linind must be an integer."); assert(linind >= 0 && linind < nb_elements_); return fnc_values[linind]; assert(linind >= 0 && linind < size()); return fnc_values_[linind]; } template <typename... Ts> scalartype& operator()(const Ts... subindices) { // We need to cast all indices to the same type for dmn_variadic. return fnc_values[dmn(static_cast<int>(subindices)...)]; return fnc_values_[dmn(static_cast<int>(subindices)...)]; } template <typename... Ts> const scalartype& operator()(const Ts... subindices) const { return fnc_values[dmn(static_cast<int>(subindices)...)]; return fnc_values_[dmn(static_cast<int>(subindices)...)]; } void operator+=(const function<scalartype, domain>& other); Loading @@ -243,7 +239,7 @@ public: void operator/=(scalartype c); // Equal-comparison opertor // Returns true if the function's elements (fnc_values) are equal to other's elements, false // Returns true if the function's elements (fnc_values_) are equal to other's elements, false // otherwise. // TODO: Make the equal-comparison operator a non-member function. bool operator==(const function<scalartype, domain>& other) const; Loading Loading @@ -276,47 +272,58 @@ public: template <class concurrency_t> void unpack(const concurrency_t& concurrency, char* buffer, int buffer_size, int& position); // Gather a function that was initialized as distributed. // Precondition: concurrency must be the same object used during construction. template <class Concurrency> function gather(const Concurrency& concurrency) const; private: std::string name_; std::string function_type; domain dmn; // TODO: Remove domain object? std::size_t nb_elements_; // The subdomains (sbdmn) represent the leaf domains, not the branch domains. int Nb_sbdms; const std::vector<std::size_t>& size_sbdm; // TODO: Remove? const std::vector<std::size_t>& step_sbdm; // TODO: Remove? scalartype* fnc_values; std::vector<scalartype> fnc_values_; }; template <typename scalartype, class domain> const std::string function<scalartype, domain>::default_name_ = "no-name"; template <typename scalartype, class domain> function<scalartype, domain>::function(const std::string& name, DistType dist) function<scalartype, domain>::function(const std::string& name) : name_(name), function_type(__PRETTY_FUNCTION__), dmn(), nb_elements_(dmn.get_size()), Nb_sbdms(dmn.get_leaf_domain_sizes().size()), size_sbdm(dmn.get_leaf_domain_sizes()), step_sbdm(dmn.get_leaf_domain_steps()), fnc_values(nullptr) { dca::util::ignoreUnused(dist); #ifdef DCA_HAVE_MPI if (dist == DistType::MPI) { int my_rank, mpi_size; MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); nb_elements_ = dca::parallel::util::getWorkload(dmn.get_size(), mpi_size, my_rank); fnc_values_(dmn.get_size()) { for (int linind = 0; linind < size(); ++linind) setToZero(fnc_values_[linind]); } #endif // DCA_HAVE_MPI fnc_values = new scalartype[nb_elements_]; for (int linind = 0; linind < nb_elements_; ++linind) setToZero(fnc_values[linind]); template <typename scalartype, class domain> template <class Concurrency> function<scalartype, domain>::function(const std::string& name, const Concurrency& concurrency) : name_(name), function_type(__PRETTY_FUNCTION__), dmn(), Nb_sbdms(dmn.get_leaf_domain_sizes().size()), size_sbdm(dmn.get_leaf_domain_sizes()), step_sbdm(dmn.get_leaf_domain_steps()) { // TODO: multi-index access to partitioned function is not safe. const std::size_t mpi_size = concurrency.number_of_processors(); const std::size_t nb_elements = dca::util::ceilDiv(dmn.get_size(), mpi_size); fnc_values_.resize(nb_elements); for (int linind = 0; linind < nb_elements; ++linind) setToZero(fnc_values_[linind]); } template <typename scalartype, class domain> Loading @@ -324,17 +331,13 @@ function<scalartype, domain>::function(const function<scalartype, domain>& other : name_(other.name_), function_type(__PRETTY_FUNCTION__), dmn(), nb_elements_(dmn.get_size()), Nb_sbdms(dmn.get_leaf_domain_sizes().size()), size_sbdm(dmn.get_leaf_domain_sizes()), step_sbdm(dmn.get_leaf_domain_steps()), fnc_values(nullptr) { fnc_values_(other.fnc_values_) { if (dmn.get_size() != other.dmn.get_size()) // The other function has not been resetted after the domain was initialized. throw std::logic_error("Copy construction from a not yet resetted function."); fnc_values = new scalartype[nb_elements_]; std::copy_n(other.fnc_values, nb_elements_, fnc_values); } template <typename scalartype, class domain> Loading @@ -342,18 +345,13 @@ function<scalartype, domain>::function(function<scalartype, domain>&& other) : name_(std::move(other.name_)), function_type(__PRETTY_FUNCTION__), dmn(), nb_elements_(dmn.get_size()), Nb_sbdms(dmn.get_leaf_domain_sizes().size()), size_sbdm(dmn.get_leaf_domain_sizes()), step_sbdm(dmn.get_leaf_domain_steps()), fnc_values(nullptr) { fnc_values_(std::move(other.fnc_values_)) { if (dmn.get_size() != other.dmn.get_size()) // The other function has not been resetted after the domain was initialized. throw std::logic_error("Move construction from a not yet resetted function."); fnc_values = other.fnc_values; other.nb_elements_ = 0; other.fnc_values = nullptr; } template <typename scalartype, class domain> Loading @@ -370,7 +368,7 @@ function<scalartype, domain>& function<scalartype, domain>::operator=( throw std::logic_error("Copy assignment from a not yet resetted function."); } std::copy_n(other.values(), nb_elements_, fnc_values); fnc_values_ = other.fnc_values_; } return *this; Loading @@ -383,7 +381,7 @@ function<Scalar, domain>& function<Scalar, domain>::operator=(const function<Sca throw(std::logic_error("Function size does not match.")); } std::copy_n(other.values(), nb_elements_, fnc_values); fnc_values_ = other.fnc_values_; return *this; } Loading @@ -402,33 +400,21 @@ function<scalartype, domain>& function<scalartype, domain>::operator=( throw std::logic_error("Move assignment from a not yet resetted function."); } delete[] fnc_values; fnc_values = other.fnc_values; other.nb_elements_ = 0; other.fnc_values = nullptr; fnc_values_ = std::move(other.fnc_values_); } return *this; } template <typename scalartype, class domain> function<scalartype, domain>::~function() { delete[] fnc_values; } template <typename scalartype, class domain> void function<scalartype, domain>::reset() { dmn.reset(); nb_elements_ = dmn.get_size(); fnc_values_.resize(dmn.get_size()); Nb_sbdms = dmn.get_leaf_domain_sizes().size(); delete[] fnc_values; fnc_values = new scalartype[nb_elements_]; for (int linind = 0; linind < nb_elements_; ++linind) setToZero(fnc_values[linind]); for (int linind = 0; linind < size(); ++linind) setToZero(fnc_values_[linind]); } template <typename scalartype, class domain> Loading Loading @@ -480,8 +466,8 @@ scalartype& function<scalartype, domain>::operator()(const int* const subind) { int linind; subind_2_linind(subind, linind); assert(linind >= 0 && linind < nb_elements_); return fnc_values[linind]; assert(linind >= 0 && linind < size()); return fnc_values_[linind]; } template <typename scalartype, class domain> Loading @@ -489,64 +475,64 @@ const scalartype& function<scalartype, domain>::operator()(const int* const subi int linind; subind_2_linind(subind, linind); assert(linind >= 0 && linind < nb_elements_); return fnc_values[linind]; assert(linind >= 0 && linind < size()); return fnc_values_[linind]; } template <typename scalartype, class domain> void function<scalartype, domain>::operator+=(const function<scalartype, domain>& other) { for (int linind = 0; linind < nb_elements_; ++linind) fnc_values[linind] += other(linind); for (int linind = 0; linind < size(); ++linind) fnc_values_[linind] += other(linind); } template <typename scalartype, class domain> void function<scalartype, domain>::operator-=(const function<scalartype, domain>& other) { for (int linind = 0; linind < nb_elements_; ++linind) fnc_values[linind] -= other(linind); for (int linind = 0; linind < size(); ++linind) fnc_values_[linind] -= other(linind); } template <typename scalartype, class domain> void function<scalartype, domain>::operator*=(const function<scalartype, domain>& other) { for (int linind = 0; linind < nb_elements_; ++linind) fnc_values[linind] *= other(linind); for (int linind = 0; linind < size(); ++linind) fnc_values_[linind] *= other(linind); } template <typename scalartype, class domain> void function<scalartype, domain>::operator/=(const function<scalartype, domain>& other) { for (int linind = 0; linind < nb_elements_; ++linind) { for (int linind = 0; linind < size(); ++linind) { assert(std::abs(other(linind)) > 1.e-16); fnc_values[linind] /= other(linind); fnc_values_[linind] /= other(linind); } } template <typename scalartype, class domain> void function<scalartype, domain>::operator=(const scalartype c) { for (int linind = 0; linind < nb_elements_; linind++) fnc_values[linind] = c; for (int linind = 0; linind < size(); linind++) fnc_values_[linind] = c; } template <typename scalartype, class domain> void function<scalartype, domain>::operator+=(const scalartype c) { for (int linind = 0; linind < nb_elements_; linind++) fnc_values[linind] += c; for (int linind = 0; linind < size(); linind++) fnc_values_[linind] += c; } template <typename scalartype, class domain> void function<scalartype, domain>::operator-=(const scalartype c) { for (int linind = 0; linind < nb_elements_; linind++) fnc_values[linind] -= c; for (int linind = 0; linind < size(); linind++) fnc_values_[linind] -= c; } template <typename scalartype, class domain> void function<scalartype, domain>::operator*=(const scalartype c) { for (int linind = 0; linind < nb_elements_; linind++) fnc_values[linind] *= c; for (int linind = 0; linind < size(); linind++) fnc_values_[linind] *= c; } template <typename scalartype, class domain> void function<scalartype, domain>::operator/=(const scalartype c) { for (int linind = 0; linind < nb_elements_; linind++) fnc_values[linind] /= c; for (int linind = 0; linind < size(); linind++) fnc_values_[linind] /= c; } template <typename scalartype, class domain> Loading @@ -555,8 +541,8 @@ bool function<scalartype, domain>::operator==(const function<scalartype, domain> // One of the function has not been resetted after the domain was initialized. throw std::logic_error("Comparing functions of different sizes."); for (int i = 0; i < nb_elements_; ++i) if (other(i) != fnc_values[i]) for (int i = 0; i < size(); ++i) if (other(i) != fnc_values_[i]) return false; return true; Loading @@ -574,7 +560,8 @@ void function<scalartype, domain>::slice(const int sbdm_index, int* subind, subind_2_linind(subind, linind); for (int i = 0; i < size_sbdm[sbdm_index]; i++) fnc_vals[i] = ScalarCast<new_scalartype>::execute(fnc_values[linind + i * step_sbdm[sbdm_index]]); fnc_vals[i] = ScalarCast<new_scalartype>::execute(fnc_values_[linind + i * step_sbdm[sbdm_index]]); } template <typename scalartype, class domain> Loading Loading @@ -602,12 +589,12 @@ void function<scalartype, domain>::slice(const int sbdm_index_1, const int sbdm_ for (int j = 0; j < size_sbdm_2; j++) { fnc_ptr_left = &fnc_vals[0 + j * size_sbdm_1]; fnc_ptr_right = &fnc_values[linind + j * step_sbdm_2]; fnc_ptr_right = &fnc_values_[linind + j * step_sbdm_2]; for (int i = 0; i < size_sbdm_1; i++) fnc_ptr_left[i] = fnc_ptr_right[i * step_sbdm_1]; // fnc_vals[i+j*size_sbdm[sbdm_index_1]] = fnc_values[linind + i*step_sbdm[sbdm_index_1] + // j*step_sbdm[sbdm_index_2]]; // fnc_vals[i+j*size_sbdm[sbdm_index_1]] = fnc_values_[linind + i*step_sbdm[sbdm_index_1] // + j*step_sbdm[sbdm_index_2]]; } } Loading @@ -623,7 +610,7 @@ void function<scalartype, domain>::distribute(const int sbdm_index, int* subind, subind_2_linind(subind, linind); for (int i = 0; i < size_sbdm[sbdm_index]; i++) fnc_values[linind + i * step_sbdm[sbdm_index]] = ScalarCast<scalartype>::execute(fnc_vals[i]); fnc_values_[linind + i * step_sbdm[sbdm_index]] = ScalarCast<scalartype>::execute(fnc_vals[i]); } template <typename scalartype, class domain> Loading @@ -642,7 +629,7 @@ void function<scalartype, domain>::distribute(const int sbdm_index_1, const int for (int i = 0; i < size_sbdm[sbdm_index_1]; i++) for (int j = 0; j < size_sbdm[sbdm_index_2]; j++) fnc_values[linind + i * step_sbdm[sbdm_index_1] + j * step_sbdm[sbdm_index_2]] = fnc_values_[linind + i * step_sbdm[sbdm_index_1] + j * step_sbdm[sbdm_index_2]] = fnc_vals[i + j * size_sbdm[sbdm_index_1]]; } Loading @@ -661,8 +648,8 @@ void function<scalartype, domain>::print_fingerprint(std::ostream& stream) const stream << " " << size_sbdm[i]; stream << "\n"; stream << "# elements: " << nb_elements_ << "\n"; stream << "memory: " << nb_elements_ * sizeof(scalartype) / (1024. * 1024.) << " MiB\n"; stream << "# elements: " << size() << "\n"; stream << "memory: " << size() * sizeof(scalartype) / (1024. * 1024.) << " MiB\n"; stream << "****************************************\n" << std::endl; } Loading @@ -673,11 +660,11 @@ void function<scalartype, domain>::print_elements(std::ostream& stream) const { stream << "****************************************\n"; std::vector<int> subind(Nb_sbdms); for (int lindex = 0; lindex < nb_elements_; ++lindex) { for (int lindex = 0; lindex < size(); ++lindex) { linind_2_subind(lindex, subind); for (int index : subind) stream << index << "\t"; stream << " \t" << fnc_values[lindex] << "\n"; stream << " \t" << fnc_values_[lindex] << "\n"; } stream << "****************************************\n" << std::endl; Loading Loading @@ -705,6 +692,15 @@ void function<scalartype, domain>::unpack(const concurrency_t& concurrency, char concurrency.unpack(buffer, buffer_size, position, *this); } template <typename scalartype, class domain> template <class Concurrency> function<scalartype, domain> function<scalartype, domain>::gather(const Concurrency& concurrency) const { function result(name_); concurrency.gather(*this, result, concurrency); return result; } } // namespace func } // namespace dca Loading Loading
CMakeLists.txt +0 −6 Original line number Diff line number Diff line Loading @@ -147,11 +147,7 @@ set(DCA_LIBS cuda_utils ) set(SYSTEM_GPU_COUNT 0) if (DCA_HAVE_CUDA) EXECUTE_PROCESS(COMMAND bash -c "nvidia-smi -L | awk 'BEGIN { num_gpu=0;} /GPU/ { num_gpu++;} END { printf(\"%d\", num_gpu) }'" OUTPUT_VARIABLE SYSTEM_GPU_COUNT) list(APPEND DCA_LIBS blas_kernels dnfft_kernels Loading @@ -171,8 +167,6 @@ option(DCA_WITH_TESTS_EXTENSIVE "Build DCA++'s extensive tests." OFF) option(DCA_WITH_TESTS_PERFORMANCE "Build DCA++'s performance tests. (Only in Release mode.)" OFF) option(DCA_WITH_TESTS_STOCHASTIC "Build DCA++'s stochastic tests." OFF) set(DCA_TEST_GPU_COUNT "${SYSTEM_GPU_COUNT}" CACHE INTEGER "Number of GPUs available on one node for one test.") set(TEST_RUNNER "" CACHE STRING "Command for executing (MPI) programs.") set(MPIEXEC_NUMPROC_FLAG "-n" CACHE STRING "Flag used by TEST_RUNNER to specify the number of processes.") set(MPIEXEC_PREFLAGS "" CACHE STRING "Flags to pass to TEST_RUNNER directly before the executable to run.") Loading
build-aux/summit.cmake +2 −5 Original line number Diff line number Diff line Loading @@ -23,14 +23,11 @@ set(MPIEXEC_PREFLAGS "-a 1 -g 1 -c 5" CACHE STRING set(SMPIARGS_FLAG_NOMPI "--smpiargs=none" CACHE STRING "Spectrum MPI argument list flag for serial tests.") # Let's keep this option in case we need it again in the future. set(SMPIARGS_FLAG_MPI "" CACHE STRING "Spectrum MPI argument list flag for MPI tests.") # When we want to us a cuda visible devices restriction we need this flag set(SMPIARGS_FLAG_MPI_CVD "--smpiargs=-gpu" CACHE STRING "Spectrum MPI argument list for cuda-mpi tests") set(SMPIARGS_FLAG_MPI "--smpiargs=\"-gpu\"" CACHE STRING "Spectrum MPI argument list flag for MPI tests.") # Enable the GPU support. option(DCA_WITH_CUDA "Enable GPU support." ON) option(DCA_WITH_CUDA_AWARE_MPI "Enable CUDA aware MPI." ON) # Compile for Volta compute architecture. set(CUDA_GPU_ARCH "sm_70" CACHE STRING "Name of the *real* architecture to build for.") Loading
cmake/dca_cuda.cmake +9 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,10 @@ if (CUDA_FOUND) list(APPEND DCA_CUDA_LIBS ${CUDA_LIBRARIES} ${CUDA_cusparse_LIBRARY} ${CUDA_cublas_LIBRARY}) CUDA_INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS}) set(CUDA_SEPARABLE_COMPILATION ON) set(CVD_LAUNCHER "" CACHE INTERNAL "launch script for setting the Cuda visible devices.") # Use the following script for systems with multiple gpus visible from a rank. # set(CVD_LAUNCHER "test/cvd_launcher.sh" CACHE INTERNAL "") endif() # Find MAGMA. Loading Loading @@ -48,4 +52,9 @@ endif() if (CUDA_FOUND AND DCA_HAVE_MAGMA) set(DCA_HAVE_CUDA TRUE CACHE INTERNAL "") dca_add_haves_define(DCA_HAVE_CUDA) option(DCA_WITH_CUDA_AWARE_MPI "Enable CUDA aware MPI." OFF) if(DCA_WITH_CUDA_AWARE_MPI) dca_add_haves_define(DCA_HAVE_CUDA_AWARE_MPI) endif() endif()
cmake/dca_testing.cmake +5 −17 Original line number Diff line number Diff line Loading @@ -24,7 +24,7 @@ include(CMakeParseArguments) # MPI or CUDA may be given to indicate that the test requires these libraries. MPI_NUMPROC is the # number of MPI processes to use for a test with MPI, the default value is 1. function(dca_add_gtest name) set(options FAST EXTENSIVE STOCHASTIC PERFORMANCE GTEST_MAIN MPI CUDA CUDA_CVD) set(options FAST EXTENSIVE STOCHASTIC PERFORMANCE GTEST_MAIN MPI CUDA) set(oneValueArgs MPI_NUMPROC) set(multiValueArgs INCLUDE_DIRS SOURCES LIBS) cmake_parse_arguments(DCA_ADD_GTEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) Loading Loading @@ -82,14 +82,6 @@ function(dca_add_gtest name) return() endif() if (DCA_ADD_GTEST_CUDA_CVD AND NOT DCA_HAVE_CUDA ) return() endif() if (DCA_ADD_GTEST_CUDA_CVD AND (DCA_TEST_GPU_COUNT LESS 3) ) return() endif() add_executable(${name} ${name}.cpp ${DCA_ADD_GTEST_SOURCES}) # Create a macro with the project source dir. We use this as the root path for reading files in Loading @@ -104,7 +96,7 @@ function(dca_add_gtest name) target_link_libraries(${name} gtest ${DCA_ADD_GTEST_LIBS}) endif() if (DCA_ADD_GTEST_CUDA OR DCA_ADD_GTEST_CUDA_CVD) if (DCA_ADD_GTEST_CUDA) target_include_directories(${name} PRIVATE ${CUDA_TOOLKIT_INCLUDE}) target_link_libraries(${name} ${DCA_CUDA_LIBS}) target_compile_definitions(${name} PRIVATE DCA_HAVE_CUDA) Loading @@ -113,11 +105,6 @@ function(dca_add_gtest name) target_compile_definitions(${name} PRIVATE DCA_HAVE_MAGMA) endif() cuda_add_cublas_to_target(${name}) # a less hacky way to do this would be good but this is used to test # development only feature distributed G4 at the moment. if (DCA_ADD_GTEST_CUDA_CVD) set(CVD_LAUNCHER "${PROJECT_SOURCE_DIR}/test/cvdlauncher.sh") endif() endif() target_include_directories(${name} PRIVATE Loading @@ -131,13 +118,14 @@ function(dca_add_gtest name) add_test(NAME ${name} COMMAND ${TEST_RUNNER} ${MPIEXEC_NUMPROC_FLAG} ${DCA_ADD_GTEST_MPI_NUMPROC} ${MPIEXEC_PREFLAGS} ${SMPIARGS_FLAG_MPI_CVD} ${CVD_LAUNCHER} "$<TARGET_FILE:${name}>") ${MPIEXEC_PREFLAGS} ${CVD_LAUNCHER} "$<TARGET_FILE:${name}>") target_link_libraries(${name} ${MPI_C_LIBRARIES}) else() if (TEST_RUNNER) add_test(NAME ${name} COMMAND ${TEST_RUNNER} ${MPIEXEC_NUMPROC_FLAG} 1 ${MPIEXEC_PREFLAGS} ${SMPIARGS_FLAG_NOMPI} "$<TARGET_FILE:${name}>") ${MPIEXEC_PREFLAGS} ${SMPIARGS_FLAG_NOMPI} ${CVD_LAUNCHER} "$<TARGET_FILE:${name}>") else (TEST_RUNNER) add_test(NAME ${name} COMMAND "$<TARGET_FILE:${name}>") Loading
include/dca/function/function.hpp +110 −114 Original line number Diff line number Diff line Loading @@ -30,15 +30,10 @@ #include "dca/distribution/dist_types.hpp" #include "dca/function/scalar_cast.hpp" #include "dca/function/set_to_zero.hpp" #include "dca/util/ignore.hpp" #include "dca/util/pack_operations.hpp" #include "dca/util/integer_division.hpp" #include "dca/util/type_utils.hpp" #include "dca/parallel/util/get_workload.hpp" #ifdef DCA_HAVE_MPI #include "mpi.h" #endif namespace dca { namespace func { // dca::func:: Loading @@ -54,9 +49,11 @@ public: // Default constructor // Constructs the function with the name name. // Postcondition: All elements are set to zero. // Special case: when distributed_g4_enabled, G4 related variables only gets // allocation of 1/p of original G4 size, where p = #mpiranks function(const std::string& name = default_name_, const DistType dist = DistType::NONE); function(const std::string& name = default_name_); // Distributed function. Access with multi-index operator() is not safe. template <class Concurrency> function(const std::string& name, const Concurrency& concurrency); // Copy constructor // Constructs the function with the a copy of elements and name of other. Loading Loading @@ -97,8 +94,6 @@ public: // The other function is in a non-specified state. function<scalartype, domain>& operator=(function<scalartype, domain>&& other); ~function(); // Resets the function by resetting the domain object and reallocating the memory for the function // elements. // Postcondition: All elements are set to zero. Loading @@ -118,11 +113,12 @@ public: return Nb_sbdms; } std::size_t size() const { return nb_elements_; return fnc_values_.size(); } // TODO: remove as it breaks class' invariant. void resize(std::size_t nb_elements_new) { nb_elements_ = nb_elements_new; fnc_values_.resize(nb_elements_new); } // Returns the size of the leaf domain with the given index. // Does not return function values! Loading @@ -131,31 +127,31 @@ public: } // Begin and end methods for compatibility with range for loop. scalartype* begin() { return fnc_values; auto begin() { return fnc_values_.begin(); } scalartype* end() { return fnc_values + nb_elements_; auto end() { return fnc_values_.end(); } const scalartype* begin() const { return fnc_values; const auto begin() const { return fnc_values_.begin(); } const scalartype* end() const { return fnc_values + nb_elements_; const auto end() const { return fnc_values_.end(); } // Returns a pointer to the function's elements. scalartype* values() { return fnc_values; return fnc_values_.data(); } const scalartype* values() const { return fnc_values; return fnc_values_.data(); } scalartype* data() { return fnc_values; return fnc_values_.data(); } const scalartype* data() const { return fnc_values; return fnc_values_.data(); } // Loading Loading @@ -197,7 +193,7 @@ public: template <typename T> int subind_2_linind(const T ind) const { static_assert(std::is_integral<T>::value, "Index ind must be an integer."); assert(ind >= 0 && ind < nb_elements_); assert(ind >= 0 && ind < size()); return ind; } Loading @@ -211,24 +207,24 @@ public: template <typename T> scalartype& operator()(const T linind) { static_assert(std::is_integral<T>::value, "Index linind must be an integer."); assert(linind >= 0 && linind < nb_elements_); return fnc_values[linind]; assert(linind >= 0 && linind < size()); return fnc_values_[linind]; } template <typename T> const scalartype& operator()(const T linind) const { static_assert(std::is_integral<T>::value, "Index linind must be an integer."); assert(linind >= 0 && linind < nb_elements_); return fnc_values[linind]; assert(linind >= 0 && linind < size()); return fnc_values_[linind]; } template <typename... Ts> scalartype& operator()(const Ts... subindices) { // We need to cast all indices to the same type for dmn_variadic. return fnc_values[dmn(static_cast<int>(subindices)...)]; return fnc_values_[dmn(static_cast<int>(subindices)...)]; } template <typename... Ts> const scalartype& operator()(const Ts... subindices) const { return fnc_values[dmn(static_cast<int>(subindices)...)]; return fnc_values_[dmn(static_cast<int>(subindices)...)]; } void operator+=(const function<scalartype, domain>& other); Loading @@ -243,7 +239,7 @@ public: void operator/=(scalartype c); // Equal-comparison opertor // Returns true if the function's elements (fnc_values) are equal to other's elements, false // Returns true if the function's elements (fnc_values_) are equal to other's elements, false // otherwise. // TODO: Make the equal-comparison operator a non-member function. bool operator==(const function<scalartype, domain>& other) const; Loading Loading @@ -276,47 +272,58 @@ public: template <class concurrency_t> void unpack(const concurrency_t& concurrency, char* buffer, int buffer_size, int& position); // Gather a function that was initialized as distributed. // Precondition: concurrency must be the same object used during construction. template <class Concurrency> function gather(const Concurrency& concurrency) const; private: std::string name_; std::string function_type; domain dmn; // TODO: Remove domain object? std::size_t nb_elements_; // The subdomains (sbdmn) represent the leaf domains, not the branch domains. int Nb_sbdms; const std::vector<std::size_t>& size_sbdm; // TODO: Remove? const std::vector<std::size_t>& step_sbdm; // TODO: Remove? scalartype* fnc_values; std::vector<scalartype> fnc_values_; }; template <typename scalartype, class domain> const std::string function<scalartype, domain>::default_name_ = "no-name"; template <typename scalartype, class domain> function<scalartype, domain>::function(const std::string& name, DistType dist) function<scalartype, domain>::function(const std::string& name) : name_(name), function_type(__PRETTY_FUNCTION__), dmn(), nb_elements_(dmn.get_size()), Nb_sbdms(dmn.get_leaf_domain_sizes().size()), size_sbdm(dmn.get_leaf_domain_sizes()), step_sbdm(dmn.get_leaf_domain_steps()), fnc_values(nullptr) { dca::util::ignoreUnused(dist); #ifdef DCA_HAVE_MPI if (dist == DistType::MPI) { int my_rank, mpi_size; MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); nb_elements_ = dca::parallel::util::getWorkload(dmn.get_size(), mpi_size, my_rank); fnc_values_(dmn.get_size()) { for (int linind = 0; linind < size(); ++linind) setToZero(fnc_values_[linind]); } #endif // DCA_HAVE_MPI fnc_values = new scalartype[nb_elements_]; for (int linind = 0; linind < nb_elements_; ++linind) setToZero(fnc_values[linind]); template <typename scalartype, class domain> template <class Concurrency> function<scalartype, domain>::function(const std::string& name, const Concurrency& concurrency) : name_(name), function_type(__PRETTY_FUNCTION__), dmn(), Nb_sbdms(dmn.get_leaf_domain_sizes().size()), size_sbdm(dmn.get_leaf_domain_sizes()), step_sbdm(dmn.get_leaf_domain_steps()) { // TODO: multi-index access to partitioned function is not safe. const std::size_t mpi_size = concurrency.number_of_processors(); const std::size_t nb_elements = dca::util::ceilDiv(dmn.get_size(), mpi_size); fnc_values_.resize(nb_elements); for (int linind = 0; linind < nb_elements; ++linind) setToZero(fnc_values_[linind]); } template <typename scalartype, class domain> Loading @@ -324,17 +331,13 @@ function<scalartype, domain>::function(const function<scalartype, domain>& other : name_(other.name_), function_type(__PRETTY_FUNCTION__), dmn(), nb_elements_(dmn.get_size()), Nb_sbdms(dmn.get_leaf_domain_sizes().size()), size_sbdm(dmn.get_leaf_domain_sizes()), step_sbdm(dmn.get_leaf_domain_steps()), fnc_values(nullptr) { fnc_values_(other.fnc_values_) { if (dmn.get_size() != other.dmn.get_size()) // The other function has not been resetted after the domain was initialized. throw std::logic_error("Copy construction from a not yet resetted function."); fnc_values = new scalartype[nb_elements_]; std::copy_n(other.fnc_values, nb_elements_, fnc_values); } template <typename scalartype, class domain> Loading @@ -342,18 +345,13 @@ function<scalartype, domain>::function(function<scalartype, domain>&& other) : name_(std::move(other.name_)), function_type(__PRETTY_FUNCTION__), dmn(), nb_elements_(dmn.get_size()), Nb_sbdms(dmn.get_leaf_domain_sizes().size()), size_sbdm(dmn.get_leaf_domain_sizes()), step_sbdm(dmn.get_leaf_domain_steps()), fnc_values(nullptr) { fnc_values_(std::move(other.fnc_values_)) { if (dmn.get_size() != other.dmn.get_size()) // The other function has not been resetted after the domain was initialized. throw std::logic_error("Move construction from a not yet resetted function."); fnc_values = other.fnc_values; other.nb_elements_ = 0; other.fnc_values = nullptr; } template <typename scalartype, class domain> Loading @@ -370,7 +368,7 @@ function<scalartype, domain>& function<scalartype, domain>::operator=( throw std::logic_error("Copy assignment from a not yet resetted function."); } std::copy_n(other.values(), nb_elements_, fnc_values); fnc_values_ = other.fnc_values_; } return *this; Loading @@ -383,7 +381,7 @@ function<Scalar, domain>& function<Scalar, domain>::operator=(const function<Sca throw(std::logic_error("Function size does not match.")); } std::copy_n(other.values(), nb_elements_, fnc_values); fnc_values_ = other.fnc_values_; return *this; } Loading @@ -402,33 +400,21 @@ function<scalartype, domain>& function<scalartype, domain>::operator=( throw std::logic_error("Move assignment from a not yet resetted function."); } delete[] fnc_values; fnc_values = other.fnc_values; other.nb_elements_ = 0; other.fnc_values = nullptr; fnc_values_ = std::move(other.fnc_values_); } return *this; } template <typename scalartype, class domain> function<scalartype, domain>::~function() { delete[] fnc_values; } template <typename scalartype, class domain> void function<scalartype, domain>::reset() { dmn.reset(); nb_elements_ = dmn.get_size(); fnc_values_.resize(dmn.get_size()); Nb_sbdms = dmn.get_leaf_domain_sizes().size(); delete[] fnc_values; fnc_values = new scalartype[nb_elements_]; for (int linind = 0; linind < nb_elements_; ++linind) setToZero(fnc_values[linind]); for (int linind = 0; linind < size(); ++linind) setToZero(fnc_values_[linind]); } template <typename scalartype, class domain> Loading Loading @@ -480,8 +466,8 @@ scalartype& function<scalartype, domain>::operator()(const int* const subind) { int linind; subind_2_linind(subind, linind); assert(linind >= 0 && linind < nb_elements_); return fnc_values[linind]; assert(linind >= 0 && linind < size()); return fnc_values_[linind]; } template <typename scalartype, class domain> Loading @@ -489,64 +475,64 @@ const scalartype& function<scalartype, domain>::operator()(const int* const subi int linind; subind_2_linind(subind, linind); assert(linind >= 0 && linind < nb_elements_); return fnc_values[linind]; assert(linind >= 0 && linind < size()); return fnc_values_[linind]; } template <typename scalartype, class domain> void function<scalartype, domain>::operator+=(const function<scalartype, domain>& other) { for (int linind = 0; linind < nb_elements_; ++linind) fnc_values[linind] += other(linind); for (int linind = 0; linind < size(); ++linind) fnc_values_[linind] += other(linind); } template <typename scalartype, class domain> void function<scalartype, domain>::operator-=(const function<scalartype, domain>& other) { for (int linind = 0; linind < nb_elements_; ++linind) fnc_values[linind] -= other(linind); for (int linind = 0; linind < size(); ++linind) fnc_values_[linind] -= other(linind); } template <typename scalartype, class domain> void function<scalartype, domain>::operator*=(const function<scalartype, domain>& other) { for (int linind = 0; linind < nb_elements_; ++linind) fnc_values[linind] *= other(linind); for (int linind = 0; linind < size(); ++linind) fnc_values_[linind] *= other(linind); } template <typename scalartype, class domain> void function<scalartype, domain>::operator/=(const function<scalartype, domain>& other) { for (int linind = 0; linind < nb_elements_; ++linind) { for (int linind = 0; linind < size(); ++linind) { assert(std::abs(other(linind)) > 1.e-16); fnc_values[linind] /= other(linind); fnc_values_[linind] /= other(linind); } } template <typename scalartype, class domain> void function<scalartype, domain>::operator=(const scalartype c) { for (int linind = 0; linind < nb_elements_; linind++) fnc_values[linind] = c; for (int linind = 0; linind < size(); linind++) fnc_values_[linind] = c; } template <typename scalartype, class domain> void function<scalartype, domain>::operator+=(const scalartype c) { for (int linind = 0; linind < nb_elements_; linind++) fnc_values[linind] += c; for (int linind = 0; linind < size(); linind++) fnc_values_[linind] += c; } template <typename scalartype, class domain> void function<scalartype, domain>::operator-=(const scalartype c) { for (int linind = 0; linind < nb_elements_; linind++) fnc_values[linind] -= c; for (int linind = 0; linind < size(); linind++) fnc_values_[linind] -= c; } template <typename scalartype, class domain> void function<scalartype, domain>::operator*=(const scalartype c) { for (int linind = 0; linind < nb_elements_; linind++) fnc_values[linind] *= c; for (int linind = 0; linind < size(); linind++) fnc_values_[linind] *= c; } template <typename scalartype, class domain> void function<scalartype, domain>::operator/=(const scalartype c) { for (int linind = 0; linind < nb_elements_; linind++) fnc_values[linind] /= c; for (int linind = 0; linind < size(); linind++) fnc_values_[linind] /= c; } template <typename scalartype, class domain> Loading @@ -555,8 +541,8 @@ bool function<scalartype, domain>::operator==(const function<scalartype, domain> // One of the function has not been resetted after the domain was initialized. throw std::logic_error("Comparing functions of different sizes."); for (int i = 0; i < nb_elements_; ++i) if (other(i) != fnc_values[i]) for (int i = 0; i < size(); ++i) if (other(i) != fnc_values_[i]) return false; return true; Loading @@ -574,7 +560,8 @@ void function<scalartype, domain>::slice(const int sbdm_index, int* subind, subind_2_linind(subind, linind); for (int i = 0; i < size_sbdm[sbdm_index]; i++) fnc_vals[i] = ScalarCast<new_scalartype>::execute(fnc_values[linind + i * step_sbdm[sbdm_index]]); fnc_vals[i] = ScalarCast<new_scalartype>::execute(fnc_values_[linind + i * step_sbdm[sbdm_index]]); } template <typename scalartype, class domain> Loading Loading @@ -602,12 +589,12 @@ void function<scalartype, domain>::slice(const int sbdm_index_1, const int sbdm_ for (int j = 0; j < size_sbdm_2; j++) { fnc_ptr_left = &fnc_vals[0 + j * size_sbdm_1]; fnc_ptr_right = &fnc_values[linind + j * step_sbdm_2]; fnc_ptr_right = &fnc_values_[linind + j * step_sbdm_2]; for (int i = 0; i < size_sbdm_1; i++) fnc_ptr_left[i] = fnc_ptr_right[i * step_sbdm_1]; // fnc_vals[i+j*size_sbdm[sbdm_index_1]] = fnc_values[linind + i*step_sbdm[sbdm_index_1] + // j*step_sbdm[sbdm_index_2]]; // fnc_vals[i+j*size_sbdm[sbdm_index_1]] = fnc_values_[linind + i*step_sbdm[sbdm_index_1] // + j*step_sbdm[sbdm_index_2]]; } } Loading @@ -623,7 +610,7 @@ void function<scalartype, domain>::distribute(const int sbdm_index, int* subind, subind_2_linind(subind, linind); for (int i = 0; i < size_sbdm[sbdm_index]; i++) fnc_values[linind + i * step_sbdm[sbdm_index]] = ScalarCast<scalartype>::execute(fnc_vals[i]); fnc_values_[linind + i * step_sbdm[sbdm_index]] = ScalarCast<scalartype>::execute(fnc_vals[i]); } template <typename scalartype, class domain> Loading @@ -642,7 +629,7 @@ void function<scalartype, domain>::distribute(const int sbdm_index_1, const int for (int i = 0; i < size_sbdm[sbdm_index_1]; i++) for (int j = 0; j < size_sbdm[sbdm_index_2]; j++) fnc_values[linind + i * step_sbdm[sbdm_index_1] + j * step_sbdm[sbdm_index_2]] = fnc_values_[linind + i * step_sbdm[sbdm_index_1] + j * step_sbdm[sbdm_index_2]] = fnc_vals[i + j * size_sbdm[sbdm_index_1]]; } Loading @@ -661,8 +648,8 @@ void function<scalartype, domain>::print_fingerprint(std::ostream& stream) const stream << " " << size_sbdm[i]; stream << "\n"; stream << "# elements: " << nb_elements_ << "\n"; stream << "memory: " << nb_elements_ * sizeof(scalartype) / (1024. * 1024.) << " MiB\n"; stream << "# elements: " << size() << "\n"; stream << "memory: " << size() * sizeof(scalartype) / (1024. * 1024.) << " MiB\n"; stream << "****************************************\n" << std::endl; } Loading @@ -673,11 +660,11 @@ void function<scalartype, domain>::print_elements(std::ostream& stream) const { stream << "****************************************\n"; std::vector<int> subind(Nb_sbdms); for (int lindex = 0; lindex < nb_elements_; ++lindex) { for (int lindex = 0; lindex < size(); ++lindex) { linind_2_subind(lindex, subind); for (int index : subind) stream << index << "\t"; stream << " \t" << fnc_values[lindex] << "\n"; stream << " \t" << fnc_values_[lindex] << "\n"; } stream << "****************************************\n" << std::endl; Loading Loading @@ -705,6 +692,15 @@ void function<scalartype, domain>::unpack(const concurrency_t& concurrency, char concurrency.unpack(buffer, buffer_size, position, *this); } template <typename scalartype, class domain> template <class Concurrency> function<scalartype, domain> function<scalartype, domain>::gather(const Concurrency& concurrency) const { function result(name_); concurrency.gather(*this, result, concurrency); return result; } } // namespace func } // namespace dca Loading