Unverified Commit 8c956625 authored by Peter Doak's avatar Peter Doak Committed by GitHub
Browse files

Merge pull request #213 from gbalduzz/optional_cuda_aware_mpi

Cuda aware MPI is optional.
parents 329e2f2e 23e03873
Loading
Loading
Loading
Loading
+0 −6
Original line number Diff line number Diff line
@@ -147,11 +147,7 @@ set(DCA_LIBS
  cuda_utils
)

set(SYSTEM_GPU_COUNT 0)

if (DCA_HAVE_CUDA)
  EXECUTE_PROCESS(COMMAND bash -c "nvidia-smi -L | awk 'BEGIN { num_gpu=0;} /GPU/ { num_gpu++;} END { printf(\"%d\", num_gpu) }'"
                  OUTPUT_VARIABLE SYSTEM_GPU_COUNT)
  list(APPEND DCA_LIBS
    blas_kernels
    dnfft_kernels
@@ -171,8 +167,6 @@ option(DCA_WITH_TESTS_EXTENSIVE "Build DCA++'s extensive tests." OFF)
option(DCA_WITH_TESTS_PERFORMANCE "Build DCA++'s performance tests. (Only in Release mode.)" OFF)
option(DCA_WITH_TESTS_STOCHASTIC  "Build DCA++'s stochastic tests." OFF)

set(DCA_TEST_GPU_COUNT "${SYSTEM_GPU_COUNT}" CACHE INTEGER "Number of GPUs available on one node for one test.")

set(TEST_RUNNER "" CACHE STRING "Command for executing (MPI) programs.")
set(MPIEXEC_NUMPROC_FLAG "-n" CACHE STRING "Flag used by TEST_RUNNER to specify the number of processes.")
set(MPIEXEC_PREFLAGS "" CACHE STRING "Flags to pass to TEST_RUNNER directly before the executable to run.")
+2 −5
Original line number Diff line number Diff line
@@ -23,14 +23,11 @@ set(MPIEXEC_PREFLAGS "-a 1 -g 1 -c 5" CACHE STRING
set(SMPIARGS_FLAG_NOMPI "--smpiargs=none" CACHE STRING
  "Spectrum MPI argument list flag for serial tests.")
# Let's keep this option in case we need it again in the future.
set(SMPIARGS_FLAG_MPI "" CACHE STRING "Spectrum MPI argument list flag for MPI tests.")

# When we want to us a cuda visible devices restriction we need this flag
set(SMPIARGS_FLAG_MPI_CVD "--smpiargs=-gpu" CACHE STRING 
  "Spectrum MPI argument list for cuda-mpi tests")
set(SMPIARGS_FLAG_MPI "--smpiargs=\"-gpu\"" CACHE STRING "Spectrum MPI argument list flag for MPI tests.")

# Enable the GPU support.
option(DCA_WITH_CUDA "Enable GPU support." ON)
option(DCA_WITH_CUDA_AWARE_MPI "Enable CUDA aware MPI." ON)

# Compile for Volta compute architecture.
set(CUDA_GPU_ARCH "sm_70" CACHE STRING "Name of the *real* architecture to build for.")
+9 −0
Original line number Diff line number Diff line
@@ -20,6 +20,10 @@ if (CUDA_FOUND)
  list(APPEND DCA_CUDA_LIBS ${CUDA_LIBRARIES} ${CUDA_cusparse_LIBRARY} ${CUDA_cublas_LIBRARY})
  CUDA_INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
  set(CUDA_SEPARABLE_COMPILATION ON)

  set(CVD_LAUNCHER "" CACHE INTERNAL "launch script for setting the Cuda visible devices.")
  # Use the following script for systems with multiple gpus visible from a rank.
  # set(CVD_LAUNCHER "test/cvd_launcher.sh" CACHE INTERNAL "")
endif()

# Find MAGMA.
@@ -48,4 +52,9 @@ endif()
if (CUDA_FOUND AND DCA_HAVE_MAGMA)
  set(DCA_HAVE_CUDA TRUE CACHE INTERNAL "")
  dca_add_haves_define(DCA_HAVE_CUDA)

  option(DCA_WITH_CUDA_AWARE_MPI "Enable CUDA aware MPI." OFF)
  if(DCA_WITH_CUDA_AWARE_MPI)
    dca_add_haves_define(DCA_HAVE_CUDA_AWARE_MPI)
  endif()
endif()
+5 −17
Original line number Diff line number Diff line
@@ -24,7 +24,7 @@ include(CMakeParseArguments)
# MPI or CUDA may be given to indicate that the test requires these libraries. MPI_NUMPROC is the
# number of MPI processes to use for a test with MPI, the default value is 1.
function(dca_add_gtest name)
  set(options FAST EXTENSIVE STOCHASTIC PERFORMANCE GTEST_MAIN MPI CUDA CUDA_CVD)
  set(options FAST EXTENSIVE STOCHASTIC PERFORMANCE GTEST_MAIN MPI CUDA)
  set(oneValueArgs MPI_NUMPROC)
  set(multiValueArgs INCLUDE_DIRS SOURCES LIBS)
  cmake_parse_arguments(DCA_ADD_GTEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
@@ -82,14 +82,6 @@ function(dca_add_gtest name)
    return()
  endif()

  if (DCA_ADD_GTEST_CUDA_CVD AND NOT DCA_HAVE_CUDA )
    return()
  endif()

  if (DCA_ADD_GTEST_CUDA_CVD AND (DCA_TEST_GPU_COUNT LESS 3) )
    return()
  endif()

  add_executable(${name} ${name}.cpp ${DCA_ADD_GTEST_SOURCES})

  # Create a macro with the project source dir. We use this as the root path for reading files in
@@ -104,7 +96,7 @@ function(dca_add_gtest name)
    target_link_libraries(${name} gtest ${DCA_ADD_GTEST_LIBS})
  endif()

  if (DCA_ADD_GTEST_CUDA OR DCA_ADD_GTEST_CUDA_CVD)
  if (DCA_ADD_GTEST_CUDA)
    target_include_directories(${name} PRIVATE ${CUDA_TOOLKIT_INCLUDE})
    target_link_libraries(${name} ${DCA_CUDA_LIBS})
    target_compile_definitions(${name} PRIVATE DCA_HAVE_CUDA)
@@ -113,11 +105,6 @@ function(dca_add_gtest name)
      target_compile_definitions(${name} PRIVATE DCA_HAVE_MAGMA)
    endif()
    cuda_add_cublas_to_target(${name})
    # a less hacky way to do this would be good but this is used to test
    # development only feature distributed G4 at the moment.
    if (DCA_ADD_GTEST_CUDA_CVD)
      set(CVD_LAUNCHER "${PROJECT_SOURCE_DIR}/test/cvdlauncher.sh")
    endif()
  endif()

  target_include_directories(${name} PRIVATE
@@ -131,13 +118,14 @@ function(dca_add_gtest name)

    add_test(NAME ${name}
             COMMAND ${TEST_RUNNER} ${MPIEXEC_NUMPROC_FLAG} ${DCA_ADD_GTEST_MPI_NUMPROC}
                     ${MPIEXEC_PREFLAGS} ${SMPIARGS_FLAG_MPI_CVD} ${CVD_LAUNCHER} "$<TARGET_FILE:${name}>")
                     ${MPIEXEC_PREFLAGS}  ${CVD_LAUNCHER} "$<TARGET_FILE:${name}>")
                 target_link_libraries(${name} ${MPI_C_LIBRARIES})
  else()
    if (TEST_RUNNER)
      add_test(NAME ${name}
               COMMAND ${TEST_RUNNER} ${MPIEXEC_NUMPROC_FLAG} 1
	               ${MPIEXEC_PREFLAGS} ${SMPIARGS_FLAG_NOMPI} "$<TARGET_FILE:${name}>")
	               ${MPIEXEC_PREFLAGS} ${SMPIARGS_FLAG_NOMPI}
                   ${CVD_LAUNCHER} "$<TARGET_FILE:${name}>")
    else (TEST_RUNNER)
      add_test(NAME ${name}
               COMMAND "$<TARGET_FILE:${name}>")
+110 −114
Original line number Diff line number Diff line
@@ -30,15 +30,10 @@
#include "dca/distribution/dist_types.hpp"
#include "dca/function/scalar_cast.hpp"
#include "dca/function/set_to_zero.hpp"
#include "dca/util/ignore.hpp"
#include "dca/util/pack_operations.hpp"
#include "dca/util/integer_division.hpp"
#include "dca/util/type_utils.hpp"

#include "dca/parallel/util/get_workload.hpp"
#ifdef DCA_HAVE_MPI
#include "mpi.h"
#endif

namespace dca {
namespace func {
// dca::func::
@@ -54,9 +49,11 @@ public:
  // Default constructor
  // Constructs the function with the name name.
  // Postcondition: All elements are set to zero.
  // Special case: when distributed_g4_enabled, G4 related variables only gets
  // allocation of 1/p of original G4 size, where p = #mpiranks
  function(const std::string& name = default_name_, const DistType dist = DistType::NONE);
  function(const std::string& name = default_name_);

  // Distributed function. Access with multi-index operator() is not safe.
  template <class Concurrency>
  function(const std::string& name, const Concurrency& concurrency);

  // Copy constructor
  // Constructs the function with the a copy of elements and name of other.
@@ -97,8 +94,6 @@ public:
  //                 The other function is in a non-specified state.
  function<scalartype, domain>& operator=(function<scalartype, domain>&& other);

  ~function();

  // Resets the function by resetting the domain object and reallocating the memory for the function
  // elements.
  // Postcondition: All elements are set to zero.
@@ -118,11 +113,12 @@ public:
    return Nb_sbdms;
  }
  std::size_t size() const {
    return nb_elements_;
    return fnc_values_.size();
  }

  // TODO: remove as it breaks class' invariant.
  void resize(std::size_t nb_elements_new) {
    nb_elements_ = nb_elements_new;
    fnc_values_.resize(nb_elements_new);
  }
  // Returns the size of the leaf domain with the given index.
  // Does not return function values!
@@ -131,31 +127,31 @@ public:
  }

  // Begin and end methods for compatibility with range for loop.
  scalartype* begin() {
    return fnc_values;
  auto begin() {
    return fnc_values_.begin();
  }
  scalartype* end() {
    return fnc_values + nb_elements_;
  auto end() {
    return fnc_values_.end();
  }
  const scalartype* begin() const {
    return fnc_values;
  const auto begin() const {
    return fnc_values_.begin();
  }
  const scalartype* end() const {
    return fnc_values + nb_elements_;
  const auto end() const {
    return fnc_values_.end();
  }

  // Returns a pointer to the function's elements.
  scalartype* values() {
    return fnc_values;
    return fnc_values_.data();
  }
  const scalartype* values() const {
    return fnc_values;
    return fnc_values_.data();
  }
  scalartype* data() {
    return fnc_values;
    return fnc_values_.data();
  }
  const scalartype* data() const {
    return fnc_values;
    return fnc_values_.data();
  }

  //
@@ -197,7 +193,7 @@ public:
  template <typename T>
  int subind_2_linind(const T ind) const {
    static_assert(std::is_integral<T>::value, "Index ind must be an integer.");
    assert(ind >= 0 && ind < nb_elements_);
    assert(ind >= 0 && ind < size());
    return ind;
  }

@@ -211,24 +207,24 @@ public:
  template <typename T>
  scalartype& operator()(const T linind) {
    static_assert(std::is_integral<T>::value, "Index linind must be an integer.");
    assert(linind >= 0 && linind < nb_elements_);
    return fnc_values[linind];
    assert(linind >= 0 && linind < size());
    return fnc_values_[linind];
  }
  template <typename T>
  const scalartype& operator()(const T linind) const {
    static_assert(std::is_integral<T>::value, "Index linind must be an integer.");
    assert(linind >= 0 && linind < nb_elements_);
    return fnc_values[linind];
    assert(linind >= 0 && linind < size());
    return fnc_values_[linind];
  }

  template <typename... Ts>
  scalartype& operator()(const Ts... subindices) {
    // We need to cast all indices to the same type for dmn_variadic.
    return fnc_values[dmn(static_cast<int>(subindices)...)];
    return fnc_values_[dmn(static_cast<int>(subindices)...)];
  }
  template <typename... Ts>
  const scalartype& operator()(const Ts... subindices) const {
    return fnc_values[dmn(static_cast<int>(subindices)...)];
    return fnc_values_[dmn(static_cast<int>(subindices)...)];
  }

  void operator+=(const function<scalartype, domain>& other);
@@ -243,7 +239,7 @@ public:
  void operator/=(scalartype c);

  // Equal-comparison opertor
  // Returns true if the function's elements (fnc_values) are equal to other's elements, false
  // Returns true if the function's elements (fnc_values_) are equal to other's elements, false
  // otherwise.
  // TODO: Make the equal-comparison operator a non-member function.
  bool operator==(const function<scalartype, domain>& other) const;
@@ -276,47 +272,58 @@ public:
  template <class concurrency_t>
  void unpack(const concurrency_t& concurrency, char* buffer, int buffer_size, int& position);

  // Gather a function that was initialized as distributed.
  // Precondition: concurrency must be the same object used during construction.
  template <class Concurrency>
  function gather(const Concurrency& concurrency) const;

private:
  std::string name_;
  std::string function_type;

  domain dmn;  // TODO: Remove domain object?

  std::size_t nb_elements_;

  // The subdomains (sbdmn) represent the leaf domains, not the branch domains.
  int Nb_sbdms;
  const std::vector<std::size_t>& size_sbdm;  // TODO: Remove?
  const std::vector<std::size_t>& step_sbdm;  // TODO: Remove?

  scalartype* fnc_values;
  std::vector<scalartype> fnc_values_;
};

template <typename scalartype, class domain>
const std::string function<scalartype, domain>::default_name_ = "no-name";

template <typename scalartype, class domain>
function<scalartype, domain>::function(const std::string& name, DistType dist)
function<scalartype, domain>::function(const std::string& name)
    : name_(name),
      function_type(__PRETTY_FUNCTION__),
      dmn(),
      nb_elements_(dmn.get_size()),
      Nb_sbdms(dmn.get_leaf_domain_sizes().size()),
      size_sbdm(dmn.get_leaf_domain_sizes()),
      step_sbdm(dmn.get_leaf_domain_steps()),
      fnc_values(nullptr) {
  dca::util::ignoreUnused(dist);
#ifdef DCA_HAVE_MPI
  if (dist == DistType::MPI) {
    int my_rank, mpi_size;
    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    nb_elements_ = dca::parallel::util::getWorkload(dmn.get_size(), mpi_size, my_rank);
      fnc_values_(dmn.get_size()) {
  for (int linind = 0; linind < size(); ++linind)
    setToZero(fnc_values_[linind]);
}
#endif  // DCA_HAVE_MPI
  fnc_values = new scalartype[nb_elements_];
  for (int linind = 0; linind < nb_elements_; ++linind)
    setToZero(fnc_values[linind]);

template <typename scalartype, class domain>
template <class Concurrency>
function<scalartype, domain>::function(const std::string& name, const Concurrency& concurrency)
    : name_(name),
      function_type(__PRETTY_FUNCTION__),
      dmn(),
      Nb_sbdms(dmn.get_leaf_domain_sizes().size()),
      size_sbdm(dmn.get_leaf_domain_sizes()),
      step_sbdm(dmn.get_leaf_domain_steps()) {
  // TODO: multi-index access to partitioned function is not safe.
  const std::size_t mpi_size = concurrency.number_of_processors();

  const std::size_t nb_elements = dca::util::ceilDiv(dmn.get_size(), mpi_size);
  fnc_values_.resize(nb_elements);

  for (int linind = 0; linind < nb_elements; ++linind)
    setToZero(fnc_values_[linind]);
}

template <typename scalartype, class domain>
@@ -324,17 +331,13 @@ function<scalartype, domain>::function(const function<scalartype, domain>& other
    : name_(other.name_),
      function_type(__PRETTY_FUNCTION__),
      dmn(),
      nb_elements_(dmn.get_size()),
      Nb_sbdms(dmn.get_leaf_domain_sizes().size()),
      size_sbdm(dmn.get_leaf_domain_sizes()),
      step_sbdm(dmn.get_leaf_domain_steps()),
      fnc_values(nullptr) {
      fnc_values_(other.fnc_values_) {
  if (dmn.get_size() != other.dmn.get_size())
    // The other function has not been resetted after the domain was initialized.
    throw std::logic_error("Copy construction from a not yet resetted function.");

  fnc_values = new scalartype[nb_elements_];
  std::copy_n(other.fnc_values, nb_elements_, fnc_values);
}

template <typename scalartype, class domain>
@@ -342,18 +345,13 @@ function<scalartype, domain>::function(function<scalartype, domain>&& other)
    : name_(std::move(other.name_)),
      function_type(__PRETTY_FUNCTION__),
      dmn(),
      nb_elements_(dmn.get_size()),
      Nb_sbdms(dmn.get_leaf_domain_sizes().size()),
      size_sbdm(dmn.get_leaf_domain_sizes()),
      step_sbdm(dmn.get_leaf_domain_steps()),
      fnc_values(nullptr) {
      fnc_values_(std::move(other.fnc_values_)) {
  if (dmn.get_size() != other.dmn.get_size())
    // The other function has not been resetted after the domain was initialized.
    throw std::logic_error("Move construction from a not yet resetted function.");

  fnc_values = other.fnc_values;
  other.nb_elements_ = 0;
  other.fnc_values = nullptr;
}

template <typename scalartype, class domain>
@@ -370,7 +368,7 @@ function<scalartype, domain>& function<scalartype, domain>::operator=(
        throw std::logic_error("Copy assignment from a not yet resetted function.");
    }

    std::copy_n(other.values(), nb_elements_, fnc_values);
    fnc_values_ = other.fnc_values_;
  }

  return *this;
@@ -383,7 +381,7 @@ function<Scalar, domain>& function<Scalar, domain>::operator=(const function<Sca
    throw(std::logic_error("Function size does not match."));
  }

  std::copy_n(other.values(), nb_elements_, fnc_values);
  fnc_values_ = other.fnc_values_;

  return *this;
}
@@ -402,33 +400,21 @@ function<scalartype, domain>& function<scalartype, domain>::operator=(
        throw std::logic_error("Move assignment from a not yet resetted function.");
    }

    delete[] fnc_values;
    fnc_values = other.fnc_values;

    other.nb_elements_ = 0;
    other.fnc_values = nullptr;
    fnc_values_ = std::move(other.fnc_values_);
  }

  return *this;
}

template <typename scalartype, class domain>
function<scalartype, domain>::~function() {
  delete[] fnc_values;
}

template <typename scalartype, class domain>
void function<scalartype, domain>::reset() {
  dmn.reset();

  nb_elements_ = dmn.get_size();
  fnc_values_.resize(dmn.get_size());
  Nb_sbdms = dmn.get_leaf_domain_sizes().size();

  delete[] fnc_values;
  fnc_values = new scalartype[nb_elements_];

  for (int linind = 0; linind < nb_elements_; ++linind)
    setToZero(fnc_values[linind]);
  for (int linind = 0; linind < size(); ++linind)
    setToZero(fnc_values_[linind]);
}

template <typename scalartype, class domain>
@@ -480,8 +466,8 @@ scalartype& function<scalartype, domain>::operator()(const int* const subind) {
  int linind;
  subind_2_linind(subind, linind);

  assert(linind >= 0 && linind < nb_elements_);
  return fnc_values[linind];
  assert(linind >= 0 && linind < size());
  return fnc_values_[linind];
}

template <typename scalartype, class domain>
@@ -489,64 +475,64 @@ const scalartype& function<scalartype, domain>::operator()(const int* const subi
  int linind;
  subind_2_linind(subind, linind);

  assert(linind >= 0 && linind < nb_elements_);
  return fnc_values[linind];
  assert(linind >= 0 && linind < size());
  return fnc_values_[linind];
}

template <typename scalartype, class domain>
void function<scalartype, domain>::operator+=(const function<scalartype, domain>& other) {
  for (int linind = 0; linind < nb_elements_; ++linind)
    fnc_values[linind] += other(linind);
  for (int linind = 0; linind < size(); ++linind)
    fnc_values_[linind] += other(linind);
}

template <typename scalartype, class domain>
void function<scalartype, domain>::operator-=(const function<scalartype, domain>& other) {
  for (int linind = 0; linind < nb_elements_; ++linind)
    fnc_values[linind] -= other(linind);
  for (int linind = 0; linind < size(); ++linind)
    fnc_values_[linind] -= other(linind);
}

template <typename scalartype, class domain>
void function<scalartype, domain>::operator*=(const function<scalartype, domain>& other) {
  for (int linind = 0; linind < nb_elements_; ++linind)
    fnc_values[linind] *= other(linind);
  for (int linind = 0; linind < size(); ++linind)
    fnc_values_[linind] *= other(linind);
}

template <typename scalartype, class domain>
void function<scalartype, domain>::operator/=(const function<scalartype, domain>& other) {
  for (int linind = 0; linind < nb_elements_; ++linind) {
  for (int linind = 0; linind < size(); ++linind) {
    assert(std::abs(other(linind)) > 1.e-16);
    fnc_values[linind] /= other(linind);
    fnc_values_[linind] /= other(linind);
  }
}

template <typename scalartype, class domain>
void function<scalartype, domain>::operator=(const scalartype c) {
  for (int linind = 0; linind < nb_elements_; linind++)
    fnc_values[linind] = c;
  for (int linind = 0; linind < size(); linind++)
    fnc_values_[linind] = c;
}

template <typename scalartype, class domain>
void function<scalartype, domain>::operator+=(const scalartype c) {
  for (int linind = 0; linind < nb_elements_; linind++)
    fnc_values[linind] += c;
  for (int linind = 0; linind < size(); linind++)
    fnc_values_[linind] += c;
}

template <typename scalartype, class domain>
void function<scalartype, domain>::operator-=(const scalartype c) {
  for (int linind = 0; linind < nb_elements_; linind++)
    fnc_values[linind] -= c;
  for (int linind = 0; linind < size(); linind++)
    fnc_values_[linind] -= c;
}

template <typename scalartype, class domain>
void function<scalartype, domain>::operator*=(const scalartype c) {
  for (int linind = 0; linind < nb_elements_; linind++)
    fnc_values[linind] *= c;
  for (int linind = 0; linind < size(); linind++)
    fnc_values_[linind] *= c;
}

template <typename scalartype, class domain>
void function<scalartype, domain>::operator/=(const scalartype c) {
  for (int linind = 0; linind < nb_elements_; linind++)
    fnc_values[linind] /= c;
  for (int linind = 0; linind < size(); linind++)
    fnc_values_[linind] /= c;
}

template <typename scalartype, class domain>
@@ -555,8 +541,8 @@ bool function<scalartype, domain>::operator==(const function<scalartype, domain>
    // One of the function has not been resetted after the domain was initialized.
    throw std::logic_error("Comparing functions of different sizes.");

  for (int i = 0; i < nb_elements_; ++i)
    if (other(i) != fnc_values[i])
  for (int i = 0; i < size(); ++i)
    if (other(i) != fnc_values_[i])
      return false;

  return true;
@@ -574,7 +560,8 @@ void function<scalartype, domain>::slice(const int sbdm_index, int* subind,
  subind_2_linind(subind, linind);

  for (int i = 0; i < size_sbdm[sbdm_index]; i++)
    fnc_vals[i] = ScalarCast<new_scalartype>::execute(fnc_values[linind + i * step_sbdm[sbdm_index]]);
    fnc_vals[i] =
        ScalarCast<new_scalartype>::execute(fnc_values_[linind + i * step_sbdm[sbdm_index]]);
}

template <typename scalartype, class domain>
@@ -602,12 +589,12 @@ void function<scalartype, domain>::slice(const int sbdm_index_1, const int sbdm_

  for (int j = 0; j < size_sbdm_2; j++) {
    fnc_ptr_left = &fnc_vals[0 + j * size_sbdm_1];
    fnc_ptr_right = &fnc_values[linind + j * step_sbdm_2];
    fnc_ptr_right = &fnc_values_[linind + j * step_sbdm_2];

    for (int i = 0; i < size_sbdm_1; i++)
      fnc_ptr_left[i] = fnc_ptr_right[i * step_sbdm_1];
    //       fnc_vals[i+j*size_sbdm[sbdm_index_1]] = fnc_values[linind + i*step_sbdm[sbdm_index_1] +
    //       j*step_sbdm[sbdm_index_2]];
    //       fnc_vals[i+j*size_sbdm[sbdm_index_1]] = fnc_values_[linind + i*step_sbdm[sbdm_index_1]
    //       + j*step_sbdm[sbdm_index_2]];
  }
}

@@ -623,7 +610,7 @@ void function<scalartype, domain>::distribute(const int sbdm_index, int* subind,
  subind_2_linind(subind, linind);

  for (int i = 0; i < size_sbdm[sbdm_index]; i++)
    fnc_values[linind + i * step_sbdm[sbdm_index]] = ScalarCast<scalartype>::execute(fnc_vals[i]);
    fnc_values_[linind + i * step_sbdm[sbdm_index]] = ScalarCast<scalartype>::execute(fnc_vals[i]);
}

template <typename scalartype, class domain>
@@ -642,7 +629,7 @@ void function<scalartype, domain>::distribute(const int sbdm_index_1, const int

  for (int i = 0; i < size_sbdm[sbdm_index_1]; i++)
    for (int j = 0; j < size_sbdm[sbdm_index_2]; j++)
      fnc_values[linind + i * step_sbdm[sbdm_index_1] + j * step_sbdm[sbdm_index_2]] =
      fnc_values_[linind + i * step_sbdm[sbdm_index_1] + j * step_sbdm[sbdm_index_2]] =
          fnc_vals[i + j * size_sbdm[sbdm_index_1]];
}

@@ -661,8 +648,8 @@ void function<scalartype, domain>::print_fingerprint(std::ostream& stream) const
    stream << "  " << size_sbdm[i];
  stream << "\n";

  stream << "# elements: " << nb_elements_ << "\n";
  stream << "memory: " << nb_elements_ * sizeof(scalartype) / (1024. * 1024.) << " MiB\n";
  stream << "# elements: " << size() << "\n";
  stream << "memory: " << size() * sizeof(scalartype) / (1024. * 1024.) << " MiB\n";
  stream << "****************************************\n" << std::endl;
}

@@ -673,11 +660,11 @@ void function<scalartype, domain>::print_elements(std::ostream& stream) const {
  stream << "****************************************\n";

  std::vector<int> subind(Nb_sbdms);
  for (int lindex = 0; lindex < nb_elements_; ++lindex) {
  for (int lindex = 0; lindex < size(); ++lindex) {
    linind_2_subind(lindex, subind);
    for (int index : subind)
      stream << index << "\t";
    stream << " \t" << fnc_values[lindex] << "\n";
    stream << " \t" << fnc_values_[lindex] << "\n";
  }

  stream << "****************************************\n" << std::endl;
@@ -705,6 +692,15 @@ void function<scalartype, domain>::unpack(const concurrency_t& concurrency, char
  concurrency.unpack(buffer, buffer_size, position, *this);
}

template <typename scalartype, class domain>
template <class Concurrency>
function<scalartype, domain> function<scalartype, domain>::gather(const Concurrency& concurrency) const {
  function result(name_);

  concurrency.gather(*this, result, concurrency);
  return result;
}

}  // namespace func
}  // namespace dca

Loading