Unverified Commit 189d0d8d authored by Peter Doak's avatar Peter Doak Committed by GitHub
Browse files

Merge pull request #278 from PDoakORNL/function_updates

Function updates, tested on summit
parents 221688ea 343fe75b
Loading
Loading
Loading
Loading
+69 −0
Original line number Diff line number Diff line
# Check compiler version
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
  message(FATAL_ERROR "Requires gcc 9.0 or higher ")
endif()

# Enable OpenMP

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops")

set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fno-omit-frame-pointer")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-omit-frame-pointer")

# Suppress compile warnings
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-deprecated")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated")

# treat VLA as error
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wvla")

# set compiler warnings
set(CMAKE_CXX_FLAGS
    "${CMAKE_CXX_FLAGS} -Wcomment -Wmisleading-indentation -Wmaybe-uninitialized -Wuninitialized -Wreorder -Wno-unknown-pragmas -Wno-sign-compare"
)

if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 9.2)
  string(APPEND CMAKE_CXX_FLAGS " -Wsuggest-override")
endif()

# Set extra optimization specific flags
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")

if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
  # the case for x86_64
  # check if the user has already specified -march=XXXX option for cross-compiling.
  if(CMAKE_CXX_FLAGS MATCHES "-march=" OR CMAKE_C_FLAGS MATCHES "-march=")
    # make sure that the user specifies -march= for both CMAKE_CXX_FLAGS and CMAKE_C_FLAGS.
    if(CMAKE_CXX_FLAGS MATCHES "-march=" AND CMAKE_C_FLAGS MATCHES "-march=")

    else() #(CMAKE_CXX_FLAGS MATCHES "-march=" AND CMAKE_C_FLAGS MATCHES "-march=")
      message(
        FATAL_ERROR
          "if -march=ARCH is specified by the user, it should be added in both CMAKE_CXX_FLAGS and CMAKE_C_FLAGS!")
    endif() #(CMAKE_CXX_FLAGS MATCHES "-march=" AND CMAKE_C_FLAGS MATCHES "-march=")
  else() #(CMAKE_CXX_FLAGS MATCHES "-march=" OR CMAKE_C_FLAGS MATCHES "-march=")
    # use -march=native
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
  endif() #(CMAKE_CXX_FLAGS MATCHES "-march=" OR CMAKE_C_FLAGS MATCHES "-march=")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
  # the case for PowerPC and ARM
  # check if the user has already specified -mcpu=XXXX option for cross-compiling.
  if(CMAKE_CXX_FLAGS MATCHES "-mcpu=" OR CMAKE_C_FLAGS MATCHES "-mcpu=")
    # make sure that the user specifies -mcpu= for both CMAKE_CXX_FLAGS and CMAKE_C_FLAGS.
    if(CMAKE_CXX_FLAGS MATCHES "-mcpu=" AND CMAKE_C_FLAGS MATCHES "-mcpu=")

    else() #(CMAKE_CXX_FLAGS MATCHES "-mcpu=" AND CMAKE_C_FLAGS MATCHES "-mcpu=")
      message(
        FATAL_ERROR
          "if -mcpu=ARCH is specified by the user, it should be added in both CMAKE_CXX_FLAGS and CMAKE_C_FLAGS!")
    endif() #(CMAKE_CXX_FLAGS MATCHES "-mcpu=" AND CMAKE_C_FLAGS MATCHES "-mcpu=")
  else() #(CMAKE_CXX_FLAGS MATCHES "-mcpu=" OR CMAKE_C_FLAGS MATCHES "-mcpu=")
    # use -mcpu=native
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=native")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=native")
  endif() #(CMAKE_CXX_FLAGS MATCHES "-mcpu=" OR CMAKE_C_FLAGS MATCHES "-mcpu=")
endif()
+41 −2
Original line number Diff line number Diff line
@@ -54,6 +54,18 @@ public:
  // Resets the domain back to the original state at creation time.
  void reset();

  // template <typename TUPLE, std::size_t... Is>
  // std::size_t array_index_impl(const TUPLE&& t, std::index_sequence<Is...>) const;

  // template <typename... Args>
  // std::size_t index_by_tuple(std::tuple<Args...>&& t) const;

  template <typename ARRAY, std::size_t... SIZE>
  std::size_t index_by_array_impl(const ARRAY& a, std::index_sequence<SIZE...>) const;

  template <std::size_t N, typename Indices = std::make_index_sequence<N>>
  std::size_t index_by_array(const std::array<int, N>& a) const;

  // Indexing operator: accesses elements of the domain.
  // If sizeof(args) < sizeof(domains): error.
  // If sizeof(args) == sizeof(domains): calls index via branch domains.
@@ -220,6 +232,33 @@ void dmn_variadic<domain_list...>::reset() {
  // std::cout << "Domain size is " << size << std::endl;
}

// template <typename... domain_list>
// template <typename TUPLE, std::size_t... Is>
// std::size_t dmn_variadic<domain_list...>::array_index_impl(const TUPLE&& t,
// std::index_sequence<Is...>) const {
//   return operator()(std::get<Is>(t)...);
// }

// template <typename... domain_list>
// template <typename... Args>
// std::size_t dmn_variadic<domain_list...>::index_by_tuple(std::tuple<Args...>&& t) const {
//   return array_index_impl(std::move(t), std::index_sequence_for<Args...>{});
// }

template <typename... domain_list>
template <typename ARRAY, std::size_t... SIZE>
std::size_t dmn_variadic<domain_list...>::index_by_array_impl(const ARRAY& a,
                                                              std::index_sequence<SIZE...>) const {
  return operator()(a[SIZE]...);
}

template <typename... domain_list>
template <std::size_t N, typename Indices>
std::size_t dmn_variadic<domain_list...>::index_by_array(const std::array<int, N>& a) const {
  // return index_by_tuple(std::move(util::Array2Tuple(a)));
  return index_by_array_impl(a, Indices{});
}

template <typename... domain_list>
template <typename... Args>
std::size_t dmn_variadic<domain_list...>::operator()(Args&&... args) const {
@@ -272,8 +311,8 @@ std::size_t dmn_variadic<domain_list...>::index_lookup(std::integral_constant<bo
  auto seq = detail::make_index_sequence_with_offset<1, sizeof...(Args)>();

#ifndef NDEBUG
  auto seq2 = std::make_index_sequence<sizeof...(Args) + 1>{};
  check_indices("leaf", leaf_domain_sizes, seq2, leaf_i0, std::forward<Args>(leaf_indices)...);
  // auto seq2 = std::make_index_sequence<sizeof...(Args) + 1>{};
  // check_indices("leaf", leaf_domain_sizes, seq2, leaf_i0, std::forward<Args>(leaf_indices)...);
#endif  // NDEBUG

  std::size_t N = leaf_i0 + detail::multiply_offsets(leaf_domain_steps, seq,
+1 −1
Original line number Diff line number Diff line
@@ -56,7 +56,7 @@ public:
    return branch_domain_steps;
  }

  // linind <--> subdomain_indices != branch_indices
  // linind <--> leaf indices
  void linind_2_subind(std::size_t linind, int* subind) const;
  void subind_2_linind(const int* subind, std::size_t& linind) const;

+280 −28
Original line number Diff line number Diff line
// Copyright (C) 2021 ETH Zurich
// Copyright (C) 2021 UT-Battelle, LLC
// Copyright (C) 2022 ETH Zurich
// Copyright (C) 2022 UT-Battelle, LLC
// All rights reserved.
//
// See LICENSE for terms of usage.
@@ -22,6 +22,7 @@
#include <cmath>    // std::abs
#include <complex>  // std::abs(std::complex)
#include <iostream>
#include <initializer_list>
#include <stdexcept>
#include <string>
#include <type_traits>  // std::is_integral
@@ -80,6 +81,9 @@ public:
    name_ = name;
  }

  // Initializer list constructor
  function(std::initializer_list<scalartype> init_list, const std::string& name = default_name_);

  // Copy assignment operator
  // Replaces the function's elements with a copy of the elements of other.
  // Precondition: The other function has been resetted, if the domain had been initialized after
@@ -206,6 +210,10 @@ public:
  void linind_2_subind(int linind, int* subind) const;
  // std::vector version
  void linind_2_subind(int linind, std::vector<int>& subind) const;

  template <std::size_t N>
  void linind_2_subind(int linind, std::array<int,N>& subind) const;

  // modern RVO version
  std::vector<size_t> linind_2_subind(int linind) const;

@@ -217,6 +225,9 @@ public:
  // using standard vector and avoiding returning argument
  size_t subind_2_linind(const std::vector<int>& subind) const;

  // using standard vector and avoiding returning argument
  size_t branch_subind_2_linind(const std::vector<int>& subind) const;

  // Computes and returns the linear index for the given subindices of the branch or leaf domains,
  // depending on the size of subindices.
  // Enable only if all arguments are integral to prevent subind_to_linind(int*, int) to resolve to
@@ -243,29 +254,45 @@ public:
  scalartype& operator()(const int* subind);
  const scalartype& operator()(const int* subind) const;

  template <typename T>
  template<std::size_t N>
  scalartype& operator()(const std::array<int,N>& subind) {
    #ifndef NDEBUG
    auto linind = dmn.index_by_array(subind);
    assert(linind >=0 && linind < size());
    return fnc_values_[linind];
    #else
    return fnc_values_[dmn.index_by_array(subind)];
    #endif
  }

  const scalartype& operator()(const std::vector<int>& subind) const;

  template <typename T, typename = typename std::enable_if<std::is_integral<T>::value, void>::type>
  scalartype& operator()(const T linind) {
    static_assert(std::is_integral<T>::value, "Index linind must be an integer.");
    assert(linind >= 0 && linind < size());
    return fnc_values_[linind];
  }
  template <typename T>
  template <typename T, typename = typename std::enable_if<std::is_integral<T>::value, void>::type>
  const scalartype& operator()(const T linind) const {
    static_assert(std::is_integral<T>::value, "Index linind must be an integer.");
    assert(linind >= 0 && linind < size());
    return fnc_values_[linind];
  }

  template <typename... Ts>
  scalartype& operator()(const Ts... subindices) {
  template <typename T, typename... Ts,
            typename = typename std::enable_if<std::is_integral<T>::value, void>::type>
  scalartype& operator()(const T t, const Ts... subindices) {
    // We need to cast all indices to the same type for dmn_variadic.
    return fnc_values_[dmn(static_cast<int>(subindices)...)];
    return fnc_values_[dmn(static_cast<int>(t), static_cast<int>(subindices)...)];
  }
  template <typename... Ts>
  const scalartype& operator()(const Ts... subindices) const {
    return fnc_values_[dmn(static_cast<int>(subindices)...)];
  template <typename T, typename... Ts,
            typename = typename std::enable_if<std::is_integral<T>::value, void>::type>
  const scalartype& operator()(const T t, const Ts... subindices) const {
    return fnc_values_[dmn(static_cast<int>(t), static_cast<int>(subindices)...)];
  }

  
  void operator+=(const function<scalartype, domain, DT>& other);
  void operator-=(const function<scalartype, domain, DT>& other);
  void operator*=(const function<scalartype, domain, DT>& other);
@@ -283,15 +310,33 @@ public:
  // TODO: Make the equal-comparison operator a non-member function.
  bool operator==(const function<scalartype, domain, DT>& other) const;

  /** slice across 1 leaf domain
   *  deprecated, really unsafe pointer use
   */
  template <typename new_scalartype>
  void slice(int sbdm_index, int* subind, new_scalartype* fnc_vals) const;
  /** slice across leaf domains or branch domains based on size of subind.
   *  \param[in] subind    subindex of the slice, index in the slice domain is ignored
   */
  template <typename new_scalartype>
  void slice(const int sbdm_index, std::vector<int> subind, new_scalartype* fnc_vals) const;

  template <typename new_scalartype>
  void slice(int sbdm_index_1, int sbdm_index_2, std::vector<int> subind,
             new_scalartype* fnc_vals) const;
  template <typename new_scalartype>
  void slice(int sbdm_index_1, int sbdm_index_2, int* subind, new_scalartype* fnc_vals) const;

  template <typename new_scalartype>
  void distribute(int sbdm_index, std::vector<int> subind, const new_scalartype* fnc_vals);
  template <typename new_scalartype>
  void distribute(int sbdm_index, int* subind, const new_scalartype* fnc_vals);
  template <typename new_scalartype>
  void distribute(int sbdm_index_1, int sbdm_index_2, std::vector<int> subind, const new_scalartype* fnc_vals);
  template <typename new_scalartype>
  void distribute(int sbdm_index_1, int sbdm_index_2, int* subind, const new_scalartype* fnc_vals);

  
  //
  // Methods for printing
  //
@@ -385,6 +430,7 @@ function<scalartype, domain, DT>::function(const function<scalartype, domain, DT
  end_ = other.end_;
}

/** move constructor */
template <typename scalartype, class domain, DistType DT>
function<scalartype, domain, DT>::function(function<scalartype, domain, DT>&& other)
    : name_(std::move(other.name_)),
@@ -399,6 +445,22 @@ function<scalartype, domain, DT>::function(function<scalartype, domain, DT>&& ot
  end_ = other.end_;
}

/** initializer list constructor
 *  only needed for testing.
 */
template <typename scalartype, class domain, DistType DT>
function<scalartype, domain, DT>::function(std::initializer_list<scalartype> init_list,
                                           const std::string& name)
    : name_(name),
      function_type(__PRETTY_FUNCTION__),
      dmn(),
      Nb_sbdms(dmn.get_leaf_domain_sizes().size()) {
  start_ = 0;
  end_ = dmn.get_size();
  fnc_values_.resize(dmn.get_size(), {});
  std::copy_n(init_list.begin(), init_list.size(), fnc_values_.begin());
}

template <typename scalartype, class domain, DistType DT>
template <class Concurrency>
std::size_t function<scalartype, domain, DT>::calcDistribution(const Concurrency& concurrency) {
@@ -430,14 +492,17 @@ std::size_t function<scalartype, domain, DT>::calcDistribution(const Concurrency
      if (remaining_ranks < dmn.get_subdomain_size(idim)) {
        if (dmn.get_subdomain_size(idim) % remaining_ranks != 0) {
          break;  // i.e no regular bricked blocking
	} else {
        }
        else {
          regular_local_function_size = true;
          break;
        }
      } else {
      }
      else {
        if (remaining_ranks % dmn.get_subdomain_size(idim) != 0) {
          break;  // i.e no regular bricked blocking
	} else {
        }
        else {
          remaining_ranks /= dmn.get_subdomain_size(idim);
        }
      }
@@ -673,6 +738,17 @@ void function<scalartype, domain, DT>::linind_2_subind(int linind, int* subind)
  }
}

template <typename scalartype, class domain, DistType DT>
template <std::size_t N>
void function<scalartype, domain, DT>::linind_2_subind(int linind, std::array<int,N>& subind) const {
  assert(N == dmn.get_Nb_branch_domains() || dmn.get_Nb_leaf_domains());
  auto& size_sbdm = (N == dmn.get_Nb_branch_domains() ? dmn.get_branch_domain_sizes() : dmn.get_leaf_domain_sizes() );;
  for (size_t i = 0; i < size_sbdm.size(); ++i) {
    subind[i] = linind % size_sbdm[i];
    linind = (linind - subind[i]) / size_sbdm[i];
  }
}

// TODO: Resize vector if necessary.
template <typename scalartype, class domain, DistType DT>
void function<scalartype, domain, DT>::linind_2_subind(int linind, std::vector<int>& subind) const {
@@ -724,6 +800,15 @@ size_t function<scalartype, domain, DT>::subind_2_linind(const std::vector<int>&
  return linind;
}

template <typename scalartype, class domain, DistType DT>
size_t function<scalartype, domain, DT>::branch_subind_2_linind(const std::vector<int>& subind) const {
  auto& branch_sbdm = dmn.get_branch_domain_steps();
  int linind = 0;
  for (int i = 0; i < int(branch_sbdm.size()); ++i)
    linind += subind[i] * branch_sbdm[i];
  return linind;
}

template <typename scalartype, class domain, DistType DT>
scalartype& function<scalartype, domain, DT>::operator()(const int* const subind) {
  auto& step_sbdm = dmn.get_leaf_domain_steps();
@@ -743,6 +828,21 @@ const scalartype& function<scalartype, domain, DT>::operator()(const int* const
  return fnc_values_[linind];
}

template <typename scalartype, class domain, DistType DT>
const scalartype& function<scalartype, domain, DT>::operator()(const std::vector<int>& subind) const {
  int linind;
  if( subind.size() == Nb_sbdms ) {
    subind_2_linind(subind, linind);
    assert(linind >= 0 && linind < size());
  }
  else if (subind.size() == dmn.get_Nb_branch_domains()) {
    branch_subind_2_linind(subind);
  }
  else
    throw std::runtime_error("number of indicies matches neither branches or leaves");
  return fnc_values_[linind];
}

template <typename scalartype, class domain, DistType DT>
void function<scalartype, domain, DT>::operator+=(const function<scalartype, domain, DT>& other) {
  for (int linind = 0; linind < size(); ++linind)
@@ -812,6 +912,42 @@ bool function<scalartype, domain, DT>::operator==(const function<scalartype, dom
  return true;
}

template <typename scalartype, class domain, DistType DT>
template <typename new_scalartype>
void function<scalartype, domain, DT>::slice(const int sbdm_index, std::vector<int> subind,
                                             new_scalartype* fnc_vals) const {
  assert(sbdm_index >= 0);
  assert(sbdm_index < Nb_sbdms);

  int linind = 0;

  if (subind.size() < Nb_sbdms && subind.size() == dmn.get_Nb_branch_domains()) {
    auto& size_sbdm = dmn.get_branch_domain_sizes();
    auto& step_sbdm = dmn.get_branch_domain_steps();

    subind[sbdm_index] = 0;
    linind = branch_subind_2_linind(subind);

    for (int i = 0; i < size_sbdm[sbdm_index]; i++)
      fnc_vals[i] =
          ScalarCast<new_scalartype>::execute(fnc_values_[linind + i * step_sbdm[sbdm_index]]);
  }
  else if (subind.size() == Nb_sbdms) {
    auto& size_sbdm = dmn.get_leaf_domain_sizes();
    auto& step_sbdm = dmn.get_leaf_domain_steps();

    subind[sbdm_index] = 0;
    linind = subind_2_linind(subind);

    for (int i = 0; i < size_sbdm[sbdm_index]; i++)
      fnc_vals[i] =
          ScalarCast<new_scalartype>::execute(fnc_values_[linind + i * step_sbdm[sbdm_index]]);
  }
  else
    throw std::runtime_error(
        "function::slice can only be called with subind of size of leaf or branch domains.");
}

template <typename scalartype, class domain, DistType DT>
template <typename new_scalartype>
void function<scalartype, domain, DT>::slice(const int sbdm_index, int* subind,
@@ -825,7 +961,6 @@ void function<scalartype, domain, DT>::slice(const int sbdm_index, int* subind,
  int linind = 0;
  subind[sbdm_index] = 0;
  subind_2_linind(subind, linind);

  for (int i = 0; i < size_sbdm[sbdm_index]; i++)
    fnc_vals[i] =
        ScalarCast<new_scalartype>::execute(fnc_values_[linind + i * step_sbdm[sbdm_index]]);
@@ -855,11 +990,10 @@ void function<scalartype, domain, DT>::slice(const int sbdm_index_1, const int s
  int step_sbdm_2 = step_sbdm[sbdm_index_2];

  new_scalartype* fnc_ptr_left = NULL;
  new_scalartype* fnc_ptr_right = NULL;

  for (int j = 0; j < size_sbdm_2; j++) {
    fnc_ptr_left = &fnc_vals[0 + j * size_sbdm_1];
    fnc_ptr_right = &fnc_values_[linind + j * step_sbdm_2];
    const new_scalartype* fnc_ptr_right{fnc_values_.data() + linind + j * step_sbdm_2};

    for (int i = 0; i < size_sbdm_1; i++)
      fnc_ptr_left[i] = fnc_ptr_right[i * step_sbdm_1];
@@ -868,6 +1002,97 @@ void function<scalartype, domain, DT>::slice(const int sbdm_index_1, const int s
  }
}

template <typename scalartype, class domain, DistType DT>
template <typename new_scalartype>
void function<scalartype, domain, DT>::slice(const int sbdm_index_1, const int sbdm_index_2,
                                             std::vector<int> subind, new_scalartype* fnc_vals) const {
  assert(sbdm_index_1 >= 0);
  assert(sbdm_index_2 >= 0);
  assert(sbdm_index_1 < Nb_sbdms);
  assert(sbdm_index_2 < Nb_sbdms);

  int linind = 0;
  subind[sbdm_index_1] = 0;
  subind[sbdm_index_2] = 0;

  if (subind.size() < Nb_sbdms && subind.size() == dmn.get_Nb_branch_domains()) {
    linind = branch_subind_2_linind(subind);

    auto& size_sbdm = dmn.get_branch_domain_sizes();
    auto& step_sbdm = dmn.get_branch_domain_steps();

    int size_sbdm_1 = size_sbdm[sbdm_index_1];
    int size_sbdm_2 = size_sbdm[sbdm_index_2];

    int step_sbdm_1 = step_sbdm[sbdm_index_1];
    int step_sbdm_2 = step_sbdm[sbdm_index_2];

    new_scalartype* fnc_ptr_left = NULL;

    for (int j = 0; j < size_sbdm_2; j++) {
      fnc_ptr_left = &fnc_vals[0 + j * size_sbdm_1];
      const new_scalartype* fnc_ptr_right{fnc_values_.data() + linind + j * step_sbdm_2};

      for (int i = 0; i < size_sbdm_1; i++)
        fnc_ptr_left[i] = fnc_ptr_right[i * step_sbdm_1];
      //       fnc_vals[i+j*size_sbdm[sbdm_index_1]] = fnc_values_[linind + i*step_sbdm[sbdm_index_1]
      //       + j*step_sbdm[sbdm_index_2]];
    }
  } else {
    linind = subind_2_linind(subind);

    auto& size_sbdm = dmn.get_leaf_domain_sizes();
    auto& step_sbdm = dmn.get_leaf_domain_steps();
    int size_sbdm_1 = size_sbdm[sbdm_index_1];
    int size_sbdm_2 = size_sbdm[sbdm_index_2];

    int step_sbdm_1 = step_sbdm[sbdm_index_1];
    int step_sbdm_2 = step_sbdm[sbdm_index_2];

    new_scalartype* fnc_ptr_left = NULL;

    for (int j = 0; j < size_sbdm_2; j++) {
      fnc_ptr_left = &fnc_vals[0 + j * size_sbdm_1];
      const new_scalartype* fnc_ptr_right{fnc_values_.data() + linind + j * step_sbdm_2};

      for (int i = 0; i < size_sbdm_1; i++)
        fnc_ptr_left[i] = fnc_ptr_right[i * step_sbdm_1];
      //       fnc_vals[i+j*size_sbdm[sbdm_index_1]] = fnc_values_[linind + i*step_sbdm[sbdm_index_1]
      //       + j*step_sbdm[sbdm_index_2]];
    }
  }
}

template <typename scalartype, class domain, DistType DT>
template <typename new_scalartype>
void function<scalartype, domain, DT>::distribute(const int sbdm_index, std::vector<int> subind,
                                                  const new_scalartype* fnc_vals) {
  assert(sbdm_index >= 0);
  assert(sbdm_index < Nb_sbdms);

  int linind = 0;
  subind[sbdm_index] = 0;
  
  if (subind.size() < Nb_sbdms && subind.size() == dmn.get_Nb_branch_domains()) {
    linind = branch_subind_2_linind(subind);

    auto& size_sbdm = dmn.get_branch_domain_sizes();
    auto& step_sbdm = dmn.get_branch_domain_steps();

    for (int i = 0; i < size_sbdm[sbdm_index]; i++)
      fnc_values_[linind + i * step_sbdm[sbdm_index]] = ScalarCast<scalartype>::execute(fnc_vals[i]);
  }
  else {
    linind = subind_2_linind(subind);

    auto& size_sbdm = dmn.get_leaf_domain_sizes();
    auto& step_sbdm = dmn.get_leaf_domain_steps();

    for (int i = 0; i < size_sbdm[sbdm_index]; i++)
      fnc_values_[linind + i * step_sbdm[sbdm_index]] = ScalarCast<scalartype>::execute(fnc_vals[i]);
  }
}

template <typename scalartype, class domain, DistType DT>
template <typename new_scalartype>
void function<scalartype, domain, DT>::distribute(const int sbdm_index, int* subind,
@@ -886,6 +1111,33 @@ void function<scalartype, domain, DT>::distribute(const int sbdm_index, int* sub
    fnc_values_[linind + i * step_sbdm[sbdm_index]] = ScalarCast<scalartype>::execute(fnc_vals[i]);
}

template <typename scalartype, class domain, DistType DT>
template <typename new_scalartype>
void function<scalartype, domain, DT>::distribute(const int sbdm_index_1, const int sbdm_index_2,
                                                  std::vector<int> subind, const new_scalartype* fnc_vals) {
  assert(sbdm_index_1 >= 0);
  assert(sbdm_index_2 >= 0);
  assert(sbdm_index_1 < Nb_sbdms);
  assert(sbdm_index_2 < Nb_sbdms);

  int linind = 0;
  subind[sbdm_index_1] = 0;
  subind[sbdm_index_2] = 0;

  if (subind.size() < Nb_sbdms && subind.size() == dmn.get_Nb_branch_domains()) {
  
    linind = branch_subind_2_linind(subind);

  auto& size_sbdm = dmn.get_branch_domain_sizes();
  auto& step_sbdm = dmn.get_branch_domain_steps();

  for (int i = 0; i < size_sbdm[sbdm_index_1]; i++)
    for (int j = 0; j < size_sbdm[sbdm_index_2]; j++)
      fnc_values_[linind + i * step_sbdm[sbdm_index_1] + j * step_sbdm[sbdm_index_2]] =
          fnc_vals[i + j * size_sbdm[sbdm_index_1]];
}
}
  
template <typename scalartype, class domain, DistType DT>
template <typename new_scalartype>
void function<scalartype, domain, DT>::distribute(const int sbdm_index_1, const int sbdm_index_2,
+10 −1
Original line number Diff line number Diff line
@@ -65,11 +65,20 @@ public:
  bool open_group(const std::string& name);
  void close_group();

  void begin_step();
  void end_step();

  std::string get_path(const std::string& name = "");

  template <typename arbitrary_struct_t>
  static void from_file(adios2::ADIOS& adios, arbitrary_struct_t& arbitrary_struct, std::string file_name);

  /** Return number of steps in the file
   *  file may have multiple iterations in it.
   *  Each iteration is a step in the adios file for many variables.
   */
  std::size_t getStepCount() { return file_.Steps(); }
  
  // `execute` returns true if the object is read correctly.

  template <typename Scalartype>
Loading