Commit 5b13616d authored by gbalduzz's avatar gbalduzz
Browse files

Merge remote-tracking branch 'origin/master' into new_models

parents 88778f2f 1dde202b
Loading
Loading
Loading
Loading
+10 −3
Original line number Diff line number Diff line
@@ -32,6 +32,9 @@ inline void copyRows(int row_size, int n_rows, const int* i_x, const std::comple
  auto cu_y = util::castCudaComplex(y);
  copyRows(row_size, n_rows, i_x, cu_x, ldx, i_y, cu_y, ldy, thread_id, stream_id);
}
template <typename Type>
void copyRows(int row_size, int n_rows, const int* i_x, const Type* x, int ldx, Type* y, int ldy,
              int thread_id, int stream_id);

template <typename Type>
void copyCols(int col_size, int n_cols, const int* j_x, const Type* x, int ldx, const int* j_y,
@@ -43,6 +46,9 @@ inline void copyCols(int col_size, int n_cols, const int* j_x, const std::comple
  auto cu_y = util::castCudaComplex(y);
  copyCols(col_size, n_cols, j_x, cu_x, ldx, j_y, cu_y, ldy, thread_id, stream_id);
}
template <typename Type>
void copyCols(int col_size, int n_cols, const int* j_x, const Type* x, int ldx, Type* y, int ldy,
              int thread_id, int stream_id);

template <typename Type>
void moveLeft(int m, int n, Type* a, int lda);
@@ -90,8 +96,9 @@ inline void swapCols(int col_size, int n_cols, const int* j_1, const int* j_2,
  auto cu_a = util::castCudaComplex(a);
  swapCols(col_size, n_cols, j_1, j_2, cu_a, lda, thread_id, stream_id);
}
}  // blas
}  // linalg
}  // dca

}  // namespace blas
}  // namespace linalg
}  // namespace dca

#endif  // DCA_LINALG_BLAS_KERNELS_GPU_HPP
+16 −4
Original line number Diff line number Diff line
@@ -87,10 +87,9 @@ inline void copyCol(const Matrix<Scalar, device_name>& mat_x, int jx,
// Preconditions: j_x.size() <= j_y.size(), mat_x.nrRows() == mat_y.nrRows()
//                0 <= j_x[i] < mat_x.nrCols() for 0 <= i < j_x.size(),
//                0 <= j_y[i] < mat_y.nrCols() for 0 <= i < j_x.size().
template <typename Scalar>
inline void copyCols(const Matrix<Scalar, CPU>& mat_x, const Vector<int, CPU>& j_x,
                     Matrix<Scalar, CPU>& mat_y, const Vector<int, CPU>& j_y, int /*thread_id*/ = 0,
                     int /*stream_id*/ = 0) {
template <typename Scalar, class Vec>
inline void copyCols(const Matrix<Scalar, CPU>& mat_x, const Vec& j_x, Matrix<Scalar, CPU>& mat_y,
                     const Vec& j_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) {
  assert(j_x.size() <= j_y.size());

  for (int ind_j = 0; ind_j < j_x.size(); ++ind_j)
@@ -107,6 +106,19 @@ inline void copyCols(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& j
  blas::copyCols(mat_x.nrRows(), j_x.size(), j_x.ptr(), mat_x.ptr(), mat_x.leadingDimension(),
                 j_y.ptr(), mat_y.ptr(), mat_y.leadingDimension(), thread_id, stream_id);
}

// Copies the j_x columns of mat_x into the  mat_y, for 0 <= i < j_x.size().
// In/Out: mat_y
// Preconditions: mat_x.nrRows() == mat_y.nrRows()
//                0 <= j_x[i] < mat_x.nrCols() for 0 <= i < j_x.size(),
template <typename Scalar>
inline void copyCols(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& j_x,
                     Matrix<Scalar, GPU>& mat_y, int thread_id = 0, int stream_id = 0) {
  assert(mat_x.nrRows() == mat_y.nrRows());

  blas::copyCols(mat_x.nrRows(), j_x.size(), j_x.ptr(), mat_x.ptr(), mat_x.leadingDimension(),
                 mat_y.ptr(), mat_y.leadingDimension(), thread_id, stream_id);
}
#endif  // DCA_HAVE_CUDA

// Copies the ix-th row of mat_x into the iy-th row of mat_y.
+24 −1
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
//
// Author: Peter Staar (taa@zurich.ibm.com)
//         Urs R. Haehner (haehneru@itp.phys.ethz.ch)
//         Giovanni Balduzzi (gbaludzz@itp.phys.ethz.ch)
//
// This file provides utility functions to do various vector operations.

@@ -17,6 +18,7 @@
#include <cmath>
#include <complex>
#include <iostream>
#include <functional>
#include <type_traits>
#include <vector>

@@ -270,6 +272,27 @@ auto operator-(const std::vector<T>& x, const std::vector<T>& y) {
  return subtract(y, x);  // Note: subtract(x, y) is defined as y - x;
}

// Returns true if pred evaluates to true for any element of v. Returns false if v is empty.
template <class Vec>
bool any(const Vec& v, const std::function<bool(typename Vec::value_type)>& pred) {
  for (const auto& x : v) {
    if (pred(x)) {
      return true;
    }
  }
  return false;
}

// Returns true if pred evaluates to true for all element of v. Returns true if v is empty.
template <class Vec>
bool all(const Vec& v, const std::function<bool(typename Vec::value_type)>& pred) {
  for (const auto& x : v) {
    if (!pred(x)) {
      return false;
    }
  }
  return true;
}

}  // namespace util
}  // namespace math
+5 −1
Original line number Diff line number Diff line
@@ -19,11 +19,15 @@ namespace parallel {
// dca::parallel::

// Returns a list of cores id for which the calling thread has affinity.
// If the macro __linux__ is not defined, returns an empty vector.
std::vector<int> get_affinity();

// Sets the affinity list of the current thread.
// If the macro __linux__ is not defined, performs a no-op.
void set_affinity(const std::vector<int>& cores);

// Number of cores used by this process.
// Returns the number of visible hardware cores.
// If the macro __linux__ is not defined, returns std::hardware_concurrency().
int get_core_count();

}  // namespace parallel
+2 −2
Original line number Diff line number Diff line
@@ -232,9 +232,9 @@ public: // Optional members getters.
              "non_density_interaction"));
    return *non_density_interactions_;
  }
  const auto& get_nondensity_interactions() const {
  const auto& get_non_density_interactions() const {
    assert(non_density_interactions_);
    return non_density_interactions_;
    return *non_density_interactions_;
  }

  bool has_non_density_interactions() const {
Loading