Commit e1f93abd authored by gbalduzz's avatar gbalduzz
Browse files

Merge remote-tracking branch 'origin/master' into single_precision_ctaux

parents ea60d57a 51d00850
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -14,10 +14,10 @@ mark_as_advanced(DCA_ESSL_INCLUDES)

# Use jsrun for executing the tests.
set(TEST_RUNNER "jsrun" CACHE STRING "Command for executing (MPI) programs.")
set(MPIEXEC_NUMPROC_FLAG "-a" CACHE STRING
set(MPIEXEC_NUMPROC_FLAG "-n" CACHE STRING
  "Flag used by TEST_RUNNER to specify the number of processes.")
# Use 1 resource set with 1 GPU and 8 cores for executing the tests.
set(MPIEXEC_PREFLAGS "-n 1 -g 1 -c 8" CACHE STRING
# Use 1 resource set with 1 GPU and 5 cores for executing the tests.
set(MPIEXEC_PREFLAGS "-a 1 -g 1 -c 5" CACHE STRING
  "Flags to pass to TEST_RUNNER directly before the executable to run.")
# The flag "--smpiargs=none" is needed to execute tests with no MPI functionalities.
set(SMPIARGS_FLAG_NOMPI "--smpiargs=none" CACHE STRING
+3 −2
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@ public:
  using ThisType = Matrix<ScalarType, device_name>;
  using ValueType = ScalarType;
  using Allocator = util::DefaultAllocator<ScalarType, device_name>;
  constexpr static DeviceType device = device_name;

  Matrix(const std::string& name = default_name_);

@@ -517,7 +518,7 @@ std::size_t Matrix<ScalarType, device_name>::deviceFingerprint() const {
    return 0;
}

}  // linalg
}  // dca
}  // namespace linalg
}  // namespace dca

#endif  // DCA_LINALG_MATRIX_HPP
+3 −0
Original line number Diff line number Diff line
@@ -28,6 +28,9 @@ namespace linalg {
template <typename ScalarType, DeviceType device_name = linalg::CPU>
class MatrixView {
public:
  using ValueType = ScalarType;
  constexpr static DeviceType device = device_name;

  using Pair = std::pair<int, int>;
  MatrixView(ScalarType* data, Pair size);
  MatrixView(ScalarType* data, Pair size, int ld);
+81 −4
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@
// - real
// - insertCol, insertRow (for CPU matrices only)
// - inverse
// - inverseAndDeterminant
// - removeCol, removeCols, removeRow, removeRows, removeRowAndCol, removeRowAndCols
// - scaleCol, scaleRow, scaleRows
// - swapCol, swapRow, swapRowAndCol
@@ -22,6 +23,7 @@
// - gemm
// - multiply
// - trsm
// - determinant
// - eigensolver (non-symmetric / symmetric / Hermitian)
// - pseudoInverse

@@ -128,10 +130,9 @@ inline void copyRow(const Matrix<Scalar, device_name>& mat_x, int ix,
// Preconditions: i_x.size() <= i_y.size(), mat_x.nrCols() == mat_y.nrCols()
//                0 <= i_x[i] < mat_x.nrRows() for 0 <= i < i_x.size(),
//                0 <= i_y[i] < mat_y.nrRows() for 0 <= i < i_x.size().
template <typename Scalar>
inline void copyRows(const Matrix<Scalar, CPU>& mat_x, const Vector<int, CPU>& i_x,
                     Matrix<Scalar, CPU>& mat_y, const Vector<int, CPU>& i_y, int /*thread_id*/ = 0,
                     int /*stream_id*/ = 0) {
template <typename Scalar, class Vec>
inline void copyRows(const Matrix<Scalar, CPU>& mat_x, const Vec& i_x, Matrix<Scalar, CPU>& mat_y,
                     const Vec& i_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) {
  assert(i_x.size() <= i_y.size());
  assert(mat_x.nrCols() == mat_y.nrCols());

@@ -150,6 +151,19 @@ inline void copyRows(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& i
  blas::copyRows(mat_x.nrCols(), i_x.size(), i_x.ptr(), mat_x.ptr(), mat_x.leadingDimension(),
                 i_y.ptr(), mat_y.ptr(), mat_y.leadingDimension(), thread_id, stream_id);
}

// Copies the i_x rows of mat_x into  mat_y, for 0 <= i < i_x.size().
// In/Out: mat_y
// Preconditions: mat_x.nrCols() == mat_y.nrCols()
//                0 <= i_x[i] < mat_x.nrRows() for 0 <= i < i_x.size().
template <typename Scalar>
inline void copyRows(const Matrix<Scalar, GPU>& mat_x, const Vector<int, GPU>& i_x,
                     Matrix<Scalar, GPU>& mat_y, int thread_id = 0, int stream_id = 0) {
  assert(mat_x.nrCols() == mat_y.nrCols());

  blas::copyRows(mat_x.nrCols(), i_x.size(), i_x.ptr(), mat_x.ptr(), mat_x.leadingDimension(),
                 mat_y.ptr(), mat_y.leadingDimension(), thread_id, stream_id);
}
#endif  // DCA_HAVE_CUDA

// Returns the difference of two matrices in terms of max_i,j(|a(i, j) - b(i, j)|).
@@ -325,6 +339,33 @@ void smallInverse(Matrix<Scalar, CPU>& m_inv) {
  smallInverse(m_inv, ipiv, work);
}

// Computes in place the inverse of mat and the determinant of the inverse.
// In/Out: mat
// Returns: the determinant of mat^-1
// Precondition: mat is a non-singular real matrix.
template <typename Scalar, template <typename, DeviceType> class MatrixType>
Scalar inverseAndDeterminant(MatrixType<Scalar, CPU>& mat) {
  assert(mat.is_square());
  std::vector<int> ipiv(mat.nrRows());

  lapack::UseDevice<CPU>::getrf(mat.nrRows(), mat.nrCols(), mat.ptr(), mat.leadingDimension(),
                                ipiv.data());

  Scalar det = 1;
  for (int i = 0; i < mat.nrCols(); ++i) {
    det *= mat(i, i);
    if (ipiv[i] != i + 1)
      det *= -1;
  }

  const int lwork = util::getInverseWorkSize(mat);
  std::vector<Scalar> work(lwork);
  lapack::UseDevice<CPU>::getri(mat.nrRows(), mat.ptr(), mat.leadingDimension(), ipiv.data(),
                                work.data(), lwork);

  return 1. / det;
}

// Remove the j-th column. The data is moved accordingly.
// In/Out: mat
// Preconditions: 0 <= j < mat.nrCols().
@@ -1160,6 +1201,42 @@ void pseudoInverse(const Matrix<Scalar, CPU>& a, Matrix<Scalar, CPU>& a_inv, dou
  }
}

// Computes (in place) the determinant of the matrix.
// Returns: determinant.
// Postcondition: M is its LU decomposition.
template <template <typename, DeviceType> class MatrixType, typename Scalar, DeviceType device>
double determinantIP(MatrixType<Scalar, device>& M) {
  assert(M.nrCols() == M.nrRows());
  const int n = M.nrCols();
  std::vector<int> ipiv(n);

  try {
    lapack::getrf(n, n, M.ptr(), M.leadingDimension(), ipiv.data());
  }
  catch (lapack::util::LapackException& err) {
    if (err.info() > 0)
      return 0;
    else
      throw(std::logic_error("LU decomposition failed."));
  }

  double det = 1.;
  for (int i = 0; i < n; i++) {
    det *= M(i, i);
    if (ipiv[i] != i + 1)
      det *= -1;
  }
  return det;
}

// Copy and computes the determinant of the matrix.
// Returns: determinant.
template <typename Scalar>
double determinant(const Matrix<Scalar, CPU>& M) {
  Matrix<Scalar, CPU> M_copy(M);
  return determinantIP(M_copy);
}

template <typename Scalar>
bool areNear(const Matrix<Scalar, CPU>& A, const Matrix<Scalar, CPU>& B, const double err = 1e-16) {
  if (A.size() != B.size())
+24 −6
Original line number Diff line number Diff line
@@ -11,16 +11,19 @@

#ifndef DCA_LINALG_UTIL_CUDA_STREAM_HPP
#define DCA_LINALG_UTIL_CUDA_STREAM_HPP
#ifdef DCA_HAVE_CUDA

#include <cuda.h>
#include <magma.h>
#ifdef DCA_HAVE_CUDA
#include <cuda_runtime.h>
#include "dca/linalg/util/error_cuda.hpp"
#endif  // DCA_HAVE_CUDA

namespace dca {
namespace linalg {
namespace util {
// dca::linalg::util::

#ifdef DCA_HAVE_CUDA

class CudaStream {
public:
  CudaStream() {
@@ -33,6 +36,10 @@ public:
    std::swap(stream_, other.stream_);
  }

  void sync() const {
    checkRC(cudaStreamSynchronize(stream_));
  }

  ~CudaStream() {
    if (stream_)
      cudaStreamDestroy(stream_);
@@ -46,9 +53,20 @@ private:
  cudaStream_t stream_ = nullptr;
};

}  // util
}  // linalg
}  // dca
#else  // DCA_HAVE_CUDA

// Mock object.
class CudaStream {
public:
  CudaStream() = default;

  void sync() const {}
};

#endif  // DCA_HAVE_CUDA

}  // namespace util
}  // namespace linalg
}  // namespace dca

#endif  // DCA_LINALG_UTIL_CUDA_STREAM_HPP
Loading