Commit 4a42879a authored by Doak, Peter W.'s avatar Doak, Peter W.
Browse files

allowing allocator used by Matrix on CPU to be selected

parent 1aa28196
Loading
Loading
Loading
Loading
+120 −118
Original line number Diff line number Diff line
// Copyright (C) 2018 ETH Zurich
// Copyright (C) 2018 UT-Battelle, LLC
// Copyright (C) 2023 ETH Zurich
// Copyright (C) 2023 UT-Battelle, LLC
// All rights reserved.
//
// See LICENSE for terms of usage.
@@ -7,8 +7,9 @@
//
// Author: Peter Staar (taa@zurich.ibm.com)
//         Raffaele Solca' (rasolca@itp.phys.ethz.ch)
//         Peter W. Doak (doakpw@ornl.gov
//
// This file provides the Matrix object for different device types.
/// \file provides the Matrix object for different device types and allocators.

#ifndef DCA_LINALG_MATRIX_HPP
#define DCA_LINALG_MATRIX_HPP
@@ -37,12 +38,12 @@ namespace linalg {
/** Matrix class for interfacing with Blas, Cublas, Rocblas
 *  its row major i.e, row is fast.
 */
template <typename ScalarType, DeviceType device_name>
class Matrix : public util::DefaultAllocator<ScalarType, device_name> {
template <typename ScalarType, DeviceType device_name, class ALLOC = util::DefaultAllocator<ScalarType, device_name>>
class Matrix : public ALLOC {
public:
  using ThisType = Matrix<ScalarType, device_name>;
  using ValueType = ScalarType;
  using Allocator = util::DefaultAllocator<ScalarType, device_name>;
  using Allocator = ALLOC;
  constexpr static DeviceType device = device_name;

  Matrix(const std::string& name = default_name_);
@@ -63,48 +64,48 @@ public:

  // Copy and move constructor:
  // Constructs a matrix with name name, size rhs.size() and a copy of the elements of rhs.
  Matrix(const Matrix<ScalarType, device_name>& rhs, const std::string& name = default_name_);
  Matrix(const Matrix<ScalarType, device_name, ALLOC>& rhs, const std::string& name = default_name_);
  // Constructs a matrix with name name, size rhs.size(). The elements of rhs are moved.
  // Postcondition: rhs is a (0 x 0) matrix.
  Matrix(Matrix<ScalarType, device_name>&& rhs, const std::string& = default_name_);
  Matrix(Matrix<ScalarType, device_name, ALLOC>&& rhs, const std::string& = default_name_);

  // Contructs a matrix with name name, size rhs.size() and a copy of the elements of rhs, where rhs
  // elements are stored on a different device.
  template <DeviceType rhs_device_name>
  Matrix(const Matrix<ScalarType, rhs_device_name>& rhs, const std::string& = default_name_);
  template <DeviceType rhs_device_name, class rhs_ALLOC>
  Matrix(const Matrix<ScalarType, rhs_device_name, rhs_ALLOC>& rhs, const std::string& = default_name_);

  // Contructs a matrix with name name, size rhs.size() and a copy of the elements of rhs, where rhs
  // elements are stored on a different device.
  template <typename Scalar2, DeviceType rhs_device_name>
  Matrix(const Matrix<Scalar2, rhs_device_name>& rhs, const std::string& = default_name_);
  template <typename Scalar2, DeviceType rhs_device_name, class rhs_ALLOC>
  Matrix(const Matrix<Scalar2, rhs_device_name, rhs_ALLOC>& rhs, const std::string& = default_name_);

  ~Matrix();

  // Assignment operators:
  // Resizes the matrix to rhs.size() and copy the elements of rhs.
  // Postcondition: The name of the matrix is unchanged.
  Matrix<ScalarType, device_name>& operator=(const Matrix<ScalarType, device_name>& rhs);
  Matrix<ScalarType, device_name, ALLOC>& operator=(const Matrix<ScalarType, device_name, ALLOC>& rhs);
  // Resizes the matrix to rhs.size() and move the elements of rhs.
  // Postcondition: The name of the matrix is unchanged; rhs is a (0 x 0) matrix.
  Matrix<ScalarType, device_name>& operator=(Matrix<ScalarType, device_name>&& rhs);
  Matrix<ScalarType, device_name, ALLOC>& operator=(Matrix<ScalarType, device_name, ALLOC>&& rhs);

  // Resizes the matrix to rhs.size() and copy the elements, stored on a different device, of rhs.
  // Postcondition: The name of the matrix is unchanged.
  template <DeviceType rhs_device_name>
  Matrix<ScalarType, device_name>& operator=(const Matrix<ScalarType, rhs_device_name>& rhs);
  template <DeviceType rhs_device_name, class rhs_ALLOC>
  Matrix<ScalarType, device_name, ALLOC>& operator=(const Matrix<ScalarType, rhs_device_name, rhs_ALLOC>& rhs);

  template <typename ScalarRhs, DeviceType rhs_device_name>
  Matrix<ScalarType, device_name>& operator=(const Matrix<ScalarRhs, rhs_device_name>& rhs);
  template <typename ScalarRhs, DeviceType rhs_device_name, class rhs_ALLOC>
  Matrix<ScalarType, device_name, ALLOC>& operator=(const Matrix<ScalarRhs, rhs_device_name, rhs_ALLOC>& rhs);

  // Returns true if this is equal to other, false otherwise.
  // Two matrices are equal, if they have the same size and contain the same elements. Name and
  // capacity are ignored.
  // Special case: two matrices without elements are equal.
  bool operator==(const Matrix<ScalarType, device_name>& other) const;
  bool operator==(const Matrix<ScalarType, device_name, ALLOC>& other) const;

  // Returns true if this is not equal to other, false otherwise.
  // See description of operator== for the definition of equality.
  bool operator!=(const Matrix<ScalarType, device_name>& other) const;
  bool operator!=(const Matrix<ScalarType, device_name, ALLOC>& other) const;

  // Returns the (i,j)-th element of the matrix.
  // Preconditions: 0 <= i < size().first, 0 <= j < size().second.
@@ -208,25 +209,25 @@ public:
  void clear();

  // Swaps the contents of the matrix except the name with those of rhs.
  void swap(Matrix<ScalarType, device_name>& rhs);
  void swap(Matrix<ScalarType, device_name, ALLOC>& rhs);
  // Swaps the contents of the matrix, included the name, with those of rhs.
  void swapWithName(Matrix<ScalarType, device_name>& rhs);
  void swapWithName(Matrix<ScalarType, device_name, ALLOC>& rhs);

  // Asynchronous assignment (copy with stream = getStream(thread_id, stream_id))
  // + synchronization of stream
  template <DeviceType rhs_device_name>
  void set(const Matrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id);
  template <DeviceType rhs_device_name, class rhs_ALLOC>
  void set(const Matrix<ScalarType, rhs_device_name, rhs_ALLOC>& rhs, int thread_id, int stream_id);

  template <DeviceType rhs_device_name>
  void set(const Matrix<ScalarType, rhs_device_name>& rhs, const util::GpuStream& stream);
  template <DeviceType rhs_device_name, class rhs_ALLOC>
  void set(const Matrix<ScalarType, rhs_device_name, rhs_ALLOC>& rhs, const util::GpuStream& stream);

  // Asynchronous assignment.
  template <DeviceType rhs_device_name>
  void setAsync(const Matrix<ScalarType, rhs_device_name>& rhs, const util::GpuStream& stream);
  template <DeviceType rhs_device_name, class rhs_ALLOC>
  void setAsync(const Matrix<ScalarType, rhs_device_name, rhs_ALLOC>& rhs, const util::GpuStream& stream);

  // Asynchronous assignment (copy with stream = getStream(thread_id, stream_id))
  template <DeviceType rhs_device_name>
  void setAsync(const Matrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id);
  template <DeviceType rhs_device_name, class rhs_ALLOC>
  void setAsync(const Matrix<ScalarType, rhs_device_name, rhs_ALLOC>& rhs, int thread_id, int stream_id);

  void setToZero(const util::GpuStream& stream);

@@ -255,50 +256,51 @@ private:
  template <class ScalarType2, DeviceType device_name2>
  friend class dca::linalg::Matrix;
};
template <typename ScalarType, DeviceType device_name>
const std::string Matrix<ScalarType, device_name>::default_name_ = "no-name";

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>::Matrix(const std::string& name) : Matrix(name, 0) {}
template <typename ScalarType, DeviceType device_name, class ALLOC>
const std::string Matrix<ScalarType, device_name,  ALLOC>::default_name_ = "no-name";

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>::Matrix(int size) : Matrix(std::make_pair(size, size)) {}
template <typename ScalarType, DeviceType device_name, class ALLOC>
Matrix<ScalarType, device_name,  ALLOC>::Matrix(const std::string& name) : Matrix(name, 0) {}

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>::Matrix(const std::string& name, int size)
template <typename ScalarType, DeviceType device_name, class ALLOC>
Matrix<ScalarType, device_name,  ALLOC>::Matrix(int size) : Matrix(std::make_pair(size, size)) {}

template <typename ScalarType, DeviceType device_name, class ALLOC>
Matrix<ScalarType, device_name,  ALLOC>::Matrix(const std::string& name, int size)
    : Matrix(name, std::make_pair(size, size)) {}

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>::Matrix(int size, int capacity)
template <typename ScalarType, DeviceType device_name, class ALLOC>
Matrix<ScalarType, device_name,  ALLOC>::Matrix(int size, int capacity)
    : Matrix(std::make_pair(size, size), std::make_pair(capacity, capacity)) {}

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>::Matrix(const std::string& name, int size, int capacity)
template <typename ScalarType, DeviceType device_name, class ALLOC>
Matrix<ScalarType, device_name,  ALLOC>::Matrix(const std::string& name, int size, int capacity)
    : Matrix(name, std::make_pair(size, size), std::make_pair(capacity, capacity)) {}

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>::Matrix(std::pair<int, int> size) : Matrix(size, size) {}
template <typename ScalarType, DeviceType device_name, class ALLOC>
Matrix<ScalarType, device_name,  ALLOC>::Matrix(std::pair<int, int> size) : Matrix(size, size) {}

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>::Matrix(const std::string& name, std::pair<int, int> size)
template <typename ScalarType, DeviceType device_name, class ALLOC>
Matrix<ScalarType, device_name,  ALLOC>::Matrix(const std::string& name, std::pair<int, int> size)
    : Matrix(name, size, size) {}

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>::Matrix(std::pair<int, int> size, std::pair<int, int> capacity)
template <typename ScalarType, DeviceType device_name, class ALLOC>
Matrix<ScalarType, device_name,  ALLOC>::Matrix(std::pair<int, int> size, std::pair<int, int> capacity)
    : Matrix(default_name_, size, capacity) {}

template <typename ScalarType, DeviceType device_name>
template <DeviceType rhs_device_name>
Matrix<ScalarType, device_name>::Matrix(const Matrix<ScalarType, rhs_device_name>& rhs,
template <typename ScalarType, DeviceType device_name, class ALLOC>
template <DeviceType rhs_device_name, class rhs_ALLOC>
Matrix<ScalarType, device_name, ALLOC>::Matrix(const Matrix<ScalarType, rhs_device_name, rhs_ALLOC>& rhs,
                                        const std::string& name)
    : name_(name), size_(rhs.size_), capacity_(rhs.capacity_) {
  data_ = Allocator::allocate(nrElements(capacity_));
  util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
}

template <typename ScalarType, DeviceType device_name>
template <typename ScalarRhs, DeviceType rhs_device_name>
Matrix<ScalarType, device_name>::Matrix(const Matrix<ScalarRhs, rhs_device_name>& rhs,
template <typename ScalarType, DeviceType device_name, class ALLOC>
template <typename ScalarRhs, DeviceType rhs_device_name, class rhs_ALLOC>
Matrix<ScalarType, device_name,  ALLOC>::Matrix(const Matrix<ScalarRhs, rhs_device_name, rhs_ALLOC>& rhs,
                                        const std::string& name)
    : name_(name), size_(rhs.size_), capacity_(rhs.capacity_) {
  static_assert(sizeof(ScalarType) == sizeof(ScalarRhs));
@@ -306,8 +308,8 @@ Matrix<ScalarType, device_name>::Matrix(const Matrix<ScalarRhs, rhs_device_name>
  util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
}

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>::Matrix(const std::string& name, std::pair<int, int> size,
template <typename ScalarType, DeviceType device_name, class ALLOC>
Matrix<ScalarType, device_name, ALLOC>::Matrix(const std::string& name, std::pair<int, int> size,
                                        std::pair<int, int> capacity)
    : name_(name), size_(size), capacity_(capacityMultipleOfBlockSize(capacity)) {
  assert(size_.first >= 0 && size_.second >= 0);
@@ -319,28 +321,28 @@ Matrix<ScalarType, device_name>::Matrix(const std::string& name, std::pair<int,
  util::Memory<device_name>::setToZero(data_, nrElements(capacity_));
}

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>::Matrix(const Matrix<ScalarType, device_name>& rhs,
template <typename ScalarType, DeviceType device_name, class ALLOC>
Matrix<ScalarType, device_name,  ALLOC>::Matrix(const Matrix<ScalarType, device_name,  ALLOC>& rhs,
                                        const std::string& name)
    : name_(name) {
  *this = rhs;
}

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>::Matrix(Matrix<ScalarType, device_name>&& rhs, const std::string& name)
template <typename ScalarType, DeviceType device_name, class ALLOC>
Matrix<ScalarType, device_name,  ALLOC>::Matrix(Matrix<ScalarType, device_name,  ALLOC>&& rhs, const std::string& name)
    : name_(name), size_(rhs.size_), capacity_(rhs.capacity_), data_(rhs.data_) {
  rhs.capacity_ = std::make_pair(0, 0);
  rhs.size_ = std::make_pair(0, 0);
  rhs.data_ = nullptr;
}

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>::~Matrix() {
template <typename ScalarType, DeviceType device_name, class ALLOC>
Matrix<ScalarType, device_name,  ALLOC>::~Matrix() {
  Allocator::deallocate(data_);
}

template <typename ScalarType, DeviceType device_name>
void Matrix<ScalarType, device_name>::resize(std::pair<int, int> new_size) {
template <typename ScalarType, DeviceType device_name, class ALLOC>
void Matrix<ScalarType, device_name,  ALLOC>::resize(std::pair<int, int> new_size) {
  assert(new_size.first >= 0 && new_size.second >= 0);
  if (new_size.first > capacity_.first || new_size.second > capacity_.second) {
    std::pair<int, int> new_capacity = capacityMultipleOfBlockSize(new_size);
@@ -360,9 +362,9 @@ void Matrix<ScalarType, device_name>::resize(std::pair<int, int> new_size) {
  }
}

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>& Matrix<ScalarType, device_name>::operator=(
    const Matrix<ScalarType, device_name>& rhs) {
template <typename ScalarType, DeviceType device_name, class ALLOC>
Matrix<ScalarType, device_name,  ALLOC>& Matrix<ScalarType, device_name, ALLOC>::operator=(
    const Matrix<ScalarType, device_name,  ALLOC>& rhs) {
  resizeNoCopy(rhs.size_);
  if (device_name == CPU)
    util::memoryCopyCpu(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
@@ -371,27 +373,27 @@ Matrix<ScalarType, device_name>& Matrix<ScalarType, device_name>::operator=(
  return *this;
}

template <typename ScalarType, DeviceType device_name>
Matrix<ScalarType, device_name>& Matrix<ScalarType, device_name>::operator=(
    Matrix<ScalarType, device_name>&& rhs) {
template <typename ScalarType, DeviceType device_name, class ALLOC>
Matrix<ScalarType, device_name,  ALLOC>& Matrix<ScalarType, device_name,  ALLOC>::operator=(
    Matrix<ScalarType, device_name,  ALLOC>&& rhs) {
  swap(rhs);
  return *this;
}

template <typename ScalarType, DeviceType device_name>
template <DeviceType rhs_device_name>
Matrix<ScalarType, device_name>& Matrix<ScalarType, device_name>::operator=(
    const Matrix<ScalarType, rhs_device_name>& rhs) {
template <typename ScalarType, DeviceType device_name, class ALLOC>
template <DeviceType rhs_device_name, class rhs_ALLOC>
Matrix<ScalarType, device_name,  ALLOC>& Matrix<ScalarType, device_name,  ALLOC>::operator=(
    const Matrix<ScalarType, rhs_device_name, rhs_ALLOC>& rhs) {
  resizeNoCopy(rhs.size_);
  util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
  return *this;
}

#ifdef DCA_HAVE_GPU
template <typename ScalarType, DeviceType device_name>
template <typename ScalarRhs, DeviceType rhs_device_name>
Matrix<ScalarType, device_name>& Matrix<ScalarType, device_name>::operator=(
    const Matrix<ScalarRhs, rhs_device_name>& rhs) {
template <typename ScalarType, DeviceType device_name, class ALLOC>
template <typename ScalarRhs, DeviceType rhs_device_name, class rhs_ALLOC>
Matrix<ScalarType, device_name,  ALLOC>& Matrix<ScalarType, device_name,  ALLOC>::operator=(
    const Matrix<ScalarRhs, rhs_device_name, rhs_ALLOC>& rhs) {
  static_assert(sizeof(ScalarType) == sizeof(ScalarRhs),
                "sizeof ScalarType and ScalarRhs are not equal");
  resizeNoCopy(rhs.size_);
@@ -401,8 +403,8 @@ Matrix<ScalarType, device_name>& Matrix<ScalarType, device_name>::operator=(

#endif

template <typename ScalarType, DeviceType device_name>
bool Matrix<ScalarType, device_name>::operator==(const Matrix<ScalarType, device_name>& other) const {
template <typename ScalarType, DeviceType device_name, class ALLOC>
bool Matrix<ScalarType, device_name, ALLOC>::operator==(const Matrix<ScalarType, device_name,  ALLOC>& other) const {
  if (device_name == GPU)
    return Matrix<ScalarType, CPU>(*this) == Matrix<ScalarType, CPU>(other);

@@ -417,13 +419,13 @@ bool Matrix<ScalarType, device_name>::operator==(const Matrix<ScalarType, device
  return true;
}

template <typename ScalarType, DeviceType device_name>
bool Matrix<ScalarType, device_name>::operator!=(const Matrix<ScalarType, device_name>& other) const {
template <typename ScalarType, DeviceType device_name, class ALLOC>
bool Matrix<ScalarType, device_name,  ALLOC>::operator!=(const Matrix<ScalarType, device_name,  ALLOC>& other) const {
  return not(*this == other);
}

template <typename ScalarType, DeviceType device_name>
void Matrix<ScalarType, device_name>::resizeNoCopy(std::pair<int, int> new_size) {
template <typename ScalarType, DeviceType device_name, class ALLOC>
void Matrix<ScalarType, device_name,  ALLOC>::resizeNoCopy(std::pair<int, int> new_size) {
  if (new_size.first > capacity_.first || new_size.second > capacity_.second) {
    size_ = new_size;
    capacity_ = capacityMultipleOfBlockSize(new_size);
@@ -436,28 +438,28 @@ void Matrix<ScalarType, device_name>::resizeNoCopy(std::pair<int, int> new_size)
  }
}

template <typename ScalarType, DeviceType device_name>
void Matrix<ScalarType, device_name>::clear() {
template <typename ScalarType, DeviceType device_name, class ALLOC>
void Matrix<ScalarType, device_name,  ALLOC>::clear() {
  Allocator::deallocate(data_);
  size_ = capacity_ = std::make_pair(0, 0);
}

template <typename ScalarType, DeviceType device_name>
void Matrix<ScalarType, device_name>::swap(Matrix<ScalarType, device_name>& rhs) {
template <typename ScalarType, DeviceType device_name, class ALLOC>
void Matrix<ScalarType, device_name,  ALLOC>::swap(Matrix<ScalarType, device_name,  ALLOC>& rhs) {
  std::swap(size_, rhs.size_);
  std::swap(capacity_, rhs.capacity_);
  std::swap(data_, rhs.data_);
}

template <typename ScalarType, DeviceType device_name>
void Matrix<ScalarType, device_name>::swapWithName(Matrix<ScalarType, device_name>& rhs) {
template <typename ScalarType, DeviceType device_name, class ALLOC>
void Matrix<ScalarType, device_name,  ALLOC>::swapWithName(Matrix<ScalarType, device_name,  ALLOC>& rhs) {
  std::swap(name_, rhs.name_);
  swap(rhs);
}

template <typename ScalarType, DeviceType device_name>
template <DeviceType rhs_device_name>
void Matrix<ScalarType, device_name>::set(const Matrix<ScalarType, rhs_device_name>& rhs,
template <typename ScalarType, DeviceType device_name, class ALLOC>
template <DeviceType rhs_device_name, class rhs_ALLOC>
void Matrix<ScalarType, device_name,  ALLOC>::set(const Matrix<ScalarType, rhs_device_name, rhs_ALLOC>& rhs,
                                          int thread_id, int stream_id) {
  resize(rhs.size_);
  // This specialization is required since without unified memory CUDA doesn't known which memory locality the pointer has.
@@ -472,9 +474,9 @@ void Matrix<ScalarType, device_name>::set(const Matrix<ScalarType, rhs_device_na
                        thread_id, stream_id);
}

template <typename ScalarType, DeviceType device_name>
template <DeviceType rhs_device_name>
void Matrix<ScalarType, device_name>::set(const Matrix<ScalarType, rhs_device_name>& rhs,
template <typename ScalarType, DeviceType device_name, class ALLOC>
template <DeviceType rhs_device_name, class rhs_ALLOC>
void Matrix<ScalarType, device_name,  ALLOC>::set(const Matrix<ScalarType, rhs_device_name, rhs_ALLOC>& rhs,
                                          const util::GpuStream& stream [[maybe_unused]]) {
  resize(rhs.size_);
  if constexpr (device_name == DeviceType::GPU && rhs_device_name == DeviceType::CPU)
@@ -483,28 +485,28 @@ void Matrix<ScalarType, device_name>::set(const Matrix<ScalarType, rhs_device_na
    util::memoryCopyD2H(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
}

template <typename ScalarType, DeviceType device_name>
template <DeviceType rhs_device_name>
void Matrix<ScalarType, device_name>::setAsync(const Matrix<ScalarType, rhs_device_name>& rhs,
template <typename ScalarType, DeviceType device_name, class ALLOC>
template <DeviceType rhs_device_name, class rhs_ALLOC>
void Matrix<ScalarType, device_name,  ALLOC>::setAsync(const Matrix<ScalarType, rhs_device_name, rhs_ALLOC>& rhs,
                                               const util::GpuStream& stream) {
  resizeNoCopy(rhs.size_);
  util::memoryCopyAsync(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_, stream);
}

template <typename ScalarType, DeviceType device_name>
template <DeviceType rhs_device_name>
void Matrix<ScalarType, device_name>::setAsync(const Matrix<ScalarType, rhs_device_name>& rhs,
template <typename ScalarType, DeviceType device_name, class ALLOC>
template <DeviceType rhs_device_name, class rhs_ALLOC>
void Matrix<ScalarType, device_name,  ALLOC>::setAsync(const Matrix<ScalarType, rhs_device_name, rhs_ALLOC>& rhs,
                                               const int thread_id, const int stream_id) {
  setAsync(rhs, util::getStream(thread_id, stream_id));
}

template <typename ScalarType, DeviceType device_name>
void Matrix<ScalarType, device_name>::setToZero(const util::GpuStream& stream) {
template <typename ScalarType, DeviceType device_name, class ALLOC>
void Matrix<ScalarType, device_name,  ALLOC>::setToZero(const util::GpuStream& stream) {
  util::Memory<device_name>::setToZeroAsync(data_, leadingDimension() * nrCols(), stream);
}

template <typename ScalarType, DeviceType device_name>
void Matrix<ScalarType, device_name>::print() const {
template <typename ScalarType, DeviceType device_name, class ALLOC>
void Matrix<ScalarType, device_name,  ALLOC>::print() const {
  if (device_name == GPU)
    return Matrix<ScalarType, CPU>(*this).print();

@@ -524,8 +526,8 @@ void Matrix<ScalarType, device_name>::print() const {
  std::cout << ss.str() << std::endl;
}

template <typename ScalarType, DeviceType device_name>
void Matrix<ScalarType, device_name>::printFingerprint() const {
template <typename ScalarType, DeviceType device_name, class ALLOC>
void Matrix<ScalarType, device_name,  ALLOC>::printFingerprint() const {
  std::stringstream ss;

  ss << "\n";
@@ -537,8 +539,8 @@ void Matrix<ScalarType, device_name>::printFingerprint() const {
  std::cout << ss.str() << std::endl;
}

template <typename ScalarType, DeviceType device_name>
std::pair<int, int> Matrix<ScalarType, device_name>::capacityMultipleOfBlockSize(
template <typename ScalarType, DeviceType device_name, class ALLOC>
std::pair<int, int> Matrix<ScalarType, device_name,  ALLOC>::capacityMultipleOfBlockSize(
    std::pair<int, int> size) {
  assert(size.first >= 0);
  assert(size.second >= 0);
@@ -553,8 +555,8 @@ std::pair<int, int> Matrix<ScalarType, device_name>::capacityMultipleOfBlockSize
  return size;
}

template <typename ScalarType, DeviceType device_name>
std::size_t Matrix<ScalarType, device_name>::deviceFingerprint() const {
template <typename ScalarType, DeviceType device_name, class ALLOC>
std::size_t Matrix<ScalarType, device_name,  ALLOC>::deviceFingerprint() const {
  if (device_name == GPU)
    return capacity_.first * capacity_.second * sizeof(ScalarType);
  else
@@ -562,10 +564,10 @@ std::size_t Matrix<ScalarType, device_name>::deviceFingerprint() const {
}

/// Factory function for diangonal matrices, type is inferred from the type of Vector.
template <typename ScalarType, DeviceType device_name>
auto makeDiagonalMatrix(Vector<ScalarType, device_name>& diag) {
template <typename ScalarType, DeviceType device_name, class ALLOC>
auto makeDiagonalMatrix(Vector<ScalarType, device_name, ALLOC>& diag) {
  int dsize = diag.size();
  Matrix<ScalarType, device_name> matrix("diag_matrix", dsize);
  Matrix<ScalarType, device_name,  ALLOC> matrix("diag_matrix", dsize);
  for (int i = 0; i < dsize; ++i) {
    matrix(i, i) = diag[i];
  }
@@ -573,10 +575,10 @@ auto makeDiagonalMatrix(Vector<ScalarType, device_name>& diag) {
}

/// Factory function for diangonal matrices, type is inferred from the type of Vector.
template <typename ScalarType, DeviceType device_name>
auto makeDiagonalMatrixInv(Vector<ScalarType, device_name>& diag) {
template <typename ScalarType, DeviceType device_name, class ALLOC>
auto makeDiagonalMatrixInv(Vector<ScalarType, device_name, ALLOC>& diag) {
  int dsize = diag.size();
  Matrix<ScalarType, device_name> matrix("diag_matrix", dsize);
  Matrix<ScalarType, device_name,  ALLOC> matrix("diag_matrix", dsize);
  // insure that if ScalarType is complex the 1 is as well.
  // then std::complex will give us a proper complex multiplicative inverse
  ScalarType the_one{};
+60 −55

File changed.

Preview size limit exceeded, changes collapsed.

+145 −139

File changed.

Preview size limit exceeded, changes collapsed.

+2 −2
Original line number Diff line number Diff line
@@ -75,9 +75,9 @@ int getEigensolverWorkSize(char jobvl, char jobvr, Matrix<std::complex<ScalarTyp
}
// Returns optimal lwork and liwork for the symmetric eigensolver.
// In: mat
template <typename ScalarType>
  template <typename ScalarType, template <typename> class ALLOC >
std::tuple<int, int> getEigensolverSymmetricWorkSize(char jobv, char uplo,
                                                     Matrix<ScalarType, CPU>& mat) {
                                                     Matrix<ScalarType, CPU, ALLOC<ScalarType>>& mat) {
  assert(mat.is_square());

  ScalarType tmp1;
+1 −5
Original line number Diff line number Diff line
@@ -62,15 +62,11 @@ public:
  typedef typename basis_transformation_type::matrix_type matrix_type;

public:
  static bool& is_initialized() {
    return basis_transformation_type::is_initialized();
  }

  static std::string& get_name() {
    return basis_transformation_type::get_name();
  }

  static matrix_type& get_transformation_matrix() {
  static matrix_type get_transformation_matrix() {
    return basis_transformation_type::get_transformation_matrix();
  }
};
Loading