Commit 5a4b3993 authored by Thomas Maier's avatar Thomas Maier
Browse files

Merge remote-tracking branch 'origin/gpu_trunk2' into tambranch

parents deab721b c685f954
Loading
Loading
Loading
Loading
+10 −15
Original line number Diff line number Diff line
@@ -35,7 +35,7 @@

int main(int argc, char** argv) {
  if (argc < 2) {
    std::cerr << "Usage: " << argv[0] << " input_file.json [skip ed]" << std::endl;
    std::cerr << "Usage: " << argv[0] << " input_file.json" << std::endl;
    return -1;
  }

@@ -43,8 +43,8 @@ int main(int argc, char** argv) {

  try {
    std::string input_file(argv[1]);
    const bool skip_ed = argc > 2 ? std::atoi(argv[2]) : false;
    const bool perform_statistical_test = concurrency.number_of_processors() >= 8 && !skip_ed;

    const bool perform_statistical_test = concurrency.number_of_processors() >= 8;

    Profiler::start();

@@ -94,26 +94,21 @@ int main(int argc, char** argv) {

    // ED solver
    EdSolver ed_solver(parameters, dca_data_imag, dca_data_real);
    if (!skip_ed) {
    ed_solver.initialize(0);
    ed_solver.execute();
    ed_solver.finalize(dca_loop_data);

      if (concurrency.id() == concurrency.first()) {
        ed_solver.write(data_file_ed);
      }
    }

    const auto Sigma_ed(dca_data_imag.Sigma);
    const int tested_frequencies = 10;
    const auto G_ed(dca::math::util::cutFrequency(dca_data_imag.G_k_w, tested_frequencies));

    if (concurrency.id() == concurrency.first()) {
      ed_solver.write(data_file_ed);
    }

    // QMC solver
    // The QMC solver uses the free Greens function G0 computed by the ED solver.
    // It is passed via the dca_data_imag object.
    if (skip_ed)
      dca_data_imag.initialize();

    ClusterSolver qmc_solver(parameters, dca_data_imag);
    qmc_solver.initialize(1);  // 1 = dummy iteration number
    qmc_solver.integrate();
+3 −3
Original line number Diff line number Diff line
@@ -9,8 +9,8 @@
//
// This class stores compile time options for the MC accumulation.

#ifndef DCA_CONFIG_ACCUMULATON_OPTIONS_HPP
#define DCA_CONFIG_ACCUMULATON_OPTIONS_HPP
#ifndef DCA_CONFIG_ACCUMULATION_OPTIONS_HPP
#define DCA_CONFIG_ACCUMULATION_OPTIONS_HPP

#ifdef DCA_HAVE_CUDA
#include "dca/linalg/util/allocators/device_allocator.hpp"
@@ -36,4 +36,4 @@ struct AccumulationOptions {
}  // config
}  // dca

#endif  // DCA_CONFIG_ACCUMULATON_OPTIONS_HPP
#endif  // DCA_CONFIG_ACCUMULATION_OPTIONS_HPP
+2 −2
Original line number Diff line number Diff line
@@ -50,7 +50,7 @@ struct CMakeOptions {
  static void print();
};

}  // config
}  // dca
}  // namespace config
}  // namespace dca

#endif  // DCA_CONFIG_CMAKE_OPTIONS_HPP
+65 −47
Original line number Diff line number Diff line
@@ -37,21 +37,29 @@ public:
  using ThisType = ReshapableMatrix<ScalarType, device_name, Allocator>;
  using ValueType = ScalarType;

  ReshapableMatrix(int size = 0);
  // Default contructor creates a matrix of zero size and capacity.
  ReshapableMatrix() = default;
  // Initializes a square size x size matrix.
  ReshapableMatrix(int size);
  // Initializes a square size.first x size.second matrix.
  ReshapableMatrix(std::pair<int, int> size);

  // Copy and move constructor:
  // Constructs a matrix with name name, size rhs.size() and a copy of the elements of rhs.
  ReshapableMatrix(const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs);
  // Constructs a matrix with name name, size rhs.size(). The elements of rhs are moved.
  // Postcondition: rhs is a (0 x 0) matrix.
  ReshapableMatrix(ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs);

  // Contructs a matrix with name name, size rhs.size() and a copy of the elements of rhs, where rhs
  // elements are stored on a different device.
  // Contructs a matrix with size rhs.size() and a copy of the elements of rhs.
  ReshapableMatrix(const ThisType& rhs);
  template <DeviceType rhs_device_name, class AllocatorRhs>
  ReshapableMatrix(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs);

  // Constructs a matrix with size rhs.size(). The elements of rhs are moved.
  ReshapableMatrix(ThisType&& rhs);

  // Resize the matrix to rhs.size() and copies the elements.
  ReshapableMatrix& operator=(const ThisType& rhs);
  template <DeviceType rhs_device_name, class AllocatorRhs>
  ReshapableMatrix& operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs);

  // Moves the elements of rhs into this matrix.
  ReshapableMatrix& operator=(ThisType&& rhs);

  ~ReshapableMatrix();

  // Returns true if this is equal to other, false otherwise.
@@ -118,23 +126,19 @@ public:
    return size_.first;
  }

  // Resizes *this to a (new_size * new_size) matrix.
  // Resizes *this to a (new_size.first * new_size.second) matrix.
  // The previous elements are not copied, therefore all the elements
  // may have any value after the call to this method.
  // Returns: true if reallocation took place.
  // Remark: The capacity of the matrix and element pointers do not change
  // if new_size <= capacity().first and new_size <= capacity().second.
  bool resizeNoCopy(std::pair<int, int> new_size);
  // Resizes *this to a (new_size * new_size) matrix. See previous method for details.
  bool resizeNoCopy(int new_size) {
    return resizeNoCopy(std::make_pair(new_size, new_size));
  }
  // Resizes *this to a (new_size.first * new_size.second) matrix.
  // The previous elements are not copied, therefore all the elements
  // may have any value after the call to this method.
  // Returns: true if reallocation took place.
  // Remark: The capacity of the matrix and element pointers do not change
  // if new_size.first <= capacity().first and new_size.second <= capacity().second.
  bool resizeNoCopy(std::pair<int, int> new_size);

  // Reserves the space for at least (new_size.first * new_size.second) elements without changing
  // the matrix size. The value of the matrix elements is undefined after calling this method.
  // Returns: true if reallocation took place.
  bool reserveNoCopy(std::size_t new_size);

  void swap(ReshapableMatrix<ScalarType, device_name, Allocator>& other);
@@ -142,11 +146,6 @@ public:
  // Releases the memory allocated by *this and sets size and capacity to zero.
  void clear();

  // Asynchronous assignment (copy with stream = getStream(thread_id, stream_id))
  // + synchronization of stream
  template <DeviceType rhs_device_name>
  void set(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id);

#ifdef DCA_HAVE_CUDA
  // Asynchronous assignment.
  template <DeviceType rhs_device_name>
@@ -161,8 +160,8 @@ public:
#else
  // Synchronous assignment fallback for SetAsync.
  template <DeviceType rhs_device_name>
  void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id,
                int stream_id);
  void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /*thread_id*/,
                int /*stream_id*/);

#endif  // DCA_HAVE_CUDA

@@ -175,8 +174,8 @@ private:
    return static_cast<size_t>(size.first) * static_cast<size_t>(size.second);
  }

  std::pair<int, int> size_;
  std::size_t capacity_;
  std::pair<int, int> size_ = std::make_pair(0, 0);
  std::size_t capacity_ = 0;

  ValueType* data_ = nullptr;

@@ -198,27 +197,55 @@ ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(std::pair
}

template <typename ScalarType, DeviceType device_name, class Allocator>
ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(const ThisType& rhs) {
  *this = rhs;
}

template <typename ScalarType, DeviceType device_name, class Allocator>
template <DeviceType rhs_device_name, class AllocatorRhs>
ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(
    const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs) {
    const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) {
  *this = rhs;
}

template <typename ScalarType, DeviceType device_name, class Allocator>
ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(
    ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs)
    : size_(rhs.size_), capacity_(rhs.capacity_), data_(rhs.data_) {
  rhs.capacity_ = 0;
  rhs.size_ = std::make_pair(0, 0);
  rhs.data_ = nullptr;
    : ReshapableMatrix<ScalarType, device_name, Allocator>() {
  swap(rhs);
}

template <typename ScalarType, DeviceType device_name, class Allocator>
ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix<
    ScalarType, device_name, Allocator>::operator=(const ThisType& rhs) {
  size_ = rhs.size_;
  capacity_ = rhs.capacity_;

  Allocator::deallocate(data_);
  data_ = Allocator::allocate(capacity_);
  util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
  return *this;
}

template <typename ScalarType, DeviceType device_name, class Allocator>
template <DeviceType rhs_device_name, class AllocatorRhs>
ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(
    const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs)
    : size_(rhs.size_), capacity_(rhs.capacity_) {
ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix<
    ScalarType, device_name,
    Allocator>::operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) {
  size_ = rhs.size_;
  capacity_ = rhs.capacity_;

  Allocator::deallocate(data_);
  data_ = Allocator::allocate(capacity_);
  util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
  return *this;
}

template <typename ScalarType, DeviceType device_name, class Allocator>
ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix<
    ScalarType, device_name, Allocator>::operator=(ThisType&& rhs) {
  swap(rhs);
  return *this;
}

template <typename ScalarType, DeviceType device_name, class Allocator>
@@ -284,15 +311,6 @@ void ReshapableMatrix<ScalarType, device_name, Allocator>::clear() {
  capacity_ = 0;
}

template <typename ScalarType, DeviceType device_name, class Allocator>
template <DeviceType rhs_device_name>
void ReshapableMatrix<ScalarType, device_name, Allocator>::set(
    const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id) {
  resize(rhs.size_);
  util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_, thread_id,
                   stream_id);
}

#ifdef DCA_HAVE_CUDA

template <typename ScalarType, DeviceType device_name, class Allocator>
@@ -322,7 +340,7 @@ template <typename ScalarType, DeviceType device_name, class Allocator>
template <DeviceType rhs_device_name>
void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync(
    const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /*thread_id*/, int /*stream_id*/) {
  set(rhs);
  *this = rhs;
}

#endif  // DCA_HAVE_CUDA
+3 −0
Original line number Diff line number Diff line
@@ -21,6 +21,9 @@ template <typename T>
class AlignedAllocator {
protected:
  T* allocate(std::size_t n) {
    if (!n)
      return nullptr;

    T* ptr;
    int err = posix_memalign((void**)&ptr, 128, n * sizeof(T));
    if (err)
Loading