Commit 9434713d authored by gbalduzz's avatar gbalduzz
Browse files

Merge remote-tracking branch 'origin/master' into gpu_trunk2

parents 1f18ae79 1602e2a5
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -63,6 +63,7 @@ PenaltyReturnTypeOnItsOwnLine: 100000000
PointerAlignment: Left
#ReflowComments: true # Supported only from clang 3.9
SortIncludes: false
SortUsingDeclarations: false
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
+10 −15
Original line number Diff line number Diff line
@@ -35,7 +35,7 @@

int main(int argc, char** argv) {
  if (argc < 2) {
    std::cerr << "Usage: " << argv[0] << " input_file.json [skip ed]" << std::endl;
    std::cerr << "Usage: " << argv[0] << " input_file.json" << std::endl;
    return -1;
  }

@@ -43,8 +43,8 @@ int main(int argc, char** argv) {

  try {
    std::string input_file(argv[1]);
    const bool skip_ed = argc > 2 ? std::atoi(argv[2]) : false;
    const bool perform_statistical_test = concurrency.number_of_processors() >= 8 && !skip_ed;

    const bool perform_statistical_test = concurrency.number_of_processors() >= 8;

    Profiler::start();

@@ -94,26 +94,21 @@ int main(int argc, char** argv) {

    // ED solver
    EdSolver ed_solver(parameters, dca_data_imag, dca_data_real);
    if (!skip_ed) {
    ed_solver.initialize(0);
    ed_solver.execute();
    ed_solver.finalize(dca_loop_data);

      if (concurrency.id() == concurrency.first()) {
        ed_solver.write(data_file_ed);
      }
    }

    const auto Sigma_ed(dca_data_imag.Sigma);
    const int tested_frequencies = 10;
    const auto G_ed(dca::math::util::cutFrequency(dca_data_imag.G_k_w, tested_frequencies));

    if (concurrency.id() == concurrency.first()) {
      ed_solver.write(data_file_ed);
    }

    // QMC solver
    // The QMC solver uses the free Greens function G0 computed by the ED solver.
    // It is passed via the dca_data_imag object.
    if (skip_ed)
      dca_data_imag.initialize();

    ClusterSolver qmc_solver(parameters, dca_data_imag);
    qmc_solver.initialize(1);  // 1 = dummy iteration number
    qmc_solver.integrate();
+65 −47
Original line number Diff line number Diff line
@@ -37,21 +37,29 @@ public:
  using ThisType = ReshapableMatrix<ScalarType, device_name, Allocator>;
  using ValueType = ScalarType;

  ReshapableMatrix(int size = 0);
  // Default contructor creates a matrix of zero size and capacity.
  ReshapableMatrix() = default;
  // Initializes a square size x size matrix.
  ReshapableMatrix(int size);
  // Initializes a square size.first x size.second matrix.
  ReshapableMatrix(std::pair<int, int> size);

  // Copy and move constructor:
  // Constructs a matrix with name name, size rhs.size() and a copy of the elements of rhs.
  ReshapableMatrix(const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs);
  // Constructs a matrix with name name, size rhs.size(). The elements of rhs are moved.
  // Postcondition: rhs is a (0 x 0) matrix.
  ReshapableMatrix(ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs);

  // Contructs a matrix with name name, size rhs.size() and a copy of the elements of rhs, where rhs
  // elements are stored on a different device.
  // Contructs a matrix with size rhs.size() and a copy of the elements of rhs.
  ReshapableMatrix(const ThisType& rhs);
  template <DeviceType rhs_device_name, class AllocatorRhs>
  ReshapableMatrix(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs);

  // Constructs a matrix with size rhs.size(). The elements of rhs are moved.
  ReshapableMatrix(ThisType&& rhs);

  // Resize the matrix to rhs.size() and copies the elements.
  ReshapableMatrix& operator=(const ThisType& rhs);
  template <DeviceType rhs_device_name, class AllocatorRhs>
  ReshapableMatrix& operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs);

  // Moves the elements of rhs into this matrix.
  ReshapableMatrix& operator=(ThisType&& rhs);

  ~ReshapableMatrix();

  // Returns true if this is equal to other, false otherwise.
@@ -118,23 +126,19 @@ public:
    return size_.first;
  }

  // Resizes *this to a (new_size * new_size) matrix.
  // Resizes *this to a (new_size.first * new_size.second) matrix.
  // The previous elements are not copied, therefore all the elements
  // may have any value after the call to this method.
  // Returns: true if reallocation took place.
  // Remark: The capacity of the matrix and element pointers do not change
  // if new_size <= capacity().first and new_size <= capacity().second.
  bool resizeNoCopy(std::pair<int, int> new_size);
  // Resizes *this to a (new_size * new_size) matrix. See previous method for details.
  bool resizeNoCopy(int new_size) {
    return resizeNoCopy(std::make_pair(new_size, new_size));
  }
  // Resizes *this to a (new_size.first * new_size.second) matrix.
  // The previous elements are not copied, therefore all the elements
  // may have any value after the call to this method.
  // Returns: true if reallocation took place.
  // Remark: The capacity of the matrix and element pointers do not change
  // if new_size.first <= capacity().first and new_size.second <= capacity().second.
  bool resizeNoCopy(std::pair<int, int> new_size);

  // Reserves the space for at least (new_size.first * new_size.second) elements without changing
  // the matrix size. The value of the matrix elements is undefined after calling this method.
  // Returns: true if reallocation took place.
  bool reserveNoCopy(std::size_t new_size);

  void swap(ReshapableMatrix<ScalarType, device_name, Allocator>& other);
@@ -142,11 +146,6 @@ public:
  // Releases the memory allocated by *this and sets size and capacity to zero.
  void clear();

  // Asynchronous assignment (copy with stream = getStream(thread_id, stream_id))
  // + synchronization of stream
  template <DeviceType rhs_device_name>
  void set(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id);

#ifdef DCA_HAVE_CUDA
  // Asynchronous assignment.
  template <DeviceType rhs_device_name>
@@ -161,8 +160,8 @@ public:
#else
  // Synchronous assignment fallback for SetAsync.
  template <DeviceType rhs_device_name>
  void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id,
                int stream_id);
  void setAsync(const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /*thread_id*/,
                int /*stream_id*/);

#endif  // DCA_HAVE_CUDA

@@ -175,8 +174,8 @@ private:
    return static_cast<size_t>(size.first) * static_cast<size_t>(size.second);
  }

  std::pair<int, int> size_;
  std::size_t capacity_;
  std::pair<int, int> size_ = std::make_pair(0, 0);
  std::size_t capacity_ = 0;

  ValueType* data_ = nullptr;

@@ -198,27 +197,55 @@ ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(std::pair
}

template <typename ScalarType, DeviceType device_name, class Allocator>
ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(const ThisType& rhs) {
  *this = rhs;
}

template <typename ScalarType, DeviceType device_name, class Allocator>
template <DeviceType rhs_device_name, class AllocatorRhs>
ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(
    const ReshapableMatrix<ScalarType, device_name, Allocator>& rhs) {
    const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) {
  *this = rhs;
}

template <typename ScalarType, DeviceType device_name, class Allocator>
ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(
    ReshapableMatrix<ScalarType, device_name, Allocator>&& rhs)
    : size_(rhs.size_), capacity_(rhs.capacity_), data_(rhs.data_) {
  rhs.capacity_ = 0;
  rhs.size_ = std::make_pair(0, 0);
  rhs.data_ = nullptr;
    : ReshapableMatrix<ScalarType, device_name, Allocator>() {
  swap(rhs);
}

template <typename ScalarType, DeviceType device_name, class Allocator>
ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix<
    ScalarType, device_name, Allocator>::operator=(const ThisType& rhs) {
  size_ = rhs.size_;
  capacity_ = rhs.capacity_;

  Allocator::deallocate(data_);
  data_ = Allocator::allocate(capacity_);
  util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
  return *this;
}

template <typename ScalarType, DeviceType device_name, class Allocator>
template <DeviceType rhs_device_name, class AllocatorRhs>
ReshapableMatrix<ScalarType, device_name, Allocator>::ReshapableMatrix(
    const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs)
    : size_(rhs.size_), capacity_(rhs.capacity_) {
ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix<
    ScalarType, device_name,
    Allocator>::operator=(const ReshapableMatrix<ScalarType, rhs_device_name, AllocatorRhs>& rhs) {
  size_ = rhs.size_;
  capacity_ = rhs.capacity_;

  Allocator::deallocate(data_);
  data_ = Allocator::allocate(capacity_);
  util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_);
  return *this;
}

template <typename ScalarType, DeviceType device_name, class Allocator>
ReshapableMatrix<ScalarType, device_name, Allocator>& ReshapableMatrix<
    ScalarType, device_name, Allocator>::operator=(ThisType&& rhs) {
  swap(rhs);
  return *this;
}

template <typename ScalarType, DeviceType device_name, class Allocator>
@@ -284,15 +311,6 @@ void ReshapableMatrix<ScalarType, device_name, Allocator>::clear() {
  capacity_ = 0;
}

template <typename ScalarType, DeviceType device_name, class Allocator>
template <DeviceType rhs_device_name>
void ReshapableMatrix<ScalarType, device_name, Allocator>::set(
    const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id) {
  resize(rhs.size_);
  util::memoryCopy(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_, thread_id,
                   stream_id);
}

#ifdef DCA_HAVE_CUDA

template <typename ScalarType, DeviceType device_name, class Allocator>
@@ -322,7 +340,7 @@ template <typename ScalarType, DeviceType device_name, class Allocator>
template <DeviceType rhs_device_name>
void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync(
    const ReshapableMatrix<ScalarType, rhs_device_name>& rhs, int /*thread_id*/, int /*stream_id*/) {
  set(rhs);
  *this = rhs;
}

#endif  // DCA_HAVE_CUDA
+3 −0
Original line number Diff line number Diff line
@@ -21,6 +21,9 @@ template <typename T>
class AlignedAllocator {
protected:
  T* allocate(std::size_t n) {
    if (!n)
      return nullptr;

    T* ptr;
    int err = posix_memalign((void**)&ptr, 128, n * sizeof(T));
    if (err)
+8 −10
Original line number Diff line number Diff line
@@ -145,11 +145,14 @@ void SpaceTransform2DGpu<RDmn, KDmn, Real>::execute(RMatrix& M) {
}

template <class RDmn, class KDmn, typename Real>
void SpaceTransform2DGpu<RDmn, KDmn, Real>::phaseFactorsAndRearrange(const RMatrix& in, RMatrix& out) {
void SpaceTransform2DGpu<RDmn, KDmn, Real>::phaseFactorsAndRearrange(const RMatrix& in,
                                                                     RMatrix& out) {
  out.resizeNoCopy(in.size());
  const Complex* const phase_factors_ptr =
      BaseClass::hasPhaseFactors() ? getPhaseFactors().ptr() : nullptr;
  details::phaseFactorsAndRearrange(in.ptr(), in.leadingDimension(), out.ptr(),
                                    out.leadingDimension(), n_bands_, nc_, nw_,
                                    getPhaseFactors().ptr(), stream_);
                                    out.leadingDimension(), n_bands_, nc_, nw_, phase_factors_ptr,
                                    stream_);
}

template <class RDmn, class KDmn, typename Real>
@@ -167,18 +170,13 @@ const linalg::Matrix<std::complex<Real>, linalg::GPU>& SpaceTransform2DGpu<RDmn,
template <class RDmn, class KDmn, typename Real>
const auto& SpaceTransform2DGpu<RDmn, KDmn, Real>::getPhaseFactors() {
  auto initialize = []() {
    if (!BaseClass::hasPhaseFactors()) {
      return VectorDev();
    }

    const auto& phase_factors = BaseClass::getPhaseFactors();
    linalg::Vector<std::complex<Real>, linalg::CPU> host_vector(phase_factors.size());
    std::copy_n(phase_factors.values(), phase_factors.size(), host_vector.data());
    std::copy_n(phase_factors.values(), phase_factors.size(), host_vector.ptr());
    return VectorDev(host_vector);
  };

  static const VectorDev phase_factors_dev(initialize(), "Phase factors GPU.");
  assert(BaseClass::hasPhaseFactors() || phase_factors_dev.ptr() == nullptr);
  static const VectorDev phase_factors_dev(initialize());

  return phase_factors_dev;
}
Loading