Commit 972430c0 authored by Dmitry I. Lyakh's avatar Dmitry I. Lyakh
Browse files

Implemented aligned linear memory allocator for cuQuantum backend

parent 875aabf9
Loading
Loading
Loading
Loading
+23 −2
Original line number Diff line number Diff line
/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
REVISION: 2021/12/27
REVISION: 2021/12/29

Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
@@ -22,6 +22,8 @@ Rationale:

#include "talshxx.hpp"

#include "linear_memory.hpp"

#include "cuquantum_executor.hpp"


@@ -74,7 +76,17 @@ CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func):
 int num_gpus = 0;
 auto error_code = talshDeviceCount(DEV_NVIDIA_GPU,&num_gpus); assert(error_code == TALSH_SUCCESS);
 for(int i = 0; i < num_gpus; ++i){
  if(talshDeviceState(i,DEV_NVIDIA_GPU) >= DEV_ON) gpu_attr_.emplace_back(std::make_pair(i,DeviceAttr{}));
  if(talshDeviceState(i,DEV_NVIDIA_GPU) >= DEV_ON){
   gpu_attr_.emplace_back(std::make_pair(i,DeviceAttr{}));
   gpu_attr_.back().second.workspace_ptr = talsh::getDeviceBufferBasePtr(DEV_NVIDIA_GPU,i);
   assert(reinterpret_cast<std::size_t>(gpu_attr_.back().second.workspace_ptr) % MEM_ALIGNMENT == 0);
   gpu_attr_.back().second.buffer_size = talsh::getDeviceMaxBufferSize(DEV_NVIDIA_GPU,i);
   std::size_t wrk_size = static_cast<float>(gpu_attr_.back().second.buffer_size) * WORKSPACE_FRACTION;
   wrk_size -= wrk_size % MEM_ALIGNMENT;
   gpu_attr_.back().second.workspace_size = wrk_size;
   gpu_attr_.back().second.buffer_size -= wrk_size;
   gpu_attr_.back().second.buffer_ptr = (void*)(((char*)(gpu_attr_.back().second.workspace_ptr)) + wrk_size);
  }
 }
 std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Number of available GPUs = " << gpu_attr_.size() << std::endl;

@@ -83,6 +95,15 @@ CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func):
  HANDLE_CTN_ERROR(cutensornetCreate((cutensornetHandle_t*)(&gpu.second.cutn_handle)));
 }
 std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Created cuTensorNet contexts for all available GPUs" << std::endl;

 std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): GPU configuration:\n";
 for(const auto & gpu: gpu_attr_){
  std::cout << " GPU #" << gpu.first
            << ": wrk_ptr = " << gpu.second.workspace_ptr
            << ", size = " << gpu.second.workspace_size
            << "; buf_ptr = " << gpu.second.buffer_ptr
            << ", size = " << gpu.second.buffer_size << std::endl;
 }
}


+4 −1
Original line number Diff line number Diff line
/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
REVISION: 2021/12/27
REVISION: 2021/12/29

Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
@@ -65,6 +65,9 @@ public:

protected:

 static constexpr float WORKSPACE_FRACTION = 0.2;
 static constexpr std::size_t MEM_ALIGNMENT = 256;

 struct DeviceAttr{
  void * buffer_ptr = nullptr;
  std::size_t buffer_size = 0;
+91 −0
Original line number Diff line number Diff line
/** ExaTN: Tensor Runtime: Tensor network executor: Linear memory allocator
REVISION: 2021/12/29

Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)

Rationale:

 Linear memory moving window:

 (a) front >= back:
 ____________________________________
 |          |                |       |
 |  FREE    |    OCCUPIED    | FREE  |
 |          |                |       |
 |__________|________________|_______|
 begin     back            front   end

 (b) front < back:
 ____________________________________
 |          |              |         |
 | OCCUPIED |      FREE    | OCCUPIED|
 |          |              |         |
 |__________|______________|_________|
 begin    front           back     end

**/

#ifndef EXATN_RUNTIME_LINEAR_MEMORY_HPP_
#define EXATN_RUNTIME_LINEAR_MEMORY_HPP_

class LinearMemoryPool {

public:

 LinearMemoryPool(void * base_ptr,
                  std::size_t total_size,
                  std::size_t alignment):
  base_ptr_(base_ptr), total_size_(total_size), alignment_(alignment),
  front_(base_ptr), back_(base_ptr)
 {
  assert(reinterpret_cast<std::size_t>(base_ptr_) % alignment_ == 0);
 }

 std::size_t occupiedSize() const {
  const std::size_t fptr = reinterpret_cast<std::size_t>(front_);
  const std::size_t bptr = reinterpret_cast<std::size_t>(back_);
  if(fptr >= bptr) return (fptr - bptr);
  return (total_size_ - bptr + fptr);
 }

 void * acquireMemory(std::size_t mem_size) {
  assert(mem_size > 0);
  mem_size = (mem_size - (mem_size % alignment_)) + alignment_;
  if(occupiedSize() + mem_size > total_size_) return nullptr;
  void * mem_ptr = front_;
  std::size_t left_forward = (total_size_ - reinterpret_cast<std::size_t>(front_));
  if(left_forward > mem_size){
   front_ = (void*)((char*)front_ + mem_size);
  }else{
   front_ = (void*)((char*)base_ptr_ + (mem_size - left_forward));
  }
  return mem_ptr;
 }

 void releaseMemory(void * back_ptr) {
  assert(reinterpret_cast<std::size_t>(back_ptr) % alignment_ == 0);
  const auto preceding_size = occupiedSize();
  back_ = back_ptr;
  assert(occupiedSize() < preceding_size);
  return;
 }

 void * getFront() const {
  return front_;
 }

 void * getBack() const {
  return back_;
 }

protected:

 void * base_ptr_;
 std::size_t total_size_;
 std::size_t alignment_;
 void * front_;
 void * back_;
};

#endif //EXATN_RUNTIME_LINEAR_MEMORY_HPP_
Compare cf2acb52 to 08bfaaab
Original line number Diff line number Diff line
Subproject commit cf2acb5276c3e1d60986e6ea8f50bce1949e75e6
Subproject commit 08bfaaabe281a9ec97d76d068dc745ea0e4b481b