Commit 972430c0 authored by Dmitry I. Lyakh's avatar Dmitry I. Lyakh
Browse files

Implemented aligned linear memory allocator for cuQuantum backend


Signed-off-by: default avatarDmitry I. Lyakh <quant4me@gmail.com>
parent 875aabf9
/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
REVISION: 2021/12/27
REVISION: 2021/12/29
Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -22,6 +22,8 @@ Rationale:
#include "talshxx.hpp"
#include "linear_memory.hpp"
#include "cuquantum_executor.hpp"
......@@ -74,7 +76,17 @@ CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func):
int num_gpus = 0;
auto error_code = talshDeviceCount(DEV_NVIDIA_GPU,&num_gpus); assert(error_code == TALSH_SUCCESS);
for(int i = 0; i < num_gpus; ++i){
if(talshDeviceState(i,DEV_NVIDIA_GPU) >= DEV_ON) gpu_attr_.emplace_back(std::make_pair(i,DeviceAttr{}));
if(talshDeviceState(i,DEV_NVIDIA_GPU) >= DEV_ON){
gpu_attr_.emplace_back(std::make_pair(i,DeviceAttr{}));
gpu_attr_.back().second.workspace_ptr = talsh::getDeviceBufferBasePtr(DEV_NVIDIA_GPU,i);
assert(reinterpret_cast<std::size_t>(gpu_attr_.back().second.workspace_ptr) % MEM_ALIGNMENT == 0);
gpu_attr_.back().second.buffer_size = talsh::getDeviceMaxBufferSize(DEV_NVIDIA_GPU,i);
std::size_t wrk_size = static_cast<float>(gpu_attr_.back().second.buffer_size) * WORKSPACE_FRACTION;
wrk_size -= wrk_size % MEM_ALIGNMENT;
gpu_attr_.back().second.workspace_size = wrk_size;
gpu_attr_.back().second.buffer_size -= wrk_size;
gpu_attr_.back().second.buffer_ptr = (void*)(((char*)(gpu_attr_.back().second.workspace_ptr)) + wrk_size);
}
}
std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Number of available GPUs = " << gpu_attr_.size() << std::endl;
......@@ -83,6 +95,15 @@ CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func):
HANDLE_CTN_ERROR(cutensornetCreate((cutensornetHandle_t*)(&gpu.second.cutn_handle)));
}
std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Created cuTensorNet contexts for all available GPUs" << std::endl;
std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): GPU configuration:\n";
for(const auto & gpu: gpu_attr_){
std::cout << " GPU #" << gpu.first
<< ": wrk_ptr = " << gpu.second.workspace_ptr
<< ", size = " << gpu.second.workspace_size
<< "; buf_ptr = " << gpu.second.buffer_ptr
<< ", size = " << gpu.second.buffer_size << std::endl;
}
}
......
/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
REVISION: 2021/12/27
REVISION: 2021/12/29
Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -65,6 +65,9 @@ public:
protected:
static constexpr float WORKSPACE_FRACTION = 0.2;
static constexpr std::size_t MEM_ALIGNMENT = 256;
struct DeviceAttr{
void * buffer_ptr = nullptr;
std::size_t buffer_size = 0;
......
/** ExaTN: Tensor Runtime: Tensor network executor: Linear memory allocator
REVISION: 2021/12/29
Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
Rationale:
Linear memory moving window:
(a) front >= back:
____________________________________
| | | |
| FREE | OCCUPIED | FREE |
| | | |
|__________|________________|_______|
begin back front end
(b) front < back:
____________________________________
| | | |
| OCCUPIED | FREE | OCCUPIED|
| | | |
|__________|______________|_________|
begin front back end
**/
#ifndef EXATN_RUNTIME_LINEAR_MEMORY_HPP_
#define EXATN_RUNTIME_LINEAR_MEMORY_HPP_
class LinearMemoryPool {
public:
LinearMemoryPool(void * base_ptr,
std::size_t total_size,
std::size_t alignment):
base_ptr_(base_ptr), total_size_(total_size), alignment_(alignment),
front_(base_ptr), back_(base_ptr)
{
assert(reinterpret_cast<std::size_t>(base_ptr_) % alignment_ == 0);
}
std::size_t occupiedSize() const {
const std::size_t fptr = reinterpret_cast<std::size_t>(front_);
const std::size_t bptr = reinterpret_cast<std::size_t>(back_);
if(fptr >= bptr) return (fptr - bptr);
return (total_size_ - bptr + fptr);
}
void * acquireMemory(std::size_t mem_size) {
assert(mem_size > 0);
mem_size = (mem_size - (mem_size % alignment_)) + alignment_;
if(occupiedSize() + mem_size > total_size_) return nullptr;
void * mem_ptr = front_;
std::size_t left_forward = (total_size_ - reinterpret_cast<std::size_t>(front_));
if(left_forward > mem_size){
front_ = (void*)((char*)front_ + mem_size);
}else{
front_ = (void*)((char*)base_ptr_ + (mem_size - left_forward));
}
return mem_ptr;
}
void releaseMemory(void * back_ptr) {
assert(reinterpret_cast<std::size_t>(back_ptr) % alignment_ == 0);
const auto preceding_size = occupiedSize();
back_ = back_ptr;
assert(occupiedSize() < preceding_size);
return;
}
void * getFront() const {
return front_;
}
void * getBack() const {
return back_;
}
protected:
void * base_ptr_;
std::size_t total_size_;
std::size_t alignment_;
void * front_;
void * back_;
};
#endif //EXATN_RUNTIME_LINEAR_MEMORY_HPP_
Subproject commit cf2acb5276c3e1d60986e6ea8f50bce1949e75e6
Subproject commit 08bfaaabe281a9ec97d76d068dc745ea0e4b481b
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment