Loading src/runtime/executor/cuquantum/cuquantum_executor.cu +23 −2 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2021/12/27 REVISION: 2021/12/29 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading @@ -22,6 +22,8 @@ Rationale: #include "talshxx.hpp" #include "linear_memory.hpp" #include "cuquantum_executor.hpp" Loading Loading @@ -74,7 +76,17 @@ CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func): int num_gpus = 0; auto error_code = talshDeviceCount(DEV_NVIDIA_GPU,&num_gpus); assert(error_code == TALSH_SUCCESS); for(int i = 0; i < num_gpus; ++i){ if(talshDeviceState(i,DEV_NVIDIA_GPU) >= DEV_ON) gpu_attr_.emplace_back(std::make_pair(i,DeviceAttr{})); if(talshDeviceState(i,DEV_NVIDIA_GPU) >= DEV_ON){ gpu_attr_.emplace_back(std::make_pair(i,DeviceAttr{})); gpu_attr_.back().second.workspace_ptr = talsh::getDeviceBufferBasePtr(DEV_NVIDIA_GPU,i); assert(reinterpret_cast<std::size_t>(gpu_attr_.back().second.workspace_ptr) % MEM_ALIGNMENT == 0); gpu_attr_.back().second.buffer_size = talsh::getDeviceMaxBufferSize(DEV_NVIDIA_GPU,i); std::size_t wrk_size = static_cast<float>(gpu_attr_.back().second.buffer_size) * WORKSPACE_FRACTION; wrk_size -= wrk_size % MEM_ALIGNMENT; gpu_attr_.back().second.workspace_size = wrk_size; gpu_attr_.back().second.buffer_size -= wrk_size; gpu_attr_.back().second.buffer_ptr = (void*)(((char*)(gpu_attr_.back().second.workspace_ptr)) + wrk_size); } } std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Number of available GPUs = " << gpu_attr_.size() << std::endl; Loading @@ -83,6 +95,15 @@ CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func): HANDLE_CTN_ERROR(cutensornetCreate((cutensornetHandle_t*)(&gpu.second.cutn_handle))); } std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Created cuTensorNet contexts for all available GPUs" << std::endl; std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): GPU configuration:\n"; for(const auto & gpu: gpu_attr_){ std::cout << " GPU #" << gpu.first << ": wrk_ptr = " << gpu.second.workspace_ptr << ", size = " << gpu.second.workspace_size << "; buf_ptr = " << gpu.second.buffer_ptr << ", size = " << gpu.second.buffer_size << std::endl; } } Loading src/runtime/executor/cuquantum/cuquantum_executor.hpp +4 −1 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2021/12/27 REVISION: 2021/12/29 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -65,6 +65,9 @@ public: protected: static constexpr float WORKSPACE_FRACTION = 0.2; static constexpr std::size_t MEM_ALIGNMENT = 256; struct DeviceAttr{ void * buffer_ptr = nullptr; std::size_t buffer_size = 0; Loading src/runtime/executor/cuquantum/linear_memory.hpp 0 → 100644 +91 −0 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: Linear memory allocator REVISION: 2021/12/29 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Rationale: Linear memory moving window: (a) front >= back: ____________________________________ | | | | | FREE | OCCUPIED | FREE | | | | | |__________|________________|_______| begin back front end (b) front < back: ____________________________________ | | | | | OCCUPIED | FREE | OCCUPIED| | | | | |__________|______________|_________| begin front back end **/ #ifndef EXATN_RUNTIME_LINEAR_MEMORY_HPP_ #define EXATN_RUNTIME_LINEAR_MEMORY_HPP_ class LinearMemoryPool { public: LinearMemoryPool(void * base_ptr, std::size_t total_size, std::size_t alignment): base_ptr_(base_ptr), total_size_(total_size), alignment_(alignment), front_(base_ptr), back_(base_ptr) { assert(reinterpret_cast<std::size_t>(base_ptr_) % alignment_ == 0); } std::size_t occupiedSize() const { const std::size_t fptr = reinterpret_cast<std::size_t>(front_); const std::size_t bptr = reinterpret_cast<std::size_t>(back_); if(fptr >= bptr) return (fptr - bptr); return (total_size_ - bptr + fptr); } void * acquireMemory(std::size_t mem_size) { assert(mem_size > 0); mem_size = (mem_size - (mem_size % alignment_)) + alignment_; if(occupiedSize() + mem_size > total_size_) return nullptr; void * mem_ptr = front_; std::size_t left_forward = (total_size_ - reinterpret_cast<std::size_t>(front_)); if(left_forward > mem_size){ front_ = (void*)((char*)front_ + mem_size); }else{ front_ = (void*)((char*)base_ptr_ + (mem_size - left_forward)); } return mem_ptr; } void releaseMemory(void * back_ptr) { assert(reinterpret_cast<std::size_t>(back_ptr) % alignment_ == 0); const auto preceding_size = occupiedSize(); back_ = back_ptr; assert(occupiedSize() < preceding_size); return; } void * getFront() const { return front_; } void * getBack() const { return back_; } protected: void * base_ptr_; std::size_t total_size_; std::size_t alignment_; void * front_; void * back_; }; #endif //EXATN_RUNTIME_LINEAR_MEMORY_HPP_ ExaTensor @ 08bfaaab Compare cf2acb52 to 08bfaaab Original line number Diff line number Diff line Subproject commit cf2acb5276c3e1d60986e6ea8f50bce1949e75e6 Subproject commit 08bfaaabe281a9ec97d76d068dc745ea0e4b481b Loading
src/runtime/executor/cuquantum/cuquantum_executor.cu +23 −2 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2021/12/27 REVISION: 2021/12/29 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading @@ -22,6 +22,8 @@ Rationale: #include "talshxx.hpp" #include "linear_memory.hpp" #include "cuquantum_executor.hpp" Loading Loading @@ -74,7 +76,17 @@ CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func): int num_gpus = 0; auto error_code = talshDeviceCount(DEV_NVIDIA_GPU,&num_gpus); assert(error_code == TALSH_SUCCESS); for(int i = 0; i < num_gpus; ++i){ if(talshDeviceState(i,DEV_NVIDIA_GPU) >= DEV_ON) gpu_attr_.emplace_back(std::make_pair(i,DeviceAttr{})); if(talshDeviceState(i,DEV_NVIDIA_GPU) >= DEV_ON){ gpu_attr_.emplace_back(std::make_pair(i,DeviceAttr{})); gpu_attr_.back().second.workspace_ptr = talsh::getDeviceBufferBasePtr(DEV_NVIDIA_GPU,i); assert(reinterpret_cast<std::size_t>(gpu_attr_.back().second.workspace_ptr) % MEM_ALIGNMENT == 0); gpu_attr_.back().second.buffer_size = talsh::getDeviceMaxBufferSize(DEV_NVIDIA_GPU,i); std::size_t wrk_size = static_cast<float>(gpu_attr_.back().second.buffer_size) * WORKSPACE_FRACTION; wrk_size -= wrk_size % MEM_ALIGNMENT; gpu_attr_.back().second.workspace_size = wrk_size; gpu_attr_.back().second.buffer_size -= wrk_size; gpu_attr_.back().second.buffer_ptr = (void*)(((char*)(gpu_attr_.back().second.workspace_ptr)) + wrk_size); } } std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Number of available GPUs = " << gpu_attr_.size() << std::endl; Loading @@ -83,6 +95,15 @@ CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func): HANDLE_CTN_ERROR(cutensornetCreate((cutensornetHandle_t*)(&gpu.second.cutn_handle))); } std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Created cuTensorNet contexts for all available GPUs" << std::endl; std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): GPU configuration:\n"; for(const auto & gpu: gpu_attr_){ std::cout << " GPU #" << gpu.first << ": wrk_ptr = " << gpu.second.workspace_ptr << ", size = " << gpu.second.workspace_size << "; buf_ptr = " << gpu.second.buffer_ptr << ", size = " << gpu.second.buffer_size << std::endl; } } Loading
src/runtime/executor/cuquantum/cuquantum_executor.hpp +4 −1 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2021/12/27 REVISION: 2021/12/29 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -65,6 +65,9 @@ public: protected: static constexpr float WORKSPACE_FRACTION = 0.2; static constexpr std::size_t MEM_ALIGNMENT = 256; struct DeviceAttr{ void * buffer_ptr = nullptr; std::size_t buffer_size = 0; Loading
src/runtime/executor/cuquantum/linear_memory.hpp 0 → 100644 +91 −0 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: Linear memory allocator REVISION: 2021/12/29 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Rationale: Linear memory moving window: (a) front >= back: ____________________________________ | | | | | FREE | OCCUPIED | FREE | | | | | |__________|________________|_______| begin back front end (b) front < back: ____________________________________ | | | | | OCCUPIED | FREE | OCCUPIED| | | | | |__________|______________|_________| begin front back end **/ #ifndef EXATN_RUNTIME_LINEAR_MEMORY_HPP_ #define EXATN_RUNTIME_LINEAR_MEMORY_HPP_ class LinearMemoryPool { public: LinearMemoryPool(void * base_ptr, std::size_t total_size, std::size_t alignment): base_ptr_(base_ptr), total_size_(total_size), alignment_(alignment), front_(base_ptr), back_(base_ptr) { assert(reinterpret_cast<std::size_t>(base_ptr_) % alignment_ == 0); } std::size_t occupiedSize() const { const std::size_t fptr = reinterpret_cast<std::size_t>(front_); const std::size_t bptr = reinterpret_cast<std::size_t>(back_); if(fptr >= bptr) return (fptr - bptr); return (total_size_ - bptr + fptr); } void * acquireMemory(std::size_t mem_size) { assert(mem_size > 0); mem_size = (mem_size - (mem_size % alignment_)) + alignment_; if(occupiedSize() + mem_size > total_size_) return nullptr; void * mem_ptr = front_; std::size_t left_forward = (total_size_ - reinterpret_cast<std::size_t>(front_)); if(left_forward > mem_size){ front_ = (void*)((char*)front_ + mem_size); }else{ front_ = (void*)((char*)base_ptr_ + (mem_size - left_forward)); } return mem_ptr; } void releaseMemory(void * back_ptr) { assert(reinterpret_cast<std::size_t>(back_ptr) % alignment_ == 0); const auto preceding_size = occupiedSize(); back_ = back_ptr; assert(occupiedSize() < preceding_size); return; } void * getFront() const { return front_; } void * getBack() const { return back_; } protected: void * base_ptr_; std::size_t total_size_; std::size_t alignment_; void * front_; void * back_; }; #endif //EXATN_RUNTIME_LINEAR_MEMORY_HPP_
ExaTensor @ 08bfaaab Compare cf2acb52 to 08bfaaab Original line number Diff line number Diff line Subproject commit cf2acb5276c3e1d60986e6ea8f50bce1949e75e6 Subproject commit 08bfaaabe281a9ec97d76d068dc745ea0e4b481b