Loading src/runtime/executor/cuquantum/cuquantum_executor.cu +79 −19 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2021/12/29 REVISION: 2021/12/30 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading @@ -22,8 +22,6 @@ Rationale: #include "talshxx.hpp" #include "linear_memory.hpp" #include "cuquantum_executor.hpp" Loading @@ -44,7 +42,6 @@ namespace exatn { namespace runtime { struct TensorDescriptor { std::vector<int32_t> modes; //indices associated with tensor dimensions std::vector<int64_t> extents; //tensor dimension extents std::vector<int64_t> strides; //tensor dimension strides (optional) void * body_ptr = nullptr; //pointer to the tensor body image Loading @@ -53,17 +50,18 @@ struct TensorDescriptor { }; struct TensorNetworkReq { std::shared_ptr<numerics::TensorNetwork> network; std::unordered_map<numerics::TensorHashType,TensorDescriptor> tensor_descriptors; std::unordered_map<int32_t,int64_t> index_extents; TensorNetworkQueue::ExecStat exec_status = TensorNetworkQueue::ExecStat::None; //tensor network execution status std::shared_ptr<numerics::TensorNetwork> network; //tensor network specification std::unordered_map<numerics::TensorHashType,TensorDescriptor> tensor_descriptors; //tensor descriptors (shape, volume, data type, body) std::unordered_map<unsigned int, std::vector<int32_t>> tensor_modes; //indices associated with tensor dimensions (key is the original tensor id) std::unordered_map<int32_t,int64_t> index_extents; //extent of each registered tensor mode std::vector<void*> memory_window_ptr; //end of the GPU memory segment allocated for the tensors cutensornetNetworkDescriptor_t net_descriptor; cutensornetContractionOptimizerConfig_t opt_config; cutensornetContractionOptimizerInfo_t opt_info; cutensornetContractionPlan_t comp_plan; cudaStream_t stream; cutensornetComputeType_t compute_type; void * memory_window_ptr = nullptr; TensorNetworkQueue::ExecStat exec_status = TensorNetworkQueue::ExecStat::Idle; }; Loading @@ -83,11 +81,14 @@ CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func): gpu_attr_.back().second.workspace_ptr = talsh::getDeviceBufferBasePtr(DEV_NVIDIA_GPU,i); assert(reinterpret_cast<std::size_t>(gpu_attr_.back().second.workspace_ptr) % MEM_ALIGNMENT == 0); gpu_attr_.back().second.buffer_size = talsh::getDeviceMaxBufferSize(DEV_NVIDIA_GPU,i); std::size_t wrk_size = static_cast<float>(gpu_attr_.back().second.buffer_size) * WORKSPACE_FRACTION; std::size_t wrk_size = (std::size_t)(static_cast<float>(gpu_attr_.back().second.buffer_size) * WORKSPACE_FRACTION); wrk_size -= wrk_size % MEM_ALIGNMENT; gpu_attr_.back().second.workspace_size = wrk_size; gpu_attr_.back().second.buffer_size -= wrk_size; gpu_attr_.back().second.buffer_size -= gpu_attr_.back().second.buffer_size % MEM_ALIGNMENT; gpu_attr_.back().second.buffer_ptr = (void*)(((char*)(gpu_attr_.back().second.workspace_ptr)) + wrk_size); mem_pool_.emplace_back(LinearMemoryPool(gpu_attr_.back().second.buffer_ptr, gpu_attr_.back().second.buffer_size,MEM_ALIGNMENT)); } } std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Number of available GPUs = " << gpu_attr_.size() << std::endl; Loading @@ -111,7 +112,7 @@ CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func): CuQuantumExecutor::~CuQuantumExecutor() { bool success = sync(); assert(success); sync(); for(const auto & gpu: gpu_attr_){ HANDLE_CUDA_ERROR(cudaSetDevice(gpu.first)); HANDLE_CTN_ERROR(cutensornetDestroy((cutensornetHandle_t)(gpu.second.cutn_handle))); Loading @@ -130,8 +131,16 @@ TensorNetworkQueue::ExecStat CuQuantumExecutor::execute(std::shared_ptr<numerics if(res.second){ auto tn_req = res.first->second; tn_req->network = network; tn_req->exec_status = TensorNetworkQueue::ExecStat::Idle; parseTensorNetwork(tn_req); //still Idle loadTensors(tn_req); //Idle --> Loading if(tn_req->exec_status == TensorNetworkQueue::ExecStat::Loading){ planExecution(tn_req); //Loading --> Planning (while loading data) if(tn_req->exec_status == TensorNetworkQueue::ExecStat::Planning){ contractTensorNetwork(tn_req); //Planning --> Executing } } exec_stat = tn_req->exec_status; //`Finish }else{ std::cout << "#WARNING(exatn::runtime::CuQuantumExecutor): execute: Repeated tensor network submission detected!\n"; } Loading @@ -140,26 +149,77 @@ TensorNetworkQueue::ExecStat CuQuantumExecutor::execute(std::shared_ptr<numerics TensorNetworkQueue::ExecStat CuQuantumExecutor::sync(const TensorOpExecHandle exec_handle, int * error_code, bool wait) int * error_code) { *error_code = 0; TensorNetworkQueue::ExecStat exec_stat = TensorNetworkQueue::ExecStat::None; auto iter = active_networks_.find(exec_handle); if(iter != active_networks_.end()){ auto tn_req = iter->second; if(tn_req->exec_status == TensorNetworkQueue::ExecStat::Executing){ testCompletion(tn_req); //Executing --> Completed }else{ if(tn_req->exec_status == TensorNetworkQueue::ExecStat::Idle) loadTensors(tn_req); //Idle --> Loading if(tn_req->exec_status == TensorNetworkQueue::ExecStat::Loading) planExecution(tn_req); //Loading --> Planning (while loading data) if(tn_req->exec_status == TensorNetworkQueue::ExecStat::Planning) contractTensorNetwork(tn_req); //Planning --> Executing } exec_stat = tn_req->exec_status; //`Finish tn_req.reset(); if(exec_stat == TensorNetworkQueue::ExecStat::Completed) active_networks_.erase(iter); } return exec_stat; } bool CuQuantumExecutor::sync() void CuQuantumExecutor::sync() { while(!active_networks_.empty()){ for(auto iter = active_networks_.begin(); iter != active_networks_.end(); ++iter){ int error_code = 0; const auto exec_stat = sync(iter->first,&error_code); assert(error_code == 0); if(exec_stat == TensorNetworkQueue::ExecStat::Completed) break; } } return; } void CuQuantumExecutor::parseTensorNetwork(std::shared_ptr<TensorNetworkReq> tn_req) { return; } void CuQuantumExecutor::loadTensors(std::shared_ptr<TensorNetworkReq> tn_req) { bool synced = true; //`Finish return synced; return; } void CuQuantumExecutor::planExecution(std::shared_ptr<TensorNetworkReq> tn_req) { return; } void CuQuantumExecutor::contractTensorNetwork(std::shared_ptr<TensorNetworkReq> tn_req) { return; } void CuQuantumExecutor::testCompletion(std::shared_ptr<TensorNetworkReq> tn_req) { return; } } //namespace runtime Loading src/runtime/executor/cuquantum/cuquantum_executor.hpp +12 −4 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2021/12/29 REVISION: 2021/12/30 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading @@ -20,6 +20,7 @@ Rationale: #include <vector> #include <functional> #include "linear_memory.hpp" #include "tensor_network_queue.hpp" namespace talsh{ Loading Loading @@ -57,17 +58,22 @@ public: If wait = TRUE, waits until completion, otherwise just tests the progress. Returns the current status of the tensor network execution. **/ TensorNetworkQueue::ExecStat sync(const TensorOpExecHandle exec_handle, int * error_code, bool wait = true); int * error_code); /** Synchronizes execution of all submitted tensor networks to completion. **/ bool sync(); void sync(); protected: static constexpr float WORKSPACE_FRACTION = 0.2; static constexpr std::size_t MEM_ALIGNMENT = 256; void parseTensorNetwork(std::shared_ptr<TensorNetworkReq> tn_req); void loadTensors(std::shared_ptr<TensorNetworkReq> tn_req); void planExecution(std::shared_ptr<TensorNetworkReq> tn_req); void contractTensorNetwork(std::shared_ptr<TensorNetworkReq> tn_req); void testCompletion(std::shared_ptr<TensorNetworkReq> tn_req); struct DeviceAttr{ void * buffer_ptr = nullptr; std::size_t buffer_size = 0; Loading @@ -80,6 +86,8 @@ protected: std::unordered_map<TensorOpExecHandle,std::shared_ptr<TensorNetworkReq>> active_networks_; /** Attributes of all GPUs available to the current process **/ std::vector<std::pair<int,DeviceAttr>> gpu_attr_; //{gpu_id, gpu_attributes} /** Moving-window linear memory pool (in GPU RAM) **/ std::vector<LinearMemoryPool> mem_pool_; /** Tensor data access function **/ TensorImplFunc tensor_data_access_func_; //numerics::Tensor --> {tensor_body_ptr, size_in_bytes} }; Loading src/runtime/executor/cuquantum/linear_memory.hpp +4 −2 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: Linear memory allocator REVISION: 2021/12/29 REVISION: 2021/12/30 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Rationale: Linear memory moving window: Linear memory moving window (----->): (a) front >= back: ____________________________________ Loading @@ -29,6 +29,8 @@ Rationale: #ifndef EXATN_RUNTIME_LINEAR_MEMORY_HPP_ #define EXATN_RUNTIME_LINEAR_MEMORY_HPP_ #include "errors.hpp" class LinearMemoryPool { public: Loading src/runtime/executor/cuquantum/tensor_network_queue.hpp +3 −2 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: Execution queue REVISION: 2021/12/27 REVISION: 2021/12/30 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -36,7 +36,8 @@ public: enum class ExecStat { None, //no execution status Idle, //submitted but execution has not yet started Preparing, //preparation for execution has started (loading data, planning) Loading, //started loading data Planning, //preparation for execution has started (planning) Executing, //actual execution (numerical computation) has started Completed //execution completed }; Loading src/runtime/executor/graph_executors/lazy/graph_executor_lazy.cpp +3 −3 Original line number Diff line number Diff line /** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy REVISION: 2021/12/29 REVISION: 2021/12/30 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -290,7 +290,7 @@ void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) { int error_code = 0; const auto current = tensor_network_queue.getCurrent(); const auto exec_handle = current->second; auto exec_stat = cuquantum_executor_->sync(exec_handle,&error_code,false); //this call will progress tensor network execution auto exec_stat = cuquantum_executor_->sync(exec_handle,&error_code); //this call will progress tensor network execution assert(error_code == 0); if(exec_stat == TensorNetworkQueue::ExecStat::None){ exec_stat = cuquantum_executor_->execute(current->first,exec_handle); Loading @@ -310,7 +310,7 @@ void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) { } } } synced = cuquantum_executor_->sync(); assert(synced); cuquantum_executor_->sync(); #else assert(tensor_network_queue.isEmpty()); #endif Loading Loading
src/runtime/executor/cuquantum/cuquantum_executor.cu +79 −19 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2021/12/29 REVISION: 2021/12/30 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading @@ -22,8 +22,6 @@ Rationale: #include "talshxx.hpp" #include "linear_memory.hpp" #include "cuquantum_executor.hpp" Loading @@ -44,7 +42,6 @@ namespace exatn { namespace runtime { struct TensorDescriptor { std::vector<int32_t> modes; //indices associated with tensor dimensions std::vector<int64_t> extents; //tensor dimension extents std::vector<int64_t> strides; //tensor dimension strides (optional) void * body_ptr = nullptr; //pointer to the tensor body image Loading @@ -53,17 +50,18 @@ struct TensorDescriptor { }; struct TensorNetworkReq { std::shared_ptr<numerics::TensorNetwork> network; std::unordered_map<numerics::TensorHashType,TensorDescriptor> tensor_descriptors; std::unordered_map<int32_t,int64_t> index_extents; TensorNetworkQueue::ExecStat exec_status = TensorNetworkQueue::ExecStat::None; //tensor network execution status std::shared_ptr<numerics::TensorNetwork> network; //tensor network specification std::unordered_map<numerics::TensorHashType,TensorDescriptor> tensor_descriptors; //tensor descriptors (shape, volume, data type, body) std::unordered_map<unsigned int, std::vector<int32_t>> tensor_modes; //indices associated with tensor dimensions (key is the original tensor id) std::unordered_map<int32_t,int64_t> index_extents; //extent of each registered tensor mode std::vector<void*> memory_window_ptr; //end of the GPU memory segment allocated for the tensors cutensornetNetworkDescriptor_t net_descriptor; cutensornetContractionOptimizerConfig_t opt_config; cutensornetContractionOptimizerInfo_t opt_info; cutensornetContractionPlan_t comp_plan; cudaStream_t stream; cutensornetComputeType_t compute_type; void * memory_window_ptr = nullptr; TensorNetworkQueue::ExecStat exec_status = TensorNetworkQueue::ExecStat::Idle; }; Loading @@ -83,11 +81,14 @@ CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func): gpu_attr_.back().second.workspace_ptr = talsh::getDeviceBufferBasePtr(DEV_NVIDIA_GPU,i); assert(reinterpret_cast<std::size_t>(gpu_attr_.back().second.workspace_ptr) % MEM_ALIGNMENT == 0); gpu_attr_.back().second.buffer_size = talsh::getDeviceMaxBufferSize(DEV_NVIDIA_GPU,i); std::size_t wrk_size = static_cast<float>(gpu_attr_.back().second.buffer_size) * WORKSPACE_FRACTION; std::size_t wrk_size = (std::size_t)(static_cast<float>(gpu_attr_.back().second.buffer_size) * WORKSPACE_FRACTION); wrk_size -= wrk_size % MEM_ALIGNMENT; gpu_attr_.back().second.workspace_size = wrk_size; gpu_attr_.back().second.buffer_size -= wrk_size; gpu_attr_.back().second.buffer_size -= gpu_attr_.back().second.buffer_size % MEM_ALIGNMENT; gpu_attr_.back().second.buffer_ptr = (void*)(((char*)(gpu_attr_.back().second.workspace_ptr)) + wrk_size); mem_pool_.emplace_back(LinearMemoryPool(gpu_attr_.back().second.buffer_ptr, gpu_attr_.back().second.buffer_size,MEM_ALIGNMENT)); } } std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Number of available GPUs = " << gpu_attr_.size() << std::endl; Loading @@ -111,7 +112,7 @@ CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func): CuQuantumExecutor::~CuQuantumExecutor() { bool success = sync(); assert(success); sync(); for(const auto & gpu: gpu_attr_){ HANDLE_CUDA_ERROR(cudaSetDevice(gpu.first)); HANDLE_CTN_ERROR(cutensornetDestroy((cutensornetHandle_t)(gpu.second.cutn_handle))); Loading @@ -130,8 +131,16 @@ TensorNetworkQueue::ExecStat CuQuantumExecutor::execute(std::shared_ptr<numerics if(res.second){ auto tn_req = res.first->second; tn_req->network = network; tn_req->exec_status = TensorNetworkQueue::ExecStat::Idle; parseTensorNetwork(tn_req); //still Idle loadTensors(tn_req); //Idle --> Loading if(tn_req->exec_status == TensorNetworkQueue::ExecStat::Loading){ planExecution(tn_req); //Loading --> Planning (while loading data) if(tn_req->exec_status == TensorNetworkQueue::ExecStat::Planning){ contractTensorNetwork(tn_req); //Planning --> Executing } } exec_stat = tn_req->exec_status; //`Finish }else{ std::cout << "#WARNING(exatn::runtime::CuQuantumExecutor): execute: Repeated tensor network submission detected!\n"; } Loading @@ -140,26 +149,77 @@ TensorNetworkQueue::ExecStat CuQuantumExecutor::execute(std::shared_ptr<numerics TensorNetworkQueue::ExecStat CuQuantumExecutor::sync(const TensorOpExecHandle exec_handle, int * error_code, bool wait) int * error_code) { *error_code = 0; TensorNetworkQueue::ExecStat exec_stat = TensorNetworkQueue::ExecStat::None; auto iter = active_networks_.find(exec_handle); if(iter != active_networks_.end()){ auto tn_req = iter->second; if(tn_req->exec_status == TensorNetworkQueue::ExecStat::Executing){ testCompletion(tn_req); //Executing --> Completed }else{ if(tn_req->exec_status == TensorNetworkQueue::ExecStat::Idle) loadTensors(tn_req); //Idle --> Loading if(tn_req->exec_status == TensorNetworkQueue::ExecStat::Loading) planExecution(tn_req); //Loading --> Planning (while loading data) if(tn_req->exec_status == TensorNetworkQueue::ExecStat::Planning) contractTensorNetwork(tn_req); //Planning --> Executing } exec_stat = tn_req->exec_status; //`Finish tn_req.reset(); if(exec_stat == TensorNetworkQueue::ExecStat::Completed) active_networks_.erase(iter); } return exec_stat; } bool CuQuantumExecutor::sync() void CuQuantumExecutor::sync() { while(!active_networks_.empty()){ for(auto iter = active_networks_.begin(); iter != active_networks_.end(); ++iter){ int error_code = 0; const auto exec_stat = sync(iter->first,&error_code); assert(error_code == 0); if(exec_stat == TensorNetworkQueue::ExecStat::Completed) break; } } return; } void CuQuantumExecutor::parseTensorNetwork(std::shared_ptr<TensorNetworkReq> tn_req) { return; } void CuQuantumExecutor::loadTensors(std::shared_ptr<TensorNetworkReq> tn_req) { bool synced = true; //`Finish return synced; return; } void CuQuantumExecutor::planExecution(std::shared_ptr<TensorNetworkReq> tn_req) { return; } void CuQuantumExecutor::contractTensorNetwork(std::shared_ptr<TensorNetworkReq> tn_req) { return; } void CuQuantumExecutor::testCompletion(std::shared_ptr<TensorNetworkReq> tn_req) { return; } } //namespace runtime Loading
src/runtime/executor/cuquantum/cuquantum_executor.hpp +12 −4 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2021/12/29 REVISION: 2021/12/30 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading @@ -20,6 +20,7 @@ Rationale: #include <vector> #include <functional> #include "linear_memory.hpp" #include "tensor_network_queue.hpp" namespace talsh{ Loading Loading @@ -57,17 +58,22 @@ public: If wait = TRUE, waits until completion, otherwise just tests the progress. Returns the current status of the tensor network execution. **/ TensorNetworkQueue::ExecStat sync(const TensorOpExecHandle exec_handle, int * error_code, bool wait = true); int * error_code); /** Synchronizes execution of all submitted tensor networks to completion. **/ bool sync(); void sync(); protected: static constexpr float WORKSPACE_FRACTION = 0.2; static constexpr std::size_t MEM_ALIGNMENT = 256; void parseTensorNetwork(std::shared_ptr<TensorNetworkReq> tn_req); void loadTensors(std::shared_ptr<TensorNetworkReq> tn_req); void planExecution(std::shared_ptr<TensorNetworkReq> tn_req); void contractTensorNetwork(std::shared_ptr<TensorNetworkReq> tn_req); void testCompletion(std::shared_ptr<TensorNetworkReq> tn_req); struct DeviceAttr{ void * buffer_ptr = nullptr; std::size_t buffer_size = 0; Loading @@ -80,6 +86,8 @@ protected: std::unordered_map<TensorOpExecHandle,std::shared_ptr<TensorNetworkReq>> active_networks_; /** Attributes of all GPUs available to the current process **/ std::vector<std::pair<int,DeviceAttr>> gpu_attr_; //{gpu_id, gpu_attributes} /** Moving-window linear memory pool (in GPU RAM) **/ std::vector<LinearMemoryPool> mem_pool_; /** Tensor data access function **/ TensorImplFunc tensor_data_access_func_; //numerics::Tensor --> {tensor_body_ptr, size_in_bytes} }; Loading
src/runtime/executor/cuquantum/linear_memory.hpp +4 −2 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: Linear memory allocator REVISION: 2021/12/29 REVISION: 2021/12/30 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Rationale: Linear memory moving window: Linear memory moving window (----->): (a) front >= back: ____________________________________ Loading @@ -29,6 +29,8 @@ Rationale: #ifndef EXATN_RUNTIME_LINEAR_MEMORY_HPP_ #define EXATN_RUNTIME_LINEAR_MEMORY_HPP_ #include "errors.hpp" class LinearMemoryPool { public: Loading
src/runtime/executor/cuquantum/tensor_network_queue.hpp +3 −2 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: Execution queue REVISION: 2021/12/27 REVISION: 2021/12/30 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -36,7 +36,8 @@ public: enum class ExecStat { None, //no execution status Idle, //submitted but execution has not yet started Preparing, //preparation for execution has started (loading data, planning) Loading, //started loading data Planning, //preparation for execution has started (planning) Executing, //actual execution (numerical computation) has started Completed //execution completed }; Loading
src/runtime/executor/graph_executors/lazy/graph_executor_lazy.cpp +3 −3 Original line number Diff line number Diff line /** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy REVISION: 2021/12/29 REVISION: 2021/12/30 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -290,7 +290,7 @@ void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) { int error_code = 0; const auto current = tensor_network_queue.getCurrent(); const auto exec_handle = current->second; auto exec_stat = cuquantum_executor_->sync(exec_handle,&error_code,false); //this call will progress tensor network execution auto exec_stat = cuquantum_executor_->sync(exec_handle,&error_code); //this call will progress tensor network execution assert(error_code == 0); if(exec_stat == TensorNetworkQueue::ExecStat::None){ exec_stat = cuquantum_executor_->execute(current->first,exec_handle); Loading @@ -310,7 +310,7 @@ void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) { } } } synced = cuquantum_executor_->sync(); assert(synced); cuquantum_executor_->sync(); #else assert(tensor_network_queue.isEmpty()); #endif Loading