Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
ORNL Quantum Computing Institute
exatn
Commits
0c74a510
Commit
0c74a510
authored
Jan 03, 2022
by
Dmitry I. Lyakh
Browse files
Implemented CuQuantumExecutor::loadTensors
Signed-off-by:
Dmitry I. Lyakh
<
quant4me@gmail.com
>
parent
ee550dac
Changes
4
Hide whitespace changes
Inline
Side-by-side
src/runtime/executor/cuquantum/cuquantum_executor.cu
View file @
0c74a510
/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
REVISION: 202
1/12/30
REVISION: 202
2/01/03
Copyright (C) 2018-202
1
Dmitry Lyakh
Copyright (C) 2018-202
1
Oak Ridge National Laboratory (UT-Battelle)
Copyright (C) 2018-202
2
Dmitry Lyakh
Copyright (C) 2018-202
2
Oak Ridge National Laboratory (UT-Battelle)
Rationale:
...
...
@@ -48,15 +48,15 @@ struct TensorDescriptor {
std
::
size_t
volume
=
0
;
//tensor body volume
std
::
size_t
size
=
0
;
//tensor body size (bytes)
void
*
src_ptr
=
nullptr
;
//non-owning pointer to the tensor body source image
std
::
vector
<
void
*>
dst_ptr
;
//non-owning pointer to the tensor body dest image (
for all
GPU)
std
::
vector
<
void
*>
dst_ptr
;
//non-owning pointer to the tensor body dest
ination
image (
on each
GPU)
};
struct
TensorNetworkReq
{
TensorNetworkQueue
::
ExecStat
exec_status
=
TensorNetworkQueue
::
ExecStat
::
None
;
//tensor network execution status
std
::
shared_ptr
<
numerics
::
TensorNetwork
>
network
;
//tensor network specification
std
::
unordered_map
<
numerics
::
TensorHashType
,
TensorDescriptor
>
tensor_descriptors
;
//tensor descriptors (shape, volume, data type, body)
std
::
unordered_map
<
unsigned
int
,
std
::
vector
<
int32_t
>>
tensor_modes
;
//indices associated with tensor dimensions (key
is the
original tensor id)
std
::
unordered_map
<
int32_t
,
int64_t
>
mode_extents
;
//extent of each registered tensor mode
std
::
unordered_map
<
numerics
::
TensorHashType
,
TensorDescriptor
>
tensor_descriptors
;
//tensor descriptors (shape, volume, data type, body)
std
::
unordered_map
<
unsigned
int
,
std
::
vector
<
int32_t
>>
tensor_modes
;
//indices associated with tensor dimensions (key
=
original tensor id)
std
::
unordered_map
<
int32_t
,
int64_t
>
mode_extents
;
//extent of each registered tensor mode
int32_t
*
num_modes_in
=
nullptr
;
int64_t
**
extents_in
=
nullptr
;
int64_t
**
strides_in
=
nullptr
;
...
...
@@ -67,7 +67,7 @@ struct TensorNetworkReq {
int64_t
*
strides_out
=
nullptr
;
int32_t
*
modes_out
=
nullptr
;
uint32_t
alignment_out
;
std
::
vector
<
void
*>
memory_window_ptr
;
//end of the GPU memory segment allocated for the tensors
std
::
vector
<
void
*>
memory_window_ptr
;
//end of the GPU memory segment allocated for the tensors
(on each GPU)
cutensornetNetworkDescriptor_t
net_descriptor
;
cutensornetContractionOptimizerConfig_t
opt_config
;
cutensornetContractionOptimizerInfo_t
opt_info
;
...
...
@@ -75,6 +75,20 @@ struct TensorNetworkReq {
cudaDataType_t
data_type
;
cutensornetComputeType_t
compute_type
;
cudaStream_t
stream
;
~
TensorNetworkReq
()
{
cudaStreamSynchronize
(
stream
);
cudaStreamDestroy
(
stream
);
cutensornetDestroyNetworkDescriptor
(
net_descriptor
);
if
(
modes_out
!=
nullptr
)
delete
[]
modes_out
;
if
(
strides_out
!=
nullptr
)
delete
[]
strides_out
;
if
(
extents_out
!=
nullptr
)
delete
[]
extents_out
;
if
(
alignments_in
!=
nullptr
)
delete
[]
alignments_in
;
if
(
modes_in
!=
nullptr
)
delete
[]
modes_in
;
if
(
strides_in
!=
nullptr
)
delete
[]
strides_in
;
if
(
extents_in
!=
nullptr
)
delete
[]
extents_in
;
if
(
num_modes_in
!=
nullptr
)
delete
[]
num_modes_in
;
}
};
...
...
@@ -181,8 +195,7 @@ TensorNetworkQueue::ExecStat CuQuantumExecutor::sync(const TensorOpExecHandle ex
}
exec_stat
=
tn_req
->
exec_status
;
tn_req
.
reset
();
if
(
exec_stat
==
TensorNetworkQueue
::
ExecStat
::
Completed
)
active_networks_
.
erase
(
iter
);
if
(
exec_stat
==
TensorNetworkQueue
::
ExecStat
::
Completed
)
active_networks_
.
erase
(
iter
);
}
return
exec_stat
;
}
...
...
@@ -216,6 +229,21 @@ cudaDataType_t getCudaDataType(const TensorElementType elem_type)
}
cutensornetComputeType_t
getCutensorComputeType
(
const
TensorElementType
elem_type
)
{
cutensornetComputeType_t
cutensor_data_type
;
switch
(
elem_type
){
case
TensorElementType
::
REAL32
:
cutensor_data_type
=
CUTENSORNET_COMPUTE_32F
;
break
;
case
TensorElementType
::
REAL64
:
cutensor_data_type
=
CUTENSORNET_COMPUTE_64F
;
break
;
case
TensorElementType
::
COMPLEX32
:
cutensor_data_type
=
CUTENSORNET_COMPUTE_32F
;
break
;
case
TensorElementType
::
COMPLEX64
:
cutensor_data_type
=
CUTENSORNET_COMPUTE_64F
;
break
;
default:
assert
(
false
);
}
return
cutensor_data_type
;
}
void
CuQuantumExecutor
::
parseTensorNetwork
(
std
::
shared_ptr
<
TensorNetworkReq
>
tn_req
)
{
const
auto
&
net
=
*
(
tn_req
->
network
);
...
...
@@ -239,6 +267,11 @@ void CuQuantumExecutor::parseTensorNetwork(std::shared_ptr<TensorNetworkReq> tn_
const
auto
tens_vol
=
tens
.
getTensor
()
->
getVolume
();
const
auto
tens_rank
=
tens
.
getRank
();
const
auto
tens_type
=
tens
.
getElementType
();
if
(
tens_type
==
TensorElementType
::
VOID
){
std
::
cout
<<
"#ERROR(exatn::runtime::CuQuantumExecutor): Network tensor #"
<<
tens_id
<<
" has not been allocated typed storage yet!
\n
"
;
assert
(
false
);
}
const
auto
&
tens_legs
=
tens
.
getTensorLegs
();
const
auto
&
tens_dims
=
tens
.
getDimExtents
();
...
...
@@ -249,7 +282,7 @@ void CuQuantumExecutor::parseTensorNetwork(std::shared_ptr<TensorNetworkReq> tn_
for
(
unsigned
int
i
=
0
;
i
<
tens_rank
;
++
i
)
descr
.
extents
[
i
]
=
tens_dims
[
i
];
descr
.
data_type
=
getCudaDataType
(
tens_type
);
descr
.
volume
=
tens_vol
;
descr
.
src_ptr
=
tensor_data_access_func_
(
*
(
tens
.
getTensor
()),
DEV_HOST
,
0
,
&
(
descr
.
size
));
descr
.
src_ptr
=
tensor_data_access_func_
(
*
(
tens
.
getTensor
()),
DEV_HOST
,
0
,
&
(
descr
.
size
));
//`Assuming tensor body is on Host
assert
(
descr
.
src_ptr
!=
nullptr
);
}
...
...
@@ -279,19 +312,52 @@ void CuQuantumExecutor::parseTensorNetwork(std::shared_ptr<TensorNetworkReq> tn_
}
}
HANDLE_CTN_ERROR
(
cutensornetCreateNetworkDescriptor
(
gpu_attr_
[
0
].
second
.
cutn_handle
,
num_input_tensors
,
tn_req
->
num_modes_in
,
tn_req
->
extents_in
,
tn_req
->
strides_in
,
tn_req
->
modes_in
,
tn_req
->
alignments_in
,
tn_req
->
num_modes_out
,
tn_req
->
extents_out
,
tn_req
->
strides_out
,
tn_req
->
modes_out
,
tn_req
->
alignment_out
,
tn_req
->
data_type
,
tn_req
->
compute_type
,
&
(
tn_req
->
net_descriptor
)));
const
auto
tens_elem_type
=
net
.
getTensorElementType
();
tn_req
->
data_type
=
getCudaDataType
(
tens_elem_type
);
tn_req
->
compute_type
=
getCutensorComputeType
(
tens_elem_type
);
//Create a cuTensorNet network descriptor on one or all GPUs:
for
(
int
gpu
=
0
;
gpu
<
1
;
++
gpu
){
//`Only one GPU for now
const
auto
gpu_id
=
gpu_attr_
[
gpu
].
first
;
HANDLE_CUDA_ERROR
(
cudaSetDevice
(
gpu_id
));
HANDLE_CTN_ERROR
(
cutensornetCreateNetworkDescriptor
(
gpu_attr_
[
gpu
].
second
.
cutn_handle
,
num_input_tensors
,
tn_req
->
num_modes_in
,
tn_req
->
extents_in
,
tn_req
->
strides_in
,
tn_req
->
modes_in
,
tn_req
->
alignments_in
,
tn_req
->
num_modes_out
,
tn_req
->
extents_out
,
tn_req
->
strides_out
,
tn_req
->
modes_out
,
tn_req
->
alignment_out
,
tn_req
->
data_type
,
tn_req
->
compute_type
,
&
(
tn_req
->
net_descriptor
)));
HANDLE_CUDA_ERROR
(
cudaStreamCreate
(
&
(
tn_req
->
stream
)));
HANDLE_CUDA_ERROR
(
cudaStreamCreate
(
&
(
tn_req
->
stream
)));
}
return
;
}
void
CuQuantumExecutor
::
loadTensors
(
std
::
shared_ptr
<
TensorNetworkReq
>
tn_req
)
{
//Load tensors to one or all GPUs:
for
(
int
gpu
=
0
;
gpu
<
1
;
++
gpu
){
//`Only one GPU for now
const
auto
gpu_id
=
gpu_attr_
[
gpu
].
first
;
HANDLE_CUDA_ERROR
(
cudaSetDevice
(
gpu_id
));
void
*
prev_front
=
mem_pool_
[
gpu
].
getFront
();
bool
success
=
true
;
//Acquire device memory:
for
(
auto
&
descr
:
tn_req
->
tensor_descriptors
){
void
*
dev_ptr
=
mem_pool_
[
gpu
].
acquireMemory
(
descr
.
second
.
size
);
success
=
(
dev_ptr
!=
nullptr
);
if
(
!
success
)
break
;
descr
.
second
.
dst_ptr
.
emplace_back
(
dev_ptr
);
}
if
(
success
){
//Initiate data transfers:
for
(
auto
&
descr
:
tn_req
->
tensor_descriptors
){
HANDLE_CUDA_ERROR
(
cudaMemcpyAsync
(
descr
.
second
.
dst_ptr
.
back
(),
descr
.
second
.
src_ptr
,
descr
.
second
.
size
,
cudaMemcpyDefault
,
tn_req
->
stream
));
}
tn_req
->
exec_status
=
TensorNetworkQueue
::
ExecStat
::
Loading
;
}
else
{
//Restore previous memory front:
mem_pool_
[
gpu
].
restorePreviousFront
(
prev_front
);
break
;
}
}
return
;
}
...
...
src/runtime/executor/cuquantum/cuquantum_executor.hpp
View file @
0c74a510
/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
REVISION: 202
1/12/30
REVISION: 202
2/01/03
Copyright (C) 2018-202
1
Dmitry Lyakh
Copyright (C) 2018-202
1
Oak Ridge National Laboratory (UT-Battelle)
Copyright (C) 2018-202
2
Dmitry Lyakh
Copyright (C) 2018-202
2
Oak Ridge National Laboratory (UT-Battelle)
Rationale:
- ExaTN graph executor may accept whole tensor networks for execution
...
...
@@ -82,11 +82,11 @@ protected:
void
*
cutn_handle
;
//cutensornetHandle_t = void*
};
/** Currently processed tensor networks **/
/** Currently processed
(progressing)
tensor networks **/
std
::
unordered_map
<
TensorOpExecHandle
,
std
::
shared_ptr
<
TensorNetworkReq
>>
active_networks_
;
/** Attributes of all GPUs available to the current process **/
std
::
vector
<
std
::
pair
<
int
,
DeviceAttr
>>
gpu_attr_
;
//{gpu_id, gpu_attributes}
/** Moving-window linear memory pool
(in GPU RAM)
**/
/** Moving-window linear memory pool
s for all GPUs of the current process
**/
std
::
vector
<
LinearMemoryPool
>
mem_pool_
;
/** Tensor data access function **/
TensorImplFunc
tensor_data_access_func_
;
//numerics::Tensor --> {tensor_body_ptr, size_in_bytes}
...
...
src/runtime/executor/cuquantum/linear_memory.hpp
View file @
0c74a510
/** ExaTN: Tensor Runtime: Tensor network executor: Linear memory allocator
REVISION: 202
1/12/30
REVISION: 202
2/01/03
Copyright (C) 2018-202
1
Dmitry Lyakh
Copyright (C) 2018-202
1
Oak Ridge National Laboratory (UT-Battelle)
Copyright (C) 2018-202
2
Dmitry Lyakh
Copyright (C) 2018-202
2
Oak Ridge National Laboratory (UT-Battelle)
Rationale:
...
...
@@ -73,6 +73,11 @@ public:
return
;
}
void
restorePreviousFront
(
void
*
front
)
{
front_
=
front
;
return
;
}
void
*
getFront
()
const
{
return
front_
;
}
...
...
src/runtime/executor/graph_executors/lazy/graph_executor_lazy.cpp
View file @
0c74a510
/** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy
REVISION: 202
1/12/30
REVISION: 202
2/01/03
Copyright (C) 2018-202
1
Dmitry Lyakh
Copyright (C) 2018-202
1
Oak Ridge National Laboratory (UT-Battelle)
Copyright (C) 2018-202
2
Dmitry Lyakh
Copyright (C) 2018-202
2
Oak Ridge National Laboratory (UT-Battelle)
**/
#include "graph_executor_lazy.hpp"
...
...
@@ -275,9 +275,9 @@ void LazyGraphExecutor::execute(TensorGraph & dag) {
void
LazyGraphExecutor
::
execute
(
TensorNetworkQueue
&
tensor_network_queue
)
{
#ifdef CUQUANTUM
std
::
cout
<<
"#DEBUG(exatn::runtime::LazyGraphExecutor::execute): Started executing the tensor network queue via cuQuantum: "
<<
tensor_network_queue
.
getSize
()
<<
" elements detected"
<<
std
::
endl
;
#ifdef CUQUANTUM
assert
(
node_executor_
);
//Synchronize the node executor:
bool
synced
=
node_executor_
->
sync
();
assert
(
synced
);
...
...
@@ -311,10 +311,10 @@ void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) {
}
}
cuquantum_executor_
->
sync
();
std
::
cout
<<
"#DEBUG(exatn::runtime::LazyGraphExecutor::execute): Finished executing the tensor network queue via cuQuantum
\n
"
;
#else
assert
(
tensor_network_queue
.
isEmpty
());
#endif
std
::
cout
<<
"#DEBUG(exatn::runtime::LazyGraphExecutor::execute): Finished executing the tensor network queue via cuQuantum
\n
"
;
return
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment