Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
olcf-acceptance-team
olcf4-acceptance-tests
Commits
7a5aba00
Commit
7a5aba00
authored
Aug 26, 2016
by
Joubert, Wayne
Browse files
Fixes to nuccor_kernels for better gpu performance.
parent
5d72d309
Changes
2
Hide whitespace changes
Inline
Side-by-side
Crest/nuccor_kernels/Source/nuccor_dgemm.F90
View file @
7a5aba00
...
...
@@ -237,7 +237,6 @@
!$acc enter data create(b)
!$acc enter data copyin(a2)
!$acc enter data copyin(a3)
!$acc wait(0)
!$acc host_data use_device(a2, a3, b)
#ifdef __PGI
call
cublasdgemm
(
'n'
,
'n'
,
m_
,
n_
,
k_
,
alpha
,
a2
,
m_
,
&
...
...
@@ -250,11 +249,7 @@
!$acc end host_data
!$acc exit data delete(a3)
!$acc exit data delete(a2)
! vvv TODO: FIX
! !$acc exit data
!$acc exit data copyout(b)
! ^^^ TODO: FIX
!$acc wait(0)
! !$acc exit data copyout(b)
endif
!----------------------------------------
...
...
@@ -267,7 +262,6 @@
!$acc enter data create(b)
!$acc enter data copyin(a2) async(2)
!$acc enter data copyin(a3) async(3)
!$acc wait(0)
!$acc wait(2)
!$acc wait(3)
!$acc host_data use_device(a2, a3, b)
...
...
@@ -282,11 +276,7 @@
!$acc end host_data
!$acc exit data delete(a3)
!$acc exit data delete(a2)
! vvv TODO: FIX
! !$acc exit data
!$acc exit data copyout(b)
! ^^^ TODO: FIX
!$acc wait(0)
! !$acc exit data copyout(b)
endif
!========================================
...
...
@@ -312,9 +302,8 @@
n_
=
n
!$acc enter data create(c)
!$acc enter data copyin(a1)
!$acc enter data present_or_copyin(b)
!
!$acc enter data present_or_copyin(b)
!$acc host_data use_device(a1, b, c)
!$acc wait(0)
#ifdef __PGI
call
cublasdgemm
(
't'
,
'n'
,
m_
,
n_
,
k_
,
alpha
,
a1
,
k_
,
&
b
,
k_
,
beta
,
c
,
m_
)
...
...
@@ -327,7 +316,6 @@
!$acc exit data delete(b)
!$acc exit data delete(a1)
!$acc exit data copyout(c)
!$acc wait(0)
endif
!----------------------------------------
...
...
@@ -338,11 +326,10 @@
n_
=
n
!$acc enter data create(c)
!$acc enter data copyin(a1) async(2)
!$acc enter data present_or_copyin(b) async(3)
!
!$acc enter data present_or_copyin(b) async(3)
!$acc host_data use_device(a1, b, c)
!$acc wait(0)
!$acc wait(2)
!$acc wait(3)
!
!$acc wait(3)
#ifdef __PGI
call
cublasdgemm
(
't'
,
'n'
,
m_
,
n_
,
k_
,
alpha
,
a1
,
k_
,
&
b
,
k_
,
beta
,
c
,
m_
)
...
...
@@ -355,7 +342,6 @@
!$acc exit data delete(b)
!$acc exit data delete(a1)
!$acc exit data copyout(c) async(1)
!$acc wait(0)
!$acc wait(1)
endif
...
...
@@ -383,8 +369,7 @@
n_
=
n
!$acc enter data copyin(a1) async(2)
!$acc enter data present_or_copyin(b) async(3)
!$acc enter data create(c)
!$acc wait (0)
! !$acc enter data create(c)
!$acc wait (2)
!$acc wait (3)
...
...
Crest/nuccor_kernels/Source/nuccor_dgemm.c
View file @
7a5aba00
...
...
@@ -225,7 +225,7 @@ void compute_product_gpu(Matrix* c, Matrix* a1, Matrix* a2, Matrix* a3,
#pragma acc enter data create(bp_data[:m*n])
#pragma acc enter data copyin(a2_data[:m*m])
#pragma acc enter data copyin(a3_data[:m*n])
#pragma acc wait(0)
//
#pragma acc wait(0)
#pragma acc host_data use_device(a2_data, a3_data, bp_data)
{
...
...
@@ -236,11 +236,8 @@ void compute_product_gpu(Matrix* c, Matrix* a1, Matrix* a2, Matrix* a3,
#pragma acc exit data delete(a3_data[:m*n])
#pragma acc exit data delete(a2_data[:m*m])
// vvv TODO: FIX
//#pragma acc exit data
#pragma acc exit data copyout(bp_data[:m*n])
// ^^^ TODO: FIX
#pragma acc wait(0)
//#pragma acc exit data copyout(bp_data[:m*n])
//#pragma acc wait(0)
}
/*------------------------------------*/
...
...
@@ -275,8 +272,8 @@ void compute_product_gpu(Matrix* c, Matrix* a1, Matrix* a2, Matrix* a3,
#pragma acc enter data create(c_data[:n*n])
#pragma acc enter data copyin(a1_data[:m*n])
#pragma acc enter data present_or_copyin(bp_data[:m*n])
#pragma acc wait(0)
//
#pragma acc enter data present_or_copyin(bp_data[:m*n])
//
#pragma acc wait(0)
#pragma acc host_data use_device(a1_data, bp_data, c_data)
{
...
...
@@ -288,7 +285,7 @@ void compute_product_gpu(Matrix* c, Matrix* a1, Matrix* a2, Matrix* a3,
#pragma acc exit data delete(bp_data[:m*n])
#pragma acc exit data delete(a1_data[:m*n])
#pragma acc exit data copyout(c_data[:n*n])
#pragma acc wait(0)
//
#pragma acc wait(0)
}
/*------------------------------------*/
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment