Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Rogers, David
launchad
Commits
5f593c56
Commit
5f593c56
authored
Sep 09, 2020
by
David M. Rogers
Browse files
Updates for large array with slurm.
parent
c0f8e598
Changes
5
Hide whitespace changes
Inline
Side-by-side
docker.sh
View file @
5f593c56
#!/bin/bash
#SBATCH -p dock
#SBATCH --nodes
1
#SBATCH --nodes
2
#SBATCH --cpus-per-task 2
#SBATCH --gres gpu:1
#SBATCH -J dock
#SBATCH -o %x.%A_%a.out
#SBATCH --array=1-1000
#SBATCH --array=1-866
# TODO: add date/time to output filename
echo
"Starting
$SLURM_JOB_NAME
-
$SLURM_ARRAY_TASK_ID
at"
`
date
`
source
/apps/dock_env/env.sh
...
...
@@ -13,7 +15,7 @@ source /apps/dock_env/env.sh
export
OMP_NUM_THREADS
=
1
DIR
=
/apps/launchad
cd
/dev/shm
srun
-n
1
-N
1
--gres
=
gpu:1
--cpus-per-task
=
2
--exclusive
\
srun
-n
2
-N
2
--gres
=
gpu:1
--cpus-per-task
=
2
--exclusive
\
$DIR
/loadem.py ccddc-controller
$SLURM_JOB_NAME
echo
"Completed
$SLURM_JOB_NAME
-
$SLURM_ARRAY_TASK_ID
at"
`
date
`
loadem.py
View file @
5f593c56
...
...
@@ -110,7 +110,8 @@ def requeue(assigned, host, db):
item
=
r
.
spop
(
assigned
)
if
item
is
None
:
break
r
.
smove
(
assigned
,
'ready'
,
item
)
r
.
sadd
(
'ready'
,
item
)
#r.smove(assigned, 'ready', item)
print
(
"%s %s re-queued %s."
%
(
stamp
(),
assigned
,
item
))
else
:
raise
IndexError
(
"More than 10 items assigned to %s!"
%
assigned
)
...
...
rescore.py
View file @
5f593c56
#!/usr/bin/env python3
from
helpers
import
*
import
os
import
os
,
concurrent
,
subprocess
import
pandas
as
pd
import
numpy
as
np
import
oddt
from
oddt.scoring
import
descriptors
from
oddt.scoring.functions
import
RFScore
from
oddt.scoring.models.regressors
import
randomforest
def
fhash
(
x
):
return
(
48271
*
x
)
%
2147483647
def
ihash
(
y
):
return
(
1899818559
*
y
)
%
2147483647
threads
=
1
batch_sz
=
16
def
gsutil
(
cmd
):
args
=
[
"gsutil"
,
"-o"
,
"GSUtil:parallel_process_count=1"
,
"-o"
,
"GSUtil:parallel_thread_count=%d"
%
threads
,
"-o"
,
"GSUtil:state_dir=gsutil"
,
"-m"
]
+
cmd
return
subprocess
.
call
(
args
)
def
process_inp
(
r
,
name
):
n
=
ihash
(
int
(
name
,
16
)
)
inp
=
"10344a.pq 11ad68.pq 132686.pq 16d551.pq d420e.pq 10f0d9.pq 1269f7.pq 1618c2.pq 1791e0.pq dfe9d.pq"
.
split
()
#inp = [ "%x.pq" % fhash(n+i) for i in range(batch_sz) ]
inp2
=
[
"gs://ccddc/%s_docked/"
%
r
+
i
for
i
in
inp
]
gsutil
([
'cp'
]
+
inp2
+
[
'./'
])
#with concurrent.futures.ProcessPoolExecutor() as executor:
with
concurrent
.
futures
.
ThreadPoolExecutor
(
max_workers
=
threads
)
as
executor
:
ans
=
executor
.
map
(
rescore
,
inp
)
return
pd
.
concat
(
ans
)
receptor
=
None
def
main
(
argv
):
assert
len
(
argv
)
==
3
,
"Usage: %s <receptor.pdbqt> <ligs.pq>"
global
receptor
assert
len
(
argv
)
==
3
,
"Usage: %s <receptor id> <lig id>"
# set up descriptors
receptor
=
next
(
oddt
.
toolkit
.
readfile
(
'pdbqt'
,
argv
[
1
]))
df
=
pd
.
read_parquet
(
argv
[
2
])
os
.
remove
(
argv
[
2
])
# signal job start
receptor
=
next
(
oddt
.
toolkit
.
readfile
(
'pdbqt'
,
argv
[
1
]
+
'.pdbqt'
))
result
=
process_inp
(
argv
[
1
],
argv
[
2
])
result
.
to_parquet
(
argv
[
2
]
+
'.pq'
,
compression
=
'snappy'
,
engine
=
'pyarrow'
)
def
get_descriptors
(
receptor
,
confs
):
cutoff
=
12
ligand_atomic_nums
=
[
6
,
7
,
8
,
9
,
15
,
16
,
17
,
35
,
53
]
protein_atomic_nums
=
[
6
,
7
,
8
,
16
]
...
...
@@ -29,41 +68,69 @@ def main(argv):
'vina_hydrogen'
,
'vina_num_rotors'
]
vina
=
oddt
.
scoring
.
descriptors
.
oddt_vina_descriptor
(
receptor
,
vina_scores
=
vina_scores
)
descriptors_v1
=
cc
descriptors_v2
=
oddt
.
scoring
.
descriptors
.
close_contacts_descriptor
(
receptor
,
cutoff
=
np
.
array
([
0
,
2
,
4
,
6
,
8
,
10
,
12
]),
protein_types
=
protein_atomic_nums
,
ligand_types
=
ligand_atomic_nums
)
#
descriptors_v1 = cc
#
descriptors_v2 = oddt.scoring.descriptors.close_contacts_descriptor(
#
receptor,
#
cutoff=np.array([0, 2, 4, 6, 8, 10, 12]),
#
protein_types=protein_atomic_nums,
#
ligand_types=ligand_atomic_nums)
descriptors_v3
=
oddt
.
scoring
.
ensemble_descriptor
((
vina
,
cc
))
# calculate descriptors individually
desc_rfscore1
=
[]
desc_rfscore2
=
[]
desc_rfscore3
=
[]
for
x
in
df
[
'conf'
]:
if
pd
.
isnan
(
x
):
desc_rfscore1
.
append
(
None
)
desc_rfscore2
.
append
(
None
)
desc_rfscore3
.
append
(
None
)
continue
try
:
c
=
oddt
.
toolkit
.
readstring
(
'pdbqt'
,
x
)
desc_rfscore1
.
append
(
descriptors_v1
.
build
(
c
))
desc_rfscore2
.
append
(
descriptors_v2
.
build
(
c
))
desc_rfscore3
.
append
(
descriptors_v3
.
build
(
c
))
except
Exception
:
desc_rfscore1
.
append
(
None
)
desc_rfscore2
.
append
(
None
)
desc_rfscore3
.
append
(
None
)
result
=
pd
.
DataFrame
({
'name'
:
df
[
'name'
].
astype
(
str
),
'desc_rfscore1'
:
desc_rfscore1
,
'desc_rfscore2'
:
desc_rfscore2
,
'desc_rfscore3'
:
desc_rfscore3
,
})
result
.
set_index
(
'name'
)
result
.
to_parquet
(
argv
[
2
],
compression
=
'snappy'
,
engine
=
'pyarrow'
)
return
[
descriptors_v3
.
build
(
oddt
.
toolkit
.
readstring
(
'pdbqt'
,
x
)
).
reshape
(
-
1
)
for
x
in
confs
]
# load models
models
=
[
(
'rf3'
,
'/apps/data/RFScore_v3_pdbbind2016.pickle'
)
,
(
'dude3'
,
'/apps/data/RFScoreVS_v3_dude.pickle'
)
,
(
'dock3'
,
'/apps/data/RFScoreVS_v3_dock.pickle'
)
,
(
'vina3'
,
'/apps/data/RFScoreVS_v3_vina.pickle'
)
]
# parallel load all these pickles
#with concurrent.futures.ProcessPoolExecutor(max_workers=threads) as executor:
with
concurrent
.
futures
.
ThreadPoolExecutor
(
max_workers
=
threads
)
as
executor
:
models
=
dict
(
executor
.
map
(
lambda
m
:
(
m
[
0
],
RFScore
.
rfscore
.
load
(
m
[
1
],
version
=
3
)),
models
)
)
#models = dict(
# rf3 = RFScore.rfscore.load(
# '/apps/data/RFScore_v3_pdbbind2016.pickle', version=3)
## , vs_dude_v1 = RFScore.rfscore.load(
## 'RFScoreVS_v1_dude.pickle',version=1)
## , vs_dude_v2 = RFScore.rfscore.load(
## 'RFScoreVS_v2_dude.pickle',version=2)
# , vs_dude_v3 = RFScore.rfscore.load(
# '/apps/data/RFScoreVS_v3_dude.pickle',version=3)
## , vs_dock_v1 = RFScore.rfscore.load(
## 'RFScoreVS_v1_dock.pickle',version=1)
## , vs_dock_v2 = RFScore.rfscore.load(
## 'RFScoreVS_v2_dock.pickle',version=2)
# , vs_dock_v3 = RFScore.rfscore.load(
# '/apps/data/RFScoreVS_v3_dock.pickle',version=3)
## , vs_vina_v1 = RFScore.rfscore.load(
## 'RFScoreVS_v1_vina.pickle',version=1)
## , vs_vina_v2 = RFScore.rfscore.load(
## 'RFScoreVS_v2_vina.pickle',version=2)
# , vs_vina_v3 = RFScore.rfscore.load(
# '/apps/data/RFScoreVS_v3_vina.pickle',version=3)
#)
def
rescore
(
inp
):
print
(
"Rescoring %s"
%
inp
)
df
=
pd
.
read_parquet
(
inp
)
os
.
remove
(
inp
)
columns
=
[
'rf3'
,
'dude3'
,
'dock3'
,
'vina3'
]
dvs
=
get_descriptors
(
receptor
,
df
[
'conf'
].
values
)
data
=
df
[
'score'
]
for
c
in
columns
:
data
[
c
]
=
models
[
c
].
model
.
predict
(
dvs
)
return
data
if
__name__
==
"__main__"
:
import
sys
...
...
rescore.sh
0 → 100644
View file @
5f593c56
#!/bin/bash
#SBATCH -p rescore
#SBATCH --nodes 1
#SBATCH -n64
#SBATCH -J rescore
#SBATCH -o %x.%A_%a.out
#SBATCH --array=1-1
echo
"Starting
$SLURM_JOB_NAME
-
$SLURM_ARRAY_TASK_ID
at"
`
date
`
source
/apps/dock_env/env.sh
export
OMP_NUM_THREADS
=
1
eval
"
$(
/apps/anaconda3/bin/conda shell.bash hook
)
"
conda activate rescore
DIR
=
/apps/launchad
cd
/dev/shm
srun
-n8
--cpus-per-task
=
8
-N1
$DIR
/loadem.py ccddc-controller
$SLURM_JOB_NAME
echo
"Completed
$SLURM_JOB_NAME
-
$SLURM_ARRAY_TASK_ID
at"
`
date
`
rules.yaml
View file @
5f593c56
...
...
@@ -21,7 +21,20 @@ dock:
rm -f *.xml
rm -f *.dlg
# uses output of rescore
# 100k ligands per file
# makes number of files 10k files
combine
:
# combine 10:1 again ~
[]
# Re-score ligand/receptor conf.
# uses output of combine
# ? rescore all 3 conf?
# - remove "far" ligands
# - combine "close" ligands
# Note: this re-combines files 10:1
# creating output files that span a sequence
# 10k ligands file, 100k files
rescore
:
queue
:
rescore
db
:
1
...
...
@@ -29,9 +42,9 @@ rescore:
out
:
[
"
{r}_scored/{n}.pq"
]
inp
:
-
targets/{r}.tgz
# note: untarring is automatic
-
"
{r}_docked/{n}.pq"
#
- "{r}_docked/{n}.pq"
# ~1050 ligands,
script
:
|
/apps/launchad/rescore.py
*
{r}
*
.pdbqt {n}
.pq
/apps/launchad/rescore.py {r}.pdbqt {n}
breakup
:
queue
:
rescore
...
...
@@ -44,22 +57,3 @@ breakup:
mkdir -p ligs
/apps/launchad/breakup.py -n 512 $((1+{n}*512)) docked.{n}.parquet ligs/%s.pq
dock_test
:
queue
:
dock
db
:
4
params
:
[
r
,
n
]
out
:
[
"
{r}_docked/{n}.pq"
]
inp
:
-
targets/{r}.tgz
# note: untarring is automatic
-
ligs/{n}.pq
script
:
|
export OMP_NUM_THREADS=2
ls {r}.maps.fld >filelist
/apps/launchad/create_inp.py {n}.pq >>filelist
rm {n}.pq
autodock_gpu_64wi -filelist filelist \
-nrun 20 -autostop 1 -nev 3000000 >/dev/null
/apps/launchad/package_out.py filelist {n}.pq
rm -f *_*.pdbqt
rm -f *.xml
rm -f *.dlg
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment