Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Rogers, David
launchad
Commits
0842e46a
Commit
0842e46a
authored
Nov 07, 2020
by
David M. Rogers
Browse files
Added alternate run modes.
parent
81b842ab
Changes
4
Hide whitespace changes
Inline
Side-by-side
create_inp.py
View file @
0842e46a
...
...
@@ -21,11 +21,13 @@ def main(argv):
assert
len
(
argv
)
==
2
,
"Usage: %s <ligs.pq>"
df
=
pd
.
read_parquet
(
argv
[
1
])
for
lig
in
df
.
itertuples
():
fname
=
lig
[
0
]
+
'.pdbqt'
for
i
in
range
(
len
(
df
)):
name
=
df
.
iloc
[
i
][
'name'
]
conf
=
df
.
iloc
[
i
][
'conf'
]
fname
=
name
+
'.pdbqt'
with
open
(
fname
,
"w"
)
as
f
:
f
.
write
(
fix
(
lig
[
1
]
)
)
print
(
"%s
\n
%s"
%
(
lig
[
0
]
,
fname
)
)
f
.
write
(
fix
(
conf
)
)
print
(
"%s
\n
%s"
%
(
name
,
fname
)
)
if
__name__
==
"__main__"
:
import
sys
...
...
pack.py
0 → 100644
View file @
0842e46a
import
pandas
as
pd
import
sys
names
=
[]
confs
=
[]
for
name
in
sys
.
argv
[
1
:]:
u
=
name
.
split
(
'.'
,
2
)[
0
]
names
.
append
(
u
)
with
open
(
name
)
as
f
:
confs
.
append
(
f
.
read
())
z
=
pd
.
DataFrame
(
data
=
{
'name'
:
names
,
'conf'
:
confs
}
)
z
.
to_parquet
(
'control.pq'
,
compression
=
'snappy'
,
engine
=
'pyarrow'
)
rescore3.py
0 → 100755
View file @
0842e46a
#!/usr/bin/env python3
from
helpers
import
*
import
os
,
concurrent
,
subprocess
import
pandas
as
pd
import
numpy
as
np
from
q2
import
Event
,
Worker
,
WorkQueue
,
time
import
oddt
def
fhash
(
x
):
return
(
48271
*
x
)
%
2147483647
def
ihash
(
y
):
return
(
1899818559
*
y
)
%
2147483647
threads
=
33
batch_sz
=
648
def
gsutil
(
cmd
):
args
=
[
"gsutil"
,
"-o"
,
"GSUtil:parallel_process_count=1"
,
"-o"
,
"GSUtil:parallel_thread_count=%d"
%
threads
,
"-o"
,
"GSUtil:state_dir=gsutil"
,
"-m"
]
+
cmd
return
subprocess
.
call
(
args
)
def
process_inp
(
r
,
name
):
#n = ihash( int(name, 16) )
#inp = [ (n+i, "%x.pq" % fhash(n+i)) for i in range(batch_sz) ]
n
=
[
l
.
split
()[
1
]
for
l
in
open
(
name
).
read
().
split
(
'
\n
'
)
if
len
(
l
.
split
())
==
2
]
inp
=
[(
-
1
,
"%s.pq"
%
x
)
for
x
in
n
]
inp2
=
[
"gs://ccddc/%s_docked/%s"
%
(
r
,
i
[
1
])
for
i
in
inp
]
gsutil
([
'cp'
]
+
inp2
+
[
'./'
])
end
=
Event
()
start
=
WorkQueue
(
end
,
1
)
out1
=
WorkQueue
(
end
)
out2
=
WorkQueue
(
end
)
done
=
WorkQueue
(
end
)
n_loaders
=
threads
-
2
loaders
=
[]
for
i
in
range
(
n_loaders
):
loaders
.
append
(
LoadMol
(
start
,
out1
)
)
loaders
[
-
1
].
r
=
r
loaders
[
-
1
].
start
()
rf3
=
Scorer
(
out1
,
out2
)
rf3
.
name
=
"rf3"
rf3
.
model
=
"/apps/data/RFScore_v3_pdbbind2016.pickle"
rf3
.
version
=
3
rf3
.
start
()
dude2
=
Scorer
(
out2
,
done
)
dude2
.
name
=
"vs_dude_v2"
dude2
.
model
=
"/apps/data/RFScoreVS_v2_dude.pickle"
dude2
.
version
=
2
dude2
.
start
()
for
i
in
inp
:
start
.
put
(
i
)
start
.
fin
()
ans
=
[
df
for
df
in
done
]
if
len
(
ans
)
>
0
:
ans
=
pd
.
concat
(
ans
)
else
:
ans
=
pd
.
DataFrame
()
ans
.
to_parquet
(
name
+
'.pq'
,
compression
=
'snappy'
,
engine
=
'pyarrow'
)
end
.
set
()
return
stop_procs
(
loaders
+
[
rf3
,
dude2
])
def
main
(
argv
):
global
threads
global
batch_sz
if
len
(
argv
)
>=
3
and
argv
[
1
]
==
"-n"
:
batch_sz
=
int
(
argv
[
2
])
threads
=
batch_sz
+
2
del
argv
[
1
:
3
]
assert
len
(
argv
)
==
3
,
"Usage: %s <receptor id> <list file>"
status
=
process_inp
(
argv
[
1
],
argv
[
2
])
print
(
status
)
class
LoadMol
(
Worker
):
""" load a molecule and calculate its descriptors """
def
setup
(
self
):
t0
=
time
.
time
()
from
oddt.scoring
import
descriptors
# set up descriptors
receptor
=
next
(
oddt
.
toolkit
.
readfile
(
'pdbqt'
,
self
.
r
+
'.pdbqt'
))
cutoff
=
12
ligand_atomic_nums
=
[
6
,
7
,
8
,
9
,
15
,
16
,
17
,
35
,
53
]
protein_atomic_nums
=
[
6
,
7
,
8
,
16
]
self
.
v2
=
descriptors
.
close_contacts_descriptor
(
receptor
,
cutoff
=
np
.
array
([
0
,
2
,
4
,
6
,
8
,
10
,
12
]),
protein_types
=
protein_atomic_nums
,
ligand_types
=
ligand_atomic_nums
)
cc
=
descriptors
.
close_contacts_descriptor
(
receptor
,
cutoff
=
cutoff
,
protein_types
=
protein_atomic_nums
,
ligand_types
=
ligand_atomic_nums
)
#v1 = cc
vina_scores
=
[
'vina_gauss1'
,
'vina_gauss2'
,
'vina_repulsion'
,
'vina_hydrophobic'
,
'vina_hydrogen'
,
'vina_num_rotors'
]
vina
=
descriptors
.
oddt_vina_descriptor
(
receptor
,
vina_scores
=
vina_scores
)
self
.
v3
=
oddt
.
scoring
.
ensemble_descriptor
((
vina
,
cc
))
dt
=
time
.
time
()
-
t0
print
(
"LoadMol setup done in %f seconds"
%
dt
)
def
fn
(
self
,
i
):
n
,
inp
=
i
try
:
df
=
pd
.
read_parquet
(
inp
)
os
.
remove
(
inp
)
except
FileNotFoundError
:
print
(
"Error: Input file %s is missing!"
%
inp
)
return
pd
.
DataFrame
()
df
[
'batch'
]
=
n
v2
=
self
.
v2
v3
=
self
.
v3
for
x
in
[
''
,
'2'
,
'3'
]:
confs
=
df
[
'conf'
+
x
]
mols
=
[]
for
c
in
confs
:
try
:
m
=
oddt
.
toolkit
.
readstring
(
'pdbqt'
,
c
)
except
Exception
:
m
=
None
mols
.
append
(
m
)
#mols = [ oddt.toolkit.readstring('pdbqt', c) for c in confs ]
#df['vs_dude_v2'+x] = list(v2.build( mols ))
#df['rf3'+x] = list(v3.build( mols ))
df
[
'vs_dude_v2'
+
x
]
=
[
v2
.
build
(
m
).
reshape
(
-
1
)
if
m
is
not
None
else
None
for
m
in
mols
]
df
[
'rf3'
+
x
]
=
[
v3
.
build
(
m
).
reshape
(
-
1
)
if
m
is
not
None
else
None
for
m
in
mols
]
#if 'Z1509820766_1_T1' in df['name']:
# print( mols[:10] )
# print( df.head() )
return
df
.
drop
(
columns
=
[
'conf'
,
'conf2'
,
'conf3'
])
class
Scorer
(
Worker
):
def
setup
(
self
):
t0
=
time
.
time
()
from
oddt.scoring.functions
import
RFScore
rfs
=
RFScore
.
rfscore
.
load
(
self
.
model
,
version
=
self
.
version
)
self
.
score
=
rfs
.
model
.
predict
dt
=
time
.
time
()
-
t0
print
(
"Completed setup of %s in %.3f seconds"
%
(
self
.
name
,
dt
))
def
fn
(
self
,
df
):
if
len
(
df
)
==
0
:
return
df
for
x
in
[
''
,
'2'
,
'3'
]:
c
=
self
.
name
+
x
#df[c] = self.score( list(df[c].values) )
v
=
list
(
df
[
c
].
dropna
()
)
if
len
(
v
)
==
0
:
print
(
"WARNING: Detected empty ligand file!"
)
df
.
loc
[
df
[
c
].
notna
(),
c
]
=
[]
else
:
df
.
loc
[
df
[
c
].
notna
(),
c
]
=
self
.
score
(
v
)
return
df
def
stop_procs
(
procs
):
end_time
=
time
.
time
()
+
200
# seconds (be sure they're done)
num_terminated
=
0
num_failed
=
0
for
proc
in
procs
:
join_secs
=
max
(
0.01
,
end_time
-
time
.
time
())
proc
.
join
(
join_secs
)
# terminate any procs that still have not exited.
for
proc
in
procs
:
if
proc
.
is_alive
():
proc
.
terminate
()
num_terminated
+=
1
else
:
exitcode
=
proc
.
exitcode
if
exitcode
:
num_failed
+=
1
return
"%d tasks complete: %d failed, %d terminated"
%
(
len
(
procs
),
num_failed
,
num_terminated
)
if
__name__
==
"__main__"
:
import
sys
main
(
sys
.
argv
)
small_docker.sh
0 → 100644
View file @
0842e46a
#!/bin/bash
#SBATCH -p dock
#SBATCH --nodes 1
#SBATCH --cpus-per-task 2
#SBATCH --gres gpu:1
#SBATCH -J dock
#SBATCH -o %x.%A_%a.%j.out
#SBATCH --array=1-2
echo
"Starting
$SLURM_JOB_NAME
-
$SLURM_ARRAY_TASK_ID
at"
`
date
`
source
/apps/dock_env/env.sh
export
OMP_NUM_THREADS
=
1
DIR
=
/apps/launchad
cd
/dev/shm
srun
-n1
-N1
--gres
=
gpu:1
--cpus-per-task
=
2
--exclusive
\
$DIR
/loadem.py ccddc-controller
$SLURM_JOB_NAME
echo
"Completed
$SLURM_JOB_NAME
-
$SLURM_ARRAY_TASK_ID
at"
`
date
`
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment