Commit 0842e46a authored by David M. Rogers's avatar David M. Rogers
Browse files

Added alternate run modes.

parent 81b842ab
......@@ -21,11 +21,13 @@ def main(argv):
assert len(argv) == 2, "Usage: %s <ligs.pq>"
df = pd.read_parquet(argv[1])
for lig in df.itertuples():
fname = lig[0] + '.pdbqt'
for i in range(len(df)):
name = df.iloc[i]['name']
conf = df.iloc[i]['conf']
fname = name + '.pdbqt'
with open(fname, "w") as f:
f.write( fix(lig[1]) )
print( "%s\n%s"%(lig[0], fname) )
f.write( fix(conf) )
print( "%s\n%s"%(name, fname) )
if __name__=="__main__":
import sys
......
import pandas as pd
import sys
names = []
confs = []
for name in sys.argv[1:]:
u = name.split('.', 2)[0]
names.append(u)
with open(name) as f:
confs.append(f.read())
z = pd.DataFrame(data={ 'name': names
, 'conf': confs
}
)
z.to_parquet('control.pq',
compression='snappy',
engine='pyarrow'
)
#!/usr/bin/env python3
from helpers import *
import os, concurrent, subprocess
import pandas as pd
import numpy as np
from q2 import Event, Worker, WorkQueue, time
import oddt
def fhash(x):
return (48271*x)%2147483647
def ihash(y):
return (1899818559*y)%2147483647
threads = 33
batch_sz = 648
def gsutil(cmd):
args = ["gsutil", "-o", "GSUtil:parallel_process_count=1"
, "-o", "GSUtil:parallel_thread_count=%d"%threads
, "-o", "GSUtil:state_dir=gsutil"
, "-m"
] + cmd
return subprocess.call( args )
def process_inp(r, name):
#n = ihash( int(name, 16) )
#inp = [ (n+i, "%x.pq" % fhash(n+i)) for i in range(batch_sz) ]
n = [l.split()[1] for l in open(name).read().split('\n') if len(l.split()) == 2]
inp = [(-1,"%s.pq"%x) for x in n]
inp2 = [ "gs://ccddc/%s_docked/%s"%(r, i[1]) for i in inp ]
gsutil(['cp'] + inp2 + ['./'])
end = Event()
start = WorkQueue(end, 1)
out1 = WorkQueue(end)
out2 = WorkQueue(end)
done = WorkQueue(end)
n_loaders = threads - 2
loaders = []
for i in range(n_loaders):
loaders.append( LoadMol(start, out1) )
loaders[-1].r = r
loaders[-1].start()
rf3 = Scorer(out1, out2)
rf3.name = "rf3"
rf3.model = "/apps/data/RFScore_v3_pdbbind2016.pickle"
rf3.version = 3
rf3.start()
dude2 = Scorer(out2, done)
dude2.name = "vs_dude_v2"
dude2.model = "/apps/data/RFScoreVS_v2_dude.pickle"
dude2.version = 2
dude2.start()
for i in inp:
start.put(i)
start.fin()
ans = [ df for df in done ]
if len(ans) > 0:
ans = pd.concat(ans)
else:
ans = pd.DataFrame()
ans.to_parquet(name+'.pq', compression='snappy', engine='pyarrow')
end.set()
return stop_procs(loaders + [rf3, dude2])
def main(argv):
global threads
global batch_sz
if len(argv) >= 3 and argv[1] == "-n":
batch_sz = int(argv[2])
threads = batch_sz+2
del argv[1:3]
assert len(argv) == 3, "Usage: %s <receptor id> <list file>"
status = process_inp(argv[1], argv[2])
print(status)
class LoadMol(Worker):
""" load a molecule and calculate its descriptors """
def setup(self):
t0 = time.time()
from oddt.scoring import descriptors
# set up descriptors
receptor = next(oddt.toolkit.readfile('pdbqt',
self.r+'.pdbqt'))
cutoff = 12
ligand_atomic_nums = [6, 7, 8, 9, 15, 16, 17, 35, 53]
protein_atomic_nums = [6, 7, 8, 16]
self.v2 = descriptors.close_contacts_descriptor(
receptor,
cutoff=np.array([0, 2, 4, 6, 8, 10, 12]),
protein_types=protein_atomic_nums,
ligand_types=ligand_atomic_nums)
cc = descriptors.close_contacts_descriptor(
receptor,
cutoff=cutoff,
protein_types=protein_atomic_nums,
ligand_types=ligand_atomic_nums)
#v1 = cc
vina_scores = ['vina_gauss1',
'vina_gauss2',
'vina_repulsion',
'vina_hydrophobic',
'vina_hydrogen',
'vina_num_rotors']
vina = descriptors.oddt_vina_descriptor(receptor,
vina_scores=vina_scores)
self.v3 = oddt.scoring.ensemble_descriptor((vina, cc))
dt = time.time() - t0
print("LoadMol setup done in %f seconds"%dt)
def fn(self, i):
n, inp = i
try:
df = pd.read_parquet(inp)
os.remove(inp)
except FileNotFoundError:
print("Error: Input file %s is missing!"%inp)
return pd.DataFrame()
df['batch'] = n
v2 = self.v2
v3 = self.v3
for x in ['', '2', '3']:
confs = df['conf'+x]
mols = []
for c in confs:
try:
m = oddt.toolkit.readstring('pdbqt', c)
except Exception:
m = None
mols.append(m)
#mols = [ oddt.toolkit.readstring('pdbqt', c) for c in confs ]
#df['vs_dude_v2'+x] = list(v2.build( mols ))
#df['rf3'+x] = list(v3.build( mols ))
df['vs_dude_v2'+x] = [ v2.build( m ).reshape(-1) if m is not None else None for m in mols ]
df['rf3'+x] = [ v3.build( m ).reshape(-1) if m is not None else None for m in mols ]
#if 'Z1509820766_1_T1' in df['name']:
# print( mols[:10] )
# print( df.head() )
return df.drop(columns=['conf', 'conf2', 'conf3'])
class Scorer(Worker):
def setup(self):
t0 = time.time()
from oddt.scoring.functions import RFScore
rfs = RFScore.rfscore.load(self.model, version=self.version)
self.score = rfs.model.predict
dt = time.time() - t0
print("Completed setup of %s in %.3f seconds"%(self.name,dt))
def fn(self, df):
if len(df) == 0:
return df
for x in ['', '2', '3']:
c = self.name + x
#df[c] = self.score( list(df[c].values) )
v = list( df[c].dropna() )
if len(v) == 0:
print("WARNING: Detected empty ligand file!")
df.loc[df[c].notna(), c] = []
else:
df.loc[df[c].notna(), c] = self.score(v)
return df
def stop_procs(procs):
end_time = time.time() + 200 # seconds (be sure they're done)
num_terminated = 0
num_failed = 0
for proc in procs:
join_secs = max(0.01, end_time - time.time())
proc.join(join_secs)
# terminate any procs that still have not exited.
for proc in procs:
if proc.is_alive():
proc.terminate()
num_terminated += 1
else:
exitcode = proc.exitcode
if exitcode:
num_failed += 1
return "%d tasks complete: %d failed, %d terminated"%(len(procs),num_failed, num_terminated)
if __name__=="__main__":
import sys
main(sys.argv)
#!/bin/bash
#SBATCH -p dock
#SBATCH --nodes 1
#SBATCH --cpus-per-task 2
#SBATCH --gres gpu:1
#SBATCH -J dock
#SBATCH -o %x.%A_%a.%j.out
#SBATCH --array=1-2
echo "Starting $SLURM_JOB_NAME-$SLURM_ARRAY_TASK_ID at" `date`
source /apps/dock_env/env.sh
export OMP_NUM_THREADS=1
DIR=/apps/launchad
cd /dev/shm
srun -n1 -N1 --gres=gpu:1 --cpus-per-task=2 --exclusive \
$DIR/loadem.py ccddc-controller $SLURM_JOB_NAME
echo "Completed $SLURM_JOB_NAME-$SLURM_ARRAY_TASK_ID at" `date`
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment