Commit c0f8e598 authored by David M. Rogers's avatar David M. Rogers
Browse files

Final updates for scale test.

parent c5e37c4a
......@@ -4,14 +4,15 @@
#SBATCH -n64
#SBATCH -J breakup
#SBATCH -o %x.%A_%a.out
#SBATCH --array=1-2
#SBATCH --array=1-5
echo "Starting $SLURM_JOB_NAME-$SLURM_ARRAY_TASK_ID at" `date`
source /apps/dock_env/env.sh
export OMP_NUM_THREADS=1
DIR=/apps/launchad
cd /dev/shm
srun -n64 -N1 --exclusive \
$DIR/loadem.py ccddc-controller $SLURM_JOB_NAME
srun -n64 -N1 $DIR/loadem.py ccddc-controller $SLURM_JOB_NAME
echo "Completed $SLURM_JOB_NAME-$SLURM_ARRAY_TASK_ID at" `date`
#!/usr/bin/env python3
# Note: we need to prune the metadata on the ligands
# down to the stuff from ROOT .. TER
import re
import pandas as pd
expr = re.compile('ROOT\n.*?\nTER\n', re.DOTALL)
# essentially we have to strip all 'MODEL', 'USER' and 'ENDMDL' lines
def fix(s):
#return s
s = s.replace('\r', '')
m = expr.search(s)
if m is not None:
return m[0]
return s[s.index("ROOT"):]
def main(argv):
assert len(argv) == 2, "Usage: %s <ligs.pq>"
df = pd.read_parquet(argv[1])
for lig in df.itertuples():
fname = lig.name + '.pdbqt'
fname = lig[0] + '.pdbqt'
with open(fname, "w") as f:
f.write(lig.pdbqt)
print( "%s\n%s"%(lig.name, fname) )
f.write( fix(lig[1]) )
print( "%s\n%s"%(lig[0], fname) )
if __name__=="__main__":
import sys
......
......@@ -5,14 +5,15 @@
#SBATCH --gres gpu:1
#SBATCH -J dock
#SBATCH -o %x.%A_%a.out
#SBATCH --array=1-2
#SBATCH --array=1-1000
echo "Starting $SLURM_JOB_NAME-$SLURM_ARRAY_TASK_ID at" `date`
source /apps/dock_env/env.sh
export OMP_NUM_THREADS=2
export OMP_NUM_THREADS=1
DIR=/apps/launchad
cd /dev/shm
srun -n1 -N1 --gres=gpu:1 --cpus-per-task=2 --exclusive \
$DIR/loadem.py ccddc-controller $SLURM_JOB_NAME
echo "Completed $SLURM_JOB_NAME-$SLURM_ARRAY_TASK_ID at" `date`
......@@ -39,9 +39,9 @@ def xml_to_energy(f):
def grep_all(f, *keys):
out = dict((k,[]) for k in keys)
for line in f:
line = line.decode('utf8')
#line = line.decode('utf8')
for k in keys:
if k in line:
if len(line) >= len(k) and line[:len(k)] == k:
out[k].append(line)
return out
......@@ -64,6 +64,7 @@ def dlg_to_confs(lines):
en = None
for s in lines:
conf.append( s[8:].strip() )
#print(s[8:].strip())
# DOCKED: USER Estimated Free Energy of Binding = -7.85 kcal/mol
if "Estimated Free Energy of Binding" in s:
tok = s.replace('=','').split()
......
......@@ -9,9 +9,8 @@ base = Path(__file__).resolve().parent
rules = yaml.safe_load(open(base / 'rules.yaml'))
bucket = 'gs://ccddc'
test = False
testone = False
hopper = False
test = False
hopper = True
conn_retries = 0
......@@ -54,12 +53,20 @@ def get_item(host, db, assigned):
return item
return item.decode('utf8')
def gsutil(cmd):
args = ["gsutil", "-o", "GSUtil:parallel_process_count=1",
"-o", "GSUtil:parallel_thread_count=1",
"-o", "GSUtil:state_dir=%s"%str(base / "gsutil")
] + cmd
return subprocess.call( args )
def copyin(name, bucket = bucket, static=False):
base = PurePosixPath(name).name
static = base[-4:] == '.tgz' # hack for tgz
static = (base[-4:] == '.tgz') # hack for tgz
if static and Path(base).is_file():
print("Skipping cached input %s"%base)
return
ret = subprocess.call( ["gsutil", "cp", bucket + '/' + name, base] )
ret = gsutil( ["cp", bucket + '/' + name, base] )
if ret: return ret
if base[-4:] == '.tgz':
ret = subprocess.call("tar xzf {0} && echo >{0}".format(base), shell=True)
......@@ -68,14 +75,15 @@ def copyin(name, bucket = bucket, static=False):
def moveout(name, bucket = bucket):
if name[-1] == '/': # move whole directory
return subprocess.call( ["gsutil", "mv", name+"*", bucket + '/' + name] )
return gsutil( ["-m", "cp", name+"*", bucket + '/' + name] )
loc = PurePosixPath(name).name
return subprocess.call( ["gsutil", "mv", loc, bucket + '/' + name] )
return gsutil( ["mv", loc, bucket + '/' + name] )
def cleanup(job):
for inp in job['inp'] + job['out']:
if inp[:-4] == ".tgz": continue
subprocess.call( ["rm", "-fr", inp.format(**job)] )
loc = PurePosixPath(inp.format(**job)).name
if loc[-4:] == ".tgz": continue
subprocess.call( ["rm", "-fr", loc] )
def run_job(job, item):
for p,x in zip(job['params'], item.split()):
......@@ -103,10 +111,25 @@ def requeue(assigned, host, db):
if item is None:
break
r.smove(assigned, 'ready', item)
print("%s %s re-queued %s."%(stamp(), assigned, item))
else:
raise IndexError("More than 10 items assigned to %s!"%assigned)
run_redis(run, host, db)
# Apparently, we need to get some things sorted first.
def setup(rank):
print("%s Rank %s starting."%(stamp(),rank))
if Path(rank).exists():
subprocess.call( ["rm", "-fr", rank] )
os.mkdir(rank)
os.chdir(rank)
os.mkdir("gsutil")
import resource
s,h = resource.getrlimit(resource.RLIMIT_NPROC)
resource.setrlimit(resource.RLIMIT_NPROC, (h,h))
s,h = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE, (h,h))
# usually run as:
# loadem.py localhost $SLURM_JOB_ID $SLURM_ARRAY_TASK_ID
def main(argv):
......@@ -125,11 +148,7 @@ def main(argv):
except KeyError:
pass
print("%s Rank %s starting."%(stamp(),rank))
if Path(rank).exists():
subprocess.call( ["rm", "-fr", rank] )
os.mkdir(rank)
os.chdir(rank)
setup(rank)
job = rules[jobname]
db = int(job['db']) # assoc. redis db
......@@ -174,8 +193,6 @@ def main(argv):
consecutive_errors = 0
n += 1
if testone:
break
run_redis(lambda r: r.srem('hosts', assigned), host, db)
print("%s Rank %s completed (%d items processed, "
......
# create the 'done' file listing completed decishards
find /gpfs/alpine/world-shared/bif128/docked/ -name 'p*.tgz' | sed -n -e 's/.*\/p\([0-9]*\)\.\([0-9]*\)\.tgz/\1 \2/p' >done
......@@ -8,20 +8,37 @@ def get_rdb(host, db):
import redis
return redis.Redis(host=host, port=6379, password="Z1908840168_2_T1", db=db)
def main(argv):
assert len(argv) == 3, "Usage: %s <server name> <db>"
r = get_rdb(argv[1], int(argv[2]))
todo = r.scard('errors')
def redo_key(r, key):
k = 0
while True:
sh = r.spop('errors')
sh = r.spop(key)
if sh is None:
break
k += r.sadd('ready', sh)
return k
def main(argv):
assigned = False
if len(argv) >= 2 and argv[1] == "-a":
assigned = True
del argv[1]
assert len(argv) == 3, "Usage: %s [-a] <server name> <db>"
r = get_rdb(argv[1], int(argv[2]))
todo = r.scard('errors')
k = redo_key(r, 'errors')
print("%d/%d items requeued"%(k,todo))
if not assigned:
return 0
k = 0
for srv in r.smembers('hosts'):
k += redo_key(r, srv)
r.srem('hosts', srv)
print("%d in-progress elements requeued"%(k))
if __name__=="__main__":
import sys
main(sys.argv)
......@@ -9,15 +9,17 @@ dock:
- ligs/{n}.pq
script: |
export OMP_NUM_THREADS=2
version="dwork v1.0"
log() {{ echo $(date +"%F %H:%M:%S.%N") "($version) {r}_docked/{n} $*" }}
log started
ls *{r}*.fld >filelist
ls {r}.maps.fld >filelist
/apps/launchad/create_inp.py {n}.pq >>filelist
rm {n}.pq
log completed file list
autodock_gpu_64wi -filelist filelist -nrun 20 -autostop 1 -nev 3000000 >/dev/null
AD=`which autodock_gpu_64wi`
#AD=/apps/launchad/autodock-Aug28
$AD -filelist filelist \
-nrun 20 -autostop 1 -nev 3000000 >/dev/null
/apps/launchad/package_out.py filelist {n}.pq
rm -f *_*.pdbqt
rm -f *.xml
rm -f *.dlg
# Re-score ligand/receptor conf.
rescore:
......@@ -42,3 +44,22 @@ breakup:
mkdir -p ligs
/apps/launchad/breakup.py -n 512 $((1+{n}*512)) docked.{n}.parquet ligs/%s.pq
dock_test:
queue: dock
db: 4
params: [r, n]
out: [ "{r}_docked/{n}.pq" ]
inp:
- targets/{r}.tgz # note: untarring is automatic
- ligs/{n}.pq
script: |
export OMP_NUM_THREADS=2
ls {r}.maps.fld >filelist
/apps/launchad/create_inp.py {n}.pq >>filelist
rm {n}.pq
autodock_gpu_64wi -filelist filelist \
-nrun 20 -autostop 1 -nev 3000000 >/dev/null
/apps/launchad/package_out.py filelist {n}.pq
rm -f *_*.pdbqt
rm -f *.xml
rm -f *.dlg
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment