Commit e4219e45 authored by David M. Rogers's avatar David M. Rogers
Browse files

Update path to launchad.

parent 91f9dc30
TODO: - check output files of this run (ex:) TODO: periodic status updates on DB health
/gpfs/alpine/world-shared/bif128/docked/149875/g02n03/p2498.7.tgz
- run for 4 hrs on 90 nodes * v. 1.2
* Add check for repeat failures of docking.
* Current version slated to run.
* v. 1.1
* Change protein stucture to 6WQF.
* v. 1.0
* Remove hopper limits to run complete docking.
* v. 0.6 * v. 0.6
* Added logfile into tgz outputs * Added logfile into tgz outputs
......
...@@ -76,7 +76,7 @@ def main(argv): ...@@ -76,7 +76,7 @@ def main(argv):
break break
ret = False ret = False
if not test: if not test:
cmd = ["bash", "/ccs/proj/bif128/analysis/reduce/run_ad.sh"] cmd = ["bash", "/ccs/proj/bif128/analysis/launchad/run_ad.sh"]
cmd.extend(shard.split()) cmd.extend(shard.split())
cmd[2] = "p" + cmd[2] cmd[2] = "p" + cmd[2]
ret = subprocess.call(cmd) # ex. bash run_ad.sh p3321 7 ret = subprocess.call(cmd) # ex. bash run_ad.sh p3321 7
......
import datetime as DT import datetime as DT
import sys import sys
dat = {'completed docking': [], states = ['completed docking',
'completed copyin': [], 'completed copyin',
'completed copyout': [], 'completed copyout',
'completed segment file list': [] 'completed segment file list',
} 'started',
'unknown' ]
dat = {}
def add_stat(a, b, t):
try:
n = dat[(a,b)]
except KeyError:
n = []
dat[(a,b)] = n
n.append(t)
def get_state(line):
for k in states:
if k in line:
return k
return 'unknown'
def main(argv):
for fname in argv[1:]:
f = open(fname)
t0 = None
state0 = None
for line in f:
t1 = DT.datetime.strptime(line[:26], "%Y-%m-%d %H:%M:%S.%f")
state1 = get_state(line)
if t0 is not None:
dt = (t1-t0).total_seconds()
#print("%.3f %s"%(dt, line))
add_stat(state0, state1, dt)
t0 = t1
state0 = state1
f = open(sys.argv[1])
t0 = None
for line in f:
t1 = DT.datetime.strptime(line[:26], "%Y-%m-%d %H:%M:%S.%f")
if t0 is not None:
dt = (t1-t0).total_seconds()
#print("%.3f %s"%(dt, line))
for k, v in dat.items(): for k, v in dat.items():
if k in line: stats(k, v)
v.append(dt)
break
t0 = t1
def stats(k, sec): def stats(k, sec):
if len(sec) == 0: if len(sec) == 0:
return return
m = sum(sec)/len(sec) m = sum(sec)/len(sec)
v = sum((s-m)**2 for s in sec)/len(sec) v = sum((s-m)**2 for s in sec)/len(sec)
print("%d %s: min, max, avg, stddev = "%(len(sec),k)) print("%d %s -> %s: min, max, avg, stddev = "%(len(sec),k[0],k[1]))
print(min(sec), max(sec), m, v**0.5) print(min(sec), max(sec), m, v**0.5)
for k, v in dat.items(): if __name__=="__main__":
stats(k, v) main(sys.argv)
...@@ -8,14 +8,16 @@ def get_rdb(host): ...@@ -8,14 +8,16 @@ def get_rdb(host):
import redis import redis
return redis.Redis(host=host, port=6379, password="Z1908840168_2_T1", db=0) return redis.Redis(host=host, port=6379, password="Z1908840168_2_T1", db=0)
key = 'doing'
key = 'errors'
def main(argv): def main(argv):
assert len(argv) == 2, "Usage: %s <server name>" assert len(argv) == 2, "Usage: %s <server name>"
r = get_rdb(argv[1]) r = get_rdb(argv[1])
todo = r.scard('doing') todo = r.scard(key)
k = 0 k = 0
while True: while True:
sh = r.spop('doing') sh = r.spop(key)
if sh is None: if sh is None:
break break
k += r.sadd('shards', sh) k += r.sadd('shards', sh)
......
...@@ -17,7 +17,8 @@ if [ $# -ne 2 ]; then ...@@ -17,7 +17,8 @@ if [ $# -ne 2 ]; then
fi fi
shard_name=$1 shard_name=$1
seg=$2 seg=$2
sfile=`grep $shard_name.tar.gz /gpfs/alpine/bif128/world-shared/ligand_shards.txt` #sfile=`grep $shard_name.tar.gz /gpfs/alpine/bif128/world-shared/ligand_shards.txt`
sfile=`grep $shard_name.tar.gz /ccs/proj/bif128/analysis/launchad/fixed_shards.txt`
if [ ! -s "$sfile" ]; then if [ ! -s "$sfile" ]; then
echo "Missing $sfile" echo "Missing $sfile"
exit 1 exit 1
...@@ -79,7 +80,7 @@ log completed docking ...@@ -79,7 +80,7 @@ log completed docking
# copy-out function # copy-out function
#cd $WORK_DIR #cd $WORK_DIR
tar czf $shard_name.$seg.tgz ${seg}.log `awk '{printf("%s.xml\n%s.dlg\n",$0,$0);}' lignames.$seg` \ tar czf $shard_name.$seg.tgz ${seg}.log `awk '{printf("%s.xml\n%s.dlg\n",$0,$0);}' lignames.$seg` \
|| echo "Error tarring some files." || echo "Error while tarring $shard_name.$seg.tgz"
cp $shard_name.$seg.tgz $OUT_DIR cp $shard_name.$seg.tgz $OUT_DIR
#rm -fr $WORK_DIR #rm -fr $WORK_DIR
log completed copyout log completed copyout
......
#BSUB -nnodes 4600 #BSUB -nnodes 37
#BSUB -W 24:00 #BSUB -W 1:20
#BSUB -q batch #BSUB -q batch
#BSUB -P BIF128 #BSUB -P BIF128
#BSUB -J ADv1.0 #BSUB -J ADv1.3
#BSUB -o %J.out #BSUB -o %J.out
#BSUB -alloc_flags "NVME" #BSUB -alloc_flags "NVME"
...@@ -10,15 +10,17 @@ source /ccs/proj/bif128/venvs/env.sh ...@@ -10,15 +10,17 @@ source /ccs/proj/bif128/venvs/env.sh
PROJ=/gpfs/alpine/bif128/proj-shared/redis PROJ=/gpfs/alpine/bif128/proj-shared/redis
gpus=$(( (LSB_MAX_NUM_PROCESSORS-1)/7 )) gpus=$(( (LSB_MAX_NUM_PROCESSORS-1)/7 ))
echo "Starting $((gpus/6)) node run of ADv1.0 at " `date` echo "Starting $((gpus/6)) node run of ADv1.3 at " `date`
[ -s $PROJ/shards.rdb ] dbname=shards_6WQF
[ -s $PROJ/$dbname.rdb ]
REMAKE=$? REMAKE=$?
# REMAKE == 0 if file exists and has nonzero size # REMAKE == 0 if file exists and has nonzero size
redis-server shards.conf 2>&1 >$PROJ/"shard_"$LSB_JOBID.log & redis-server $dbname.conf 2>&1 >$PROJ/$dbname.$LSB_JOBID.log &
if [ $REMAKE -eq 1 ]; then if [ $REMAKE -eq 1 ]; then
# NOTE: this needs to catch redis.exceptions.BusyLoadingError
python setdb.py `hostname` /gpfs/alpine/bif128/world-shared/ligand_shards.txt python setdb.py `hostname` /gpfs/alpine/bif128/world-shared/ligand_shards.txt
fi fi
...@@ -52,4 +54,6 @@ if [ $memb -gt 0 ]; then ...@@ -52,4 +54,6 @@ if [ $memb -gt 0 ]; then
query smembers doing query smembers doing
fi fi
query shutdown
sleep 10
kill % kill %
...@@ -320,7 +320,7 @@ rdbcompression yes ...@@ -320,7 +320,7 @@ rdbcompression yes
rdbchecksum yes rdbchecksum yes
# The filename where to dump the DB # The filename where to dump the DB
dbfilename shards_6WQF.rdb dbfilename shards.rdb
# Remove RDB files used by replication in instances without persistence # Remove RDB files used by replication in instances without persistence
# enabled. By default this option is disabled, however there are environments # enabled. By default this option is disabled, however there are environments
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment