Commit c5e37c4a authored by David M. Rogers's avatar David M. Rogers
Browse files

Bugfixes.

parent 416b9e6c
......@@ -4,14 +4,14 @@
#SBATCH -n64
#SBATCH -J breakup
#SBATCH -o %x.%A_%a.out
#SBATCH --array=1
#SBATCH --array=1-2
source /apps/dock_env/bin/activate
source /apps/dock_env/env.sh
export OMP_NUM_THREADS=1
DIR=/apps/launchad
cd /dev/shm
srun -n64 -N1 --exclusive \
$DIR/loadem.py ccddc-controller $SLURM_JOB_ID
$DIR/loadem.py ccddc-controller $SLURM_JOB_NAME
......@@ -7,12 +7,12 @@
#SBATCH -o %x.%A_%a.out
#SBATCH --array=1-2
source /apps/dock_env/bin/activate
source /apps/dock_env/env.sh
export OMP_NUM_THREADS=2
DIR=/apps/launchad
cd /dev/shm
srun -n1 -N1 --gres=gpu:1 --cpus-per-task=2 --exclusive \
$DIR/loadem.py ccddc-controller $SLURM_JOB_ID
$DIR/loadem.py ccddc-controller $SLURM_JOB_NAME
......@@ -62,7 +62,8 @@ def copyin(name, bucket = bucket, static=False):
ret = subprocess.call( ["gsutil", "cp", bucket + '/' + name, base] )
if ret: return ret
if base[-4:] == '.tgz':
ret = subprocess.call( ["tar", "xzf", base] )
ret = subprocess.call("tar xzf {0} && echo >{0}".format(base), shell=True)
if ret: return ret
return ret
def moveout(name, bucket = bucket):
......@@ -71,6 +72,11 @@ def moveout(name, bucket = bucket):
loc = PurePosixPath(name).name
return subprocess.call( ["gsutil", "mv", loc, bucket + '/' + name] )
def cleanup(job):
for inp in job['inp'] + job['out']:
if inp[:-4] == ".tgz": continue
subprocess.call( ["rm", "-fr", inp.format(**job)] )
def run_job(job, item):
for p,x in zip(job['params'], item.split()):
job[p] = x
......@@ -115,7 +121,7 @@ def main(argv):
except KeyError:
rank = "x"
try:
rank = rank + "-" + os.environ['OMPI_COMM_WORLD_RANK']
rank = rank + "-" + os.environ['SLURM_PROCID']
except KeyError:
pass
......@@ -125,7 +131,7 @@ def main(argv):
os.mkdir(rank)
os.chdir(rank)
job = rules[job]
job = rules[jobname]
db = int(job['db']) # assoc. redis db
#username = os.environ['USER']
......@@ -146,6 +152,7 @@ def main(argv):
ret = False
if not test:
ret = run_job(job, item)
cleanup(job)
if ret:
def add_err(r):
......@@ -162,6 +169,7 @@ def main(argv):
if consecutive_errors >= 2:
time.sleep(60)
else:
print("%s Rank %s completed %s %s"%(stamp(),rank,jobname,item))
run_redis(lambda r: r.srem(assigned, item), host, db)
consecutive_errors = 0
......@@ -169,6 +177,7 @@ def main(argv):
if testone:
break
run_redis(lambda r: r.srem('hosts', assigned), host, db)
print("%s Rank %s completed (%d items processed, "
"%d errors, %d conn retries)."%(stamp(),rank,n,errors,conn_retries))
......
#!/usr/bin/env python3
# This script will re-queue all 'doing' entries back into 'shards'
# This script will re-queue all 'errors' entries back into 'ready'
# It is meant to be run by hand to recover from a failed run.
import re
def get_rdb(host):
def get_rdb(host, db):
import redis
return redis.Redis(host=host, port=6379, password="Z1908840168_2_T1", db=0)
return redis.Redis(host=host, port=6379, password="Z1908840168_2_T1", db=db)
def main(argv):
assert len(argv) == 2, "Usage: %s <server name>"
r = get_rdb(argv[1])
assert len(argv) == 3, "Usage: %s <server name> <db>"
r = get_rdb(argv[1], int(argv[2]))
todo = r.scard('doing')
todo = r.scard('errors')
k = 0
while True:
sh = r.spop('doing')
sh = r.spop('errors')
if sh is None:
break
k += r.sadd('shards', sh)
k += r.sadd('ready', sh)
print("%d/%d shards requeued"%(k,todo))
print("%d/%d items requeued"%(k,todo))
if __name__=="__main__":
import sys
......
......@@ -7,7 +7,7 @@ dock:
inp:
- targets/{r}.tgz # note: untarring is automatic
- ligs/{n}.pq
script:
script: |
export OMP_NUM_THREADS=2
version="dwork v1.0"
log() {{ echo $(date +"%F %H:%M:%S.%N") "($version) {r}_docked/{n} $*" }}
......@@ -28,7 +28,7 @@ rescore:
inp:
- targets/{r}.tgz # note: untarring is automatic
- "{r}_docked/{n}.pq"
script:
script: |
/apps/launchad/rescore.py *{r}*.pdbqt {n}.pq
breakup:
......@@ -38,7 +38,7 @@ breakup:
out: [ ligs/ ]
inp:
- 6WQF_docked/docked.{n}.parquet
script:
mkdir ligs
script: |
mkdir -p ligs
/apps/launchad/breakup.py -n 512 $((1+{n}*512)) docked.{n}.parquet ligs/%s.pq
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment