Commit 7eaa8bc5 authored by David M. Rogers's avatar David M. Rogers
Browse files

Actual test scripts used for job 148827.

parent 4125d09c
......@@ -8,7 +8,7 @@ testone = True
conn_retries = 0
def run_redis(fn):
def run_redis(host, fn):
global conn_retries
for i in range(120):
try:
......@@ -27,7 +27,7 @@ def run_redis(fn):
return u
def get_shard(host):
shard = run_redis(lambda r: r.spop('shards'))
shard = run_redis(host, lambda r: r.spop('shards'))
if shard is None:
return shard
return shard.decode('utf8')
......@@ -51,10 +51,11 @@ def main(argv):
if not test:
cmd = ["bash", "/ccs/proj/bif128/analysis/reduce/run_ad.sh"]
cmd.extend(shard.split())
cmd[2] = "p" + cmd[2]
ret = subprocess.call(cmd)
if ret:
ofile.write("%s ERR"%shard)
run_redis(lambda r: r.sadd('errors', shard))
run_redis(host, lambda r: r.sadd('errors', shard))
else:
ofile.write("%s OK\n"%shard)
n += 1
......@@ -68,7 +69,7 @@ def main(argv):
print("Host %04x completed (%d decishards processed)."%(rank,n))
print("Host %04x %d connection retries"%(rank,conn_retries))
ret = subprocess.call("rm -fr /mnt/bb/%s/%d"%(user,rank), shell=True)
ret = subprocess.call("rm -fr /mnt/bb/%s/%d"%(username, rank), shell=True)
if __name__=="__main__":
import sys
......
#BSUB -nnodes 60
#BSUB -nnodes 5
#BSUB -W 30
#BSUB -q debug
#BSUB -P BIF128
#BSUB -J ADv1
#BSUB -o %J.out
#BSUB -alloc_flags "NVME"
source /ccs/proj/bif128/venvs/env.sh
PROJ=/gpfs/alpine/bif128/proj-shared/redis
gpus=$(( (LSB_MAX_NUM_PROCESSORS-1)/7 ))
echo "Starting $((gpus/6)) node run at " `date`
echo "Starting $((gpus/6)) node run of v0.2 at " `date`
[ -s $PROJ/shards.rdb ]
REMAKE=$?
......@@ -29,6 +30,7 @@ done
echo "$memb initial members at " `date`
export OMP_NUM_THREADS=7
jsrun -X 0 \
-n $gpus -r6 -a1 -g1 -c7 -d cyclic -b packed:7 \
python loadem.py `hostname`
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment