Commit 6cfbbb89 authored by David M. Rogers's avatar David M. Rogers
Browse files

Added timestamps to logs and set to run 2 ds ea.

parent d239f1b3
......@@ -2,12 +2,17 @@
import os, subprocess
import redis, time, random
from datetime import datetime
test = False
testone = True
testone = False
testtwo = True
conn_retries = 0
def stamp():
return datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") + " v0.4"
def run_redis(host, fn):
global conn_retries
for i in range(120):
......@@ -52,6 +57,7 @@ def main(argv):
ofile = open('%s/logs/%s/rank%04x.log'%(out_pre,jobid,rank), "w")
time.sleep(rank*0.0001) # 10k connections per second at startup
#print("%s: Host %04x requesting first shard."%(stamp(),rank)) # worried about hidden sync on file-write
n = 0
while True:
......@@ -65,23 +71,24 @@ def main(argv):
cmd[2] = "p" + cmd[2]
ret = subprocess.call(cmd)
if ret:
ofile.write("%s ERR\n"%shard)
ofile.write("%s %s ERR\n"%(stamp(),shard))
run_redis(host, lambda r: r.sadd('errors', shard))
else:
ofile.write("%s OK\n"%shard)
ofile.write("%s %s OK\n"%(stamp(),shard))
run_redis(host, lambda r: r.srem('doing', shard))
n += 1
if n%10 == 0: # 13k of these messages.
ofile.flush()
print("Host %04x processed %d decishards."%(rank,n))
print("%s Host %04x processed %d decishards."%(stamp(),rank,n))
if testone:
break
if testtwo:
testone = True
ofile.close()
print("Host %04x completed (%d decishards processed)."%(rank,n))
print("Host %04x %d connection retries"%(rank,conn_retries))
print("%s Host %04x completed (%d decishards processed, %d conn retries)."%(stamp(),rank,n,conn_retries))
ret = subprocess.call("rm -fr /mnt/bb/%s/%d"%(username, rank), shell=True)
if __name__=="__main__":
......
import datetime as DT
import sys
sec = []
dat = {'completed docking': [],
'completed copyin': [],
'completed copyout': [],
'completed segment file list': []
}
f = open(sys.argv[1])
t0 = None
......@@ -9,11 +13,20 @@ for line in f:
t1 = DT.datetime.strptime(line[:26], "%Y-%m-%d %H:%M:%S.%f")
if t0 is not None:
dt = (t1-t0).total_seconds()
print("%.3f %s"%(dt, line))
if "completed docking" in line:
sec.append(dt)
#print("%.3f %s"%(dt, line))
for k, v in dat.items():
if k in line:
v.append(dt)
break
t0 = t1
m = sum(sec)/len(sec)
v = sum((s-m)**2 for s in sec)/len(sec)
print(min(sec), max(sec), m, v**0.5)
def stats(k, sec):
if len(sec) == 0:
return
m = sum(sec)/len(sec)
v = sum((s-m)**2 for s in sec)/len(sec)
print("%d %s: min, max, avg, stddev = "%(len(sec),k))
print(min(sec), max(sec), m, v**0.5)
for k, v in dat.items():
stats(k, v)
......@@ -9,7 +9,7 @@
export OMP_NUM_THREADS=7
set -e
version="run_ad.sh v0.3"
version="run_ad.sh v0.4"
if [ $# -ne 2 ]; then
echo "Usage: $0 shard_name shard_segment"
......@@ -37,7 +37,7 @@ log() {
echo $(date +"%F %H:%M:%S.%N") "($version) $*" >>$OUT_DIR/$OMPI_COMM_WORLD_RANK.status
}
log started $shard $seg
log started $shard_name $seg
# prep directories
WORK_DIR=/mnt/bb/$USER/$OMPI_COMM_WORLD_RANK/$shard_name
......
#BSUB -nnodes 100
#BSUB -W 60
#BSUB -nnodes 922
#BSUB -W 120
#BSUB -q batch
#BSUB -P BIF128
#BSUB -J ADv0.3
#BSUB -J ADv0.4
#BSUB -o %J.out
#BSUB -alloc_flags "NVME"
......@@ -10,7 +10,7 @@ source /ccs/proj/bif128/venvs/env.sh
PROJ=/gpfs/alpine/bif128/proj-shared/redis
gpus=$(( (LSB_MAX_NUM_PROCESSORS-1)/7 ))
echo "Starting $((gpus/6)) node run of v0.3 at " `date`
echo "Starting $((gpus/6)) node run of v0.4 at " `date`
[ -s $PROJ/shards.rdb ]
REMAKE=$?
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment