Commit 9a5a5444 authored by Yin, Junqi's avatar Yin, Junqi

add per step timing on training

parent 92ecb4e6
#!/bin/bash
#BSUB -P stf011
#BSUB -W 1:00
#BSUB -nnodes 4
#BSUB -W 0:30
#BSUB -nnodes 3
#BSUB -alloc_flags gpumps
#BSUB -J chocosgd
#BSUB -o logs/chocosgd.o%J
......@@ -15,11 +15,13 @@ source choco_env.sh
TOPOLOGY=ring
NRANK=$((NNODES*6))
WORLD=$(python -c "print(','.join(['0']*$NRANK))")
WORLD=$(python -c "print(','.join(['0,1,2,3,4,5']*$NNODES))")
NOW=$(date '+%Y%m%d%H%M%S')
cp run.sh.tplt run.sh
sed -i "s/TODO_NRANK/$NRANK/" run.sh
sed -i "s/TODO_NRANK/$NNODES/" run.sh
sed -i "s/TODO_GPURANKS/$WORLD/" run.sh
sed -i "s/TODO_TOPOLOGY/$TOPOLOGY/" run.sh
sed -i "s/TODO_TIMESTAMP/$NOW/" run.sh
jsrun -n${NRANK} -a1 -g1 -c7 -r6 --smpiargs "-gpu" --bind=proportional-packed:7 --launch_distribution=packed ./run.sh
jsrun -n${NNODES} -a1 -g6 -c42 -r1 --smpiargs "-gpu" --bind=rs --launch_distribution=packed ./run.sh
......@@ -104,6 +104,7 @@ def display_training_stat(conf, scheduler, tracker, n_bits_to_transmit):
"local_index": scheduler.local_index,
"n_bits_to_transmit": n_bits_to_transmit / 8 / (2 ** 20),
**tracker(),
**conf.timer.per_step(),
},
tags={"split": "train"},
display=True,
......
......@@ -32,6 +32,7 @@ class Timer:
"""Reset the timer"""
self.totals = {} # Total time per label
self.first_time = {} # First occurrence of a label (start time)
self.interval_time = {} # First occurrence of a label (step time)
self.last_time = {} # Last occurence of a label (end time)
self.call_counts = {} # Number of times a label occurred
......@@ -52,6 +53,7 @@ class Timer:
# Update first and last occurrence of this label
if label not in self.first_time:
self.first_time[label] = start
self.interval_time[label] = end - start
self.last_time[label] = end
# Update the totals and call counts
......@@ -108,6 +110,14 @@ class Timer:
print("-------------------------------------------", file=buffer)
return buffer.getvalue()
def per_step(self):
"""return dict of per step timings"""
values={}
if len(self.totals) > 0:
for event_label in sorted(self.totals):
values[event_label] = self.interval_time[event_label]
return values
def _cuda_sync(self):
"""Finish all asynchronous GPU computations to get correct timings"""
if self.cuda_available:
......
......@@ -65,7 +65,7 @@ python main.py \
--evaluate False \
--eval_freq 1 \
--summary_freq 100 \
--timestamp 1599681078_l2-0.0001_lr-0.1_epochs-300_batchsize-128_basebatchsize-64_num_mpi_process_6_n_sub_process-1_topology-ring_optim-parallel_choco-stepsize-0.5_comm_info-sign_ \
--timestamp TODO_TIMESTAMP \
--track_time True \
--track_detailed_time False \
--display_tracked_time True \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment