Commit 3d5f6fe1 authored by Romero, Joshua's avatar Romero, Joshua
Browse files

Adding some changes for benchy.

parent be5d1f2a
...@@ -444,7 +444,7 @@ def get_pytorch_train_loader( ...@@ -444,7 +444,7 @@ def get_pytorch_train_loader(
pin_memory=True, pin_memory=True,
collate_fn=partial(fast_collate, memory_format), collate_fn=partial(fast_collate, memory_format),
drop_last=True, drop_last=True,
persistent_workers=True, persistent_workers=False, # see https://github.com/pytorch/pytorch/issues/48370
) )
return ( return (
...@@ -498,7 +498,7 @@ def get_pytorch_val_loader( ...@@ -498,7 +498,7 @@ def get_pytorch_val_loader(
pin_memory=True, pin_memory=True,
collate_fn=partial(fast_collate, memory_format), collate_fn=partial(fast_collate, memory_format),
drop_last=False, drop_last=False,
persistent_workers=True, persistent_workers=False, # see https://github.com/pytorch/pytorch/issues/48370
) )
return PrefetchedWrapper(val_loader, 0, num_classes, one_hot), len(val_loader) return PrefetchedWrapper(val_loader, 0, num_classes, one_hot), len(val_loader)
......
...@@ -331,6 +331,7 @@ def add_parser_arguments(parser, skip_arch=False): ...@@ -331,6 +331,7 @@ def add_parser_arguments(parser, skip_arch=False):
required=False, required=False,
help="number of classes" help="number of classes"
) )
parser.add_argument("--use-benchy", action="store_true", help="enable benchy")
def prepare_for_training(args, model_args, model_arch): def prepare_for_training(args, model_args, model_arch):
...@@ -562,6 +563,13 @@ def main(args, model_args, model_arch): ...@@ -562,6 +563,13 @@ def main(args, model_args, model_arch):
model_and_loss, optimizer, lr_policy, scaler, train_loader, val_loader, logger, ema, model_ema, train_loader_len, \ model_and_loss, optimizer, lr_policy, scaler, train_loader, val_loader, logger, ema, model_ema, train_loader_len, \
batch_size_multiplier, start_epoch = prepare_for_training(args, model_args, model_arch) batch_size_multiplier, start_epoch = prepare_for_training(args, model_args, model_arch)
if args.use_benchy:
try:
from benchy.torch import BenchmarkGenericIteratorWrapper
train_loader = BenchmarkGenericIteratorWrapper(train_loader, args.batch_size)
except:
print("Requested to use benchy but could not find library. Ignoring...")
if (args.dtLdTime): if (args.dtLdTime):
dtLdTime( dtLdTime(
train_loader, train_loader,
......
global:
report_freq: 20
exit_after_tests: True
profiler_mode: 'single'
json_prefix: 'benchy_imagenet'
output_dir: '/gpfs/alpine/scratch/joshr/ven201/pytorch_tutorial/summit/logs'
IO:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
synthetic:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
full:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
#!/bin/bash
# Begin LSF directives
#BSUB -P ven201
#BSUB -J sc21
#BSUB -o logs/sc21.benchy.o%J
#BSUB -W 0:10
#BSUB -nnodes 1
#BSUB -alloc_flags "nvme smt4"
#BSUB -N
#BSUB -q debug
# End LSF directives and begin shell commands
nnodes=$(cat ${LSB_DJOB_HOSTFILE} | sort | uniq | grep -v login | grep -v batch | wc -l)
DATA_DIR=/gpfs/alpine/world-shared/stf011/junqi/choco_env/dl_code/data/ILSVRC
LOG_DIR=/gpfs/alpine/scratch/joshr/ven201/pytorch_tutorial/summit/logs
CODE_DIR=/gpfs/alpine/scratch/joshr/ven201/pytorch_tutorial/imagenet
source /gpfs/alpine/world-shared/stf011/atsaris/summit_env/monai/setup.sh
export PYTHONPATH=${HOME}/benchy:${PYTHONPATH}
export BENCHY_CONFIG_FILE=/gpfs/alpine/scratch/joshr/ven201/pytorch_tutorial/summit/benchy-conf.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a6 -c42 -g6 -r1 --bind=proportional-packed:7 --launch_distribution=packed \
bash -c "\
source export_DDP_envvars.sh && \
python -u ${CODE_DIR}/main.py \
--arch resnet50 \
-j 8 \
-p 10 \
-b 128\
--training-only \
--raport-file ${LOG_DIR}/benchmark.baseline.json \
--epochs 100 \
--no-checkpoints \
--data-backend pytorch \
--amp \
--memory-format nhwc \
--use-benchy \
${DATA_DIR}
"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment