Commit d440969a authored by Aristeidis Tsaris's avatar Aristeidis Tsaris
Browse files

clean things up

parent efe645d1
This source diff could not be displayed because it is too large. You can view the blob instead.
global:
report_freq: 20
exit_after_tests: True
profiler_mode: 'single'
json_prefix: 'benchy_imagenet_0_noddp'
output_dir: 'ascent_logs'
IO:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
synthetic:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
full:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
global:
report_freq: 20
exit_after_tests: True
profiler_mode: 'single'
json_prefix: 'benchy_imagenet_0'
output_dir: 'ascent_logs'
IO:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
synthetic:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
full:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
global:
report_freq: 20
exit_after_tests: True
profiler_mode: 'single'
json_prefix: 'benchy_imagenet_1_noddp'
output_dir: 'ascent_logs'
IO:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
synthetic:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
full:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
global:
report_freq: 20
exit_after_tests: True
profiler_mode: 'single'
json_prefix: 'benchy_imagenet_1'
output_dir: 'ascent_logs'
IO:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
synthetic:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
full:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
global:
report_freq: 20
exit_after_tests: True
profiler_mode: 'single'
json_prefix: 'benchy_imagenet_2_noddp'
output_dir: 'ascent_logs'
IO:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
synthetic:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
full:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
global:
report_freq: 20
exit_after_tests: True
profiler_mode: 'single'
json_prefix: 'benchy_imagenet_2'
output_dir: 'ascent_logs'
IO:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
synthetic:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
full:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
global:
report_freq: 20
exit_after_tests: True
profiler_mode: 'single'
json_prefix: 'benchy_imagenet_3_noddp'
output_dir: 'ascent_logs'
IO:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
synthetic:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
full:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
global:
report_freq: 20
exit_after_tests: True
profiler_mode: 'single'
json_prefix: 'benchy_imagenet_3'
output_dir: 'ascent_logs'
IO:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
synthetic:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
full:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
global:
report_freq: 20
exit_after_tests: True
profiler_mode: 'single'
json_prefix: 'benchy_imagenet_4_noddp'
output_dir: 'ascent_logs'
IO:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
synthetic:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
full:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
global:
report_freq: 20
exit_after_tests: True
profiler_mode: 'single'
json_prefix: 'benchy_imagenet_4'
output_dir: 'ascent_logs'
IO:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
synthetic:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
full:
run_benchmark: True
nbatches: 40
ntrials: 2
nwarmup: 1
#!/bin/bash
# Begin LSF directives
#BSUB -P gen166
#BSUB -J sc21
#BSUB -o logs/sc21.o%J
#BSUB -W 0:30
#BSUB -nnodes 1
#BSUB -alloc_flags "nvme smt4"
####BSUB -N
# End LSF directives and begin shell commands
nnodes=$(cat ${LSB_DJOB_HOSTFILE} | sort | uniq | grep -v login | grep -v batch | wc -l)
DATA_DIR=/gpfs/wolf/gen166/proj-shared/atsaris/imagenet/data/
LOG_DIR=ascent_logs/
source /gpfs/wolf/gen166/proj-shared/atsaris/env/activate.sh
echo "Starting bency"
export BENCHY_CONFIG_FILE=benchy_configs/benchy-conf-0.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a1 -c42 -g1 -r1 \
--bind=proportional-packed:7 --launch_distribution=packed \
bash -c "\
source export_DDP_envvars.sh && \
python -u ../imagenet/main.py \
--arch resnet50 \
-j 8 \
-p 10 \
-b 128\
--training-only \
--raport-file ${LOG_DIR}/benchmark.dataload.opt.0N.json \
--epochs 100 \
--no-checkpoints \
--data-backend pytorch \
--amp \
--memory-format nhwc \
--use-benchy \
${DATA_DIR}
"
echo "Starting bency no DDP"
export BENCHY_CONFIG_FILE=benchy_configs/benchy-conf-0-noddp.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a1 -c42 -g1 -r1 \
--bind=proportional-packed:7 --launch_distribution=packed \
bash -c "\
source export_DDP_envvars.sh && \
python -u ../imagenet/main.py \
--arch resnet50 \
-j 8 \
-p 10 \
-b 128\
--training-only \
--raport-file ${LOG_DIR}/benchmark.dataload.opt.0N.json \
--epochs 100 \
--no-checkpoints \
--data-backend pytorch \
--amp \
--memory-format nhwc \
--use-benchy \
--noDDP \
${DATA_DIR}
"
exit
echo "Starting bency dali"
export BENCHY_CONFIG_FILE=benchy_configs/benchy-conf-0-dali.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a1 -c42 -g1 -r1 \
--bind=proportional-packed:7 --launch_distribution=packed \
bash -c "\
source export_DDP_envvars.sh && \
python -u ../imagenet/main.py \
--arch resnet50 \
-j 8 \
-p 10 \
-b 128\
--training-only \
--raport-file ${LOG_DIR}/benchmark.dataload.opt.0N.json \
--epochs 100 \
--no-checkpoints \
--data-backend dali-cpu \
--amp \
--memory-format nhwc \
--use-benchy \
${DATA_DIR}
"
#!/bin/bash
# Begin LSF directives
#BSUB -P gen166
#BSUB -J sc21
#BSUB -o logs/sc21.o%J
#BSUB -W 0:30
#BSUB -nnodes 1
#BSUB -alloc_flags "nvme smt4"
####BSUB -N
# End LSF directives and begin shell commands
nnodes=$(cat ${LSB_DJOB_HOSTFILE} | sort | uniq | grep -v login | grep -v batch | wc -l)
DATA_DIR=/gpfs/wolf/gen166/proj-shared/atsaris/imagenet/data/
LOG_DIR=ascent_logs/
source /gpfs/wolf/gen166/proj-shared/atsaris/env/activate.sh
echo "Starting bency"
export BENCHY_CONFIG_FILE=benchy_configs/benchy-conf-1.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
--bind=proportional-packed:7 --launch_distribution=packed \
bash -c "\
source export_DDP_envvars.sh && \
python -u ../imagenet/main.py \
--arch resnet50 \
-j 8 \
-p 10 \
-b 128\
--training-only \
--raport-file ${LOG_DIR}/benchmark.dataload.opt.1N.json \
--epochs 100 \
--no-checkpoints \
--data-backend pytorch \
--amp \
--memory-format nhwc \
--use-benchy \
${DATA_DIR}
"
echo "Starting bency no DDP"
export BENCHY_CONFIG_FILE=benchy_configs/benchy-conf-1-noddp.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
--bind=proportional-packed:7 --launch_distribution=packed \
bash -c "\
source export_DDP_envvars.sh && \
python -u ../imagenet/main.py \
--arch resnet50 \
-j 8 \
-p 10 \
-b 128\
--training-only \
--raport-file ${LOG_DIR}/benchmark.dataload.opt.1N.json \
--epochs 100 \
--no-checkpoints \
--data-backend pytorch \
--amp \
--memory-format nhwc \
--use-benchy \
--noDDP \
${DATA_DIR}
"
exit
echo "Starting bency dali"
export BENCHY_CONFIG_FILE=benchy_configs/benchy-conf-1-dali.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
--bind=proportional-packed:7 --launch_distribution=packed \
bash -c "\
source export_DDP_envvars.sh && \
python -u ../imagenet/main.py \
--arch resnet50 \
-j 8 \
-p 10 \
-b 128\
--training-only \
--raport-file ${LOG_DIR}/benchmark.dataload.opt.1N.json \
--epochs 100 \
--no-checkpoints \
--data-backend dali-cpu \
--amp \
--memory-format nhwc \
--use-benchy \
${DATA_DIR}
"
#!/bin/bash
# Begin LSF directives
#BSUB -P gen166
#BSUB -J sc21
#BSUB -o logs/sc21.o%J
#BSUB -W 0:30
#BSUB -nnodes 2
#BSUB -alloc_flags "nvme smt4"
####BSUB -N
# End LSF directives and begin shell commands
nnodes=$(cat ${LSB_DJOB_HOSTFILE} | sort | uniq | grep -v login | grep -v batch | wc -l)
DATA_DIR=/gpfs/wolf/gen166/proj-shared/atsaris/imagenet/data/
LOG_DIR=ascent_logs/
source /gpfs/wolf/gen166/proj-shared/atsaris/env/activate.sh
echo "Starting bency"
export BENCHY_CONFIG_FILE=benchy_configs/benchy-conf-2.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
--bind=proportional-packed:7 --launch_distribution=packed \
bash -c "\
source export_DDP_envvars.sh && \
python -u ../imagenet/main.py \
--arch resnet50 \
-j 8 \
-p 10 \
-b 128\
--training-only \
--raport-file ${LOG_DIR}/benchmark.dataload.opt.2N.json \
--epochs 100 \
--no-checkpoints \
--data-backend pytorch \
--amp \
--memory-format nhwc \
--use-benchy \
${DATA_DIR}
"
echo "Starting bency no DDP"
export BENCHY_CONFIG_FILE=benchy_configs/benchy-conf-2-noddp.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
--bind=proportional-packed:7 --launch_distribution=packed \
bash -c "\
source export_DDP_envvars.sh && \
python -u ../imagenet/main.py \
--arch resnet50 \
-j 8 \
-p 10 \
-b 128\
--training-only \
--raport-file ${LOG_DIR}/benchmark.dataload.opt.2N.json \
--epochs 100 \
--no-checkpoints \
--data-backend pytorch \
--amp \
--memory-format nhwc \
--use-benchy \
--noDDP \
${DATA_DIR}
"
exit
echo "Starting bency dali"
export BENCHY_CONFIG_FILE=benchy_configs/benchy-conf-2-dali.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
--bind=proportional-packed:7 --launch_distribution=packed \
bash -c "\
source export_DDP_envvars.sh && \
python -u ../imagenet/main.py \
--arch resnet50 \
-j 8 \
-p 10 \
-b 128\
--training-only \
--raport-file ${LOG_DIR}/benchmark.dataload.opt.2N.json \
--epochs 100 \
--no-checkpoints \
--data-backend dali-cpu \
--amp \
--memory-format nhwc \
--use-benchy \
${DATA_DIR}
"
#!/bin/bash
# Begin LSF directives
#BSUB -P gen166
#BSUB -J sc21
#BSUB -o logs/sc21.o%J
#BSUB -W 0:30
#BSUB -nnodes 3
#BSUB -alloc_flags "nvme smt4"
####BSUB -N
# End LSF directives and begin shell commands
nnodes=$(cat ${LSB_DJOB_HOSTFILE} | sort | uniq | grep -v login | grep -v batch | wc -l)
DATA_DIR=/gpfs/wolf/gen166/proj-shared/atsaris/imagenet/data/
LOG_DIR=ascent_logs/
source /gpfs/wolf/gen166/proj-shared/atsaris/env/activate.sh
echo "Starting bency"
export BENCHY_CONFIG_FILE=benchy_configs/benchy-conf-3.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
--bind=proportional-packed:7 --launch_distribution=packed \
bash -c "\
source export_DDP_envvars.sh && \
python -u ../imagenet/main.py \
--arch resnet50 \
-j 8 \
-p 10 \
-b 128\
--training-only \
--raport-file ${LOG_DIR}/benchmark.dataload.opt.3N.json \
--epochs 100 \
--no-checkpoints \
--data-backend pytorch \
--amp \
--memory-format nhwc \
--use-benchy \
${DATA_DIR}
"
echo "Starting bency no DDP"
export BENCHY_CONFIG_FILE=benchy_configs/benchy-conf-3-noddp.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
--bind=proportional-packed:7 --launch_distribution=packed \
bash -c "\
source export_DDP_envvars.sh && \
python -u ../imagenet/main.py \
--arch resnet50 \
-j 8 \
-p 10 \
-b 128\
--training-only \
--raport-file ${LOG_DIR}/benchmark.dataload.opt.3N.json \
--epochs 100 \
--no-checkpoints \
--data-backend pytorch \
--amp \
--memory-format nhwc \
--use-benchy \
--noDDP \
${DATA_DIR}
"
exit
echo "Starting bency dali"
export BENCHY_CONFIG_FILE=benchy_configs/benchy-conf-3-dali.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
--bind=proportional-packed:7 --launch_distribution=packed \
bash -c "\
source export_DDP_envvars.sh && \
python -u ../imagenet/main.py \
--arch resnet50 \
-j 8 \
-p 10 \
-b 128\
--training-only \
--raport-file ${LOG_DIR}/benchmark.dataload.opt.3N.json \
--epochs 100 \
--no-checkpoints \
--data-backend dali-cpu \
--amp \
--memory-format nhwc \
--use-benchy \
${DATA_DIR}
"
#!/bin/bash
# Begin LSF directives
#BSUB -P gen166
#BSUB -J sc21
#BSUB -o logs/sc21.o%J
#BSUB -W 0:30
#BSUB -nnodes 4
#BSUB -alloc_flags "nvme smt4"
####BSUB -N
# End LSF directives and begin shell commands
nnodes=$(cat ${LSB_DJOB_HOSTFILE} | sort | uniq | grep -v login | grep -v batch | wc -l)
DATA_DIR=/gpfs/wolf/gen166/proj-shared/atsaris/imagenet/data/
LOG_DIR=ascent_logs/
source /gpfs/wolf/gen166/proj-shared/atsaris/env/activate.sh
echo "Starting bency"
export BENCHY_CONFIG_FILE=benchy_configs/benchy-conf-4.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
--bind=proportional-packed:7 --launch_distribution=packed \
bash -c "\
source export_DDP_envvars.sh && \
python -u ../imagenet/main.py \
--arch resnet50 \
-j 8 \
-p 10 \