Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Tsaris, Aristeidis (aris)
pytorch_tutorial
Commits
d440969a
Commit
d440969a
authored
Oct 25, 2021
by
Aristeidis Tsaris
Browse files
clean things up
parent
efe645d1
Changes
30
Hide whitespace changes
Inline
Side-by-side
ascent/ascent_logs/parse_json_MultiGPU.ipynb
deleted
100644 → 0
View file @
efe645d1
This source diff could not be displayed because it is too large. You can
view the blob
instead.
ascent/benchy_configs/benchy-conf-0-noddp.yaml
deleted
100644 → 0
View file @
efe645d1
global
:
report_freq
:
20
exit_after_tests
:
True
profiler_mode
:
'
single'
json_prefix
:
'
benchy_imagenet_0_noddp'
output_dir
:
'
ascent_logs'
IO
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
synthetic
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
full
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
ascent/benchy_configs/benchy-conf-0.yaml
deleted
100644 → 0
View file @
efe645d1
global
:
report_freq
:
20
exit_after_tests
:
True
profiler_mode
:
'
single'
json_prefix
:
'
benchy_imagenet_0'
output_dir
:
'
ascent_logs'
IO
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
synthetic
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
full
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
ascent/benchy_configs/benchy-conf-1-noddp.yaml
deleted
100644 → 0
View file @
efe645d1
global
:
report_freq
:
20
exit_after_tests
:
True
profiler_mode
:
'
single'
json_prefix
:
'
benchy_imagenet_1_noddp'
output_dir
:
'
ascent_logs'
IO
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
synthetic
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
full
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
ascent/benchy_configs/benchy-conf-1.yaml
deleted
100644 → 0
View file @
efe645d1
global
:
report_freq
:
20
exit_after_tests
:
True
profiler_mode
:
'
single'
json_prefix
:
'
benchy_imagenet_1'
output_dir
:
'
ascent_logs'
IO
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
synthetic
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
full
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
ascent/benchy_configs/benchy-conf-2-noddp.yaml
deleted
100644 → 0
View file @
efe645d1
global
:
report_freq
:
20
exit_after_tests
:
True
profiler_mode
:
'
single'
json_prefix
:
'
benchy_imagenet_2_noddp'
output_dir
:
'
ascent_logs'
IO
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
synthetic
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
full
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
ascent/benchy_configs/benchy-conf-2.yaml
deleted
100644 → 0
View file @
efe645d1
global
:
report_freq
:
20
exit_after_tests
:
True
profiler_mode
:
'
single'
json_prefix
:
'
benchy_imagenet_2'
output_dir
:
'
ascent_logs'
IO
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
synthetic
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
full
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
ascent/benchy_configs/benchy-conf-3-noddp.yaml
deleted
100644 → 0
View file @
efe645d1
global
:
report_freq
:
20
exit_after_tests
:
True
profiler_mode
:
'
single'
json_prefix
:
'
benchy_imagenet_3_noddp'
output_dir
:
'
ascent_logs'
IO
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
synthetic
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
full
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
ascent/benchy_configs/benchy-conf-3.yaml
deleted
100644 → 0
View file @
efe645d1
global
:
report_freq
:
20
exit_after_tests
:
True
profiler_mode
:
'
single'
json_prefix
:
'
benchy_imagenet_3'
output_dir
:
'
ascent_logs'
IO
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
synthetic
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
full
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
ascent/benchy_configs/benchy-conf-4-noddp.yaml
deleted
100644 → 0
View file @
efe645d1
global
:
report_freq
:
20
exit_after_tests
:
True
profiler_mode
:
'
single'
json_prefix
:
'
benchy_imagenet_4_noddp'
output_dir
:
'
ascent_logs'
IO
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
synthetic
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
full
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
ascent/benchy_configs/benchy-conf-4.yaml
deleted
100644 → 0
View file @
efe645d1
global
:
report_freq
:
20
exit_after_tests
:
True
profiler_mode
:
'
single'
json_prefix
:
'
benchy_imagenet_4'
output_dir
:
'
ascent_logs'
IO
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
synthetic
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
full
:
run_benchmark
:
True
nbatches
:
40
ntrials
:
2
nwarmup
:
1
ascent/sub_ips_0N.lsf
deleted
100755 → 0
View file @
efe645d1
#!/bin/bash
# Begin LSF directives
#BSUB -P gen166
#BSUB -J sc21
#BSUB -o logs/sc21.o%J
#BSUB -W 0:30
#BSUB -nnodes 1
#BSUB -alloc_flags "nvme smt4"
####BSUB -N
# End LSF directives and begin shell commands
nnodes
=
$(
cat
${
LSB_DJOB_HOSTFILE
}
|
sort
|
uniq
|
grep
-v
login |
grep
-v
batch |
wc
-l
)
DATA_DIR
=
/gpfs/wolf/gen166/proj-shared/atsaris/imagenet/data/
LOG_DIR
=
ascent_logs/
source
/gpfs/wolf/gen166/proj-shared/atsaris/env/activate.sh
echo
"Starting bency"
export
BENCHY_CONFIG_FILE
=
benchy_configs/benchy-conf-0.yaml
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a1
-c42
-g1
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 8
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.0N.json
\
--epochs 100
\
--no-checkpoints
\
--data-backend pytorch
\
--amp
\
--memory-format nhwc
\
--use-benchy
\
${
DATA_DIR
}
"
echo
"Starting bency no DDP"
export
BENCHY_CONFIG_FILE
=
benchy_configs/benchy-conf-0-noddp.yaml
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a1
-c42
-g1
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 8
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.0N.json
\
--epochs 100
\
--no-checkpoints
\
--data-backend pytorch
\
--amp
\
--memory-format nhwc
\
--use-benchy
\
--noDDP
\
${
DATA_DIR
}
"
exit
echo
"Starting bency dali"
export
BENCHY_CONFIG_FILE
=
benchy_configs/benchy-conf-0-dali.yaml
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a1
-c42
-g1
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 8
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.0N.json
\
--epochs 100
\
--no-checkpoints
\
--data-backend dali-cpu
\
--amp
\
--memory-format nhwc
\
--use-benchy
\
${
DATA_DIR
}
"
ascent/sub_ips_1N.lsf
deleted
100755 → 0
View file @
efe645d1
#!/bin/bash
# Begin LSF directives
#BSUB -P gen166
#BSUB -J sc21
#BSUB -o logs/sc21.o%J
#BSUB -W 0:30
#BSUB -nnodes 1
#BSUB -alloc_flags "nvme smt4"
####BSUB -N
# End LSF directives and begin shell commands
nnodes
=
$(
cat
${
LSB_DJOB_HOSTFILE
}
|
sort
|
uniq
|
grep
-v
login |
grep
-v
batch |
wc
-l
)
DATA_DIR
=
/gpfs/wolf/gen166/proj-shared/atsaris/imagenet/data/
LOG_DIR
=
ascent_logs/
source
/gpfs/wolf/gen166/proj-shared/atsaris/env/activate.sh
echo
"Starting bency"
export
BENCHY_CONFIG_FILE
=
benchy_configs/benchy-conf-1.yaml
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 8
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.1N.json
\
--epochs 100
\
--no-checkpoints
\
--data-backend pytorch
\
--amp
\
--memory-format nhwc
\
--use-benchy
\
${
DATA_DIR
}
"
echo
"Starting bency no DDP"
export
BENCHY_CONFIG_FILE
=
benchy_configs/benchy-conf-1-noddp.yaml
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 8
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.1N.json
\
--epochs 100
\
--no-checkpoints
\
--data-backend pytorch
\
--amp
\
--memory-format nhwc
\
--use-benchy
\
--noDDP
\
${
DATA_DIR
}
"
exit
echo
"Starting bency dali"
export
BENCHY_CONFIG_FILE
=
benchy_configs/benchy-conf-1-dali.yaml
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 8
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.1N.json
\
--epochs 100
\
--no-checkpoints
\
--data-backend dali-cpu
\
--amp
\
--memory-format nhwc
\
--use-benchy
\
${
DATA_DIR
}
"
ascent/sub_ips_2N.lsf
deleted
100755 → 0
View file @
efe645d1
#!/bin/bash
# Begin LSF directives
#BSUB -P gen166
#BSUB -J sc21
#BSUB -o logs/sc21.o%J
#BSUB -W 0:30
#BSUB -nnodes 2
#BSUB -alloc_flags "nvme smt4"
####BSUB -N
# End LSF directives and begin shell commands
nnodes
=
$(
cat
${
LSB_DJOB_HOSTFILE
}
|
sort
|
uniq
|
grep
-v
login |
grep
-v
batch |
wc
-l
)
DATA_DIR
=
/gpfs/wolf/gen166/proj-shared/atsaris/imagenet/data/
LOG_DIR
=
ascent_logs/
source
/gpfs/wolf/gen166/proj-shared/atsaris/env/activate.sh
echo
"Starting bency"
export
BENCHY_CONFIG_FILE
=
benchy_configs/benchy-conf-2.yaml
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 8
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.2N.json
\
--epochs 100
\
--no-checkpoints
\
--data-backend pytorch
\
--amp
\
--memory-format nhwc
\
--use-benchy
\
${
DATA_DIR
}
"
echo
"Starting bency no DDP"
export
BENCHY_CONFIG_FILE
=
benchy_configs/benchy-conf-2-noddp.yaml
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 8
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.2N.json
\
--epochs 100
\
--no-checkpoints
\
--data-backend pytorch
\
--amp
\
--memory-format nhwc
\
--use-benchy
\
--noDDP
\
${
DATA_DIR
}
"
exit
echo
"Starting bency dali"
export
BENCHY_CONFIG_FILE
=
benchy_configs/benchy-conf-2-dali.yaml
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 8
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.2N.json
\
--epochs 100
\
--no-checkpoints
\
--data-backend dali-cpu
\
--amp
\
--memory-format nhwc
\
--use-benchy
\
${
DATA_DIR
}
"
ascent/sub_ips_3N.lsf
deleted
100755 → 0
View file @
efe645d1
#!/bin/bash
# Begin LSF directives
#BSUB -P gen166
#BSUB -J sc21
#BSUB -o logs/sc21.o%J
#BSUB -W 0:30
#BSUB -nnodes 3
#BSUB -alloc_flags "nvme smt4"
####BSUB -N
# End LSF directives and begin shell commands
nnodes
=
$(
cat
${
LSB_DJOB_HOSTFILE
}
|
sort
|
uniq
|
grep
-v
login |
grep
-v
batch |
wc
-l
)
DATA_DIR
=
/gpfs/wolf/gen166/proj-shared/atsaris/imagenet/data/
LOG_DIR
=
ascent_logs/
source
/gpfs/wolf/gen166/proj-shared/atsaris/env/activate.sh
echo
"Starting bency"
export
BENCHY_CONFIG_FILE
=
benchy_configs/benchy-conf-3.yaml
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 8
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.3N.json
\
--epochs 100
\
--no-checkpoints
\
--data-backend pytorch
\
--amp
\
--memory-format nhwc
\
--use-benchy
\
${
DATA_DIR
}
"
echo
"Starting bency no DDP"
export
BENCHY_CONFIG_FILE
=
benchy_configs/benchy-conf-3-noddp.yaml
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 8
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.3N.json
\
--epochs 100
\
--no-checkpoints
\
--data-backend pytorch
\
--amp
\
--memory-format nhwc
\
--use-benchy
\
--noDDP
\
${
DATA_DIR
}
"
exit
echo
"Starting bency dali"
export
BENCHY_CONFIG_FILE
=
benchy_configs/benchy-conf-3-dali.yaml
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 8
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.3N.json
\
--epochs 100
\
--no-checkpoints
\
--data-backend dali-cpu
\
--amp
\
--memory-format nhwc
\
--use-benchy
\
${
DATA_DIR
}
"
ascent/sub_ips_4N.lsf
deleted
100755 → 0
View file @
efe645d1
#!/bin/bash
# Begin LSF directives
#BSUB -P gen166
#BSUB -J sc21
#BSUB -o logs/sc21.o%J
#BSUB -W 0:30
#BSUB -nnodes 4
#BSUB -alloc_flags "nvme smt4"
####BSUB -N
# End LSF directives and begin shell commands
nnodes
=
$(
cat
${
LSB_DJOB_HOSTFILE
}
|
sort
|
uniq
|
grep
-v
login |
grep
-v
batch |
wc
-l
)
DATA_DIR
=
/gpfs/wolf/gen166/proj-shared/atsaris/imagenet/data/
LOG_DIR
=
ascent_logs/
source
/gpfs/wolf/gen166/proj-shared/atsaris/env/activate.sh
echo
"Starting bency"
export
BENCHY_CONFIG_FILE
=
benchy_configs/benchy-conf-4.yaml