Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Tsaris, Aristeidis (aris)
pytorch_tutorial
Commits
5a104e99
Commit
5a104e99
authored
Oct 05, 2021
by
Aristeidis Tsaris
Browse files
adding more plots
parent
be5d1f2a
Changes
6
Hide whitespace changes
Inline
Side-by-side
ascent/ascent_logs/parse_json_MultiGPU.ipynb
View file @
5a104e99
This source diff could not be displayed because it is too large. You can
view the blob
instead.
ascent/sub_test_1.lsf
deleted
100755 → 0
View file @
be5d1f2a
#!/bin/bash
# Begin LSF directives
#BSUB -P gen166
#BSUB -J sc21
#BSUB -o logs/sc21.o%J
#BSUB -W 0:30
#BSUB -nnodes 1
#BSUB -alloc_flags "nvme smt4"
####BSUB -N
# End LSF directives and begin shell commands
nnodes
=
$(
cat
${
LSB_DJOB_HOSTFILE
}
|
sort
|
uniq
|
grep
-v
login |
grep
-v
batch |
wc
-l
)
DATA_DIR
=
/gpfs/wolf/gen166/proj-shared/atsaris/imagenet/data/
LOG_DIR
=
/gpfs/wolf/gen166/proj-shared/atsaris/pytorch_tutorial/ascent/ascent_logs/
source
/gpfs/wolf/gen166/proj-shared/atsaris/env/activate.sh
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a1
-c42
-g1
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 1
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.1GPU.json
\
--epochs 1
\
--prof 100
\
--no-checkpoints
\
--data-backend pytorch
\
--amp
\
--memory-format nhwc
\
--dtLdTime
\
${
DATA_DIR
}
"
ascent/sub_test_
5
.lsf
→
ascent/sub_test_
1N
.lsf
View file @
5a104e99
...
...
@@ -21,11 +21,11 @@ jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j
1
\
-j
28
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.
5GPU
.json
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.
1N
.json
\
--epochs 1
\
--prof 100
\
--no-checkpoints
\
...
...
@@ -35,3 +35,44 @@ jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
--dtLdTime
\
${
DATA_DIR
}
"
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 28
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.syntetic.opt.1N.json
\
--epochs 1
\
--prof 100
\
--no-checkpoints
\
--data-backend syntetic
\
--amp
\
--memory-format nhwc
\
${
DATA_DIR
}
"
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 28
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.syntetic.noddp.opt.1N.json
\
--epochs 1
\
--prof 100
\
--no-checkpoints
\
--data-backend syntetic
\
--amp
\
--memory-format nhwc
\
--noDDP
\
${
DATA_DIR
}
"
ascent/sub_test_
10
.lsf
→
ascent/sub_test_
2N
.lsf
View file @
5a104e99
...
...
@@ -21,11 +21,11 @@ jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j
1
\
-j
28
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.
10GPU
.json
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.
2N
.json
\
--epochs 1
\
--prof 100
\
--no-checkpoints
\
...
...
@@ -35,3 +35,44 @@ jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
--dtLdTime
\
${
DATA_DIR
}
"
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 28
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.syntetic.opt.2N.json
\
--epochs 1
\
--prof 100
\
--no-checkpoints
\
--data-backend syntetic
\
--amp
\
--memory-format nhwc
\
${
DATA_DIR
}
"
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 28
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.syntetic.noddp.opt.2N.json
\
--epochs 1
\
--prof 100
\
--no-checkpoints
\
--data-backend syntetic
\
--amp
\
--memory-format nhwc
\
--noDDP
\
${
DATA_DIR
}
"
ascent/sub_test_
15
.lsf
→
ascent/sub_test_
3N
.lsf
View file @
5a104e99
...
...
@@ -21,11 +21,11 @@ jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j
1
\
-j
28
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.
15GPU
.json
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.
3N
.json
\
--epochs 1
\
--prof 100
\
--no-checkpoints
\
...
...
@@ -35,3 +35,44 @@ jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
--dtLdTime
\
${
DATA_DIR
}
"
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 28
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.syntetic.opt.3N.json
\
--epochs 1
\
--prof 100
\
--no-checkpoints
\
--data-backend syntetic
\
--amp
\
--memory-format nhwc
\
${
DATA_DIR
}
"
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 28
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.syntetic.noddp.opt.3N.json
\
--epochs 1
\
--prof 100
\
--no-checkpoints
\
--data-backend syntetic
\
--amp
\
--memory-format nhwc
\
--noDDP
\
${
DATA_DIR
}
"
ascent/sub_test_
20
.lsf
→
ascent/sub_test_
4N
.lsf
View file @
5a104e99
...
...
@@ -21,11 +21,11 @@ jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j
1
\
-j
28
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.
20GPU
.json
\
--raport-file
${
LOG_DIR
}
/benchmark.dataload.opt.
4N
.json
\
--epochs 1
\
--prof 100
\
--no-checkpoints
\
...
...
@@ -35,3 +35,44 @@ jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a5 -c42 -g5 -r1 \
--dtLdTime
\
${
DATA_DIR
}
"
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 28
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.syntetic.opt.4N.json
\
--epochs 1
\
--prof 100
\
--no-checkpoints
\
--data-backend syntetic
\
--amp
\
--memory-format nhwc
\
${
DATA_DIR
}
"
jsrun
--smpiargs
=
"-disable_gpu_hooks"
-n
${
nnodes
}
-a5
-c42
-g5
-r1
\
--bind
=
proportional-packed:7
--launch_distribution
=
packed
\
bash
-c
"
\
source export_DDP_envvars.sh &&
\
python -u ../imagenet/main.py
\
--arch resnet50
\
-j 28
\
-p 10
\
-b 128
\
--training-only
\
--raport-file
${
LOG_DIR
}
/benchmark.syntetic.noddp.opt.4N.json
\
--epochs 1
\
--prof 100
\
--no-checkpoints
\
--data-backend syntetic
\
--amp
\
--memory-format nhwc
\
--noDDP
\
${
DATA_DIR
}
"
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment