sub_ips_0N.lsf 1.25 KB
Newer Older
Aristeidis Tsaris's avatar
Aristeidis Tsaris committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/bin/bash
# Begin LSF directives
#BSUB -P gen166
#BSUB -J sc21
#BSUB -o logs/sc21.o%J
#BSUB -W 1:00
#BSUB -nnodes 1
#BSUB -alloc_flags "nvme smt4"
####BSUB -N
# End LSF directives and begin shell commands

nnodes=$(cat ${LSB_DJOB_HOSTFILE} | sort | uniq | grep -v login | grep -v batch | wc -l)
DATA_DIR=/gpfs/wolf/gen166/proj-shared/atsaris/imagenet/data
CODE_DIR=/gpfs/wolf/gen166/proj-shared/atsaris/pytorch_tutorial/imagenet_simple

source /gpfs/wolf/gen166/proj-shared/atsaris/env/activate.sh

echo "Starting bency"
export BENCHY_CONFIG_FILE=benchy_configs/benchy-conf-0-8w.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a1 -c42 -g1 -r1 \
    --bind=proportional-packed:7 --launch_distribution=packed \
    bash -c "\
    python -u ${CODE_DIR}/example1.py \
    --train-dir ${DATA_DIR}/train \
    --epochs 100 \
    --batch-size 32 \
    --use-benchy
    "

echo "Starting bency no DDP"
export BENCHY_CONFIG_FILE=benchy_configs/benchy-conf-0-8w-noddp.yaml
jsrun --smpiargs="-disable_gpu_hooks" -n${nnodes} -a1 -c42 -g1 -r1 \
    --bind=proportional-packed:7 --launch_distribution=packed \
    bash -c "\
    python -u ${CODE_DIR}/example1.py \
    --train-dir ${DATA_DIR}/train \
    --epochs 100 \
    --batch-size 32 \
    --use-benchy \
    --noddp
    "