Loading torch_experiments/boston.lsf +2 −2 Original line number Diff line number Diff line Loading @@ -2,7 +2,7 @@ #BSUB -P MED107 #BSUB -W 2:00 #BSUB -nnodes 6 #BSUB -nnodes 40 #BSUB -J boston #BSUB -o output/boston.o #BSUB -e output/boston.e Loading @@ -11,7 +11,7 @@ #BSUB -rn # this must match the -nnodes argument above NNODES=6 NNODES=40 set -ex Loading torch_experiments/iris.lsf 0 → 100644 +44 −0 Original line number Diff line number Diff line #!/bin/bash #BSUB -P MED107 #BSUB -W 2:00 #BSUB -nnodes 40 #BSUB -J iris #BSUB -o output/iris.o #BSUB -e output/iris.e #BSUB -q batch #BSUB -rn # this must match the -nnodes argument above NNODES=40 set -ex PROJDIR=$PROJWORK/med107 USERDIR=$PROJDIR/$USER REPODIR=$USERDIR/bottleneck_nngp JOBSCRIPTDIR=$REPODIR/torch_experiments # set up env . /sw/summit/init/profile module load ibm-wml-ce conda activate $USERDIR/condaenvs/torch jsrun -n$((NNODES*6)) -a1 -g1 -c7 -r6 \ -E LD_LIBRARY_PATH \ --bind=proportional-packed:7 --launch_distribution=packed \ $JOBSCRIPTDIR/mapper.sh \ $(which python) $JOBSCRIPTDIR/main.py \ --dataset=iris \ --depths=1,%depth% \ --widths=%width% \ --vb=0.1 \ --vw=1.0 \ --vn=0.1 \ --train_samples=100 \ --test_samples=1000 \ --lr=1e-1 \ --iters=2000 \ --gpu=0 \ --manual_grad \ torch_experiments/mapper.sh +15 −4 Original line number Diff line number Diff line Loading @@ -2,8 +2,13 @@ set -e # post-bottleneck depths and bottleneck widths depths=(10 20 30 40 50 60 70 80 90) widths=(2 8 32 128) depths=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20) widths=(1 2 4 8 16 32 64 128 256 512 1024 0) # number of models num_depths=${#depths[@]} num_widths=${#widths[@]} let num_archs=num_depths*num_widths # get argument CMD=$* Loading @@ -16,9 +21,15 @@ LOCALRANK=$OMPI_COMM_WORLD_LOCAL_RANK RANK=$OMPI_COMM_WORLD_RANK JOB=$LSB_JOBID # exit if rank larger than number of architectures if [ $RANK -ge $num_archs ] then exit 0 fi # calculate architecture given rank DEPTH=${depths[$((RANK/4))]} WIDTH=${widths[$((RANK%4))]} DEPTH=${depths[$((RANK/num_widths))]} WIDTH=${widths[$((RANK%num_widths))]} # substitute into command CMD=${CMD//%depth%/$DEPTH} Loading torch_experiments/rings.lsf 0 → 100644 +44 −0 Original line number Diff line number Diff line #!/bin/bash #BSUB -P MED107 #BSUB -W 2:00 #BSUB -nnodes 40 #BSUB -J rings #BSUB -o output/rings.o #BSUB -e output/rings.e #BSUB -q batch #BSUB -rn # this must match the -nnodes argument above NNODES=40 set -ex PROJDIR=$PROJWORK/med107 USERDIR=$PROJDIR/$USER REPODIR=$USERDIR/bottleneck_nngp JOBSCRIPTDIR=$REPODIR/torch_experiments # set up env . /sw/summit/init/profile module load ibm-wml-ce conda activate $USERDIR/condaenvs/torch jsrun -n$((NNODES*6)) -a1 -g1 -c7 -r6 \ -E LD_LIBRARY_PATH \ --bind=proportional-packed:7 --launch_distribution=packed \ $JOBSCRIPTDIR/mapper.sh \ $(which python) $JOBSCRIPTDIR/main.py \ --dataset=rings \ --depths=1,%depth% \ --widths=%width% \ --vb=0.1 \ --vw=1.0 \ --vn=0.1 \ --train_samples=100 \ --test_samples=1000 \ --lr=1e-1 \ --iters=250 \ --gpu=0 \ --manual_grad \ Loading
torch_experiments/boston.lsf +2 −2 Original line number Diff line number Diff line Loading @@ -2,7 +2,7 @@ #BSUB -P MED107 #BSUB -W 2:00 #BSUB -nnodes 6 #BSUB -nnodes 40 #BSUB -J boston #BSUB -o output/boston.o #BSUB -e output/boston.e Loading @@ -11,7 +11,7 @@ #BSUB -rn # this must match the -nnodes argument above NNODES=6 NNODES=40 set -ex Loading
torch_experiments/iris.lsf 0 → 100644 +44 −0 Original line number Diff line number Diff line #!/bin/bash #BSUB -P MED107 #BSUB -W 2:00 #BSUB -nnodes 40 #BSUB -J iris #BSUB -o output/iris.o #BSUB -e output/iris.e #BSUB -q batch #BSUB -rn # this must match the -nnodes argument above NNODES=40 set -ex PROJDIR=$PROJWORK/med107 USERDIR=$PROJDIR/$USER REPODIR=$USERDIR/bottleneck_nngp JOBSCRIPTDIR=$REPODIR/torch_experiments # set up env . /sw/summit/init/profile module load ibm-wml-ce conda activate $USERDIR/condaenvs/torch jsrun -n$((NNODES*6)) -a1 -g1 -c7 -r6 \ -E LD_LIBRARY_PATH \ --bind=proportional-packed:7 --launch_distribution=packed \ $JOBSCRIPTDIR/mapper.sh \ $(which python) $JOBSCRIPTDIR/main.py \ --dataset=iris \ --depths=1,%depth% \ --widths=%width% \ --vb=0.1 \ --vw=1.0 \ --vn=0.1 \ --train_samples=100 \ --test_samples=1000 \ --lr=1e-1 \ --iters=2000 \ --gpu=0 \ --manual_grad \
torch_experiments/mapper.sh +15 −4 Original line number Diff line number Diff line Loading @@ -2,8 +2,13 @@ set -e # post-bottleneck depths and bottleneck widths depths=(10 20 30 40 50 60 70 80 90) widths=(2 8 32 128) depths=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20) widths=(1 2 4 8 16 32 64 128 256 512 1024 0) # number of models num_depths=${#depths[@]} num_widths=${#widths[@]} let num_archs=num_depths*num_widths # get argument CMD=$* Loading @@ -16,9 +21,15 @@ LOCALRANK=$OMPI_COMM_WORLD_LOCAL_RANK RANK=$OMPI_COMM_WORLD_RANK JOB=$LSB_JOBID # exit if rank larger than number of architectures if [ $RANK -ge $num_archs ] then exit 0 fi # calculate architecture given rank DEPTH=${depths[$((RANK/4))]} WIDTH=${widths[$((RANK%4))]} DEPTH=${depths[$((RANK/num_widths))]} WIDTH=${widths[$((RANK%num_widths))]} # substitute into command CMD=${CMD//%depth%/$DEPTH} Loading
torch_experiments/rings.lsf 0 → 100644 +44 −0 Original line number Diff line number Diff line #!/bin/bash #BSUB -P MED107 #BSUB -W 2:00 #BSUB -nnodes 40 #BSUB -J rings #BSUB -o output/rings.o #BSUB -e output/rings.e #BSUB -q batch #BSUB -rn # this must match the -nnodes argument above NNODES=40 set -ex PROJDIR=$PROJWORK/med107 USERDIR=$PROJDIR/$USER REPODIR=$USERDIR/bottleneck_nngp JOBSCRIPTDIR=$REPODIR/torch_experiments # set up env . /sw/summit/init/profile module load ibm-wml-ce conda activate $USERDIR/condaenvs/torch jsrun -n$((NNODES*6)) -a1 -g1 -c7 -r6 \ -E LD_LIBRARY_PATH \ --bind=proportional-packed:7 --launch_distribution=packed \ $JOBSCRIPTDIR/mapper.sh \ $(which python) $JOBSCRIPTDIR/main.py \ --dataset=rings \ --depths=1,%depth% \ --widths=%width% \ --vb=0.1 \ --vw=1.0 \ --vn=0.1 \ --train_samples=100 \ --test_samples=1000 \ --lr=1e-1 \ --iters=250 \ --gpu=0 \ --manual_grad \