Update job scripts. (735fe337) · Commits · Agrawal, Devanshu / bottleneck_nngp

torch_experiments/boston.lsf

+2 −2

Original line number	Diff line number	Diff line
		@@ -2,7 +2,7 @@

		#BSUB -P MED107
		#BSUB -W 2:00
		#BSUB -nnodes 6
		#BSUB -nnodes 40
		#BSUB -J boston
		#BSUB -o output/boston.o
		#BSUB -e output/boston.e
		@@ -11,7 +11,7 @@
		#BSUB -rn

		# this must match the -nnodes argument above
		NNODES=6
		NNODES=40

		set -ex

0 → 100644

+44 −0

Original line number	Diff line number	Diff line
		#!/bin/bash

		#BSUB -P MED107
		#BSUB -W 2:00
		#BSUB -nnodes 40
		#BSUB -J iris
		#BSUB -o output/iris.o
		#BSUB -e output/iris.e

		#BSUB -q batch
		#BSUB -rn

		# this must match the -nnodes argument above
		NNODES=40

		set -ex

		PROJDIR=$PROJWORK/med107
		USERDIR=$PROJDIR/$USER
		REPODIR=$USERDIR/bottleneck_nngp
		JOBSCRIPTDIR=$REPODIR/torch_experiments

		# set up env
		. /sw/summit/init/profile
		module load ibm-wml-ce
		conda activate $USERDIR/condaenvs/torch

		jsrun -n$((NNODES*6)) -a1 -g1 -c7 -r6 \
		-E LD_LIBRARY_PATH \
		--bind=proportional-packed:7 --launch_distribution=packed \
		$JOBSCRIPTDIR/mapper.sh \
		$(which python) $JOBSCRIPTDIR/main.py \
		--dataset=iris \
		--depths=1,%depth% \
		--widths=%width% \
		--vb=0.1 \
		--vw=1.0 \
		--vn=0.1 \
		--train_samples=100 \
		--test_samples=1000 \
		--lr=1e-1 \
		--iters=2000 \
		--gpu=0 \
		--manual_grad \

+15 −4

Original line number	Diff line number	Diff line
		@@ -2,8 +2,13 @@
		set -e

		# post-bottleneck depths and bottleneck widths
		depths=(10 20 30 40 50 60 70 80 90)
		widths=(2 8 32 128)
		depths=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20)
		widths=(1 2 4 8 16 32 64 128 256 512 1024 0)

		# number of models
		num_depths=${#depths[@]}
		num_widths=${#widths[@]}
		let num_archs=num_depths*num_widths

		# get argument
		CMD=$*
		@@ -16,9 +21,15 @@ LOCALRANK=$OMPI_COMM_WORLD_LOCAL_RANK
		RANK=$OMPI_COMM_WORLD_RANK
		JOB=$LSB_JOBID

		# exit if rank larger than number of architectures
		if [ $RANK -ge $num_archs ]
		then
		exit 0
		fi

		# calculate architecture given rank
		DEPTH=${depths[$((RANK/4))]}
		WIDTH=${widths[$((RANK%4))]}
		DEPTH=${depths[$((RANK/num_widths))]}
		WIDTH=${widths[$((RANK%num_widths))]}

		# substitute into command
		CMD=${CMD//%depth%/$DEPTH}

0 → 100644

+44 −0

Original line number	Diff line number	Diff line
		#!/bin/bash

		#BSUB -P MED107
		#BSUB -W 2:00
		#BSUB -nnodes 40
		#BSUB -J rings
		#BSUB -o output/rings.o
		#BSUB -e output/rings.e

		#BSUB -q batch
		#BSUB -rn

		# this must match the -nnodes argument above
		NNODES=40

		set -ex

		PROJDIR=$PROJWORK/med107
		USERDIR=$PROJDIR/$USER
		REPODIR=$USERDIR/bottleneck_nngp
		JOBSCRIPTDIR=$REPODIR/torch_experiments

		# set up env
		. /sw/summit/init/profile
		module load ibm-wml-ce
		conda activate $USERDIR/condaenvs/torch

		jsrun -n$((NNODES*6)) -a1 -g1 -c7 -r6 \
		-E LD_LIBRARY_PATH \
		--bind=proportional-packed:7 --launch_distribution=packed \
		$JOBSCRIPTDIR/mapper.sh \
		$(which python) $JOBSCRIPTDIR/main.py \
		--dataset=rings \
		--depths=1,%depth% \
		--widths=%width% \
		--vb=0.1 \
		--vw=1.0 \
		--vn=0.1 \
		--train_samples=100 \
		--test_samples=1000 \
		--lr=1e-1 \
		--iters=250 \
		--gpu=0 \
		--manual_grad \