Commit 1f45adcc authored Apr 16, 2020 by Mohammad

latest

parent ca19a8b5

examples/pretrain_bert_distributed.sh

+1 −0

Original line number	Diff line number	Diff line
		@@ -32,6 +32,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS \
		--distributed-backend nccl \
		--lr 0.0001 \
		--lr-decay-style linear \
		--min-lr 1.0e-5 \
		--lr-decay-iters 990000 \
		--weight-decay 1e-2 \
		--clip-grad 1.0 \

examples/pretrain_gpt2_distributed.sh

+1 −0

Original line number	Diff line number	Diff line
		@@ -36,6 +36,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS \
		--distributed-backend nccl \
		--lr 0.00015 \
		--lr-decay-style cosine \
		--min-lr 1.0e-5 \
		--weight-decay 1e-2 \
		--clip-grad 1.0 \
		--warmup .01 \