Loading README.md +6 −6 Original line number Diff line number Diff line Loading @@ -156,7 +156,7 @@ OUTPUT_ARGS="--log-interval 10 \ --save-interval 500 \ --eval-interval 100 \ --eval-iters 10 \ --checkpoint-activations" --activations-checkpoint-method uniform" python pretrain_bert.py \ $BERT_ARGS \ Loading Loading @@ -345,7 +345,7 @@ python pretrain_ict.py \ --max-position-embeddings 256 \ --ict-head-size 128 \ --train-iters 100000 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --bert-load /path/to/pretrained_bert \ --load checkpoints \ --save checkpoints \ Loading Loading @@ -375,7 +375,7 @@ python tools/create_doc_index.py \ --ict-head-size 128 \ --num-attention-heads 12 \ --batch-size 128 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --seq-length 256 \ --max-position-embeddings 256 \ --ict-load /path/to/pretrained_ict \ Loading Loading @@ -482,7 +482,7 @@ python tasks/main.py \ --merge-file $MERGE_FILE \ --load $CHECKPOINT_PATH \ --micro-batch-size 8 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --log-interval 10 \ --no-load-optim \ --no-load-rng Loading Loading @@ -512,7 +512,7 @@ python tasks/main.py \ --merge-file $MERGE_FILE \ --load $CHECKPOINT_PATH \ --micro-batch-size 8 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --log-interval 10 \ --no-load-optim \ --no-load-rng Loading Loading @@ -542,7 +542,7 @@ COMMON_TASK_ARGS="--num-layers 24 \ COMMON_TASK_ARGS_EXT="--train-data $TRAIN_DATA \ --valid-data $VALID_DATA \ --pretrained-checkpoint $PRETRAINED_CHECKPOINT \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --save-interval 10000 \ --save $CHECKPOINT_PATH \ --log-interval 100 \ Loading examples/evaluate_retriever_nq.sh +1 −1 Original line number Diff line number Diff line Loading @@ -20,7 +20,7 @@ python tasks/main.py \ --num-attention-heads 12 \ --tensor-model-parallel-size 1 \ --micro-batch-size 128 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --seq-length 512 \ --max-position-embeddings 512 \ --load ${CHECKPOINT_PATH} \ Loading examples/evaluate_zeroshot_gpt.sh +1 −1 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ --hidden-size 1024 \ --num-attention-heads 16 \ --batch-size 8 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --seq-length 1024 \ --max-position-embeddings 1024 \ --log-interval 10 \ Loading examples/finetune_mnli_distributed.sh +1 −1 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ --hidden-size 1024 \ --num-attention-heads 16 \ --micro-batch-size 8 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --lr 5.0e-5 \ --lr-decay-style linear \ --lr-warmup-fraction 0.065 \ Loading examples/finetune_race_distributed.sh +1 −1 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ --hidden-size 1024 \ --num-attention-heads 16 \ --micro-batch-size 4 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --lr 1.0e-5 \ --lr-decay-style linear \ --lr-warmup-fraction 0.06 \ Loading Loading
README.md +6 −6 Original line number Diff line number Diff line Loading @@ -156,7 +156,7 @@ OUTPUT_ARGS="--log-interval 10 \ --save-interval 500 \ --eval-interval 100 \ --eval-iters 10 \ --checkpoint-activations" --activations-checkpoint-method uniform" python pretrain_bert.py \ $BERT_ARGS \ Loading Loading @@ -345,7 +345,7 @@ python pretrain_ict.py \ --max-position-embeddings 256 \ --ict-head-size 128 \ --train-iters 100000 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --bert-load /path/to/pretrained_bert \ --load checkpoints \ --save checkpoints \ Loading Loading @@ -375,7 +375,7 @@ python tools/create_doc_index.py \ --ict-head-size 128 \ --num-attention-heads 12 \ --batch-size 128 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --seq-length 256 \ --max-position-embeddings 256 \ --ict-load /path/to/pretrained_ict \ Loading Loading @@ -482,7 +482,7 @@ python tasks/main.py \ --merge-file $MERGE_FILE \ --load $CHECKPOINT_PATH \ --micro-batch-size 8 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --log-interval 10 \ --no-load-optim \ --no-load-rng Loading Loading @@ -512,7 +512,7 @@ python tasks/main.py \ --merge-file $MERGE_FILE \ --load $CHECKPOINT_PATH \ --micro-batch-size 8 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --log-interval 10 \ --no-load-optim \ --no-load-rng Loading Loading @@ -542,7 +542,7 @@ COMMON_TASK_ARGS="--num-layers 24 \ COMMON_TASK_ARGS_EXT="--train-data $TRAIN_DATA \ --valid-data $VALID_DATA \ --pretrained-checkpoint $PRETRAINED_CHECKPOINT \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --save-interval 10000 \ --save $CHECKPOINT_PATH \ --log-interval 100 \ Loading
examples/evaluate_retriever_nq.sh +1 −1 Original line number Diff line number Diff line Loading @@ -20,7 +20,7 @@ python tasks/main.py \ --num-attention-heads 12 \ --tensor-model-parallel-size 1 \ --micro-batch-size 128 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --seq-length 512 \ --max-position-embeddings 512 \ --load ${CHECKPOINT_PATH} \ Loading
examples/evaluate_zeroshot_gpt.sh +1 −1 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ --hidden-size 1024 \ --num-attention-heads 16 \ --batch-size 8 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --seq-length 1024 \ --max-position-embeddings 1024 \ --log-interval 10 \ Loading
examples/finetune_mnli_distributed.sh +1 −1 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ --hidden-size 1024 \ --num-attention-heads 16 \ --micro-batch-size 8 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --lr 5.0e-5 \ --lr-decay-style linear \ --lr-warmup-fraction 0.065 \ Loading
examples/finetune_race_distributed.sh +1 −1 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ --hidden-size 1024 \ --num-attention-heads 16 \ --micro-batch-size 4 \ --checkpoint-activations \ --activations-checkpoint-method uniform \ --lr 1.0e-5 \ --lr-decay-style linear \ --lr-warmup-fraction 0.06 \ Loading