Loading examples/evaluate_zeroshot_gpt2.sh +2 −2 Original line number Diff line number Diff line Loading @@ -30,8 +30,8 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ --num-attention-heads 16 \ --batch-size 8 \ --checkpoint-activations \ --seq-length 512 \ --max-position-embeddings 512 \ --seq-length 1024 \ --max-position-embeddings 1024 \ --log-interval 10 \ --fp16 \ --no-load-optim \ Loading Loading
examples/evaluate_zeroshot_gpt2.sh +2 −2 Original line number Diff line number Diff line Loading @@ -30,8 +30,8 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ --num-attention-heads 16 \ --batch-size 8 \ --checkpoint-activations \ --seq-length 512 \ --max-position-embeddings 512 \ --seq-length 1024 \ --max-position-embeddings 1024 \ --log-interval 10 \ --fp16 \ --no-load-optim \ Loading