Loading megatron/arguments.py +2 −2 Original line number Diff line number Diff line Loading @@ -173,7 +173,7 @@ def _add_initialization_args(parser): def _add_learning_rate_args(parser): group = parser.add_argument_group(title='learning rate') group.add_argument('--lr', type=float, required=True, group.add_argument('--lr', type=float, default=None, help='Initial learning rate. Depending on decay style ' 'and initial warmup, the learing rate at each ' 'iteration would be different.') Loading Loading @@ -297,7 +297,7 @@ def _add_data_args(parser): ' validation, and test split. For example the split ' '`90,5,5` will use 90% of data for training, 5% for ' 'validation and 5% for test.') group.add_argument('--vocab-file', type=str, required=True, group.add_argument('--vocab-file', type=str, default=None, help='Path to the vocab file.') group.add_argument('--merge-file', type=str, default=None, help='Path to the BPE merge file.') Loading tasks/run_gpt2_eval.py→scripts/run_gpt2_eval.py +0 −0 File moved. View file tasks/main.py +1 −1 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ def get_tasks_args(parser): group.add_argument('--task', type=str, required=True, help='Task name.') group.add_argument('--epochs', type=int, required=True, group.add_argument('--epochs', type=int, default=None, help='Number of finetunning epochs. Zero results in ' 'evaluation only.') group.add_argument('--pretrained-checkpoint', type=str, default=None, Loading tasks/zeroshot_gpt2/detokenizer.py +0 −1 Original line number Diff line number Diff line Loading @@ -77,7 +77,6 @@ _DETOKENIZERS = { def get_detokenizer(path): for key in _DETOKENIZERS.keys(): if key in path: print(key) return _DETOKENIZERS[key] Loading
megatron/arguments.py +2 −2 Original line number Diff line number Diff line Loading @@ -173,7 +173,7 @@ def _add_initialization_args(parser): def _add_learning_rate_args(parser): group = parser.add_argument_group(title='learning rate') group.add_argument('--lr', type=float, required=True, group.add_argument('--lr', type=float, default=None, help='Initial learning rate. Depending on decay style ' 'and initial warmup, the learing rate at each ' 'iteration would be different.') Loading Loading @@ -297,7 +297,7 @@ def _add_data_args(parser): ' validation, and test split. For example the split ' '`90,5,5` will use 90% of data for training, 5% for ' 'validation and 5% for test.') group.add_argument('--vocab-file', type=str, required=True, group.add_argument('--vocab-file', type=str, default=None, help='Path to the vocab file.') group.add_argument('--merge-file', type=str, default=None, help='Path to the BPE merge file.') Loading
tasks/main.py +1 −1 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ def get_tasks_args(parser): group.add_argument('--task', type=str, required=True, help='Task name.') group.add_argument('--epochs', type=int, required=True, group.add_argument('--epochs', type=int, default=None, help='Number of finetunning epochs. Zero results in ' 'evaluation only.') group.add_argument('--pretrained-checkpoint', type=str, default=None, Loading
tasks/zeroshot_gpt2/detokenizer.py +0 −1 Original line number Diff line number Diff line Loading @@ -77,7 +77,6 @@ _DETOKENIZERS = { def get_detokenizer(path): for key in _DETOKENIZERS.keys(): if key in path: print(key) return _DETOKENIZERS[key]