Loading megatron/utils.py +2 −3 Original line number Diff line number Diff line Loading @@ -275,7 +275,6 @@ def vocab_size_with_padding(num_tokens, args): after = num_tokens multiple = args.make_vocab_size_divisible_by * \ mpu.get_model_parallel_world_size() if multiple > 0: while (after % multiple) != 0: after += 1 print_rank_0('> padded vocab (size: {}) with {} dummy ' Loading Loading
megatron/utils.py +2 −3 Original line number Diff line number Diff line Loading @@ -275,7 +275,6 @@ def vocab_size_with_padding(num_tokens, args): after = num_tokens multiple = args.make_vocab_size_divisible_by * \ mpu.get_model_parallel_world_size() if multiple > 0: while (after % multiple) != 0: after += 1 print_rank_0('> padded vocab (size: {}) with {} dummy ' Loading