Loading megatron/arguments.py +0 −3 Original line number Diff line number Diff line Loading @@ -136,9 +136,6 @@ def parse_args(extra_args_provider=None, defaults={}, if args.bf16: assert not args.fp16 args.params_dtype = torch.bfloat16 # Jitting fusion is not supported for bfloat for now assert not args.bias_gelu_fusion assert not args.bias_dropout_fusion if args.rank == 0: print('using {} for parameters ...'.format(args.params_dtype), Loading Loading
megatron/arguments.py +0 −3 Original line number Diff line number Diff line Loading @@ -136,9 +136,6 @@ def parse_args(extra_args_provider=None, defaults={}, if args.bf16: assert not args.fp16 args.params_dtype = torch.bfloat16 # Jitting fusion is not supported for bfloat for now assert not args.bias_gelu_fusion assert not args.bias_dropout_fusion if args.rank == 0: print('using {} for parameters ...'.format(args.params_dtype), Loading