Loading megatron/training.py +1 −2 Original line number Diff line number Diff line Loading @@ -354,7 +354,6 @@ def train_step(forward_step_func, data_iterator, if args.DDP_impl == 'local' and args.use_contiguous_buffers_in_ddp: for partition in model: partition.zero_grad_buffer() else: optimizer.zero_grad() forward_backward_func = get_forward_backward_func() Loading Loading
megatron/training.py +1 −2 Original line number Diff line number Diff line Loading @@ -354,7 +354,6 @@ def train_step(forward_step_func, data_iterator, if args.DDP_impl == 'local' and args.use_contiguous_buffers_in_ddp: for partition in model: partition.zero_grad_buffer() else: optimizer.zero_grad() forward_backward_func = get_forward_backward_func() Loading