Loading megatron/mpu/mappings.py +1 −1 Original line number Diff line number Diff line Loading @@ -90,7 +90,7 @@ class _CopyToModelParallelRegion(torch.autograd.Function): class _ReduceFromModelParallelRegion(torch.autograd.Function): """All-redcue the input from the model parallel region.""" """All-reduce the input from the model parallel region.""" @staticmethod def symbolic(graph, input_): Loading Loading
megatron/mpu/mappings.py +1 −1 Original line number Diff line number Diff line Loading @@ -90,7 +90,7 @@ class _CopyToModelParallelRegion(torch.autograd.Function): class _ReduceFromModelParallelRegion(torch.autograd.Function): """All-redcue the input from the model parallel region.""" """All-reduce the input from the model parallel region.""" @staticmethod def symbolic(graph, input_): Loading