Loading megatron/mpu/mappings.py +1 −1 Original line number Diff line number Diff line Loading @@ -20,7 +20,7 @@ from .utils import split_tensor_along_last_dim def _reduce(input_): """All-reduce the the input tensor across model parallel group.""" """All-reduce the input tensor across model parallel group.""" # Bypass the function if we are using only 1 GPU. if get_tensor_model_parallel_world_size()==1: Loading Loading
megatron/mpu/mappings.py +1 −1 Original line number Diff line number Diff line Loading @@ -20,7 +20,7 @@ from .utils import split_tensor_along_last_dim def _reduce(input_): """All-reduce the the input tensor across model parallel group.""" """All-reduce the input tensor across model parallel group.""" # Bypass the function if we are using only 1 GPU. if get_tensor_model_parallel_world_size()==1: Loading