Loading megatron/model/distributed.py +0 −4 Original line number Diff line number Diff line Loading @@ -31,10 +31,6 @@ class DistributedDataParallel(MegatronModule): self.module = module self.data_parallel_group = mpu.get_data_parallel_group() src_rank = mpu.get_model_parallel_rank() for p in self.module.parameters(): if torch.is_tensor(p): dist.broadcast(p, src_rank, group=self.data_parallel_group) def allreduce_params(reduce_after=True, no_scale=False, fp32_allreduce=False): if(self.needs_reduction): Loading Loading
megatron/model/distributed.py +0 −4 Original line number Diff line number Diff line Loading @@ -31,10 +31,6 @@ class DistributedDataParallel(MegatronModule): self.module = module self.data_parallel_group = mpu.get_data_parallel_group() src_rank = mpu.get_model_parallel_rank() for p in self.module.parameters(): if torch.is_tensor(p): dist.broadcast(p, src_rank, group=self.data_parallel_group) def allreduce_params(reduce_after=True, no_scale=False, fp32_allreduce=False): if(self.needs_reduction): Loading