Loading megatron/__init__.py +2 −2 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ from .global_vars import get_tokenizer from .global_vars import get_tensorboard_writer from .global_vars import get_adlr_autoresume from .global_vars import get_timers from .initialize import initialize_megatron def print_rank_0(message): """If distributed is initialized print only on rank 0.""" Loading megatron/initialize.py +0 −5 Original line number Diff line number Diff line Loading @@ -93,11 +93,6 @@ def _initialize_distributed(): 'skipping initialization ...', flush=True) args.rank = torch.distributed.get_rank() args.world_size = torch.distributed.get_world_size() if device_count > 0: device = torch.cuda.current_device() local_rank = args.rank % device_count assert local_rank == device, \ 'expected local-rank to be the same as rank % device-count.' else: Loading Loading
megatron/__init__.py +2 −2 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ from .global_vars import get_tokenizer from .global_vars import get_tensorboard_writer from .global_vars import get_adlr_autoresume from .global_vars import get_timers from .initialize import initialize_megatron def print_rank_0(message): """If distributed is initialized print only on rank 0.""" Loading
megatron/initialize.py +0 −5 Original line number Diff line number Diff line Loading @@ -93,11 +93,6 @@ def _initialize_distributed(): 'skipping initialization ...', flush=True) args.rank = torch.distributed.get_rank() args.world_size = torch.distributed.get_world_size() if device_count > 0: device = torch.cuda.current_device() local_rank = args.rank % device_count assert local_rank == device, \ 'expected local-rank to be the same as rank % device-count.' else: Loading