Loading megatron/arguments.py +3 −0 Original line number Diff line number Diff line Loading @@ -116,6 +116,9 @@ def parse_args(extra_args_provider=None, defaults={}, args.global_batch_size), flush=True) assert args.global_batch_size > 0 if args.num_layers_per_virtual_pipeline_stage is not None: assert args.pipeline_model_parallel_size > 2, \ 'pipeline-model-parallel size should be greater than 2 with ' \ 'interleaved schedule' assert args.num_layers % args.num_layers_per_virtual_pipeline_stage == 0, \ 'number of layers is not divisible by number of layers per virtual ' \ 'pipeline stage' Loading Loading
megatron/arguments.py +3 −0 Original line number Diff line number Diff line Loading @@ -116,6 +116,9 @@ def parse_args(extra_args_provider=None, defaults={}, args.global_batch_size), flush=True) assert args.global_batch_size > 0 if args.num_layers_per_virtual_pipeline_stage is not None: assert args.pipeline_model_parallel_size > 2, \ 'pipeline-model-parallel size should be greater than 2 with ' \ 'interleaved schedule' assert args.num_layers % args.num_layers_per_virtual_pipeline_stage == 0, \ 'number of layers is not divisible by number of layers per virtual ' \ 'pipeline stage' Loading