add comments for is_pipeline_first_stage and is_pipeline_last_stage. Add... (59dea876) · Commits · candle / Megatron-LM

megatron/training.py

+6 −2

Original line number	Diff line number	Diff line
		@@ -96,9 +96,12 @@ def pretrain(train_valid_test_dataset_provider,
		initialize_megatron(extra_args_provider=extra_args_provider,
		args_defaults=args_defaults)


		# Adjust the startup time so it reflects the largest value.
		# This will be closer to what scheduler will see (outside of
		# image ... launches.
		# XW: TRAIN_START_TIME was measured in the beginning of the code. Here it is made a global variable
		# XW: The reduce operation should be MAX instead of MIN here
		global _TRAIN_START_TIME
		start_time_tensor = torch.cuda.DoubleTensor([_TRAIN_START_TIME])
		torch.distributed.all_reduce(start_time_tensor,
		@@ -217,8 +220,9 @@ def get_model(model_provider_func, model_type=ModelType.encoder_or_decoder, wrap
		this_model.model_type = model_type
		model.append(this_model)
		else:
		pre_process = mpu.is_pipeline_first_stage()
		post_process = mpu.is_pipeline_last_stage()
		pre_process = mpu.is_pipeline_first_stage() #check if the GPU gets the pipeline first sub-model
		post_process = mpu.is_pipeline_last_stage() #check if the GPU gets the pipeline last sub-model

		add_encoder = True
		add_decoder = True
		if model_type == ModelType.encoder_and_decoder: