# Setup embedding group (to exchange gradients between
# first and last stages).
iflen(ranks)>1:
embedding_ranks=[ranks[0],ranks[-1]]
position_embedding_ranks=[ranks[0]]
embedding_ranks=[ranks[0],ranks[-1]]#For GPUs in the same pipeline model parallel group, embedding group is the two GPUs at the first sub-model and the last sub-model
position_embedding_ranks=[ranks[0]]# Only the GPUs assigned with the first pipeline sub-model