Commit 7a0710ec authored by Mostofa Patwary's avatar Mostofa Patwary
Browse files

before cleaning the comments

parent 4a09bb3c
Loading
Loading
Loading
Loading
+0 −6
Original line number Diff line number Diff line
@@ -20,12 +20,6 @@ def make_attention_mask(source_block, target_block):
    # (source_length, target_length)
    return mask

def make_history_mask(block):
    length = block.shape[0]
    arange = np.arange(length)
    history_mask = (arange[None, ] <= arange[:, None])
    history_mask = history_mask.astype(np.int64)
    return history_mask

def get_one_epoch_dataloader(dataset, micro_batch_size=None):
    """Specifically one epoch to be used in an indexing job."""
+0 −2
Original line number Diff line number Diff line
@@ -25,8 +25,6 @@ from torch.utils.data import Dataset

from megatron import print_rank_0, get_args
from megatron.data.biencoder_dataset_utils import make_attention_mask
from megatron.data.biencoder_dataset_utils import make_history_mask


def build_token_types_from_context_list(ctx_list, tokenizer, max_seq_length):
    ctx_id_list, ctx_types_list = [], []