Commit 1eccd462 authored by Mohammad Shoeybi's avatar Mohammad Shoeybi
Browse files

Merge branch 'dataset-utils-lint' into 'staging'

Lint megatron/data/dataset_utils.py

See merge request ADLR/megatron-lm!42
parents 63262827 840759b8
Loading
Loading
Loading
Loading
+172 −171
Original line number Diff line number Diff line
@@ -132,6 +132,7 @@ def truncate_segments(tokens_a, tokens_b, len_a, len_b, max_num_tokens, np_rng):
            tokens.pop()
    return True


def create_tokens_and_tokentypes(tokens_a, tokens_b, cls_id, sep_id):
    """Merge segments A and B, add [CLS] and [SEP] and build tokentypes."""