Commit c6882114 authored by Mohammad's avatar Mohammad
Browse files

added runtime compilation for helpers

parent 8134313d
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -246,6 +246,9 @@ def get_samples_mapping_(indexed_dataset,
        start_time = time.time()
        print_rank_0(' > building sapmles index mapping for {} ...'.format(
            name))
        # First compile and then import.
        from megatron.data.dataset_utils import compile_helper
        compile_helper()
        from megatron.data import helpers
        samples_mapping = helpers.build_mapping(
            indexed_dataset.doc_idx,
+9 −0
Original line number Diff line number Diff line
@@ -18,6 +18,15 @@ import collections
import numpy as np


def compile_helper():
    """Compile helper function ar runtime. Make sure this
    is invoked on a single process."""
    import os
    import subprocess
    path = os.path.abspath(os.path.dirname(__file__))
    subprocess.run(['make', '-C', path]) 


def build_training_sample(sample,
                          target_seq_length, max_seq_length,
                          vocab_id_list, vocab_id_to_token_dict,
+3 −0
Original line number Diff line number Diff line
@@ -178,6 +178,9 @@ def _build_index_mappings(name, data_prefix, documents, sizes,
            # sample-idx.
            start_time = time.time()
            # Use C++ implementation for speed.
            # First compile and then import.
            from megatron.data.dataset_utils import compile_helper
            compile_helper()
            from megatron.data import helpers
            assert doc_idx.dtype == np.int32
            assert sizes.dtype == np.int32