Commit d928dd51 authored by Mohammad Shoeybi's avatar Mohammad Shoeybi
Browse files

Merge branch 'staging_dataset_error_reporting' into 'staging'

Better error reporting when a dataset doesn't exist.

See merge request ADLR/megatron-lm!63
parents c6b5c137 f2779b18
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -43,7 +43,8 @@ def infer_dataset_impl(path):
            else:
                return None
    else:
        print(f"Dataset path does not exist: {path}")
        print(f"Dataset does not exist: {path}")
        print("Path should be a basename that both .idx and .bin can be appended to get full filenames.")
        return None


@@ -55,6 +56,10 @@ def make_builder(out_file, impl, vocab_size=None):


def make_dataset(path, impl, skip_warmup=False):
    if not IndexedDataset.exists(path):
        print(f"Dataset does not exist: {path}")
        print("Path should be a basename that both .idx and .bin can be appended to get full filenames.")
        return None
    if impl == 'infer':
        impl = infer_dataset_impl(path)
    if impl == 'lazy' and IndexedDataset.exists(path):