Loading megatron/data/indexed_dataset.py +6 −1 Original line number Diff line number Diff line Loading @@ -43,7 +43,8 @@ def infer_dataset_impl(path): else: return None else: print(f"Dataset path does not exist: {path}") print(f"Dataset does not exist: {path}") print("Path should be a basename that both .idx and .bin can be appended to get full filenames.") return None Loading @@ -55,6 +56,10 @@ def make_builder(out_file, impl, vocab_size=None): def make_dataset(path, impl, skip_warmup=False): if not IndexedDataset.exists(path): print(f"Dataset does not exist: {path}") print("Path should be a basename that both .idx and .bin can be appended to get full filenames.") return None if impl == 'infer': impl = infer_dataset_impl(path) if impl == 'lazy' and IndexedDataset.exists(path): Loading Loading
megatron/data/indexed_dataset.py +6 −1 Original line number Diff line number Diff line Loading @@ -43,7 +43,8 @@ def infer_dataset_impl(path): else: return None else: print(f"Dataset path does not exist: {path}") print(f"Dataset does not exist: {path}") print("Path should be a basename that both .idx and .bin can be appended to get full filenames.") return None Loading @@ -55,6 +56,10 @@ def make_builder(out_file, impl, vocab_size=None): def make_dataset(path, impl, skip_warmup=False): if not IndexedDataset.exists(path): print(f"Dataset does not exist: {path}") print("Path should be a basename that both .idx and .bin can be appended to get full filenames.") return None if impl == 'infer': impl = infer_dataset_impl(path) if impl == 'lazy' and IndexedDataset.exists(path): Loading