Loading megatron/data/bert_dataset.py +2 −1 Original line number Diff line number Diff line Loading @@ -77,7 +77,8 @@ class BertDataset(torch.utils.data.Dataset): sample = [self.indexed_dataset[i] for i in range(start_idx, end_idx)] # Note that this rng state should be numpy and not python since # python randint is inclusive whereas the numpy one is exclusive. np_rng = np.random.RandomState(seed=(self.seed + idx)) # We % 2**32 since numpy requres the seed to be between 0 and 2**32 - 1 np_rng = np.random.RandomState(seed=((self.seed + idx) % 2**32)) return build_training_sample(sample, seq_length, self.max_seq_length, # needed for padding self.vocab_id_list, Loading Loading
megatron/data/bert_dataset.py +2 −1 Original line number Diff line number Diff line Loading @@ -77,7 +77,8 @@ class BertDataset(torch.utils.data.Dataset): sample = [self.indexed_dataset[i] for i in range(start_idx, end_idx)] # Note that this rng state should be numpy and not python since # python randint is inclusive whereas the numpy one is exclusive. np_rng = np.random.RandomState(seed=(self.seed + idx)) # We % 2**32 since numpy requres the seed to be between 0 and 2**32 - 1 np_rng = np.random.RandomState(seed=((self.seed + idx) % 2**32)) return build_training_sample(sample, seq_length, self.max_seq_length, # needed for padding self.vocab_id_list, Loading