Commit 37475717 authored by Jared Casper's avatar Jared Casper
Browse files

Merge branch 'numpy_seed' into 'main'

Ensure numpy random seed is within range.

See merge request ADLR/megatron-lm!271
parents 002cde6b 1c4c360f
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -77,7 +77,8 @@ class BertDataset(torch.utils.data.Dataset):
        sample = [self.indexed_dataset[i] for i in range(start_idx, end_idx)]
        # Note that this rng state should be numpy and not python since
        # python randint is inclusive whereas the numpy one is exclusive.
        np_rng = np.random.RandomState(seed=(self.seed + idx))
        # We % 2**32 since numpy requres the seed to be between 0 and 2**32 - 1
        np_rng = np.random.RandomState(seed=((self.seed + idx) % 2**32))
        return build_training_sample(sample, seq_length,
                                     self.max_seq_length,  # needed for padding
                                     self.vocab_id_list,