Loading LICENSE +1 −1 Original line number Diff line number Diff line Loading @@ -28,7 +28,7 @@ The following applies to all files unless otherwise noted: -- This repository also contains code from huggingface, Google Research, This repository also contains code from Hugging Face Inc., Google Research, and Facebook (from their Fairseq project). Files from these organizations have notices at the top of each file. Below are licenses used in those files, as indicated. Loading megatron/data/dataset_utils.py +4 −0 Original line number Diff line number Diff line Loading @@ -14,6 +14,10 @@ # limitations under the License. # Most of the code here has been copied from: # https://github.com/google-research/albert/blob/master/create_pretraining_data.py # with some modifications. import collections import numpy as np Loading megatron/tokenizer/bert_tokenization.py +1 −0 Original line number Diff line number Diff line Loading @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes.""" from __future__ import absolute_import Loading Loading
LICENSE +1 −1 Original line number Diff line number Diff line Loading @@ -28,7 +28,7 @@ The following applies to all files unless otherwise noted: -- This repository also contains code from huggingface, Google Research, This repository also contains code from Hugging Face Inc., Google Research, and Facebook (from their Fairseq project). Files from these organizations have notices at the top of each file. Below are licenses used in those files, as indicated. Loading
megatron/data/dataset_utils.py +4 −0 Original line number Diff line number Diff line Loading @@ -14,6 +14,10 @@ # limitations under the License. # Most of the code here has been copied from: # https://github.com/google-research/albert/blob/master/create_pretraining_data.py # with some modifications. import collections import numpy as np Loading
megatron/tokenizer/bert_tokenization.py +1 −0 Original line number Diff line number Diff line Loading @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes.""" from __future__ import absolute_import Loading