Merge branch 'pip_support' into 'master' (5897a790) · Commits · candle / Megatron-LM

.gitignore

+5 −0

0 → 100644

+2 −0

Original line number	Diff line number	Diff line
		include megatron/data/Makefile
		include megatron/data/helpers.cpp

+6 −1

Original line number	Diff line number	Diff line
		@@ -34,7 +34,12 @@ For BERT training, we swapped the position of the layer normalization and the re
		# Setup
		We officially support only python 3.6, pytorch 1.5, cuda 10, and nccl 2.6 versions and above.

		To use this repo please install the latest supported versions of PyTorch with GPU support. We strongly recommend using one of [NGC's recent PyTorch containers](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch) (the latest compatible version at time of publication can be pulled with `docker pull nvcr.io/nvidia/pytorch:20.03-py3`). Data preprocessing requires [NLTK](https://www.nltk.org/install.html), though this is not required for training, evaluation or downstream tasks.
		To use this repo please install the latest supported versions of PyTorch with GPU support and NVIDIA [APEX](https://github.com/NVIDIA/apex#quick-start). We strongly recommend using one of [NGC's recent PyTorch containers](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch) (the latest compatible version at time of publication can be pulled with `docker pull nvcr.io/nvidia/pytorch:20.03-py3`). Data preprocessing requires [NLTK](https://www.nltk.org/install.html), though this is not required for training, evaluation or downstream tasks.

		To use megatron you can either clone the repo or install it via pip (make sure python3-dev is installed):
		<pre>
		pip install megatron-lm
		</pre>

		<a id="downloading-checkpoints"></a>
		## Downloading Checkpoints

+11 −1

Original line number	Diff line number	Diff line
		@@ -13,7 +13,16 @@
		# See the License for the specific language governing permissions and
		# limitations under the License.

		import torch
		from .package_info import (
		__description__,
		__contact_names__,
		__url__,
		__download_url__,
		__keywords__,
		__license__,
		__package_name__,
		__version__,
		)

		from .global_vars import get_args
		from .global_vars import get_tokenizer
		@@ -21,6 +30,7 @@ from .global_vars import get_tensorboard_writer
		from .global_vars import get_adlr_autoresume
		from .global_vars import get_timers

		import torch

		def print_rank_0(message):
		"""If distributed is initialized print only on rank 0."""

0 → 100644

+30 −0

Original line number	Diff line number	Diff line
		# coding=utf-8
		# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
		#
		# Licensed under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.

		MAJOR = 1
		MINOR = 0

		# Use the following formatting: (major, minor)
		VERSION = (MAJOR, MINOR)

		__version__ = '.'.join(map(str, VERSION))
		__package_name__ = 'megatron-lm'
		__contact_names__ = 'NVIDIA INC'
		__url__ = 'https://github.com/NVIDIA/Megatron-LM'
		__download_url__ = 'https://github.com/NVIDIA/Megatron-LM/releases'
		__description__ = 'Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism.'
		__license__ = 'See https://github.com/NVIDIA/Megatron-LM/blob/master/LICENSE'
		__keywords__ = 'deep learning, Megatron, gpu, NLP, nvidia, pytorch, torch, language'