Loading configure_data.py +2 −2 Original line number Diff line number Diff line Loading @@ -17,9 +17,9 @@ import copy import torch import data_utils import mpu from megatron import data_utils from megatron import mpu class DataConfig: Loading evaluate_gpt2.py +14 −14 Original line number Diff line number Diff line Loading @@ -25,21 +25,21 @@ import torch from arguments import get_args from configure_data import configure_data from fp16 import FP16_Module from fp16 import FP16_Optimizer from learning_rates import AnnealingLR from model import GPT2Model from model import gpt2_get_params_for_weight_decay_optimization from model import DistributedDataParallel as DDP import mpu from megatron.fp16 import FP16_Module from megatron.fp16 import FP16_Optimizer from megatron.learning_rates import AnnealingLR from megatron.model import GPT2Model from megatron.model import gpt2_get_params_for_weight_decay_optimization from megatron.model import DistributedDataParallel as DDP from megatron import mpu from apex.optimizers import FusedAdam as Adam from utils import Timers from utils import load_checkpoint from utils import report_memory from utils import print_params_min_max_norm from utils import print_rank_0 from megatron.utils import Timers from megatron.utils import load_checkpoint from megatron.utils import report_memory from megatron.utils import print_params_min_max_norm from megatron.utils import print_rank_0 from data_utils import make_tokenizer from megatron.data_utils import make_tokenizer from detokenizer import * Loading Loading @@ -539,7 +539,7 @@ def main(): model = GPT2LMHeadModel.from_pretrained('gpt2', cache_dir='gpt2_weights').cuda() else: if args.load_openai: from utils import move_weights from megatron.utils import move_weights model_path = args.load args.load = None model = setup_model(args) Loading generate_samples.py +8 −8 Original line number Diff line number Diff line Loading @@ -25,20 +25,20 @@ import torch.nn.functional as F import argparse import time from arguments import get_args from utils import Timers from megatron.utils import Timers from pretrain_gpt2 import initialize_distributed from pretrain_gpt2 import set_random_seed from pretrain_gpt2 import get_train_val_test_data from pretrain_gpt2 import get_masks_and_position_ids from utils import load_checkpoint from data_utils import make_tokenizer from megatron.utils import load_checkpoint from megatron.data_utils import make_tokenizer from configure_data import configure_data import mpu from megatron import mpu from fp16 import FP16_Module from model import GPT2Model from model import DistributedDataParallel as DDP from utils import print_rank_0 from megatron.fp16 import FP16_Module from megatron.model import GPT2Model from megatron.model import DistributedDataParallel as DDP from megatron.utils import print_rank_0 def get_model(args): """Build the model.""" Loading gpt2_data_loader.py +3 −3 Original line number Diff line number Diff line Loading @@ -21,9 +21,9 @@ import torch from torch.multiprocessing import Lock from torch.utils.data import Dataset import mpu from data_utils.samplers import DistributedBatchSampler from data_utils.tokenization_gpt2 import GPT2Tokenizer from megatron import mpu from megatron.data_utils.samplers import DistributedBatchSampler from megatron.data_utils.tokenization_gpt2 import GPT2Tokenizer def make_gpt2_dataloaders(args): Loading data_utils/__init__.py→megatron/data_utils/__init__.py +0 −0 File moved. View file Loading
configure_data.py +2 −2 Original line number Diff line number Diff line Loading @@ -17,9 +17,9 @@ import copy import torch import data_utils import mpu from megatron import data_utils from megatron import mpu class DataConfig: Loading
evaluate_gpt2.py +14 −14 Original line number Diff line number Diff line Loading @@ -25,21 +25,21 @@ import torch from arguments import get_args from configure_data import configure_data from fp16 import FP16_Module from fp16 import FP16_Optimizer from learning_rates import AnnealingLR from model import GPT2Model from model import gpt2_get_params_for_weight_decay_optimization from model import DistributedDataParallel as DDP import mpu from megatron.fp16 import FP16_Module from megatron.fp16 import FP16_Optimizer from megatron.learning_rates import AnnealingLR from megatron.model import GPT2Model from megatron.model import gpt2_get_params_for_weight_decay_optimization from megatron.model import DistributedDataParallel as DDP from megatron import mpu from apex.optimizers import FusedAdam as Adam from utils import Timers from utils import load_checkpoint from utils import report_memory from utils import print_params_min_max_norm from utils import print_rank_0 from megatron.utils import Timers from megatron.utils import load_checkpoint from megatron.utils import report_memory from megatron.utils import print_params_min_max_norm from megatron.utils import print_rank_0 from data_utils import make_tokenizer from megatron.data_utils import make_tokenizer from detokenizer import * Loading Loading @@ -539,7 +539,7 @@ def main(): model = GPT2LMHeadModel.from_pretrained('gpt2', cache_dir='gpt2_weights').cuda() else: if args.load_openai: from utils import move_weights from megatron.utils import move_weights model_path = args.load args.load = None model = setup_model(args) Loading
generate_samples.py +8 −8 Original line number Diff line number Diff line Loading @@ -25,20 +25,20 @@ import torch.nn.functional as F import argparse import time from arguments import get_args from utils import Timers from megatron.utils import Timers from pretrain_gpt2 import initialize_distributed from pretrain_gpt2 import set_random_seed from pretrain_gpt2 import get_train_val_test_data from pretrain_gpt2 import get_masks_and_position_ids from utils import load_checkpoint from data_utils import make_tokenizer from megatron.utils import load_checkpoint from megatron.data_utils import make_tokenizer from configure_data import configure_data import mpu from megatron import mpu from fp16 import FP16_Module from model import GPT2Model from model import DistributedDataParallel as DDP from utils import print_rank_0 from megatron.fp16 import FP16_Module from megatron.model import GPT2Model from megatron.model import DistributedDataParallel as DDP from megatron.utils import print_rank_0 def get_model(args): """Build the model.""" Loading
gpt2_data_loader.py +3 −3 Original line number Diff line number Diff line Loading @@ -21,9 +21,9 @@ import torch from torch.multiprocessing import Lock from torch.utils.data import Dataset import mpu from data_utils.samplers import DistributedBatchSampler from data_utils.tokenization_gpt2 import GPT2Tokenizer from megatron import mpu from megatron.data_utils.samplers import DistributedBatchSampler from megatron.data_utils.tokenization_gpt2 import GPT2Tokenizer def make_gpt2_dataloaders(args): Loading