Commit c882ac61 authored by Jared Casper's avatar Jared Casper
Browse files

Merge branch 'megatron_package' into 'master'

created megatron package

See merge request ADLR/megatron-lm!3
parents 4947002d b886b7bb
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -17,9 +17,9 @@

import copy
import torch
import data_utils

import mpu
from megatron import data_utils
from megatron import mpu

class DataConfig:

+14 −14
Original line number Diff line number Diff line
@@ -25,21 +25,21 @@ import torch

from arguments import get_args
from configure_data import configure_data
from fp16 import FP16_Module
from fp16 import FP16_Optimizer
from learning_rates import AnnealingLR
from model import GPT2Model
from model import gpt2_get_params_for_weight_decay_optimization
from model import DistributedDataParallel as DDP
import mpu
from megatron.fp16 import FP16_Module
from megatron.fp16 import FP16_Optimizer
from megatron.learning_rates import AnnealingLR
from megatron.model import GPT2Model
from megatron.model import gpt2_get_params_for_weight_decay_optimization
from megatron.model import DistributedDataParallel as DDP
from megatron import mpu
from apex.optimizers import FusedAdam as Adam
from utils import Timers
from utils import load_checkpoint
from utils import report_memory
from utils import print_params_min_max_norm
from utils import print_rank_0
from megatron.utils import Timers
from megatron.utils import load_checkpoint
from megatron.utils import report_memory
from megatron.utils import print_params_min_max_norm
from megatron.utils import print_rank_0

from data_utils import make_tokenizer
from megatron.data_utils import make_tokenizer

from detokenizer import *

@@ -539,7 +539,7 @@ def main():
            model = GPT2LMHeadModel.from_pretrained('gpt2', cache_dir='gpt2_weights').cuda()
    else:
        if args.load_openai:
            from utils import move_weights
            from megatron.utils import move_weights
            model_path = args.load
            args.load = None
            model = setup_model(args)
+8 −8
Original line number Diff line number Diff line
@@ -25,20 +25,20 @@ import torch.nn.functional as F
import argparse
import time
from arguments import get_args
from utils import Timers
from megatron.utils import Timers
from pretrain_gpt2 import initialize_distributed
from pretrain_gpt2 import set_random_seed
from pretrain_gpt2 import get_train_val_test_data
from pretrain_gpt2 import get_masks_and_position_ids
from utils import load_checkpoint
from data_utils import make_tokenizer
from megatron.utils import load_checkpoint
from megatron.data_utils import make_tokenizer
from configure_data import configure_data
import mpu
from megatron import mpu

from fp16 import FP16_Module
from model import GPT2Model
from model import DistributedDataParallel as DDP
from utils import print_rank_0
from megatron.fp16 import FP16_Module
from megatron.model import GPT2Model
from megatron.model import DistributedDataParallel as DDP
from megatron.utils import print_rank_0

def get_model(args):
    """Build the model."""
+3 −3
Original line number Diff line number Diff line
@@ -21,9 +21,9 @@ import torch
from torch.multiprocessing import Lock
from torch.utils.data import Dataset

import mpu
from data_utils.samplers import DistributedBatchSampler
from data_utils.tokenization_gpt2 import GPT2Tokenizer
from megatron import mpu
from megatron.data_utils.samplers import DistributedBatchSampler
from megatron.data_utils.tokenization_gpt2 import GPT2Tokenizer


def make_gpt2_dataloaders(args):
Loading