Commit 1aa66511 authored by Wang, Xiao's avatar Wang, Xiao
Browse files

add comment for get_language_model

parent 8522b801
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -138,11 +138,11 @@ class BertModel(MegatronModule):
        self.parallel_output = parallel_output
        self.pre_process = pre_process
        self.post_process = post_process

        init_method = init_method_normal(args.init_method_std)
        scaled_init_method = scaled_init_method_normal(args.init_method_std,
                                                       args.num_layers)

        #XW: get_language_model initializes embeddings and Parallel Transformer
        self.language_model, self._language_model_key = get_language_model(
            num_tokentypes=num_tokentypes,
            add_pooler=self.add_binary_head,