Commit e515f026 authored by hwijeen's avatar hwijeen
Browse files

fix typo

parent 90e0a0dd
Loading
Loading
Loading
Loading
+1 −3
Original line number Diff line number Diff line
@@ -53,8 +53,7 @@ class ParallelMLP(MegatronModule):

    MLP will take the input with h hidden state, project it to 4*h
    hidden dimension, perform nonlinear transformation, and project the
    state back into h hidden dimension. At the end, dropout is also
    applied.
    state back into h hidden dimension.
    """

    def __init__(self, init_method, output_layer_init_method):
@@ -84,7 +83,6 @@ class ParallelMLP(MegatronModule):
            init_method=output_layer_init_method,
            skip_bias_add=True)


    def forward(self, hidden_states):

        # [s, b, 4hp]
+4 −4
Original line number Diff line number Diff line
@@ -316,8 +316,8 @@ class RowParallelLinear(torch.nn.Module):
        keep_master_weight_for_test: This was added for testing and should be
                                     set to False. It returns the master weights
                                     used for initialization.
        skip_bias_add: This was added to enable performance optimations where bias
                       can be fused with other elementwise operations. we skip 
        skip_bias_add: This was added to enable performance optimization where bias
                       can be fused with other elementwise operations. We skip
                       adding bias but instead return it.
    """