Commit a6b75c61 authored by Zhang, Chen's avatar Zhang, Chen
Browse files

add model n_params tracking

parent a69bdf7b
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -39,14 +39,16 @@ def objective(trial: optuna.Trial) -> float:
        "optimizer": trial.suggest_categorical("optimizer", ["Adam", "SGD"]),
        "loss": "Huber",
        "cache_dir": "data",
        "experiment_name": "optuna_gpt",
    }
    # model parameters
    model_params = {
        "d_model": trial.suggest_categorical("d_model", [16, 32, 64, 128, 256, 512]),
        "d_model": trial.suggest_categorical("d_model", [16, 32, 64, 128, 256, 512, 1024]),
        "n_head": trial.suggest_categorical("n_head", [2, 4, 8, 16, 32]),
        "num_encoder_layers": trial.suggest_categorical("num_encoder_layers", [2, 3, 4, 5, 6, 7, 8]),
        "num_encoder_layers": trial.suggest_categorical("num_encoder_layers", [2, 4, 8, 16, 32]),
        "input_dim": 150,
        "output_dim": 13,
        "to_log": trial.suggest_categorical("to_log", [True, False]),
    }
    # train the model
    try:
+20 −1
Original line number Diff line number Diff line
#!/usr/bin/env python3
"""Model definition for Transformer-based models."""
import torch
import torch.nn as nn
from tgreft.nn.transformer import PositionalEncoding

@@ -9,9 +10,21 @@ class REFL_GPT(nn.Module):

    Parameters
    ----------
    d_model : int
        Dimensionality of the model.
    nhead : int
        Number of attention heads.
    num_encoder_layers : int
        Number of encoder layers.
    input_dim : int
        Input dimensionality.
    output_dim : int
        Output dimensionality.
    to_log : bool, optional
        Whether to convert input to log scale for uniform sensitivity, by default False
    """

    def __init__(self, d_model, nhead, num_encoder_layers, input_dim, output_dim):
    def __init__(self, d_model, nhead, num_encoder_layers, input_dim, output_dim, to_log: bool = False):
        super(REFL_GPT, self).__init__()

        self.embedding = nn.Linear(input_dim, d_model)
@@ -22,7 +35,13 @@ class REFL_GPT(nn.Module):

        self.decoder = nn.Linear(d_model, output_dim)

        self.to_log = to_log

    def forward(self, src):
        """Forward pass."""
        if self.to_log:
            src = torch.log(src)

        src = self.embedding(src)
        src = self.positional_encoding(src)
        output = self.transformer_encoder(src)
+11 −1
Original line number Diff line number Diff line
@@ -64,6 +64,7 @@ def train(
    optimizer = train_params["optimizer"]
    loss = train_params["loss"]
    cache_dir = train_params["cache_dir"]
    experiment_name = train_params.get("experiment_name", "Train_REFL_GPT")

    # parse model parameters
    d_model = model_params["d_model"]
@@ -71,6 +72,7 @@ def train(
    num_encoder_layers = model_params["num_encoder_layers"]
    input_dim = model_params["input_dim"]
    output_dim = model_params["output_dim"]
    to_log = model_params.get("to_log", False)

    # set run name
    run_name = f"gpt_d{d_model}_h{n_head}_l{num_encoder_layers}"
@@ -103,12 +105,17 @@ def train(
        num_encoder_layers=num_encoder_layers,
        input_dim=input_dim,
        output_dim=output_dim,
        to_log=to_log,
    ).to(device)
    # check if need to load pretrained model
    model_name = "model_gpt.pt"
    if load_pretrained and os.path.exists(model_name):
        logger.info("Loading pretrained model...")
        model.load_state_dict(torch.load("model_gpt.pt"))
    # calculate the number of parameters in the model
    n_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    # log the number of parameters
    logger.info(f"Number of parameters: {n_params}")

    # prepare optimizer
    logger.info("Preparing optimizer...")
@@ -130,11 +137,12 @@ def train(
    # start training with mlflow logging
    best_loss = float("inf")
    logger.info("Start training...")
    mlflow.set_experiment("Train_REFL_GPT")
    mlflow.set_experiment(experiment_name)
    with mlflow.start_run(run_name=run_name):
        # log parameters
        mlflow.log_params(train_params)
        mlflow.log_params(model_params)
        mlflow.log_param("n_params", n_params)
        # log training script
        mlflow.log_artifact(__file__, "training_script")

@@ -217,6 +225,7 @@ if __name__ == "__main__":
        "optimizer": "adam",
        "loss": "mse",
        "cache_dir": "data",
        "experiment_name": "Train_REFL_GPT",
    }

    model_params = {
@@ -225,6 +234,7 @@ if __name__ == "__main__":
        "num_encoder_layers": 3,
        "input_dim": 150,
        "output_dim": 13,
        "to_log": True,
    }

    train(