Loading scripts/optuna_gpt.py +4 −2 Original line number Diff line number Diff line Loading @@ -39,14 +39,16 @@ def objective(trial: optuna.Trial) -> float: "optimizer": trial.suggest_categorical("optimizer", ["Adam", "SGD"]), "loss": "Huber", "cache_dir": "data", "experiment_name": "optuna_gpt", } # model parameters model_params = { "d_model": trial.suggest_categorical("d_model", [16, 32, 64, 128, 256, 512]), "d_model": trial.suggest_categorical("d_model", [16, 32, 64, 128, 256, 512, 1024]), "n_head": trial.suggest_categorical("n_head", [2, 4, 8, 16, 32]), "num_encoder_layers": trial.suggest_categorical("num_encoder_layers", [2, 3, 4, 5, 6, 7, 8]), "num_encoder_layers": trial.suggest_categorical("num_encoder_layers", [2, 4, 8, 16, 32]), "input_dim": 150, "output_dim": 13, "to_log": trial.suggest_categorical("to_log", [True, False]), } # train the model try: Loading src/tgreft/models/refl_gpt.py +20 −1 Original line number Diff line number Diff line #!/usr/bin/env python3 """Model definition for Transformer-based models.""" import torch import torch.nn as nn from tgreft.nn.transformer import PositionalEncoding Loading @@ -9,9 +10,21 @@ class REFL_GPT(nn.Module): Parameters ---------- d_model : int Dimensionality of the model. nhead : int Number of attention heads. num_encoder_layers : int Number of encoder layers. input_dim : int Input dimensionality. output_dim : int Output dimensionality. to_log : bool, optional Whether to convert input to log scale for uniform sensitivity, by default False """ def __init__(self, d_model, nhead, num_encoder_layers, input_dim, output_dim): def __init__(self, d_model, nhead, num_encoder_layers, input_dim, output_dim, to_log: bool = False): super(REFL_GPT, self).__init__() self.embedding = nn.Linear(input_dim, d_model) Loading @@ -22,7 +35,13 @@ class REFL_GPT(nn.Module): self.decoder = nn.Linear(d_model, output_dim) self.to_log = to_log def forward(self, src): """Forward pass.""" if self.to_log: src = torch.log(src) src = self.embedding(src) src = self.positional_encoding(src) output = self.transformer_encoder(src) Loading src/tgreft/train/train_gpt.py +11 −1 Original line number Diff line number Diff line Loading @@ -64,6 +64,7 @@ def train( optimizer = train_params["optimizer"] loss = train_params["loss"] cache_dir = train_params["cache_dir"] experiment_name = train_params.get("experiment_name", "Train_REFL_GPT") # parse model parameters d_model = model_params["d_model"] Loading @@ -71,6 +72,7 @@ def train( num_encoder_layers = model_params["num_encoder_layers"] input_dim = model_params["input_dim"] output_dim = model_params["output_dim"] to_log = model_params.get("to_log", False) # set run name run_name = f"gpt_d{d_model}_h{n_head}_l{num_encoder_layers}" Loading Loading @@ -103,12 +105,17 @@ def train( num_encoder_layers=num_encoder_layers, input_dim=input_dim, output_dim=output_dim, to_log=to_log, ).to(device) # check if need to load pretrained model model_name = "model_gpt.pt" if load_pretrained and os.path.exists(model_name): logger.info("Loading pretrained model...") model.load_state_dict(torch.load("model_gpt.pt")) # calculate the number of parameters in the model n_params = sum(p.numel() for p in model.parameters() if p.requires_grad) # log the number of parameters logger.info(f"Number of parameters: {n_params}") # prepare optimizer logger.info("Preparing optimizer...") Loading @@ -130,11 +137,12 @@ def train( # start training with mlflow logging best_loss = float("inf") logger.info("Start training...") mlflow.set_experiment("Train_REFL_GPT") mlflow.set_experiment(experiment_name) with mlflow.start_run(run_name=run_name): # log parameters mlflow.log_params(train_params) mlflow.log_params(model_params) mlflow.log_param("n_params", n_params) # log training script mlflow.log_artifact(__file__, "training_script") Loading Loading @@ -217,6 +225,7 @@ if __name__ == "__main__": "optimizer": "adam", "loss": "mse", "cache_dir": "data", "experiment_name": "Train_REFL_GPT", } model_params = { Loading @@ -225,6 +234,7 @@ if __name__ == "__main__": "num_encoder_layers": 3, "input_dim": 150, "output_dim": 13, "to_log": True, } train( Loading Loading
scripts/optuna_gpt.py +4 −2 Original line number Diff line number Diff line Loading @@ -39,14 +39,16 @@ def objective(trial: optuna.Trial) -> float: "optimizer": trial.suggest_categorical("optimizer", ["Adam", "SGD"]), "loss": "Huber", "cache_dir": "data", "experiment_name": "optuna_gpt", } # model parameters model_params = { "d_model": trial.suggest_categorical("d_model", [16, 32, 64, 128, 256, 512]), "d_model": trial.suggest_categorical("d_model", [16, 32, 64, 128, 256, 512, 1024]), "n_head": trial.suggest_categorical("n_head", [2, 4, 8, 16, 32]), "num_encoder_layers": trial.suggest_categorical("num_encoder_layers", [2, 3, 4, 5, 6, 7, 8]), "num_encoder_layers": trial.suggest_categorical("num_encoder_layers", [2, 4, 8, 16, 32]), "input_dim": 150, "output_dim": 13, "to_log": trial.suggest_categorical("to_log", [True, False]), } # train the model try: Loading
src/tgreft/models/refl_gpt.py +20 −1 Original line number Diff line number Diff line #!/usr/bin/env python3 """Model definition for Transformer-based models.""" import torch import torch.nn as nn from tgreft.nn.transformer import PositionalEncoding Loading @@ -9,9 +10,21 @@ class REFL_GPT(nn.Module): Parameters ---------- d_model : int Dimensionality of the model. nhead : int Number of attention heads. num_encoder_layers : int Number of encoder layers. input_dim : int Input dimensionality. output_dim : int Output dimensionality. to_log : bool, optional Whether to convert input to log scale for uniform sensitivity, by default False """ def __init__(self, d_model, nhead, num_encoder_layers, input_dim, output_dim): def __init__(self, d_model, nhead, num_encoder_layers, input_dim, output_dim, to_log: bool = False): super(REFL_GPT, self).__init__() self.embedding = nn.Linear(input_dim, d_model) Loading @@ -22,7 +35,13 @@ class REFL_GPT(nn.Module): self.decoder = nn.Linear(d_model, output_dim) self.to_log = to_log def forward(self, src): """Forward pass.""" if self.to_log: src = torch.log(src) src = self.embedding(src) src = self.positional_encoding(src) output = self.transformer_encoder(src) Loading
src/tgreft/train/train_gpt.py +11 −1 Original line number Diff line number Diff line Loading @@ -64,6 +64,7 @@ def train( optimizer = train_params["optimizer"] loss = train_params["loss"] cache_dir = train_params["cache_dir"] experiment_name = train_params.get("experiment_name", "Train_REFL_GPT") # parse model parameters d_model = model_params["d_model"] Loading @@ -71,6 +72,7 @@ def train( num_encoder_layers = model_params["num_encoder_layers"] input_dim = model_params["input_dim"] output_dim = model_params["output_dim"] to_log = model_params.get("to_log", False) # set run name run_name = f"gpt_d{d_model}_h{n_head}_l{num_encoder_layers}" Loading Loading @@ -103,12 +105,17 @@ def train( num_encoder_layers=num_encoder_layers, input_dim=input_dim, output_dim=output_dim, to_log=to_log, ).to(device) # check if need to load pretrained model model_name = "model_gpt.pt" if load_pretrained and os.path.exists(model_name): logger.info("Loading pretrained model...") model.load_state_dict(torch.load("model_gpt.pt")) # calculate the number of parameters in the model n_params = sum(p.numel() for p in model.parameters() if p.requires_grad) # log the number of parameters logger.info(f"Number of parameters: {n_params}") # prepare optimizer logger.info("Preparing optimizer...") Loading @@ -130,11 +137,12 @@ def train( # start training with mlflow logging best_loss = float("inf") logger.info("Start training...") mlflow.set_experiment("Train_REFL_GPT") mlflow.set_experiment(experiment_name) with mlflow.start_run(run_name=run_name): # log parameters mlflow.log_params(train_params) mlflow.log_params(model_params) mlflow.log_param("n_params", n_params) # log training script mlflow.log_artifact(__file__, "training_script") Loading Loading @@ -217,6 +225,7 @@ if __name__ == "__main__": "optimizer": "adam", "loss": "mse", "cache_dir": "data", "experiment_name": "Train_REFL_GPT", } model_params = { Loading @@ -225,6 +234,7 @@ if __name__ == "__main__": "num_encoder_layers": 3, "input_dim": 150, "output_dim": 13, "to_log": True, } train( Loading