Loading scripts/optuna_gpt.py +7 −6 Original line number Diff line number Diff line #!/usr/bin/env python3 """Hyperparameter tuning for GPT based feature extractor.""" import mlflow import os import logging import torch Loading Loading @@ -29,10 +30,10 @@ def objective(trial: optuna.Trial) -> float: # training parameters train_params = { "cuda_id": 0, "n_epochs": 500, "n_training": 1_000_000, "n_epochs": 30, # should be sufficient for hyperparameter tuning "n_training": 1_500_000, "error": 0.07, "batch_size": trial.suggest_categorical("batch_size", [128, 256, 512, 1024]), "batch_size": trial.suggest_categorical("batch_size", [64, 128, 256, 512, 1024, 2048]), "learning_rate": trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True), "weight_decay": trial.suggest_float("weight_decay", 1e-8, 1e-4, log=True), "optimizer": trial.suggest_categorical("optimizer", ["Adam", "SGD"]), Loading @@ -41,9 +42,9 @@ def objective(trial: optuna.Trial) -> float: } # model parameters model_params = { "d_model": trial.suggest_categorical("d_model", [32, 64, 128, 256, 512]), "n_head": trial.suggest_categorical("n_head", [1, 2, 4, 8, 16, 32]), "num_encoder_layers": trial.suggest_categorical("num_encoder_layers", [1, 2, 3, 4, 5, 6, 7, 8]), "d_model": trial.suggest_categorical("d_model", [16, 32, 64, 128, 256, 512]), "n_head": trial.suggest_categorical("n_head", [2, 4, 8, 16, 32]), "num_encoder_layers": trial.suggest_categorical("num_encoder_layers", [2, 3, 4, 5, 6, 7, 8]), "input_dim": 150, "output_dim": 13, } Loading Loading
scripts/optuna_gpt.py +7 −6 Original line number Diff line number Diff line #!/usr/bin/env python3 """Hyperparameter tuning for GPT based feature extractor.""" import mlflow import os import logging import torch Loading Loading @@ -29,10 +30,10 @@ def objective(trial: optuna.Trial) -> float: # training parameters train_params = { "cuda_id": 0, "n_epochs": 500, "n_training": 1_000_000, "n_epochs": 30, # should be sufficient for hyperparameter tuning "n_training": 1_500_000, "error": 0.07, "batch_size": trial.suggest_categorical("batch_size", [128, 256, 512, 1024]), "batch_size": trial.suggest_categorical("batch_size", [64, 128, 256, 512, 1024, 2048]), "learning_rate": trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True), "weight_decay": trial.suggest_float("weight_decay", 1e-8, 1e-4, log=True), "optimizer": trial.suggest_categorical("optimizer", ["Adam", "SGD"]), Loading @@ -41,9 +42,9 @@ def objective(trial: optuna.Trial) -> float: } # model parameters model_params = { "d_model": trial.suggest_categorical("d_model", [32, 64, 128, 256, 512]), "n_head": trial.suggest_categorical("n_head", [1, 2, 4, 8, 16, 32]), "num_encoder_layers": trial.suggest_categorical("num_encoder_layers", [1, 2, 3, 4, 5, 6, 7, 8]), "d_model": trial.suggest_categorical("d_model", [16, 32, 64, 128, 256, 512]), "n_head": trial.suggest_categorical("n_head", [2, 4, 8, 16, 32]), "num_encoder_layers": trial.suggest_categorical("num_encoder_layers", [2, 3, 4, 5, 6, 7, 8]), "input_dim": 150, "output_dim": 13, } Loading