Commit 21d4a7ab authored by Zhang, Chen's avatar Zhang, Chen
Browse files

adjust range

parent 78c8a6af
Loading
Loading
Loading
Loading
+7 −6
Original line number Diff line number Diff line
#!/usr/bin/env python3
"""Hyperparameter tuning for GPT based feature extractor."""
import mlflow
import os
import logging
import torch
@@ -29,10 +30,10 @@ def objective(trial: optuna.Trial) -> float:
    # training parameters
    train_params = {
        "cuda_id": 0,
        "n_epochs": 500,
        "n_training": 1_000_000,
        "n_epochs": 30,  # should be sufficient for hyperparameter tuning
        "n_training": 1_500_000,
        "error": 0.07,
        "batch_size": trial.suggest_categorical("batch_size", [128, 256, 512, 1024]),
        "batch_size": trial.suggest_categorical("batch_size", [64, 128, 256, 512, 1024, 2048]),
        "learning_rate": trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True),
        "weight_decay": trial.suggest_float("weight_decay", 1e-8, 1e-4, log=True),
        "optimizer": trial.suggest_categorical("optimizer", ["Adam", "SGD"]),
@@ -41,9 +42,9 @@ def objective(trial: optuna.Trial) -> float:
    }
    # model parameters
    model_params = {
        "d_model": trial.suggest_categorical("d_model", [32, 64, 128, 256, 512]),
        "n_head": trial.suggest_categorical("n_head", [1, 2, 4, 8, 16, 32]),
        "num_encoder_layers": trial.suggest_categorical("num_encoder_layers", [1, 2, 3, 4, 5, 6, 7, 8]),
        "d_model": trial.suggest_categorical("d_model", [16, 32, 64, 128, 256, 512]),
        "n_head": trial.suggest_categorical("n_head", [2, 4, 8, 16, 32]),
        "num_encoder_layers": trial.suggest_categorical("num_encoder_layers", [2, 3, 4, 5, 6, 7, 8]),
        "input_dim": 150,
        "output_dim": 13,
    }