Commit 0d40c194 authored by Zhang, Chen's avatar Zhang, Chen
Browse files

new default training params

parent 4d15f4b1
Loading
Loading
Loading
Loading
+11 −11
Original line number Diff line number Diff line
@@ -3,28 +3,28 @@
from tgreft.train.train_gpt import train

if __name__ == "__main__":
    model_params = {
        "d_model": 1024,
        "n_head": 8,
        "num_encoder_layers": 4,
        "input_dim": 150,
        "output_dim": 17,
        "to_log": True,
    }
        
    train_params = {
        "cuda_id": 0,
        "n_epochs": 50,  # seems like 50 is the point where training and validation loss diverge
        "n_training": 1_500_000,
        "error": 0.07,
        "batch_size": 200,
        "batch_size": 250,
        "learning_rate": 0.0057929655918116715,
        "weight_decay": 7.198921885462489e-07,
        "optimizer": "SGD",
        "loss": "huber",
        "cache_dir": "data",
        "experiment_name": "Train_REFL_GPT",
        "run_name": "gpt_d1024_h4_l4",
    }

    model_params = {
        "d_model": 1024,
        "n_head": 4,
        "num_encoder_layers": 4,
        "input_dim": 150,
        "output_dim": 17,
        "to_log": True,
        "run_name": "gpt_d1024_h8_l4_newlog",
    }

    train(
+1 −1
Original line number Diff line number Diff line
@@ -84,7 +84,7 @@ def generate_data(
    # generate the reference parameters
    parameters_ref = np.column_stack(
        [
            np.random.uniform(3.5, 6.5, n_dataset),  # electolyte_sld,
            np.random.uniform(5.0, 7.0, n_dataset),  # electolyte_sld,
            np.random.uniform(5, 120, n_dataset),  # electolyte_roughness,

            np.random.uniform(-5.0, 6.5, n_dataset),  # sei_sld,