Loading scripts/train_gpt.py +11 −11 Original line number Diff line number Diff line Loading @@ -3,28 +3,28 @@ from tgreft.train.train_gpt import train if __name__ == "__main__": model_params = { "d_model": 1024, "n_head": 8, "num_encoder_layers": 4, "input_dim": 150, "output_dim": 17, "to_log": True, } train_params = { "cuda_id": 0, "n_epochs": 50, # seems like 50 is the point where training and validation loss diverge "n_training": 1_500_000, "error": 0.07, "batch_size": 200, "batch_size": 250, "learning_rate": 0.0057929655918116715, "weight_decay": 7.198921885462489e-07, "optimizer": "SGD", "loss": "huber", "cache_dir": "data", "experiment_name": "Train_REFL_GPT", "run_name": "gpt_d1024_h4_l4", } model_params = { "d_model": 1024, "n_head": 4, "num_encoder_layers": 4, "input_dim": 150, "output_dim": 17, "to_log": True, "run_name": "gpt_d1024_h8_l4_newlog", } train( Loading src/tgreft/utils/data/data_loader.py +1 −1 Original line number Diff line number Diff line Loading @@ -84,7 +84,7 @@ def generate_data( # generate the reference parameters parameters_ref = np.column_stack( [ np.random.uniform(3.5, 6.5, n_dataset), # electolyte_sld, np.random.uniform(5.0, 7.0, n_dataset), # electolyte_sld, np.random.uniform(5, 120, n_dataset), # electolyte_roughness, np.random.uniform(-5.0, 6.5, n_dataset), # sei_sld, Loading Loading
scripts/train_gpt.py +11 −11 Original line number Diff line number Diff line Loading @@ -3,28 +3,28 @@ from tgreft.train.train_gpt import train if __name__ == "__main__": model_params = { "d_model": 1024, "n_head": 8, "num_encoder_layers": 4, "input_dim": 150, "output_dim": 17, "to_log": True, } train_params = { "cuda_id": 0, "n_epochs": 50, # seems like 50 is the point where training and validation loss diverge "n_training": 1_500_000, "error": 0.07, "batch_size": 200, "batch_size": 250, "learning_rate": 0.0057929655918116715, "weight_decay": 7.198921885462489e-07, "optimizer": "SGD", "loss": "huber", "cache_dir": "data", "experiment_name": "Train_REFL_GPT", "run_name": "gpt_d1024_h4_l4", } model_params = { "d_model": 1024, "n_head": 4, "num_encoder_layers": 4, "input_dim": 150, "output_dim": 17, "to_log": True, "run_name": "gpt_d1024_h8_l4_newlog", } train( Loading
src/tgreft/utils/data/data_loader.py +1 −1 Original line number Diff line number Diff line Loading @@ -84,7 +84,7 @@ def generate_data( # generate the reference parameters parameters_ref = np.column_stack( [ np.random.uniform(3.5, 6.5, n_dataset), # electolyte_sld, np.random.uniform(5.0, 7.0, n_dataset), # electolyte_sld, np.random.uniform(5, 120, n_dataset), # electolyte_roughness, np.random.uniform(-5.0, 6.5, n_dataset), # sei_sld, Loading