Loading src/tgreft/train/train_gpt.py +3 −1 Original line number Diff line number Diff line Loading @@ -65,6 +65,7 @@ def train( loss = train_params["loss"] cache_dir = train_params["cache_dir"] experiment_name = train_params.get("experiment_name", "Train_REFL_GPT") run_name = train_params.get("run_name", None) # parse model parameters d_model = model_params["d_model"] Loading @@ -75,6 +76,7 @@ def train( to_log = model_params.get("to_log", False) # set run name if run_name is None: run_name = f"gpt_d{d_model}_h{n_head}_l{num_encoder_layers}" # prepare data Loading Loading
src/tgreft/train/train_gpt.py +3 −1 Original line number Diff line number Diff line Loading @@ -65,6 +65,7 @@ def train( loss = train_params["loss"] cache_dir = train_params["cache_dir"] experiment_name = train_params.get("experiment_name", "Train_REFL_GPT") run_name = train_params.get("run_name", None) # parse model parameters d_model = model_params["d_model"] Loading @@ -75,6 +76,7 @@ def train( to_log = model_params.get("to_log", False) # set run name if run_name is None: run_name = f"gpt_d{d_model}_h{n_head}_l{num_encoder_layers}" # prepare data Loading