{ "best_metric": 0.4386209168112411, "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-0/checkpoint-3207", "epoch": 3.0, "eval_steps": 500, "global_step": 3207, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.47, "grad_norm": 5.35684061050415, "learning_rate": 2.775809181053764e-06, "loss": 0.5956, "step": 500 }, { "epoch": 0.94, "grad_norm": 11.188376426696777, "learning_rate": 2.263099690648562e-06, "loss": 0.5375, "step": 1000 }, { "epoch": 1.0, "eval_loss": 0.5379385948181152, "eval_matthews_correlation": 0.29871168199754417, "eval_runtime": 0.7649, "eval_samples_per_second": 1363.614, "eval_steps_per_second": 86.288, "step": 1069 }, { "epoch": 1.4, "grad_norm": 10.391807556152344, "learning_rate": 1.7503902002433598e-06, "loss": 0.4937, "step": 1500 }, { "epoch": 1.87, "grad_norm": 19.357559204101562, "learning_rate": 1.2376807098381578e-06, "loss": 0.4738, "step": 2000 }, { "epoch": 2.0, "eval_loss": 0.526983916759491, "eval_matthews_correlation": 0.41870797137315424, "eval_runtime": 0.7468, "eval_samples_per_second": 1396.551, "eval_steps_per_second": 88.372, "step": 2138 }, { "epoch": 2.34, "grad_norm": 11.473833084106445, "learning_rate": 7.249712194329557e-07, "loss": 0.4364, "step": 2500 }, { "epoch": 2.81, "grad_norm": 12.574313163757324, "learning_rate": 2.1226172902775366e-07, "loss": 0.4349, "step": 3000 }, { "epoch": 3.0, "eval_loss": 0.5435938239097595, "eval_matthews_correlation": 0.4386209168112411, "eval_runtime": 0.7548, "eval_samples_per_second": 1381.78, "eval_steps_per_second": 87.438, "step": 3207 } ], "logging_steps": 500, "max_steps": 3207, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 113055491519748.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "learning_rate": 3.288518671458966e-06, "num_train_epochs": 3, "per_device_train_batch_size": 8, "seed": 24 } }