{ "best_metric": 0.9126, "best_model_checkpoint": "../../checkpoint/imdb/roberta-large/checkpoint-14858", "epoch": 20.0, "eval_steps": 500, "global_step": 15640, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.89904, "eval_loss": 0.2406211644411087, "eval_runtime": 103.5694, "eval_samples_per_second": 241.384, "eval_steps_per_second": 0.946, "step": 782 }, { "epoch": 1.28, "learning_rate": 4.680306905370844e-05, "loss": 0.3075, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.8962, "eval_loss": 0.2538779377937317, "eval_runtime": 102.8426, "eval_samples_per_second": 243.09, "eval_steps_per_second": 0.953, "step": 1564 }, { "epoch": 2.56, "learning_rate": 4.360613810741688e-05, "loss": 0.2171, "step": 2000 }, { "epoch": 3.0, "eval_accuracy": 0.90308, "eval_loss": 0.2649693489074707, "eval_runtime": 102.9465, "eval_samples_per_second": 242.844, "eval_steps_per_second": 0.952, "step": 2346 }, { "epoch": 3.84, "learning_rate": 4.040920716112532e-05, "loss": 0.1697, "step": 3000 }, { "epoch": 4.0, "eval_accuracy": 0.89728, "eval_loss": 0.3427022695541382, "eval_runtime": 103.0024, "eval_samples_per_second": 242.713, "eval_steps_per_second": 0.951, "step": 3128 }, { "epoch": 5.0, "eval_accuracy": 0.90308, "eval_loss": 0.3241328001022339, "eval_runtime": 102.9184, "eval_samples_per_second": 242.911, "eval_steps_per_second": 0.952, "step": 3910 }, { "epoch": 5.12, "learning_rate": 3.721227621483376e-05, "loss": 0.1339, "step": 4000 }, { "epoch": 6.0, "eval_accuracy": 0.90492, "eval_loss": 0.41408097743988037, "eval_runtime": 102.8464, "eval_samples_per_second": 243.081, "eval_steps_per_second": 0.953, "step": 4692 }, { "epoch": 6.39, "learning_rate": 3.40153452685422e-05, "loss": 0.1038, "step": 5000 }, { "epoch": 7.0, "eval_accuracy": 0.8946, "eval_loss": 0.45717746019363403, "eval_runtime": 102.3334, "eval_samples_per_second": 244.299, "eval_steps_per_second": 0.958, "step": 5474 }, { "epoch": 7.67, "learning_rate": 3.081841432225064e-05, "loss": 0.0922, "step": 6000 }, { "epoch": 8.0, "eval_accuracy": 0.9054, "eval_loss": 0.4153657555580139, "eval_runtime": 102.2979, "eval_samples_per_second": 244.384, "eval_steps_per_second": 0.958, "step": 6256 }, { "epoch": 8.95, "learning_rate": 2.7621483375959077e-05, "loss": 0.0676, "step": 7000 }, { "epoch": 9.0, "eval_accuracy": 0.89824, "eval_loss": 0.5019603371620178, "eval_runtime": 102.1481, "eval_samples_per_second": 244.743, "eval_steps_per_second": 0.959, "step": 7038 }, { "epoch": 10.0, "eval_accuracy": 0.90708, "eval_loss": 0.507008969783783, "eval_runtime": 102.4086, "eval_samples_per_second": 244.12, "eval_steps_per_second": 0.957, "step": 7820 }, { "epoch": 10.23, "learning_rate": 2.442455242966752e-05, "loss": 0.0568, "step": 8000 }, { "epoch": 11.0, "eval_accuracy": 0.90668, "eval_loss": 0.48255667090415955, "eval_runtime": 102.2755, "eval_samples_per_second": 244.438, "eval_steps_per_second": 0.958, "step": 8602 }, { "epoch": 11.51, "learning_rate": 2.122762148337596e-05, "loss": 0.0443, "step": 9000 }, { "epoch": 12.0, "eval_accuracy": 0.90856, "eval_loss": 0.5103762745857239, "eval_runtime": 102.4293, "eval_samples_per_second": 244.071, "eval_steps_per_second": 0.957, "step": 9384 }, { "epoch": 12.79, "learning_rate": 1.80306905370844e-05, "loss": 0.0313, "step": 10000 }, { "epoch": 13.0, "eval_accuracy": 0.90884, "eval_loss": 0.545563817024231, "eval_runtime": 101.9339, "eval_samples_per_second": 245.257, "eval_steps_per_second": 0.961, "step": 10166 }, { "epoch": 14.0, "eval_accuracy": 0.90776, "eval_loss": 0.47395065426826477, "eval_runtime": 102.2186, "eval_samples_per_second": 244.574, "eval_steps_per_second": 0.959, "step": 10948 }, { "epoch": 14.07, "learning_rate": 1.483375959079284e-05, "loss": 0.0245, "step": 11000 }, { "epoch": 15.0, "eval_accuracy": 0.90712, "eval_loss": 0.4977429211139679, "eval_runtime": 102.2005, "eval_samples_per_second": 244.617, "eval_steps_per_second": 0.959, "step": 11730 }, { "epoch": 15.35, "learning_rate": 1.163682864450128e-05, "loss": 0.0227, "step": 12000 }, { "epoch": 16.0, "eval_accuracy": 0.90984, "eval_loss": 0.5135776400566101, "eval_runtime": 102.5324, "eval_samples_per_second": 243.825, "eval_steps_per_second": 0.956, "step": 12512 }, { "epoch": 16.62, "learning_rate": 8.439897698209718e-06, "loss": 0.0175, "step": 13000 }, { "epoch": 17.0, "eval_accuracy": 0.91076, "eval_loss": 0.5130705833435059, "eval_runtime": 102.0766, "eval_samples_per_second": 244.914, "eval_steps_per_second": 0.96, "step": 13294 }, { "epoch": 17.9, "learning_rate": 5.242966751918159e-06, "loss": 0.0173, "step": 14000 }, { "epoch": 18.0, "eval_accuracy": 0.91092, "eval_loss": 0.5370119214057922, "eval_runtime": 102.316, "eval_samples_per_second": 244.341, "eval_steps_per_second": 0.958, "step": 14076 }, { "epoch": 19.0, "eval_accuracy": 0.9126, "eval_loss": 0.5343945026397705, "eval_runtime": 102.4054, "eval_samples_per_second": 244.128, "eval_steps_per_second": 0.957, "step": 14858 }, { "epoch": 19.18, "learning_rate": 2.0460358056265987e-06, "loss": 0.0152, "step": 15000 }, { "epoch": 20.0, "eval_accuracy": 0.91212, "eval_loss": 0.5329343676567078, "eval_runtime": 102.1338, "eval_samples_per_second": 244.777, "eval_steps_per_second": 0.96, "step": 15640 }, { "epoch": 20.0, "step": 15640, "total_flos": 1.16491420416e+17, "train_loss": 0.08507246678442601, "train_runtime": 7726.1299, "train_samples_per_second": 64.715, "train_steps_per_second": 2.024 } ], "logging_steps": 1000, "max_steps": 15640, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 1.16491420416e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }