{ "best_metric": 0.6909549832344055, "best_model_checkpoint": "hBERTv1_new_pretrain_w_init_48_rte/checkpoint-180", "epoch": 14.0, "global_step": 280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 3.9200000000000004e-05, "loss": 0.712, "step": 20 }, { "epoch": 1.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.7158167958259583, "eval_runtime": 0.5268, "eval_samples_per_second": 525.771, "eval_steps_per_second": 5.694, "step": 20 }, { "epoch": 2.0, "learning_rate": 3.8400000000000005e-05, "loss": 0.7094, "step": 40 }, { "epoch": 2.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.6957778930664062, "eval_runtime": 0.5296, "eval_samples_per_second": 523.049, "eval_steps_per_second": 5.665, "step": 40 }, { "epoch": 3.0, "learning_rate": 3.76e-05, "loss": 0.7025, "step": 60 }, { "epoch": 3.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.7007981538772583, "eval_runtime": 0.5287, "eval_samples_per_second": 523.935, "eval_steps_per_second": 5.674, "step": 60 }, { "epoch": 4.0, "learning_rate": 3.680000000000001e-05, "loss": 0.705, "step": 80 }, { "epoch": 4.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.691913902759552, "eval_runtime": 0.5289, "eval_samples_per_second": 523.717, "eval_steps_per_second": 5.672, "step": 80 }, { "epoch": 5.0, "learning_rate": 3.6e-05, "loss": 0.7023, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.6959893703460693, "eval_runtime": 0.53, "eval_samples_per_second": 522.685, "eval_steps_per_second": 5.661, "step": 100 }, { "epoch": 6.0, "learning_rate": 3.52e-05, "loss": 0.7002, "step": 120 }, { "epoch": 6.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.7094991207122803, "eval_runtime": 0.5279, "eval_samples_per_second": 524.689, "eval_steps_per_second": 5.683, "step": 120 }, { "epoch": 7.0, "learning_rate": 3.44e-05, "loss": 0.7071, "step": 140 }, { "epoch": 7.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.704041600227356, "eval_runtime": 0.5311, "eval_samples_per_second": 521.544, "eval_steps_per_second": 5.648, "step": 140 }, { "epoch": 8.0, "learning_rate": 3.3600000000000004e-05, "loss": 0.6982, "step": 160 }, { "epoch": 8.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.6917728781700134, "eval_runtime": 0.5338, "eval_samples_per_second": 518.933, "eval_steps_per_second": 5.62, "step": 160 }, { "epoch": 9.0, "learning_rate": 3.28e-05, "loss": 0.7025, "step": 180 }, { "epoch": 9.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.6909549832344055, "eval_runtime": 0.5302, "eval_samples_per_second": 522.469, "eval_steps_per_second": 5.659, "step": 180 }, { "epoch": 10.0, "learning_rate": 3.2000000000000005e-05, "loss": 0.6965, "step": 200 }, { "epoch": 10.0, "eval_accuracy": 0.4620938628158845, "eval_loss": 0.6983726024627686, "eval_runtime": 0.5283, "eval_samples_per_second": 524.331, "eval_steps_per_second": 5.679, "step": 200 }, { "epoch": 11.0, "learning_rate": 3.1200000000000006e-05, "loss": 0.6814, "step": 220 }, { "epoch": 11.0, "eval_accuracy": 0.49458483754512633, "eval_loss": 0.7635096907615662, "eval_runtime": 0.5287, "eval_samples_per_second": 523.93, "eval_steps_per_second": 5.674, "step": 220 }, { "epoch": 12.0, "learning_rate": 3.0400000000000004e-05, "loss": 0.6616, "step": 240 }, { "epoch": 12.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.6918363571166992, "eval_runtime": 0.5312, "eval_samples_per_second": 521.504, "eval_steps_per_second": 5.648, "step": 240 }, { "epoch": 13.0, "learning_rate": 2.96e-05, "loss": 0.6658, "step": 260 }, { "epoch": 13.0, "eval_accuracy": 0.5306859205776173, "eval_loss": 0.7622052431106567, "eval_runtime": 0.5281, "eval_samples_per_second": 524.555, "eval_steps_per_second": 5.681, "step": 260 }, { "epoch": 14.0, "learning_rate": 2.8800000000000002e-05, "loss": 0.6316, "step": 280 }, { "epoch": 14.0, "eval_accuracy": 0.5090252707581228, "eval_loss": 0.8002101182937622, "eval_runtime": 0.5288, "eval_samples_per_second": 523.798, "eval_steps_per_second": 5.673, "step": 280 }, { "epoch": 14.0, "step": 280, "total_flos": 5162653433462784.0, "train_loss": 0.6911461523600987, "train_runtime": 271.0754, "train_samples_per_second": 459.282, "train_steps_per_second": 3.689 } ], "max_steps": 1000, "num_train_epochs": 50, "total_flos": 5162653433462784.0, "trial_name": null, "trial_params": null }