{ "best_metric": 0.6895588636398315, "best_model_checkpoint": "hBERTv1_new_pretrain_rte/checkpoint-140", "epoch": 12.0, "global_step": 240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 3.9200000000000004e-05, "loss": 0.7407, "step": 20 }, { "epoch": 1.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.7002341151237488, "eval_runtime": 0.5049, "eval_samples_per_second": 548.575, "eval_steps_per_second": 5.941, "step": 20 }, { "epoch": 2.0, "learning_rate": 3.8400000000000005e-05, "loss": 0.7061, "step": 40 }, { "epoch": 2.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.7245458960533142, "eval_runtime": 0.508, "eval_samples_per_second": 545.223, "eval_steps_per_second": 5.905, "step": 40 }, { "epoch": 3.0, "learning_rate": 3.76e-05, "loss": 0.7102, "step": 60 }, { "epoch": 3.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.6948612332344055, "eval_runtime": 0.504, "eval_samples_per_second": 549.561, "eval_steps_per_second": 5.952, "step": 60 }, { "epoch": 4.0, "learning_rate": 3.680000000000001e-05, "loss": 0.703, "step": 80 }, { "epoch": 4.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.6950586438179016, "eval_runtime": 0.5087, "eval_samples_per_second": 544.51, "eval_steps_per_second": 5.897, "step": 80 }, { "epoch": 5.0, "learning_rate": 3.6e-05, "loss": 0.7097, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.6973713636398315, "eval_runtime": 0.5068, "eval_samples_per_second": 546.522, "eval_steps_per_second": 5.919, "step": 100 }, { "epoch": 6.0, "learning_rate": 3.52e-05, "loss": 0.7006, "step": 120 }, { "epoch": 6.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.7052543759346008, "eval_runtime": 0.5068, "eval_samples_per_second": 546.614, "eval_steps_per_second": 5.92, "step": 120 }, { "epoch": 7.0, "learning_rate": 3.44e-05, "loss": 0.6986, "step": 140 }, { "epoch": 7.0, "eval_accuracy": 0.5306859205776173, "eval_loss": 0.6895588636398315, "eval_runtime": 0.5057, "eval_samples_per_second": 547.783, "eval_steps_per_second": 5.933, "step": 140 }, { "epoch": 8.0, "learning_rate": 3.3600000000000004e-05, "loss": 0.6935, "step": 160 }, { "epoch": 8.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.7711036205291748, "eval_runtime": 0.5097, "eval_samples_per_second": 543.49, "eval_steps_per_second": 5.886, "step": 160 }, { "epoch": 9.0, "learning_rate": 3.28e-05, "loss": 0.6109, "step": 180 }, { "epoch": 9.0, "eval_accuracy": 0.4981949458483754, "eval_loss": 0.8443405032157898, "eval_runtime": 0.5053, "eval_samples_per_second": 548.178, "eval_steps_per_second": 5.937, "step": 180 }, { "epoch": 10.0, "learning_rate": 3.2000000000000005e-05, "loss": 0.469, "step": 200 }, { "epoch": 10.0, "eval_accuracy": 0.5126353790613718, "eval_loss": 1.0369467735290527, "eval_runtime": 0.5078, "eval_samples_per_second": 545.529, "eval_steps_per_second": 5.908, "step": 200 }, { "epoch": 11.0, "learning_rate": 3.1200000000000006e-05, "loss": 0.3028, "step": 220 }, { "epoch": 11.0, "eval_accuracy": 0.5234657039711191, "eval_loss": 1.1621102094650269, "eval_runtime": 0.512, "eval_samples_per_second": 541.059, "eval_steps_per_second": 5.86, "step": 220 }, { "epoch": 12.0, "learning_rate": 3.0400000000000004e-05, "loss": 0.2155, "step": 240 }, { "epoch": 12.0, "eval_accuracy": 0.5379061371841155, "eval_loss": 1.2095954418182373, "eval_runtime": 0.5057, "eval_samples_per_second": 547.72, "eval_steps_per_second": 5.932, "step": 240 }, { "epoch": 12.0, "step": 240, "total_flos": 4425131514396672.0, "train_loss": 0.6050535062948863, "train_runtime": 211.7637, "train_samples_per_second": 587.919, "train_steps_per_second": 4.722 } ], "max_steps": 1000, "num_train_epochs": 50, "total_flos": 4425131514396672.0, "trial_name": null, "trial_params": null }