|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.659090909090908, |
|
"global_step": 8500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.8863636363636366e-05, |
|
"loss": 0.9603, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.917317101584343, |
|
"eval_f1": 0.780907047467936, |
|
"eval_loss": 0.3360126316547394, |
|
"eval_precision": 0.7684100962789487, |
|
"eval_recall": 0.7938172043010753, |
|
"eval_runtime": 4.4585, |
|
"eval_samples_per_second": 211.507, |
|
"eval_steps_per_second": 42.391, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.772727272727273e-05, |
|
"loss": 0.4145, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.6590909090909094e-05, |
|
"loss": 0.2846, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.940004659832246, |
|
"eval_f1": 0.8470136913817051, |
|
"eval_loss": 0.23408983647823334, |
|
"eval_precision": 0.8338325302773799, |
|
"eval_recall": 0.8606182795698925, |
|
"eval_runtime": 5.0803, |
|
"eval_samples_per_second": 185.62, |
|
"eval_steps_per_second": 37.203, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.5454545454545454e-05, |
|
"loss": 0.2114, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.431818181818182e-05, |
|
"loss": 0.1634, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9476351351351351, |
|
"eval_f1": 0.8742690058479533, |
|
"eval_loss": 0.20282697677612305, |
|
"eval_precision": 0.8646161934805467, |
|
"eval_recall": 0.8841397849462366, |
|
"eval_runtime": 3.3011, |
|
"eval_samples_per_second": 285.658, |
|
"eval_steps_per_second": 57.253, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.3181818181818183e-05, |
|
"loss": 0.1243, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.2045454545454547e-05, |
|
"loss": 0.1158, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9500232991612302, |
|
"eval_f1": 0.8811803278688525, |
|
"eval_loss": 0.1980859786272049, |
|
"eval_precision": 0.8603072983354674, |
|
"eval_recall": 0.9030913978494624, |
|
"eval_runtime": 4.9698, |
|
"eval_samples_per_second": 189.745, |
|
"eval_steps_per_second": 38.03, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 0.0808, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9508970177073626, |
|
"eval_f1": 0.8861842105263158, |
|
"eval_loss": 0.19827169179916382, |
|
"eval_precision": 0.8679123711340206, |
|
"eval_recall": 0.905241935483871, |
|
"eval_runtime": 4.2259, |
|
"eval_samples_per_second": 223.15, |
|
"eval_steps_per_second": 44.725, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 9.772727272727273e-06, |
|
"loss": 0.0809, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 8.636363636363637e-06, |
|
"loss": 0.0679, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.952673578751165, |
|
"eval_f1": 0.89337822671156, |
|
"eval_loss": 0.1982535719871521, |
|
"eval_precision": 0.8779032048786817, |
|
"eval_recall": 0.9094086021505376, |
|
"eval_runtime": 4.536, |
|
"eval_samples_per_second": 207.892, |
|
"eval_steps_per_second": 41.667, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0582, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 6.363636363636364e-06, |
|
"loss": 0.0468, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9559354613233924, |
|
"eval_f1": 0.9001193792280144, |
|
"eval_loss": 0.20358432829380035, |
|
"eval_precision": 0.8884524744697565, |
|
"eval_recall": 0.9120967741935484, |
|
"eval_runtime": 4.9544, |
|
"eval_samples_per_second": 190.337, |
|
"eval_steps_per_second": 38.148, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 5.2272727272727274e-06, |
|
"loss": 0.0436, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 4.0909090909090915e-06, |
|
"loss": 0.0403, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.955527726001864, |
|
"eval_f1": 0.8995240613432047, |
|
"eval_loss": 0.20606616139411926, |
|
"eval_precision": 0.8850156087408949, |
|
"eval_recall": 0.9145161290322581, |
|
"eval_runtime": 3.9742, |
|
"eval_samples_per_second": 237.282, |
|
"eval_steps_per_second": 47.557, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 2.954545454545455e-06, |
|
"loss": 0.0336, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9554694780987885, |
|
"eval_f1": 0.9019062748212867, |
|
"eval_loss": 0.21168170869350433, |
|
"eval_precision": 0.8884976525821596, |
|
"eval_recall": 0.915725806451613, |
|
"eval_runtime": 2.986, |
|
"eval_samples_per_second": 315.802, |
|
"eval_steps_per_second": 63.294, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 1.8181818181818183e-06, |
|
"loss": 0.0328, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 6.818181818181818e-07, |
|
"loss": 0.0312, |
|
"step": 8500 |
|
} |
|
], |
|
"max_steps": 8800, |
|
"num_train_epochs": 10, |
|
"total_flos": 408432261397380.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|