|
{ |
|
"best_metric": 1.9327729940414429, |
|
"best_model_checkpoint": "./results/checkpoint-322", |
|
"epoch": 22.0, |
|
"eval_steps": 500, |
|
"global_step": 7084, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.2501650165016502, |
|
"eval_loss": 1.9327729940414429, |
|
"eval_precision": 0.21039270770334143, |
|
"eval_recall": 0.2501650165016502, |
|
"eval_runtime": 105.6438, |
|
"eval_samples_per_second": 86.044, |
|
"eval_steps_per_second": 0.435, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.689440993788821e-05, |
|
"loss": 1.5264, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.342024202420242, |
|
"eval_loss": 2.632329225540161, |
|
"eval_precision": 0.35262421551132295, |
|
"eval_recall": 0.342024202420242, |
|
"eval_runtime": 103.0666, |
|
"eval_samples_per_second": 88.195, |
|
"eval_steps_per_second": 0.446, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.3278327832783278, |
|
"eval_loss": 2.9007134437561035, |
|
"eval_precision": 0.34602885917564774, |
|
"eval_recall": 0.3278327832783278, |
|
"eval_runtime": 102.8287, |
|
"eval_samples_per_second": 88.399, |
|
"eval_steps_per_second": 0.447, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.37888198757764e-05, |
|
"loss": 0.6172, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.3423542354235424, |
|
"eval_loss": 3.5746607780456543, |
|
"eval_precision": 0.3512951146039761, |
|
"eval_recall": 0.3423542354235424, |
|
"eval_runtime": 101.7857, |
|
"eval_samples_per_second": 89.305, |
|
"eval_steps_per_second": 0.452, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 9.068322981366461e-05, |
|
"loss": 0.227, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.3298129812981298, |
|
"eval_loss": 4.564810276031494, |
|
"eval_precision": 0.3549699546948268, |
|
"eval_recall": 0.3298129812981298, |
|
"eval_runtime": 101.7692, |
|
"eval_samples_per_second": 89.32, |
|
"eval_steps_per_second": 0.452, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.3282728272827283, |
|
"eval_loss": 4.280569553375244, |
|
"eval_precision": 0.3536771477074885, |
|
"eval_recall": 0.3282728272827283, |
|
"eval_runtime": 103.0369, |
|
"eval_samples_per_second": 88.221, |
|
"eval_steps_per_second": 0.446, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 8.757763975155279e-05, |
|
"loss": 0.118, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.3460946094609461, |
|
"eval_loss": 4.623283386230469, |
|
"eval_precision": 0.344094580634609, |
|
"eval_recall": 0.3460946094609461, |
|
"eval_runtime": 89.9963, |
|
"eval_samples_per_second": 101.004, |
|
"eval_steps_per_second": 0.511, |
|
"step": 2254 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 8.4472049689441e-05, |
|
"loss": 0.0775, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.3316831683168317, |
|
"eval_loss": 4.871355056762695, |
|
"eval_precision": 0.3530027915318833, |
|
"eval_recall": 0.3316831683168317, |
|
"eval_runtime": 90.9139, |
|
"eval_samples_per_second": 99.985, |
|
"eval_steps_per_second": 0.506, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.34026402640264025, |
|
"eval_loss": 4.957677364349365, |
|
"eval_precision": 0.33301623037601086, |
|
"eval_recall": 0.34026402640264025, |
|
"eval_runtime": 100.8674, |
|
"eval_samples_per_second": 90.118, |
|
"eval_steps_per_second": 0.456, |
|
"step": 2898 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 8.136645962732919e-05, |
|
"loss": 0.056, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.33641364136413643, |
|
"eval_loss": 5.228715419769287, |
|
"eval_precision": 0.35135134065339246, |
|
"eval_recall": 0.33641364136413643, |
|
"eval_runtime": 101.1595, |
|
"eval_samples_per_second": 89.858, |
|
"eval_steps_per_second": 0.455, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 7.82608695652174e-05, |
|
"loss": 0.0418, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.3397139713971397, |
|
"eval_loss": 5.417286396026611, |
|
"eval_precision": 0.36541577660239133, |
|
"eval_recall": 0.3397139713971397, |
|
"eval_runtime": 100.0336, |
|
"eval_samples_per_second": 90.869, |
|
"eval_steps_per_second": 0.46, |
|
"step": 3542 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.3288228822882288, |
|
"eval_loss": 4.508035659790039, |
|
"eval_precision": 0.3497733496975655, |
|
"eval_recall": 0.3288228822882288, |
|
"eval_runtime": 98.9066, |
|
"eval_samples_per_second": 91.905, |
|
"eval_steps_per_second": 0.465, |
|
"step": 3864 |
|
}, |
|
{ |
|
"epoch": 12.42, |
|
"learning_rate": 7.515527950310559e-05, |
|
"loss": 0.0329, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.33575357535753575, |
|
"eval_loss": 5.399806022644043, |
|
"eval_precision": 0.343976788298361, |
|
"eval_recall": 0.33575357535753575, |
|
"eval_runtime": 99.618, |
|
"eval_samples_per_second": 91.249, |
|
"eval_steps_per_second": 0.462, |
|
"step": 4186 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"learning_rate": 7.20496894409938e-05, |
|
"loss": 0.0255, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.3254125412541254, |
|
"eval_loss": 5.722477436065674, |
|
"eval_precision": 0.37649139562875183, |
|
"eval_recall": 0.3254125412541254, |
|
"eval_runtime": 99.1531, |
|
"eval_samples_per_second": 91.676, |
|
"eval_steps_per_second": 0.464, |
|
"step": 4508 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.33707370737073705, |
|
"eval_loss": 5.8459153175354, |
|
"eval_precision": 0.3366756679639873, |
|
"eval_recall": 0.33707370737073705, |
|
"eval_runtime": 100.8652, |
|
"eval_samples_per_second": 90.12, |
|
"eval_steps_per_second": 0.456, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 6.894409937888199e-05, |
|
"loss": 0.0243, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.35401540154015404, |
|
"eval_loss": 5.645482540130615, |
|
"eval_precision": 0.3631233149571772, |
|
"eval_recall": 0.35401540154015404, |
|
"eval_runtime": 99.6631, |
|
"eval_samples_per_second": 91.207, |
|
"eval_steps_per_second": 0.462, |
|
"step": 5152 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.3341034103410341, |
|
"eval_loss": 5.483065605163574, |
|
"eval_precision": 0.352613586631227, |
|
"eval_recall": 0.3341034103410341, |
|
"eval_runtime": 98.2468, |
|
"eval_samples_per_second": 92.522, |
|
"eval_steps_per_second": 0.468, |
|
"step": 5474 |
|
}, |
|
{ |
|
"epoch": 17.08, |
|
"learning_rate": 6.58385093167702e-05, |
|
"loss": 0.0173, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.3547854785478548, |
|
"eval_loss": 5.973876476287842, |
|
"eval_precision": 0.37074034210656404, |
|
"eval_recall": 0.3547854785478548, |
|
"eval_runtime": 99.9918, |
|
"eval_samples_per_second": 90.907, |
|
"eval_steps_per_second": 0.46, |
|
"step": 5796 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"learning_rate": 6.273291925465838e-05, |
|
"loss": 0.017, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.3471947194719472, |
|
"eval_loss": 5.270116329193115, |
|
"eval_precision": 0.3540165595958133, |
|
"eval_recall": 0.3471947194719472, |
|
"eval_runtime": 98.3519, |
|
"eval_samples_per_second": 92.423, |
|
"eval_steps_per_second": 0.468, |
|
"step": 6118 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.34994499449944994, |
|
"eval_loss": 6.121872901916504, |
|
"eval_precision": 0.3608309273966228, |
|
"eval_recall": 0.34994499449944994, |
|
"eval_runtime": 98.8185, |
|
"eval_samples_per_second": 91.987, |
|
"eval_steps_per_second": 0.465, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 20.19, |
|
"learning_rate": 5.962732919254659e-05, |
|
"loss": 0.0152, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.3448844884488449, |
|
"eval_loss": 6.195789337158203, |
|
"eval_precision": 0.3597641027560754, |
|
"eval_recall": 0.3448844884488449, |
|
"eval_runtime": 98.2972, |
|
"eval_samples_per_second": 92.475, |
|
"eval_steps_per_second": 0.468, |
|
"step": 6762 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 5.652173913043478e-05, |
|
"loss": 0.011, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.35687568756875687, |
|
"eval_loss": 5.533013820648193, |
|
"eval_precision": 0.3680252789908563, |
|
"eval_recall": 0.35687568756875687, |
|
"eval_runtime": 97.9019, |
|
"eval_samples_per_second": 92.848, |
|
"eval_steps_per_second": 0.47, |
|
"step": 7084 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 16100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 3.727239090605691e+17, |
|
"train_batch_size": 200, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|