multilingual_bias_estimation / trainer_state.json
DaniilOr's picture
Upload 12 files
abfa868 verified
{
"best_metric": 1.9327729940414429,
"best_model_checkpoint": "./results/checkpoint-322",
"epoch": 22.0,
"eval_steps": 500,
"global_step": 7084,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.2501650165016502,
"eval_loss": 1.9327729940414429,
"eval_precision": 0.21039270770334143,
"eval_recall": 0.2501650165016502,
"eval_runtime": 105.6438,
"eval_samples_per_second": 86.044,
"eval_steps_per_second": 0.435,
"step": 322
},
{
"epoch": 1.55,
"learning_rate": 9.689440993788821e-05,
"loss": 1.5264,
"step": 500
},
{
"epoch": 2.0,
"eval_accuracy": 0.342024202420242,
"eval_loss": 2.632329225540161,
"eval_precision": 0.35262421551132295,
"eval_recall": 0.342024202420242,
"eval_runtime": 103.0666,
"eval_samples_per_second": 88.195,
"eval_steps_per_second": 0.446,
"step": 644
},
{
"epoch": 3.0,
"eval_accuracy": 0.3278327832783278,
"eval_loss": 2.9007134437561035,
"eval_precision": 0.34602885917564774,
"eval_recall": 0.3278327832783278,
"eval_runtime": 102.8287,
"eval_samples_per_second": 88.399,
"eval_steps_per_second": 0.447,
"step": 966
},
{
"epoch": 3.11,
"learning_rate": 9.37888198757764e-05,
"loss": 0.6172,
"step": 1000
},
{
"epoch": 4.0,
"eval_accuracy": 0.3423542354235424,
"eval_loss": 3.5746607780456543,
"eval_precision": 0.3512951146039761,
"eval_recall": 0.3423542354235424,
"eval_runtime": 101.7857,
"eval_samples_per_second": 89.305,
"eval_steps_per_second": 0.452,
"step": 1288
},
{
"epoch": 4.66,
"learning_rate": 9.068322981366461e-05,
"loss": 0.227,
"step": 1500
},
{
"epoch": 5.0,
"eval_accuracy": 0.3298129812981298,
"eval_loss": 4.564810276031494,
"eval_precision": 0.3549699546948268,
"eval_recall": 0.3298129812981298,
"eval_runtime": 101.7692,
"eval_samples_per_second": 89.32,
"eval_steps_per_second": 0.452,
"step": 1610
},
{
"epoch": 6.0,
"eval_accuracy": 0.3282728272827283,
"eval_loss": 4.280569553375244,
"eval_precision": 0.3536771477074885,
"eval_recall": 0.3282728272827283,
"eval_runtime": 103.0369,
"eval_samples_per_second": 88.221,
"eval_steps_per_second": 0.446,
"step": 1932
},
{
"epoch": 6.21,
"learning_rate": 8.757763975155279e-05,
"loss": 0.118,
"step": 2000
},
{
"epoch": 7.0,
"eval_accuracy": 0.3460946094609461,
"eval_loss": 4.623283386230469,
"eval_precision": 0.344094580634609,
"eval_recall": 0.3460946094609461,
"eval_runtime": 89.9963,
"eval_samples_per_second": 101.004,
"eval_steps_per_second": 0.511,
"step": 2254
},
{
"epoch": 7.76,
"learning_rate": 8.4472049689441e-05,
"loss": 0.0775,
"step": 2500
},
{
"epoch": 8.0,
"eval_accuracy": 0.3316831683168317,
"eval_loss": 4.871355056762695,
"eval_precision": 0.3530027915318833,
"eval_recall": 0.3316831683168317,
"eval_runtime": 90.9139,
"eval_samples_per_second": 99.985,
"eval_steps_per_second": 0.506,
"step": 2576
},
{
"epoch": 9.0,
"eval_accuracy": 0.34026402640264025,
"eval_loss": 4.957677364349365,
"eval_precision": 0.33301623037601086,
"eval_recall": 0.34026402640264025,
"eval_runtime": 100.8674,
"eval_samples_per_second": 90.118,
"eval_steps_per_second": 0.456,
"step": 2898
},
{
"epoch": 9.32,
"learning_rate": 8.136645962732919e-05,
"loss": 0.056,
"step": 3000
},
{
"epoch": 10.0,
"eval_accuracy": 0.33641364136413643,
"eval_loss": 5.228715419769287,
"eval_precision": 0.35135134065339246,
"eval_recall": 0.33641364136413643,
"eval_runtime": 101.1595,
"eval_samples_per_second": 89.858,
"eval_steps_per_second": 0.455,
"step": 3220
},
{
"epoch": 10.87,
"learning_rate": 7.82608695652174e-05,
"loss": 0.0418,
"step": 3500
},
{
"epoch": 11.0,
"eval_accuracy": 0.3397139713971397,
"eval_loss": 5.417286396026611,
"eval_precision": 0.36541577660239133,
"eval_recall": 0.3397139713971397,
"eval_runtime": 100.0336,
"eval_samples_per_second": 90.869,
"eval_steps_per_second": 0.46,
"step": 3542
},
{
"epoch": 12.0,
"eval_accuracy": 0.3288228822882288,
"eval_loss": 4.508035659790039,
"eval_precision": 0.3497733496975655,
"eval_recall": 0.3288228822882288,
"eval_runtime": 98.9066,
"eval_samples_per_second": 91.905,
"eval_steps_per_second": 0.465,
"step": 3864
},
{
"epoch": 12.42,
"learning_rate": 7.515527950310559e-05,
"loss": 0.0329,
"step": 4000
},
{
"epoch": 13.0,
"eval_accuracy": 0.33575357535753575,
"eval_loss": 5.399806022644043,
"eval_precision": 0.343976788298361,
"eval_recall": 0.33575357535753575,
"eval_runtime": 99.618,
"eval_samples_per_second": 91.249,
"eval_steps_per_second": 0.462,
"step": 4186
},
{
"epoch": 13.98,
"learning_rate": 7.20496894409938e-05,
"loss": 0.0255,
"step": 4500
},
{
"epoch": 14.0,
"eval_accuracy": 0.3254125412541254,
"eval_loss": 5.722477436065674,
"eval_precision": 0.37649139562875183,
"eval_recall": 0.3254125412541254,
"eval_runtime": 99.1531,
"eval_samples_per_second": 91.676,
"eval_steps_per_second": 0.464,
"step": 4508
},
{
"epoch": 15.0,
"eval_accuracy": 0.33707370737073705,
"eval_loss": 5.8459153175354,
"eval_precision": 0.3366756679639873,
"eval_recall": 0.33707370737073705,
"eval_runtime": 100.8652,
"eval_samples_per_second": 90.12,
"eval_steps_per_second": 0.456,
"step": 4830
},
{
"epoch": 15.53,
"learning_rate": 6.894409937888199e-05,
"loss": 0.0243,
"step": 5000
},
{
"epoch": 16.0,
"eval_accuracy": 0.35401540154015404,
"eval_loss": 5.645482540130615,
"eval_precision": 0.3631233149571772,
"eval_recall": 0.35401540154015404,
"eval_runtime": 99.6631,
"eval_samples_per_second": 91.207,
"eval_steps_per_second": 0.462,
"step": 5152
},
{
"epoch": 17.0,
"eval_accuracy": 0.3341034103410341,
"eval_loss": 5.483065605163574,
"eval_precision": 0.352613586631227,
"eval_recall": 0.3341034103410341,
"eval_runtime": 98.2468,
"eval_samples_per_second": 92.522,
"eval_steps_per_second": 0.468,
"step": 5474
},
{
"epoch": 17.08,
"learning_rate": 6.58385093167702e-05,
"loss": 0.0173,
"step": 5500
},
{
"epoch": 18.0,
"eval_accuracy": 0.3547854785478548,
"eval_loss": 5.973876476287842,
"eval_precision": 0.37074034210656404,
"eval_recall": 0.3547854785478548,
"eval_runtime": 99.9918,
"eval_samples_per_second": 90.907,
"eval_steps_per_second": 0.46,
"step": 5796
},
{
"epoch": 18.63,
"learning_rate": 6.273291925465838e-05,
"loss": 0.017,
"step": 6000
},
{
"epoch": 19.0,
"eval_accuracy": 0.3471947194719472,
"eval_loss": 5.270116329193115,
"eval_precision": 0.3540165595958133,
"eval_recall": 0.3471947194719472,
"eval_runtime": 98.3519,
"eval_samples_per_second": 92.423,
"eval_steps_per_second": 0.468,
"step": 6118
},
{
"epoch": 20.0,
"eval_accuracy": 0.34994499449944994,
"eval_loss": 6.121872901916504,
"eval_precision": 0.3608309273966228,
"eval_recall": 0.34994499449944994,
"eval_runtime": 98.8185,
"eval_samples_per_second": 91.987,
"eval_steps_per_second": 0.465,
"step": 6440
},
{
"epoch": 20.19,
"learning_rate": 5.962732919254659e-05,
"loss": 0.0152,
"step": 6500
},
{
"epoch": 21.0,
"eval_accuracy": 0.3448844884488449,
"eval_loss": 6.195789337158203,
"eval_precision": 0.3597641027560754,
"eval_recall": 0.3448844884488449,
"eval_runtime": 98.2972,
"eval_samples_per_second": 92.475,
"eval_steps_per_second": 0.468,
"step": 6762
},
{
"epoch": 21.74,
"learning_rate": 5.652173913043478e-05,
"loss": 0.011,
"step": 7000
},
{
"epoch": 22.0,
"eval_accuracy": 0.35687568756875687,
"eval_loss": 5.533013820648193,
"eval_precision": 0.3680252789908563,
"eval_recall": 0.35687568756875687,
"eval_runtime": 97.9019,
"eval_samples_per_second": 92.848,
"eval_steps_per_second": 0.47,
"step": 7084
}
],
"logging_steps": 500,
"max_steps": 16100,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 3.727239090605691e+17,
"train_batch_size": 200,
"trial_name": null,
"trial_params": null
}