text-translit-detector-ru / trainer_state.json
alexue4's picture
End of training
750214d
raw
history blame
35.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 40.0,
"eval_steps": 500,
"global_step": 106560,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 9.384384384384385e-09,
"loss": 30.0191,
"step": 1
},
{
"epoch": 0.2,
"learning_rate": 5.001876876876877e-06,
"loss": 28.4983,
"step": 533
},
{
"epoch": 0.4,
"learning_rate": 1.0003753753753754e-05,
"loss": 21.5164,
"step": 1066
},
{
"epoch": 0.6,
"learning_rate": 1.5005630630630632e-05,
"loss": 7.132,
"step": 1599
},
{
"epoch": 0.8,
"learning_rate": 2.000750750750751e-05,
"loss": 0.8541,
"step": 2132
},
{
"epoch": 1.0,
"eval_loss": 0.3403850197792053,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 1.1154,
"eval_samples_per_second": 320.968,
"eval_steps_per_second": 21.517,
"step": 2664
},
{
"epoch": 1.0,
"learning_rate": 2.500938438438439e-05,
"loss": 0.3348,
"step": 2665
},
{
"epoch": 1.2,
"learning_rate": 3.0011261261261263e-05,
"loss": 0.5279,
"step": 3198
},
{
"epoch": 1.4,
"learning_rate": 3.501313813813814e-05,
"loss": 0.2294,
"step": 3731
},
{
"epoch": 1.6,
"learning_rate": 4.001501501501502e-05,
"loss": 0.0605,
"step": 4264
},
{
"epoch": 1.8,
"learning_rate": 4.5016891891891895e-05,
"loss": 0.0451,
"step": 4797
},
{
"epoch": 2.0,
"eval_loss": 0.060470160096883774,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9182,
"eval_samples_per_second": 389.888,
"eval_steps_per_second": 26.138,
"step": 5328
},
{
"epoch": 2.0,
"learning_rate": 5.001876876876878e-05,
"loss": 0.0605,
"step": 5330
},
{
"epoch": 2.2,
"learning_rate": 5.502064564564565e-05,
"loss": 0.025,
"step": 5863
},
{
"epoch": 2.4,
"learning_rate": 6.0022522522522526e-05,
"loss": 0.0193,
"step": 6396
},
{
"epoch": 2.6,
"learning_rate": 6.502439939939941e-05,
"loss": 0.0153,
"step": 6929
},
{
"epoch": 2.8,
"learning_rate": 7.002627627627628e-05,
"loss": 0.0112,
"step": 7462
},
{
"epoch": 3.0,
"eval_loss": 0.04112406447529793,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9146,
"eval_samples_per_second": 391.418,
"eval_steps_per_second": 26.24,
"step": 7992
},
{
"epoch": 3.0,
"learning_rate": 7.502815315315315e-05,
"loss": 0.0109,
"step": 7995
},
{
"epoch": 3.2,
"learning_rate": 8.003003003003004e-05,
"loss": 0.0095,
"step": 8528
},
{
"epoch": 3.4,
"learning_rate": 8.50319069069069e-05,
"loss": 0.012,
"step": 9061
},
{
"epoch": 3.6,
"learning_rate": 9.003378378378379e-05,
"loss": 0.0223,
"step": 9594
},
{
"epoch": 3.8,
"learning_rate": 9.503566066066066e-05,
"loss": 0.0068,
"step": 10127
},
{
"epoch": 4.0,
"eval_loss": 0.020507752895355225,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9253,
"eval_samples_per_second": 386.912,
"eval_steps_per_second": 25.938,
"step": 10656
},
{
"epoch": 4.0,
"learning_rate": 9.999582916249583e-05,
"loss": 0.0077,
"step": 10660
},
{
"epoch": 4.2,
"learning_rate": 9.944006506506507e-05,
"loss": 0.0058,
"step": 11193
},
{
"epoch": 4.4,
"learning_rate": 9.88843009676343e-05,
"loss": 0.0072,
"step": 11726
},
{
"epoch": 4.6,
"learning_rate": 9.832853687020355e-05,
"loss": 0.0097,
"step": 12259
},
{
"epoch": 4.8,
"learning_rate": 9.777277277277279e-05,
"loss": 0.007,
"step": 12792
},
{
"epoch": 5.0,
"eval_loss": 0.02420434169471264,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9141,
"eval_samples_per_second": 391.658,
"eval_steps_per_second": 26.256,
"step": 13320
},
{
"epoch": 5.0,
"learning_rate": 9.721700867534201e-05,
"loss": 0.0049,
"step": 13325
},
{
"epoch": 5.2,
"learning_rate": 9.666124457791124e-05,
"loss": 0.005,
"step": 13858
},
{
"epoch": 5.4,
"learning_rate": 9.610548048048048e-05,
"loss": 0.0035,
"step": 14391
},
{
"epoch": 5.6,
"learning_rate": 9.554971638304973e-05,
"loss": 0.0061,
"step": 14924
},
{
"epoch": 5.8,
"learning_rate": 9.499395228561896e-05,
"loss": 0.0022,
"step": 15457
},
{
"epoch": 6.0,
"eval_loss": 0.027173461392521858,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9101,
"eval_samples_per_second": 393.378,
"eval_steps_per_second": 26.372,
"step": 15984
},
{
"epoch": 6.0,
"learning_rate": 9.44381881881882e-05,
"loss": 0.0048,
"step": 15990
},
{
"epoch": 6.2,
"learning_rate": 9.388242409075743e-05,
"loss": 0.0053,
"step": 16523
},
{
"epoch": 6.4,
"learning_rate": 9.332665999332665e-05,
"loss": 0.005,
"step": 17056
},
{
"epoch": 6.6,
"learning_rate": 9.27708958958959e-05,
"loss": 0.0034,
"step": 17589
},
{
"epoch": 6.8,
"learning_rate": 9.221513179846514e-05,
"loss": 0.0054,
"step": 18122
},
{
"epoch": 7.0,
"eval_loss": 0.008011276833713055,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9187,
"eval_samples_per_second": 389.697,
"eval_steps_per_second": 26.125,
"step": 18648
},
{
"epoch": 7.0,
"learning_rate": 9.165936770103437e-05,
"loss": 0.0036,
"step": 18655
},
{
"epoch": 7.2,
"learning_rate": 9.110360360360361e-05,
"loss": 0.002,
"step": 19188
},
{
"epoch": 7.4,
"learning_rate": 9.054783950617284e-05,
"loss": 0.0029,
"step": 19721
},
{
"epoch": 7.6,
"learning_rate": 8.999207540874208e-05,
"loss": 0.0031,
"step": 20254
},
{
"epoch": 7.8,
"learning_rate": 8.943631131131131e-05,
"loss": 0.0036,
"step": 20787
},
{
"epoch": 8.0,
"eval_loss": 0.025212394073605537,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9136,
"eval_samples_per_second": 391.872,
"eval_steps_per_second": 26.271,
"step": 21312
},
{
"epoch": 8.0,
"learning_rate": 8.888054721388055e-05,
"loss": 0.0022,
"step": 21320
},
{
"epoch": 8.2,
"learning_rate": 8.832478311644978e-05,
"loss": 0.0021,
"step": 21853
},
{
"epoch": 8.4,
"learning_rate": 8.776901901901903e-05,
"loss": 0.0016,
"step": 22386
},
{
"epoch": 8.6,
"learning_rate": 8.721325492158827e-05,
"loss": 0.0021,
"step": 22919
},
{
"epoch": 8.8,
"learning_rate": 8.665749082415749e-05,
"loss": 0.0039,
"step": 23452
},
{
"epoch": 9.0,
"eval_loss": 0.020978303626179695,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9126,
"eval_samples_per_second": 392.285,
"eval_steps_per_second": 26.298,
"step": 23976
},
{
"epoch": 9.0,
"learning_rate": 8.610172672672672e-05,
"loss": 0.0032,
"step": 23985
},
{
"epoch": 9.2,
"learning_rate": 8.554596262929596e-05,
"loss": 0.0027,
"step": 24518
},
{
"epoch": 9.4,
"learning_rate": 8.499019853186521e-05,
"loss": 0.0022,
"step": 25051
},
{
"epoch": 9.6,
"learning_rate": 8.443443443443444e-05,
"loss": 0.002,
"step": 25584
},
{
"epoch": 9.8,
"learning_rate": 8.387867033700368e-05,
"loss": 0.0026,
"step": 26117
},
{
"epoch": 10.0,
"eval_loss": 0.017031751573085785,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9145,
"eval_samples_per_second": 391.481,
"eval_steps_per_second": 26.245,
"step": 26640
},
{
"epoch": 10.0,
"learning_rate": 8.332290623957291e-05,
"loss": 0.0014,
"step": 26650
},
{
"epoch": 10.2,
"learning_rate": 8.276714214214215e-05,
"loss": 0.0016,
"step": 27183
},
{
"epoch": 10.4,
"learning_rate": 8.221137804471138e-05,
"loss": 0.0045,
"step": 27716
},
{
"epoch": 10.6,
"learning_rate": 8.165561394728062e-05,
"loss": 0.0017,
"step": 28249
},
{
"epoch": 10.8,
"learning_rate": 8.109984984984985e-05,
"loss": 0.0026,
"step": 28782
},
{
"epoch": 11.0,
"eval_loss": 0.004335461650043726,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9104,
"eval_samples_per_second": 393.245,
"eval_steps_per_second": 26.363,
"step": 29304
},
{
"epoch": 11.0,
"learning_rate": 8.054408575241909e-05,
"loss": 0.0016,
"step": 29315
},
{
"epoch": 11.2,
"learning_rate": 7.998832165498832e-05,
"loss": 0.0006,
"step": 29848
},
{
"epoch": 11.4,
"learning_rate": 7.943255755755756e-05,
"loss": 0.0011,
"step": 30381
},
{
"epoch": 11.6,
"learning_rate": 7.88767934601268e-05,
"loss": 0.0009,
"step": 30914
},
{
"epoch": 11.8,
"learning_rate": 7.832102936269603e-05,
"loss": 0.0029,
"step": 31447
},
{
"epoch": 12.0,
"eval_loss": 0.013542454689741135,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9125,
"eval_samples_per_second": 392.323,
"eval_steps_per_second": 26.301,
"step": 31968
},
{
"epoch": 12.0,
"learning_rate": 7.776526526526526e-05,
"loss": 0.0029,
"step": 31980
},
{
"epoch": 12.2,
"learning_rate": 7.720950116783451e-05,
"loss": 0.0008,
"step": 32513
},
{
"epoch": 12.4,
"learning_rate": 7.665373707040375e-05,
"loss": 0.0027,
"step": 33046
},
{
"epoch": 12.6,
"learning_rate": 7.609797297297297e-05,
"loss": 0.0016,
"step": 33579
},
{
"epoch": 12.8,
"learning_rate": 7.55422088755422e-05,
"loss": 0.0011,
"step": 34112
},
{
"epoch": 13.0,
"eval_loss": 0.03128792718052864,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9117,
"eval_samples_per_second": 392.662,
"eval_steps_per_second": 26.324,
"step": 34632
},
{
"epoch": 13.0,
"learning_rate": 7.498644477811145e-05,
"loss": 0.0007,
"step": 34645
},
{
"epoch": 13.2,
"learning_rate": 7.443068068068069e-05,
"loss": 0.0014,
"step": 35178
},
{
"epoch": 13.41,
"learning_rate": 7.387491658324992e-05,
"loss": 0.0067,
"step": 35711
},
{
"epoch": 13.61,
"learning_rate": 7.331915248581916e-05,
"loss": 0.0008,
"step": 36244
},
{
"epoch": 13.81,
"learning_rate": 7.27633883883884e-05,
"loss": 0.0017,
"step": 36777
},
{
"epoch": 14.0,
"eval_loss": 0.03530227765440941,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9178,
"eval_samples_per_second": 390.083,
"eval_steps_per_second": 26.151,
"step": 37296
},
{
"epoch": 14.01,
"learning_rate": 7.220762429095763e-05,
"loss": 0.001,
"step": 37310
},
{
"epoch": 14.21,
"learning_rate": 7.165186019352686e-05,
"loss": 0.001,
"step": 37843
},
{
"epoch": 14.41,
"learning_rate": 7.10960960960961e-05,
"loss": 0.0012,
"step": 38376
},
{
"epoch": 14.61,
"learning_rate": 7.054033199866533e-05,
"loss": 0.0011,
"step": 38909
},
{
"epoch": 14.81,
"learning_rate": 6.998456790123458e-05,
"loss": 0.0014,
"step": 39442
},
{
"epoch": 15.0,
"eval_loss": 0.011675473302602768,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.916,
"eval_samples_per_second": 390.823,
"eval_steps_per_second": 26.2,
"step": 39960
},
{
"epoch": 15.01,
"learning_rate": 6.94288038038038e-05,
"loss": 0.001,
"step": 39975
},
{
"epoch": 15.21,
"learning_rate": 6.887303970637304e-05,
"loss": 0.0003,
"step": 40508
},
{
"epoch": 15.41,
"learning_rate": 6.831727560894227e-05,
"loss": 0.002,
"step": 41041
},
{
"epoch": 15.61,
"learning_rate": 6.776151151151151e-05,
"loss": 0.001,
"step": 41574
},
{
"epoch": 15.81,
"learning_rate": 6.720574741408076e-05,
"loss": 0.0014,
"step": 42107
},
{
"epoch": 16.0,
"eval_loss": 0.0139808664098382,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9131,
"eval_samples_per_second": 392.074,
"eval_steps_per_second": 26.284,
"step": 42624
},
{
"epoch": 16.01,
"learning_rate": 6.664998331665e-05,
"loss": 0.0007,
"step": 42640
},
{
"epoch": 16.21,
"learning_rate": 6.609421921921923e-05,
"loss": 0.0004,
"step": 43173
},
{
"epoch": 16.41,
"learning_rate": 6.553845512178845e-05,
"loss": 0.0006,
"step": 43706
},
{
"epoch": 16.61,
"learning_rate": 6.498269102435769e-05,
"loss": 0.0017,
"step": 44239
},
{
"epoch": 16.81,
"learning_rate": 6.442692692692693e-05,
"loss": 0.0013,
"step": 44772
},
{
"epoch": 17.0,
"eval_loss": 0.022025227546691895,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9176,
"eval_samples_per_second": 390.127,
"eval_steps_per_second": 26.154,
"step": 45288
},
{
"epoch": 17.01,
"learning_rate": 6.387116282949617e-05,
"loss": 0.0013,
"step": 45305
},
{
"epoch": 17.21,
"learning_rate": 6.33153987320654e-05,
"loss": 0.0003,
"step": 45838
},
{
"epoch": 17.41,
"learning_rate": 6.275963463463464e-05,
"loss": 0.0011,
"step": 46371
},
{
"epoch": 17.61,
"learning_rate": 6.220387053720387e-05,
"loss": 0.0004,
"step": 46904
},
{
"epoch": 17.81,
"learning_rate": 6.164810643977311e-05,
"loss": 0.0009,
"step": 47437
},
{
"epoch": 18.0,
"eval_loss": 0.024678541347384453,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.908,
"eval_samples_per_second": 394.28,
"eval_steps_per_second": 26.432,
"step": 47952
},
{
"epoch": 18.01,
"learning_rate": 6.109234234234234e-05,
"loss": 0.0023,
"step": 47970
},
{
"epoch": 18.21,
"learning_rate": 6.053657824491158e-05,
"loss": 0.0007,
"step": 48503
},
{
"epoch": 18.41,
"learning_rate": 5.9980814147480815e-05,
"loss": 0.0015,
"step": 49036
},
{
"epoch": 18.61,
"learning_rate": 5.9425050050050057e-05,
"loss": 0.0004,
"step": 49569
},
{
"epoch": 18.81,
"learning_rate": 5.886928595261929e-05,
"loss": 0.0017,
"step": 50102
},
{
"epoch": 19.0,
"eval_loss": 0.03220739960670471,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9151,
"eval_samples_per_second": 391.2,
"eval_steps_per_second": 26.226,
"step": 50616
},
{
"epoch": 19.01,
"learning_rate": 5.831352185518853e-05,
"loss": 0.0011,
"step": 50635
},
{
"epoch": 19.21,
"learning_rate": 5.7757757757757755e-05,
"loss": 0.0009,
"step": 51168
},
{
"epoch": 19.41,
"learning_rate": 5.720199366032699e-05,
"loss": 0.0005,
"step": 51701
},
{
"epoch": 19.61,
"learning_rate": 5.664622956289624e-05,
"loss": 0.0011,
"step": 52234
},
{
"epoch": 19.81,
"learning_rate": 5.609046546546547e-05,
"loss": 0.0022,
"step": 52767
},
{
"epoch": 20.0,
"eval_loss": 0.0314439982175827,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9074,
"eval_samples_per_second": 394.526,
"eval_steps_per_second": 26.449,
"step": 53280
},
{
"epoch": 20.01,
"learning_rate": 5.55347013680347e-05,
"loss": 0.001,
"step": 53300
},
{
"epoch": 20.21,
"learning_rate": 5.497893727060394e-05,
"loss": 0.001,
"step": 53833
},
{
"epoch": 20.41,
"learning_rate": 5.442317317317318e-05,
"loss": 0.0,
"step": 54366
},
{
"epoch": 20.61,
"learning_rate": 5.3867409075742415e-05,
"loss": 0.0015,
"step": 54899
},
{
"epoch": 20.81,
"learning_rate": 5.331164497831165e-05,
"loss": 0.0006,
"step": 55432
},
{
"epoch": 21.0,
"eval_loss": 0.030524656176567078,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9097,
"eval_samples_per_second": 393.555,
"eval_steps_per_second": 26.384,
"step": 55944
},
{
"epoch": 21.01,
"learning_rate": 5.275588088088088e-05,
"loss": 0.0005,
"step": 55965
},
{
"epoch": 21.21,
"learning_rate": 5.220011678345011e-05,
"loss": 0.0006,
"step": 56498
},
{
"epoch": 21.41,
"learning_rate": 5.164435268601936e-05,
"loss": 0.0012,
"step": 57031
},
{
"epoch": 21.61,
"learning_rate": 5.108858858858859e-05,
"loss": 0.0005,
"step": 57564
},
{
"epoch": 21.81,
"learning_rate": 5.0532824491157825e-05,
"loss": 0.001,
"step": 58097
},
{
"epoch": 22.0,
"eval_loss": 0.029209736734628677,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9114,
"eval_samples_per_second": 392.814,
"eval_steps_per_second": 26.334,
"step": 58608
},
{
"epoch": 22.01,
"learning_rate": 4.997706039372707e-05,
"loss": 0.0007,
"step": 58630
},
{
"epoch": 22.21,
"learning_rate": 4.94212962962963e-05,
"loss": 0.0011,
"step": 59163
},
{
"epoch": 22.41,
"learning_rate": 4.886553219886553e-05,
"loss": 0.0004,
"step": 59696
},
{
"epoch": 22.61,
"learning_rate": 4.830976810143477e-05,
"loss": 0.001,
"step": 60229
},
{
"epoch": 22.81,
"learning_rate": 4.775400400400401e-05,
"loss": 0.0008,
"step": 60762
},
{
"epoch": 23.0,
"eval_loss": 0.03728558123111725,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9095,
"eval_samples_per_second": 393.613,
"eval_steps_per_second": 26.387,
"step": 61272
},
{
"epoch": 23.01,
"learning_rate": 4.719823990657324e-05,
"loss": 0.0001,
"step": 61295
},
{
"epoch": 23.21,
"learning_rate": 4.664247580914248e-05,
"loss": 0.0006,
"step": 61828
},
{
"epoch": 23.41,
"learning_rate": 4.608671171171172e-05,
"loss": 0.0002,
"step": 62361
},
{
"epoch": 23.61,
"learning_rate": 4.553094761428095e-05,
"loss": 0.0009,
"step": 62894
},
{
"epoch": 23.81,
"learning_rate": 4.497518351685018e-05,
"loss": 0.0008,
"step": 63427
},
{
"epoch": 24.0,
"eval_loss": 0.030942877754569054,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9149,
"eval_samples_per_second": 391.282,
"eval_steps_per_second": 26.231,
"step": 63936
},
{
"epoch": 24.01,
"learning_rate": 4.4419419419419425e-05,
"loss": 0.0003,
"step": 63960
},
{
"epoch": 24.21,
"learning_rate": 4.386365532198865e-05,
"loss": 0.0007,
"step": 64493
},
{
"epoch": 24.41,
"learning_rate": 4.3307891224557895e-05,
"loss": 0.0004,
"step": 65026
},
{
"epoch": 24.61,
"learning_rate": 4.275212712712713e-05,
"loss": 0.0001,
"step": 65559
},
{
"epoch": 24.81,
"learning_rate": 4.2196363029696365e-05,
"loss": 0.0008,
"step": 66092
},
{
"epoch": 25.0,
"eval_loss": 0.038451410830020905,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9087,
"eval_samples_per_second": 393.969,
"eval_steps_per_second": 26.411,
"step": 66600
},
{
"epoch": 25.01,
"learning_rate": 4.16405989322656e-05,
"loss": 0.0006,
"step": 66625
},
{
"epoch": 25.21,
"learning_rate": 4.1084834834834836e-05,
"loss": 0.0,
"step": 67158
},
{
"epoch": 25.41,
"learning_rate": 4.052907073740407e-05,
"loss": 0.0013,
"step": 67691
},
{
"epoch": 25.61,
"learning_rate": 3.9973306639973306e-05,
"loss": 0.0001,
"step": 68224
},
{
"epoch": 25.81,
"learning_rate": 3.941754254254255e-05,
"loss": 0.0014,
"step": 68757
},
{
"epoch": 26.0,
"eval_loss": 0.0133729362860322,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9209,
"eval_samples_per_second": 388.755,
"eval_steps_per_second": 26.062,
"step": 69264
},
{
"epoch": 26.01,
"learning_rate": 3.8861778445111776e-05,
"loss": 0.0001,
"step": 69290
},
{
"epoch": 26.21,
"learning_rate": 3.830601434768102e-05,
"loss": 0.0008,
"step": 69823
},
{
"epoch": 26.41,
"learning_rate": 3.775025025025025e-05,
"loss": 0.0008,
"step": 70356
},
{
"epoch": 26.61,
"learning_rate": 3.719448615281949e-05,
"loss": 0.0002,
"step": 70889
},
{
"epoch": 26.81,
"learning_rate": 3.663872205538872e-05,
"loss": 0.0004,
"step": 71422
},
{
"epoch": 27.0,
"eval_loss": 0.023867754265666008,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9071,
"eval_samples_per_second": 394.647,
"eval_steps_per_second": 26.457,
"step": 71928
},
{
"epoch": 27.01,
"learning_rate": 3.608295795795796e-05,
"loss": 0.0009,
"step": 71955
},
{
"epoch": 27.21,
"learning_rate": 3.55271938605272e-05,
"loss": 0.0005,
"step": 72488
},
{
"epoch": 27.41,
"learning_rate": 3.497142976309643e-05,
"loss": 0.0007,
"step": 73021
},
{
"epoch": 27.61,
"learning_rate": 3.441566566566567e-05,
"loss": 0.0001,
"step": 73554
},
{
"epoch": 27.81,
"learning_rate": 3.3859901568234906e-05,
"loss": 0.0011,
"step": 74087
},
{
"epoch": 28.0,
"eval_loss": 0.01642591878771782,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9124,
"eval_samples_per_second": 392.371,
"eval_steps_per_second": 26.304,
"step": 74592
},
{
"epoch": 28.01,
"learning_rate": 3.3304137470804134e-05,
"loss": 0.0006,
"step": 74620
},
{
"epoch": 28.21,
"learning_rate": 3.2748373373373376e-05,
"loss": 0.0008,
"step": 75153
},
{
"epoch": 28.41,
"learning_rate": 3.219260927594261e-05,
"loss": 0.0005,
"step": 75686
},
{
"epoch": 28.61,
"learning_rate": 3.1636845178511846e-05,
"loss": 0.0002,
"step": 76219
},
{
"epoch": 28.81,
"learning_rate": 3.108108108108108e-05,
"loss": 0.0002,
"step": 76752
},
{
"epoch": 29.0,
"eval_loss": 0.018625039607286453,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9144,
"eval_samples_per_second": 391.524,
"eval_steps_per_second": 26.247,
"step": 77256
},
{
"epoch": 29.01,
"learning_rate": 3.052531698365032e-05,
"loss": 0.0003,
"step": 77285
},
{
"epoch": 29.21,
"learning_rate": 2.9969552886219555e-05,
"loss": 0.0002,
"step": 77818
},
{
"epoch": 29.41,
"learning_rate": 2.9413788788788787e-05,
"loss": 0.0001,
"step": 78351
},
{
"epoch": 29.61,
"learning_rate": 2.8858024691358025e-05,
"loss": 0.0013,
"step": 78884
},
{
"epoch": 29.81,
"learning_rate": 2.830226059392726e-05,
"loss": 0.0001,
"step": 79417
},
{
"epoch": 30.0,
"eval_loss": 0.029812639579176903,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9075,
"eval_samples_per_second": 394.481,
"eval_steps_per_second": 26.446,
"step": 79920
},
{
"epoch": 30.01,
"learning_rate": 2.77464964964965e-05,
"loss": 0.0012,
"step": 79950
},
{
"epoch": 30.21,
"learning_rate": 2.7190732399065734e-05,
"loss": 0.0003,
"step": 80483
},
{
"epoch": 30.41,
"learning_rate": 2.6634968301634972e-05,
"loss": 0.0001,
"step": 81016
},
{
"epoch": 30.61,
"learning_rate": 2.6079204204204204e-05,
"loss": 0.0004,
"step": 81549
},
{
"epoch": 30.81,
"learning_rate": 2.5523440106773443e-05,
"loss": 0.0008,
"step": 82082
},
{
"epoch": 31.0,
"eval_loss": 0.027695728465914726,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9096,
"eval_samples_per_second": 393.574,
"eval_steps_per_second": 26.385,
"step": 82584
},
{
"epoch": 31.01,
"learning_rate": 2.4967676009342678e-05,
"loss": 0.0008,
"step": 82615
},
{
"epoch": 31.21,
"learning_rate": 2.4411911911911913e-05,
"loss": 0.0003,
"step": 83148
},
{
"epoch": 31.41,
"learning_rate": 2.385614781448115e-05,
"loss": 0.0003,
"step": 83681
},
{
"epoch": 31.61,
"learning_rate": 2.3300383717050383e-05,
"loss": 0.0002,
"step": 84214
},
{
"epoch": 31.81,
"learning_rate": 2.2744619619619618e-05,
"loss": 0.0003,
"step": 84747
},
{
"epoch": 32.0,
"eval_loss": 0.03773302584886551,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9096,
"eval_samples_per_second": 393.585,
"eval_steps_per_second": 26.386,
"step": 85248
},
{
"epoch": 32.01,
"learning_rate": 2.2188855522188857e-05,
"loss": 0.0007,
"step": 85280
},
{
"epoch": 32.21,
"learning_rate": 2.1633091424758092e-05,
"loss": 0.001,
"step": 85813
},
{
"epoch": 32.41,
"learning_rate": 2.107732732732733e-05,
"loss": 0.0002,
"step": 86346
},
{
"epoch": 32.61,
"learning_rate": 2.0521563229896565e-05,
"loss": 0.0002,
"step": 86879
},
{
"epoch": 32.81,
"learning_rate": 1.99657991324658e-05,
"loss": 0.0003,
"step": 87412
},
{
"epoch": 33.0,
"eval_loss": 0.03536462038755417,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9089,
"eval_samples_per_second": 393.87,
"eval_steps_per_second": 26.405,
"step": 87912
},
{
"epoch": 33.01,
"learning_rate": 1.9410035035035036e-05,
"loss": 0.0006,
"step": 87945
},
{
"epoch": 33.21,
"learning_rate": 1.885427093760427e-05,
"loss": 0.0003,
"step": 88478
},
{
"epoch": 33.41,
"learning_rate": 1.8298506840173506e-05,
"loss": 0.0003,
"step": 89011
},
{
"epoch": 33.61,
"learning_rate": 1.7742742742742744e-05,
"loss": 0.0006,
"step": 89544
},
{
"epoch": 33.81,
"learning_rate": 1.718697864531198e-05,
"loss": 0.0007,
"step": 90077
},
{
"epoch": 34.0,
"eval_loss": 0.05854496732354164,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9101,
"eval_samples_per_second": 393.356,
"eval_steps_per_second": 26.37,
"step": 90576
},
{
"epoch": 34.01,
"learning_rate": 1.6631214547881215e-05,
"loss": 0.0002,
"step": 90610
},
{
"epoch": 34.21,
"learning_rate": 1.6075450450450453e-05,
"loss": 0.0006,
"step": 91143
},
{
"epoch": 34.41,
"learning_rate": 1.5519686353019688e-05,
"loss": 0.0003,
"step": 91676
},
{
"epoch": 34.61,
"learning_rate": 1.4963922255588922e-05,
"loss": 0.0006,
"step": 92209
},
{
"epoch": 34.81,
"learning_rate": 1.4408158158158158e-05,
"loss": 0.0005,
"step": 92742
},
{
"epoch": 35.0,
"eval_loss": 0.05680559575557709,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9087,
"eval_samples_per_second": 393.956,
"eval_steps_per_second": 26.41,
"step": 93240
},
{
"epoch": 35.01,
"learning_rate": 1.3852394060727395e-05,
"loss": 0.0,
"step": 93275
},
{
"epoch": 35.21,
"learning_rate": 1.329662996329663e-05,
"loss": 0.0005,
"step": 93808
},
{
"epoch": 35.41,
"learning_rate": 1.2740865865865867e-05,
"loss": 0.0002,
"step": 94341
},
{
"epoch": 35.61,
"learning_rate": 1.2185101768435102e-05,
"loss": 0.0007,
"step": 94874
},
{
"epoch": 35.81,
"learning_rate": 1.1629337671004337e-05,
"loss": 0.0001,
"step": 95407
},
{
"epoch": 36.0,
"eval_loss": 0.05670797452330589,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9196,
"eval_samples_per_second": 389.279,
"eval_steps_per_second": 26.097,
"step": 95904
},
{
"epoch": 36.01,
"learning_rate": 1.1073573573573574e-05,
"loss": 0.0003,
"step": 95940
},
{
"epoch": 36.21,
"learning_rate": 1.0517809476142811e-05,
"loss": 0.0004,
"step": 96473
},
{
"epoch": 36.41,
"learning_rate": 9.962045378712046e-06,
"loss": 0.0002,
"step": 97006
},
{
"epoch": 36.61,
"learning_rate": 9.406281281281281e-06,
"loss": 0.0006,
"step": 97539
},
{
"epoch": 36.81,
"learning_rate": 8.850517183850518e-06,
"loss": 0.0009,
"step": 98072
},
{
"epoch": 37.0,
"eval_loss": 0.060491062700748444,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9347,
"eval_samples_per_second": 383.022,
"eval_steps_per_second": 25.677,
"step": 98568
},
{
"epoch": 37.01,
"learning_rate": 8.294753086419753e-06,
"loss": 0.0002,
"step": 98605
},
{
"epoch": 37.21,
"learning_rate": 7.738988988988988e-06,
"loss": 0.001,
"step": 99138
},
{
"epoch": 37.41,
"learning_rate": 7.183224891558225e-06,
"loss": 0.0003,
"step": 99671
},
{
"epoch": 37.61,
"learning_rate": 6.627460794127462e-06,
"loss": 0.0001,
"step": 100204
},
{
"epoch": 37.81,
"learning_rate": 6.071696696696697e-06,
"loss": 0.0002,
"step": 100737
},
{
"epoch": 38.0,
"eval_loss": 0.06128498166799545,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9116,
"eval_samples_per_second": 392.708,
"eval_steps_per_second": 26.327,
"step": 101232
},
{
"epoch": 38.01,
"learning_rate": 5.515932599265933e-06,
"loss": 0.0002,
"step": 101270
},
{
"epoch": 38.21,
"learning_rate": 4.960168501835169e-06,
"loss": 0.0002,
"step": 101803
},
{
"epoch": 38.41,
"learning_rate": 4.404404404404405e-06,
"loss": 0.0001,
"step": 102336
},
{
"epoch": 38.61,
"learning_rate": 3.848640306973641e-06,
"loss": 0.0006,
"step": 102869
},
{
"epoch": 38.81,
"learning_rate": 3.2928762095428764e-06,
"loss": 0.0002,
"step": 103402
},
{
"epoch": 39.0,
"eval_loss": 0.05631242319941521,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9146,
"eval_samples_per_second": 391.433,
"eval_steps_per_second": 26.241,
"step": 103896
},
{
"epoch": 39.01,
"learning_rate": 2.7371121121121123e-06,
"loss": 0.0004,
"step": 103935
},
{
"epoch": 39.21,
"learning_rate": 2.1813480146813483e-06,
"loss": 0.0004,
"step": 104468
},
{
"epoch": 39.41,
"learning_rate": 1.625583917250584e-06,
"loss": 0.0001,
"step": 105001
},
{
"epoch": 39.61,
"learning_rate": 1.0698198198198198e-06,
"loss": 0.0013,
"step": 105534
},
{
"epoch": 39.81,
"learning_rate": 5.140557223890558e-07,
"loss": 0.0002,
"step": 106067
},
{
"epoch": 40.0,
"eval_loss": 0.06320372968912125,
"eval_max_distance": 1,
"eval_mean_distance": 0,
"eval_runtime": 0.9155,
"eval_samples_per_second": 391.055,
"eval_steps_per_second": 26.216,
"step": 106560
},
{
"epoch": 40.0,
"step": 106560,
"total_flos": 6.005678715251712e+16,
"train_loss": 0.2983123329788039,
"train_runtime": 9788.1362,
"train_samples_per_second": 163.251,
"train_steps_per_second": 10.887
}
],
"logging_steps": 533,
"max_steps": 106560,
"num_train_epochs": 40,
"save_steps": 1066,
"total_flos": 6.005678715251712e+16,
"trial_name": null,
"trial_params": null
}