xls-r-greek-cretan / trainer_state.json
ctsoukala's picture
Upload 12 files
bf041c5 verified
raw
history blame
12.6 kB
{
"best_metric": 0.31333859510655093,
"best_model_checkpoint": "xls-r-greek-cretan/checkpoint-3894",
"epoch": 35.0,
"eval_steps": 500,
"global_step": 4130,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_cer": 0.9992756772417789,
"eval_loss": 3.404292583465576,
"eval_runtime": 6.277,
"eval_samples_per_second": 37.597,
"eval_steps_per_second": 4.779,
"eval_wer": 1.0,
"step": 118
},
{
"epoch": 1.69,
"learning_rate": 0.00011999999999999999,
"loss": 5.627,
"step": 200
},
{
"epoch": 2.0,
"eval_cer": 0.8620889468347096,
"eval_loss": 2.5985116958618164,
"eval_runtime": 6.1017,
"eval_samples_per_second": 38.678,
"eval_steps_per_second": 4.917,
"eval_wer": 0.9952644041041832,
"step": 236
},
{
"epoch": 3.0,
"eval_cer": 0.17398232652469942,
"eval_loss": 0.6798810958862305,
"eval_runtime": 5.9946,
"eval_samples_per_second": 39.369,
"eval_steps_per_second": 5.004,
"eval_wer": 0.5603788476716653,
"step": 354
},
{
"epoch": 3.39,
"learning_rate": 0.00023999999999999998,
"loss": 1.9128,
"step": 400
},
{
"epoch": 4.0,
"eval_cer": 0.15094886281326958,
"eval_loss": 0.5401029586791992,
"eval_runtime": 5.974,
"eval_samples_per_second": 39.504,
"eval_steps_per_second": 5.022,
"eval_wer": 0.5027624309392266,
"step": 472
},
{
"epoch": 5.0,
"eval_cer": 0.13906996957844414,
"eval_loss": 0.49666497111320496,
"eval_runtime": 5.4532,
"eval_samples_per_second": 43.277,
"eval_steps_per_second": 5.501,
"eval_wer": 0.48066298342541436,
"step": 590
},
{
"epoch": 5.08,
"learning_rate": 0.0002917355371900826,
"loss": 1.029,
"step": 600
},
{
"epoch": 6.0,
"eval_cer": 0.1282051282051282,
"eval_loss": 0.5171706080436707,
"eval_runtime": 5.3095,
"eval_samples_per_second": 44.449,
"eval_steps_per_second": 5.65,
"eval_wer": 0.4451460142067877,
"step": 708
},
{
"epoch": 6.78,
"learning_rate": 0.0002752066115702479,
"loss": 0.8154,
"step": 800
},
{
"epoch": 7.0,
"eval_cer": 0.13066782558307982,
"eval_loss": 0.5484737753868103,
"eval_runtime": 6.0637,
"eval_samples_per_second": 38.92,
"eval_steps_per_second": 4.947,
"eval_wer": 0.43804262036306235,
"step": 826
},
{
"epoch": 8.0,
"eval_cer": 0.11299435028248588,
"eval_loss": 0.455568790435791,
"eval_runtime": 5.7805,
"eval_samples_per_second": 40.827,
"eval_steps_per_second": 5.19,
"eval_wer": 0.40331491712707185,
"step": 944
},
{
"epoch": 8.47,
"learning_rate": 0.00025867768595041324,
"loss": 0.68,
"step": 1000
},
{
"epoch": 9.0,
"eval_cer": 0.11806460959003331,
"eval_loss": 0.47916799783706665,
"eval_runtime": 5.4838,
"eval_samples_per_second": 43.036,
"eval_steps_per_second": 5.471,
"eval_wer": 0.4112075769534333,
"step": 1062
},
{
"epoch": 10.0,
"eval_cer": 0.10922787193973635,
"eval_loss": 0.43157267570495605,
"eval_runtime": 5.6241,
"eval_samples_per_second": 41.962,
"eval_steps_per_second": 5.334,
"eval_wer": 0.39068666140489344,
"step": 1180
},
{
"epoch": 10.17,
"learning_rate": 0.0002421487603305785,
"loss": 0.5853,
"step": 1200
},
{
"epoch": 11.0,
"eval_cer": 0.10705490366507316,
"eval_loss": 0.46444636583328247,
"eval_runtime": 5.4324,
"eval_samples_per_second": 43.443,
"eval_steps_per_second": 5.522,
"eval_wer": 0.36937647987371747,
"step": 1298
},
{
"epoch": 11.86,
"learning_rate": 0.00022561983471074378,
"loss": 0.534,
"step": 1400
},
{
"epoch": 12.0,
"eval_cer": 0.11313921483413009,
"eval_loss": 0.5147783756256104,
"eval_runtime": 5.7969,
"eval_samples_per_second": 40.711,
"eval_steps_per_second": 5.175,
"eval_wer": 0.3764798737174428,
"step": 1416
},
{
"epoch": 13.0,
"eval_cer": 0.10893814283644793,
"eval_loss": 0.49593451619148254,
"eval_runtime": 5.8062,
"eval_samples_per_second": 40.647,
"eval_steps_per_second": 5.167,
"eval_wer": 0.3772691397000789,
"step": 1534
},
{
"epoch": 13.56,
"learning_rate": 0.0002090909090909091,
"loss": 0.4826,
"step": 1600
},
{
"epoch": 14.0,
"eval_cer": 0.10850354918151528,
"eval_loss": 0.48721909523010254,
"eval_runtime": 5.799,
"eval_samples_per_second": 40.697,
"eval_steps_per_second": 5.173,
"eval_wer": 0.3788476716653512,
"step": 1652
},
{
"epoch": 15.0,
"eval_cer": 0.10256410256410256,
"eval_loss": 0.45876312255859375,
"eval_runtime": 5.8742,
"eval_samples_per_second": 40.176,
"eval_steps_per_second": 5.107,
"eval_wer": 0.36306235201262826,
"step": 1770
},
{
"epoch": 15.25,
"learning_rate": 0.00019256198347107438,
"loss": 0.4211,
"step": 1800
},
{
"epoch": 16.0,
"eval_cer": 0.10155005070259307,
"eval_loss": 0.4505126476287842,
"eval_runtime": 5.2544,
"eval_samples_per_second": 44.915,
"eval_steps_per_second": 5.71,
"eval_wer": 0.3606945540647198,
"step": 1888
},
{
"epoch": 16.95,
"learning_rate": 0.00017603305785123967,
"loss": 0.396,
"step": 2000
},
{
"epoch": 17.0,
"eval_cer": 0.09923221787628567,
"eval_loss": 0.46898314356803894,
"eval_runtime": 5.9134,
"eval_samples_per_second": 39.91,
"eval_steps_per_second": 5.073,
"eval_wer": 0.3606945540647198,
"step": 2006
},
{
"epoch": 18.0,
"eval_cer": 0.09937708242792989,
"eval_loss": 0.45210039615631104,
"eval_runtime": 6.1856,
"eval_samples_per_second": 38.153,
"eval_steps_per_second": 4.85,
"eval_wer": 0.3638516179952644,
"step": 2124
},
{
"epoch": 18.64,
"learning_rate": 0.00015950413223140495,
"loss": 0.3619,
"step": 2200
},
{
"epoch": 19.0,
"eval_cer": 0.09937708242792989,
"eval_loss": 0.4832761585712433,
"eval_runtime": 6.2572,
"eval_samples_per_second": 37.716,
"eval_steps_per_second": 4.794,
"eval_wer": 0.35438042620363064,
"step": 2242
},
{
"epoch": 20.0,
"eval_cer": 0.09097493843256556,
"eval_loss": 0.44832131266593933,
"eval_runtime": 5.7413,
"eval_samples_per_second": 41.106,
"eval_steps_per_second": 5.225,
"eval_wer": 0.3346487766377269,
"step": 2360
},
{
"epoch": 20.34,
"learning_rate": 0.00014297520661157024,
"loss": 0.33,
"step": 2400
},
{
"epoch": 21.0,
"eval_cer": 0.09865275966970882,
"eval_loss": 0.4619905650615692,
"eval_runtime": 5.4696,
"eval_samples_per_second": 43.148,
"eval_steps_per_second": 5.485,
"eval_wer": 0.3425414364640884,
"step": 2478
},
{
"epoch": 22.0,
"eval_cer": 0.09532087498189193,
"eval_loss": 0.4783581793308258,
"eval_runtime": 5.7494,
"eval_samples_per_second": 41.048,
"eval_steps_per_second": 5.218,
"eval_wer": 0.33859510655090763,
"step": 2596
},
{
"epoch": 22.03,
"learning_rate": 0.00012644628099173552,
"loss": 0.3199,
"step": 2600
},
{
"epoch": 23.0,
"eval_cer": 0.09633492684340142,
"eval_loss": 0.5089753866195679,
"eval_runtime": 6.2898,
"eval_samples_per_second": 37.521,
"eval_steps_per_second": 4.77,
"eval_wer": 0.34333070244672453,
"step": 2714
},
{
"epoch": 23.73,
"learning_rate": 0.00010991735537190081,
"loss": 0.2793,
"step": 2800
},
{
"epoch": 24.0,
"eval_cer": 0.0948862813269593,
"eval_loss": 0.5048560500144958,
"eval_runtime": 6.0494,
"eval_samples_per_second": 39.012,
"eval_steps_per_second": 4.959,
"eval_wer": 0.33859510655090763,
"step": 2832
},
{
"epoch": 25.0,
"eval_cer": 0.09198899029407503,
"eval_loss": 0.5005324482917786,
"eval_runtime": 6.5065,
"eval_samples_per_second": 36.271,
"eval_steps_per_second": 4.611,
"eval_wer": 0.324388318863457,
"step": 2950
},
{
"epoch": 25.42,
"learning_rate": 9.338842975206611e-05,
"loss": 0.2769,
"step": 3000
},
{
"epoch": 26.0,
"eval_cer": 0.08851224105461393,
"eval_loss": 0.503860354423523,
"eval_runtime": 5.4619,
"eval_samples_per_second": 43.208,
"eval_steps_per_second": 5.493,
"eval_wer": 0.3267561168113654,
"step": 3068
},
{
"epoch": 27.0,
"eval_cer": 0.0938722294654498,
"eval_loss": 0.5316025614738464,
"eval_runtime": 5.994,
"eval_samples_per_second": 39.373,
"eval_steps_per_second": 5.005,
"eval_wer": 0.32991318074191,
"step": 3186
},
{
"epoch": 27.12,
"learning_rate": 7.68595041322314e-05,
"loss": 0.2329,
"step": 3200
},
{
"epoch": 28.0,
"eval_cer": 0.09256844850065189,
"eval_loss": 0.5238826274871826,
"eval_runtime": 5.6891,
"eval_samples_per_second": 41.483,
"eval_steps_per_second": 5.273,
"eval_wer": 0.3314917127071823,
"step": 3304
},
{
"epoch": 28.81,
"learning_rate": 6.033057851239669e-05,
"loss": 0.2327,
"step": 3400
},
{
"epoch": 29.0,
"eval_cer": 0.09184412574243082,
"eval_loss": 0.4961460530757904,
"eval_runtime": 5.7688,
"eval_samples_per_second": 40.91,
"eval_steps_per_second": 5.2,
"eval_wer": 0.3228097868981847,
"step": 3422
},
{
"epoch": 30.0,
"eval_cer": 0.09068520932927712,
"eval_loss": 0.4970700740814209,
"eval_runtime": 6.2622,
"eval_samples_per_second": 37.686,
"eval_steps_per_second": 4.791,
"eval_wer": 0.3267561168113654,
"step": 3540
},
{
"epoch": 30.51,
"learning_rate": 4.380165289256198e-05,
"loss": 0.2087,
"step": 3600
},
{
"epoch": 31.0,
"eval_cer": 0.09083007388092133,
"eval_loss": 0.5133862495422363,
"eval_runtime": 5.6526,
"eval_samples_per_second": 41.751,
"eval_steps_per_second": 5.307,
"eval_wer": 0.3259668508287293,
"step": 3658
},
{
"epoch": 32.0,
"eval_cer": 0.08851224105461393,
"eval_loss": 0.522043764591217,
"eval_runtime": 5.6904,
"eval_samples_per_second": 41.474,
"eval_steps_per_second": 5.272,
"eval_wer": 0.3212312549329124,
"step": 3776
},
{
"epoch": 32.2,
"learning_rate": 2.727272727272727e-05,
"loss": 0.1856,
"step": 3800
},
{
"epoch": 33.0,
"eval_cer": 0.08836737650296972,
"eval_loss": 0.5135778188705444,
"eval_runtime": 6.0356,
"eval_samples_per_second": 39.101,
"eval_steps_per_second": 4.971,
"eval_wer": 0.31333859510655093,
"step": 3894
},
{
"epoch": 33.9,
"learning_rate": 1.0743801652892562e-05,
"loss": 0.1803,
"step": 4000
},
{
"epoch": 34.0,
"eval_cer": 0.08749818919310445,
"eval_loss": 0.5257639288902283,
"eval_runtime": 5.8083,
"eval_samples_per_second": 40.631,
"eval_steps_per_second": 5.165,
"eval_wer": 0.31333859510655093,
"step": 4012
},
{
"epoch": 35.0,
"eval_cer": 0.08764305374474866,
"eval_loss": 0.5268439650535583,
"eval_runtime": 6.0191,
"eval_samples_per_second": 39.208,
"eval_steps_per_second": 4.984,
"eval_wer": 0.3164956590370955,
"step": 4130
}
],
"logging_steps": 200,
"max_steps": 4130,
"num_input_tokens_seen": 0,
"num_train_epochs": 35,
"save_steps": 500,
"total_flos": 4.2609001975048284e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}