{ "best_metric": 0.31333859510655093, "best_model_checkpoint": "xls-r-greek-cretan/checkpoint-3894", "epoch": 35.0, "eval_steps": 500, "global_step": 4130, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_cer": 0.9992756772417789, "eval_loss": 3.404292583465576, "eval_runtime": 6.277, "eval_samples_per_second": 37.597, "eval_steps_per_second": 4.779, "eval_wer": 1.0, "step": 118 }, { "epoch": 1.69, "learning_rate": 0.00011999999999999999, "loss": 5.627, "step": 200 }, { "epoch": 2.0, "eval_cer": 0.8620889468347096, "eval_loss": 2.5985116958618164, "eval_runtime": 6.1017, "eval_samples_per_second": 38.678, "eval_steps_per_second": 4.917, "eval_wer": 0.9952644041041832, "step": 236 }, { "epoch": 3.0, "eval_cer": 0.17398232652469942, "eval_loss": 0.6798810958862305, "eval_runtime": 5.9946, "eval_samples_per_second": 39.369, "eval_steps_per_second": 5.004, "eval_wer": 0.5603788476716653, "step": 354 }, { "epoch": 3.39, "learning_rate": 0.00023999999999999998, "loss": 1.9128, "step": 400 }, { "epoch": 4.0, "eval_cer": 0.15094886281326958, "eval_loss": 0.5401029586791992, "eval_runtime": 5.974, "eval_samples_per_second": 39.504, "eval_steps_per_second": 5.022, "eval_wer": 0.5027624309392266, "step": 472 }, { "epoch": 5.0, "eval_cer": 0.13906996957844414, "eval_loss": 0.49666497111320496, "eval_runtime": 5.4532, "eval_samples_per_second": 43.277, "eval_steps_per_second": 5.501, "eval_wer": 0.48066298342541436, "step": 590 }, { "epoch": 5.08, "learning_rate": 0.0002917355371900826, "loss": 1.029, "step": 600 }, { "epoch": 6.0, "eval_cer": 0.1282051282051282, "eval_loss": 0.5171706080436707, "eval_runtime": 5.3095, "eval_samples_per_second": 44.449, "eval_steps_per_second": 5.65, "eval_wer": 0.4451460142067877, "step": 708 }, { "epoch": 6.78, "learning_rate": 0.0002752066115702479, "loss": 0.8154, "step": 800 }, { "epoch": 7.0, "eval_cer": 0.13066782558307982, "eval_loss": 0.5484737753868103, "eval_runtime": 6.0637, "eval_samples_per_second": 38.92, "eval_steps_per_second": 4.947, "eval_wer": 0.43804262036306235, "step": 826 }, { "epoch": 8.0, "eval_cer": 0.11299435028248588, "eval_loss": 0.455568790435791, "eval_runtime": 5.7805, "eval_samples_per_second": 40.827, "eval_steps_per_second": 5.19, "eval_wer": 0.40331491712707185, "step": 944 }, { "epoch": 8.47, "learning_rate": 0.00025867768595041324, "loss": 0.68, "step": 1000 }, { "epoch": 9.0, "eval_cer": 0.11806460959003331, "eval_loss": 0.47916799783706665, "eval_runtime": 5.4838, "eval_samples_per_second": 43.036, "eval_steps_per_second": 5.471, "eval_wer": 0.4112075769534333, "step": 1062 }, { "epoch": 10.0, "eval_cer": 0.10922787193973635, "eval_loss": 0.43157267570495605, "eval_runtime": 5.6241, "eval_samples_per_second": 41.962, "eval_steps_per_second": 5.334, "eval_wer": 0.39068666140489344, "step": 1180 }, { "epoch": 10.17, "learning_rate": 0.0002421487603305785, "loss": 0.5853, "step": 1200 }, { "epoch": 11.0, "eval_cer": 0.10705490366507316, "eval_loss": 0.46444636583328247, "eval_runtime": 5.4324, "eval_samples_per_second": 43.443, "eval_steps_per_second": 5.522, "eval_wer": 0.36937647987371747, "step": 1298 }, { "epoch": 11.86, "learning_rate": 0.00022561983471074378, "loss": 0.534, "step": 1400 }, { "epoch": 12.0, "eval_cer": 0.11313921483413009, "eval_loss": 0.5147783756256104, "eval_runtime": 5.7969, "eval_samples_per_second": 40.711, "eval_steps_per_second": 5.175, "eval_wer": 0.3764798737174428, "step": 1416 }, { "epoch": 13.0, "eval_cer": 0.10893814283644793, "eval_loss": 0.49593451619148254, "eval_runtime": 5.8062, "eval_samples_per_second": 40.647, "eval_steps_per_second": 5.167, "eval_wer": 0.3772691397000789, "step": 1534 }, { "epoch": 13.56, "learning_rate": 0.0002090909090909091, "loss": 0.4826, "step": 1600 }, { "epoch": 14.0, "eval_cer": 0.10850354918151528, "eval_loss": 0.48721909523010254, "eval_runtime": 5.799, "eval_samples_per_second": 40.697, "eval_steps_per_second": 5.173, "eval_wer": 0.3788476716653512, "step": 1652 }, { "epoch": 15.0, "eval_cer": 0.10256410256410256, "eval_loss": 0.45876312255859375, "eval_runtime": 5.8742, "eval_samples_per_second": 40.176, "eval_steps_per_second": 5.107, "eval_wer": 0.36306235201262826, "step": 1770 }, { "epoch": 15.25, "learning_rate": 0.00019256198347107438, "loss": 0.4211, "step": 1800 }, { "epoch": 16.0, "eval_cer": 0.10155005070259307, "eval_loss": 0.4505126476287842, "eval_runtime": 5.2544, "eval_samples_per_second": 44.915, "eval_steps_per_second": 5.71, "eval_wer": 0.3606945540647198, "step": 1888 }, { "epoch": 16.95, "learning_rate": 0.00017603305785123967, "loss": 0.396, "step": 2000 }, { "epoch": 17.0, "eval_cer": 0.09923221787628567, "eval_loss": 0.46898314356803894, "eval_runtime": 5.9134, "eval_samples_per_second": 39.91, "eval_steps_per_second": 5.073, "eval_wer": 0.3606945540647198, "step": 2006 }, { "epoch": 18.0, "eval_cer": 0.09937708242792989, "eval_loss": 0.45210039615631104, "eval_runtime": 6.1856, "eval_samples_per_second": 38.153, "eval_steps_per_second": 4.85, "eval_wer": 0.3638516179952644, "step": 2124 }, { "epoch": 18.64, "learning_rate": 0.00015950413223140495, "loss": 0.3619, "step": 2200 }, { "epoch": 19.0, "eval_cer": 0.09937708242792989, "eval_loss": 0.4832761585712433, "eval_runtime": 6.2572, "eval_samples_per_second": 37.716, "eval_steps_per_second": 4.794, "eval_wer": 0.35438042620363064, "step": 2242 }, { "epoch": 20.0, "eval_cer": 0.09097493843256556, "eval_loss": 0.44832131266593933, "eval_runtime": 5.7413, "eval_samples_per_second": 41.106, "eval_steps_per_second": 5.225, "eval_wer": 0.3346487766377269, "step": 2360 }, { "epoch": 20.34, "learning_rate": 0.00014297520661157024, "loss": 0.33, "step": 2400 }, { "epoch": 21.0, "eval_cer": 0.09865275966970882, "eval_loss": 0.4619905650615692, "eval_runtime": 5.4696, "eval_samples_per_second": 43.148, "eval_steps_per_second": 5.485, "eval_wer": 0.3425414364640884, "step": 2478 }, { "epoch": 22.0, "eval_cer": 0.09532087498189193, "eval_loss": 0.4783581793308258, "eval_runtime": 5.7494, "eval_samples_per_second": 41.048, "eval_steps_per_second": 5.218, "eval_wer": 0.33859510655090763, "step": 2596 }, { "epoch": 22.03, "learning_rate": 0.00012644628099173552, "loss": 0.3199, "step": 2600 }, { "epoch": 23.0, "eval_cer": 0.09633492684340142, "eval_loss": 0.5089753866195679, "eval_runtime": 6.2898, "eval_samples_per_second": 37.521, "eval_steps_per_second": 4.77, "eval_wer": 0.34333070244672453, "step": 2714 }, { "epoch": 23.73, "learning_rate": 0.00010991735537190081, "loss": 0.2793, "step": 2800 }, { "epoch": 24.0, "eval_cer": 0.0948862813269593, "eval_loss": 0.5048560500144958, "eval_runtime": 6.0494, "eval_samples_per_second": 39.012, "eval_steps_per_second": 4.959, "eval_wer": 0.33859510655090763, "step": 2832 }, { "epoch": 25.0, "eval_cer": 0.09198899029407503, "eval_loss": 0.5005324482917786, "eval_runtime": 6.5065, "eval_samples_per_second": 36.271, "eval_steps_per_second": 4.611, "eval_wer": 0.324388318863457, "step": 2950 }, { "epoch": 25.42, "learning_rate": 9.338842975206611e-05, "loss": 0.2769, "step": 3000 }, { "epoch": 26.0, "eval_cer": 0.08851224105461393, "eval_loss": 0.503860354423523, "eval_runtime": 5.4619, "eval_samples_per_second": 43.208, "eval_steps_per_second": 5.493, "eval_wer": 0.3267561168113654, "step": 3068 }, { "epoch": 27.0, "eval_cer": 0.0938722294654498, "eval_loss": 0.5316025614738464, "eval_runtime": 5.994, "eval_samples_per_second": 39.373, "eval_steps_per_second": 5.005, "eval_wer": 0.32991318074191, "step": 3186 }, { "epoch": 27.12, "learning_rate": 7.68595041322314e-05, "loss": 0.2329, "step": 3200 }, { "epoch": 28.0, "eval_cer": 0.09256844850065189, "eval_loss": 0.5238826274871826, "eval_runtime": 5.6891, "eval_samples_per_second": 41.483, "eval_steps_per_second": 5.273, "eval_wer": 0.3314917127071823, "step": 3304 }, { "epoch": 28.81, "learning_rate": 6.033057851239669e-05, "loss": 0.2327, "step": 3400 }, { "epoch": 29.0, "eval_cer": 0.09184412574243082, "eval_loss": 0.4961460530757904, "eval_runtime": 5.7688, "eval_samples_per_second": 40.91, "eval_steps_per_second": 5.2, "eval_wer": 0.3228097868981847, "step": 3422 }, { "epoch": 30.0, "eval_cer": 0.09068520932927712, "eval_loss": 0.4970700740814209, "eval_runtime": 6.2622, "eval_samples_per_second": 37.686, "eval_steps_per_second": 4.791, "eval_wer": 0.3267561168113654, "step": 3540 }, { "epoch": 30.51, "learning_rate": 4.380165289256198e-05, "loss": 0.2087, "step": 3600 }, { "epoch": 31.0, "eval_cer": 0.09083007388092133, "eval_loss": 0.5133862495422363, "eval_runtime": 5.6526, "eval_samples_per_second": 41.751, "eval_steps_per_second": 5.307, "eval_wer": 0.3259668508287293, "step": 3658 }, { "epoch": 32.0, "eval_cer": 0.08851224105461393, "eval_loss": 0.522043764591217, "eval_runtime": 5.6904, "eval_samples_per_second": 41.474, "eval_steps_per_second": 5.272, "eval_wer": 0.3212312549329124, "step": 3776 }, { "epoch": 32.2, "learning_rate": 2.727272727272727e-05, "loss": 0.1856, "step": 3800 }, { "epoch": 33.0, "eval_cer": 0.08836737650296972, "eval_loss": 0.5135778188705444, "eval_runtime": 6.0356, "eval_samples_per_second": 39.101, "eval_steps_per_second": 4.971, "eval_wer": 0.31333859510655093, "step": 3894 }, { "epoch": 33.9, "learning_rate": 1.0743801652892562e-05, "loss": 0.1803, "step": 4000 }, { "epoch": 34.0, "eval_cer": 0.08749818919310445, "eval_loss": 0.5257639288902283, "eval_runtime": 5.8083, "eval_samples_per_second": 40.631, "eval_steps_per_second": 5.165, "eval_wer": 0.31333859510655093, "step": 4012 }, { "epoch": 35.0, "eval_cer": 0.08764305374474866, "eval_loss": 0.5268439650535583, "eval_runtime": 6.0191, "eval_samples_per_second": 39.208, "eval_steps_per_second": 4.984, "eval_wer": 0.3164956590370955, "step": 4130 } ], "logging_steps": 200, "max_steps": 4130, "num_input_tokens_seen": 0, "num_train_epochs": 35, "save_steps": 500, "total_flos": 4.2609001975048284e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }