{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.75128998968008, "global_step": 14400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.83, "learning_rate": 0.00023999999999999998, "loss": 24.132, "step": 400 }, { "epoch": 0.83, "eval_cer": 1.0, "eval_loss": 4.829305171966553, "eval_runtime": 666.1596, "eval_samples_per_second": 3.82, "step": 400 }, { "epoch": 1.65, "learning_rate": 0.0002935805991440799, "loss": 4.7165, "step": 800 }, { "epoch": 1.65, "eval_cer": 1.0, "eval_loss": 4.607272148132324, "eval_runtime": 670.8713, "eval_samples_per_second": 3.794, "step": 800 }, { "epoch": 2.48, "learning_rate": 0.00028502139800285305, "loss": 2.9637, "step": 1200 }, { "epoch": 2.48, "eval_cer": 0.34719755926062756, "eval_loss": 1.3734972476959229, "eval_runtime": 671.6914, "eval_samples_per_second": 3.789, "step": 1200 }, { "epoch": 3.31, "learning_rate": 0.0002764621968616262, "loss": 1.3373, "step": 1600 }, { "epoch": 3.31, "eval_cer": 0.26771859991180924, "eval_loss": 0.9831737875938416, "eval_runtime": 669.2245, "eval_samples_per_second": 3.803, "step": 1600 }, { "epoch": 4.13, "learning_rate": 0.0002679029957203994, "loss": 1.0378, "step": 2000 }, { "epoch": 4.13, "eval_cer": 0.24304901738788448, "eval_loss": 0.8199232220649719, "eval_runtime": 670.0042, "eval_samples_per_second": 3.798, "step": 2000 }, { "epoch": 4.96, "learning_rate": 0.00025934379457917263, "loss": 0.8747, "step": 2400 }, { "epoch": 4.96, "eval_cer": 0.21944010773576136, "eval_loss": 0.7384957075119019, "eval_runtime": 673.0546, "eval_samples_per_second": 3.781, "step": 2400 }, { "epoch": 5.78, "learning_rate": 0.0002507845934379458, "loss": 0.7648, "step": 2800 }, { "epoch": 5.78, "eval_cer": 0.21221799806933703, "eval_loss": 0.7131306529045105, "eval_runtime": 671.747, "eval_samples_per_second": 3.789, "step": 2800 }, { "epoch": 6.61, "learning_rate": 0.00024222539229671895, "loss": 0.6863, "step": 3200 }, { "epoch": 6.61, "eval_cer": 0.19983553611650717, "eval_loss": 0.6777553558349609, "eval_runtime": 679.4426, "eval_samples_per_second": 3.746, "step": 3200 }, { "epoch": 7.44, "learning_rate": 0.00023366619115549212, "loss": 0.6269, "step": 3600 }, { "epoch": 7.44, "eval_cer": 0.18854950005362953, "eval_loss": 0.6489213705062866, "eval_runtime": 682.8113, "eval_samples_per_second": 3.727, "step": 3600 }, { "epoch": 8.26, "learning_rate": 0.00022510699001426532, "loss": 0.5893, "step": 4000 }, { "epoch": 8.26, "eval_cer": 0.19089728157885327, "eval_loss": 0.6407034993171692, "eval_runtime": 687.6153, "eval_samples_per_second": 3.701, "step": 4000 }, { "epoch": 9.09, "learning_rate": 0.0002165477888730385, "loss": 0.5301, "step": 4400 }, { "epoch": 9.09, "eval_cer": 0.1817445089322957, "eval_loss": 0.6261754035949707, "eval_runtime": 681.4189, "eval_samples_per_second": 3.735, "step": 4400 }, { "epoch": 9.92, "learning_rate": 0.0002079885877318117, "loss": 0.4909, "step": 4800 }, { "epoch": 9.92, "eval_cer": 0.17955165715239127, "eval_loss": 0.623394250869751, "eval_runtime": 672.8551, "eval_samples_per_second": 3.782, "step": 4800 }, { "epoch": 10.74, "learning_rate": 0.00019942938659058487, "loss": 0.4602, "step": 5200 }, { "epoch": 10.74, "eval_cer": 0.17870550238949337, "eval_loss": 0.6308777928352356, "eval_runtime": 673.8601, "eval_samples_per_second": 3.777, "step": 5200 }, { "epoch": 11.57, "learning_rate": 0.00019087018544935804, "loss": 0.4244, "step": 5600 }, { "epoch": 11.57, "eval_cer": 0.17706086355456507, "eval_loss": 0.6224568486213684, "eval_runtime": 677.416, "eval_samples_per_second": 3.757, "step": 5600 }, { "epoch": 12.4, "learning_rate": 0.0001823109843081312, "loss": 0.3936, "step": 6000 }, { "epoch": 12.4, "eval_cer": 0.17660799199132393, "eval_loss": 0.6143100261688232, "eval_runtime": 683.1583, "eval_samples_per_second": 3.725, "step": 6000 }, { "epoch": 13.22, "learning_rate": 0.0001737517831669044, "loss": 0.3792, "step": 6400 }, { "epoch": 13.22, "eval_cer": 0.17075641468733985, "eval_loss": 0.627468466758728, "eval_runtime": 676.725, "eval_samples_per_second": 3.761, "step": 6400 }, { "epoch": 14.05, "learning_rate": 0.00016519258202567759, "loss": 0.3555, "step": 6800 }, { "epoch": 14.05, "eval_cer": 0.16945738836120083, "eval_loss": 0.6296263933181763, "eval_runtime": 678.0284, "eval_samples_per_second": 3.754, "step": 6800 }, { "epoch": 14.88, "learning_rate": 0.00015663338088445076, "loss": 0.3282, "step": 7200 }, { "epoch": 14.88, "eval_cer": 0.16861123359830293, "eval_loss": 0.6353692412376404, "eval_runtime": 677.9049, "eval_samples_per_second": 3.754, "step": 7200 }, { "epoch": 15.7, "learning_rate": 0.00014807417974322396, "loss": 0.3105, "step": 7600 }, { "epoch": 15.7, "eval_cer": 0.16284308000333694, "eval_loss": 0.6332650780677795, "eval_runtime": 678.3344, "eval_samples_per_second": 3.752, "step": 7600 }, { "epoch": 16.53, "learning_rate": 0.00013951497860199713, "loss": 0.2899, "step": 8000 }, { "epoch": 16.53, "eval_cer": 0.16626345207307916, "eval_loss": 0.6411857604980469, "eval_runtime": 680.0199, "eval_samples_per_second": 3.743, "step": 8000 }, { "epoch": 17.36, "learning_rate": 0.0001309557774607703, "loss": 0.2769, "step": 8400 }, { "epoch": 17.36, "eval_cer": 0.1635343050209155, "eval_loss": 0.629165768623352, "eval_runtime": 679.3365, "eval_samples_per_second": 3.746, "step": 8400 }, { "epoch": 18.18, "learning_rate": 0.0001223965763195435, "loss": 0.2685, "step": 8800 }, { "epoch": 18.18, "eval_cer": 0.16531003825572943, "eval_loss": 0.6566583514213562, "eval_runtime": 677.0871, "eval_samples_per_second": 3.759, "step": 8800 }, { "epoch": 19.01, "learning_rate": 0.00011383737517831669, "loss": 0.2481, "step": 9200 }, { "epoch": 19.01, "eval_cer": 0.16478566065618705, "eval_loss": 0.652852475643158, "eval_runtime": 678.2412, "eval_samples_per_second": 3.752, "step": 9200 }, { "epoch": 19.83, "learning_rate": 0.00010527817403708987, "loss": 0.2355, "step": 9600 }, { "epoch": 19.83, "eval_cer": 0.1621041842948909, "eval_loss": 0.6581872701644897, "eval_runtime": 679.7908, "eval_samples_per_second": 3.744, "step": 9600 }, { "epoch": 20.66, "learning_rate": 9.671897289586304e-05, "loss": 0.2266, "step": 10000 }, { "epoch": 20.66, "eval_cer": 0.1623544554219452, "eval_loss": 0.6539607644081116, "eval_runtime": 679.8539, "eval_samples_per_second": 3.743, "step": 10000 }, { "epoch": 21.49, "learning_rate": 8.815977175463624e-05, "loss": 0.2183, "step": 10400 }, { "epoch": 21.49, "eval_cer": 0.1604952984781132, "eval_loss": 0.6687005758285522, "eval_runtime": 678.9594, "eval_samples_per_second": 3.748, "step": 10400 }, { "epoch": 22.31, "learning_rate": 7.960057061340941e-05, "loss": 0.2089, "step": 10800 }, { "epoch": 22.31, "eval_cer": 0.16017352131475765, "eval_loss": 0.6894858479499817, "eval_runtime": 681.5459, "eval_samples_per_second": 3.734, "step": 10800 }, { "epoch": 23.14, "learning_rate": 7.10413694721826e-05, "loss": 0.2016, "step": 11200 }, { "epoch": 23.14, "eval_cer": 0.1619850075677222, "eval_loss": 0.6835413575172424, "eval_runtime": 682.6349, "eval_samples_per_second": 3.728, "step": 11200 }, { "epoch": 23.97, "learning_rate": 6.248216833095577e-05, "loss": 0.1904, "step": 11600 }, { "epoch": 23.97, "eval_cer": 0.16073365193245062, "eval_loss": 0.6923198103904724, "eval_runtime": 681.4566, "eval_samples_per_second": 3.735, "step": 11600 }, { "epoch": 24.79, "learning_rate": 5.392296718972896e-05, "loss": 0.1865, "step": 12000 }, { "epoch": 24.79, "eval_cer": 0.15916051913382354, "eval_loss": 0.6866386532783508, "eval_runtime": 680.6783, "eval_samples_per_second": 3.739, "step": 12000 }, { "epoch": 25.62, "learning_rate": 4.536376604850214e-05, "loss": 0.1754, "step": 12400 }, { "epoch": 25.62, "eval_cer": 0.15712259709923845, "eval_loss": 0.693188488483429, "eval_runtime": 682.8906, "eval_samples_per_second": 3.727, "step": 12400 }, { "epoch": 26.45, "learning_rate": 3.680456490727532e-05, "loss": 0.1739, "step": 12800 }, { "epoch": 26.45, "eval_cer": 0.15602617120928625, "eval_loss": 0.6990784406661987, "eval_runtime": 682.7736, "eval_samples_per_second": 3.727, "step": 12800 }, { "epoch": 27.27, "learning_rate": 2.82453637660485e-05, "loss": 0.1657, "step": 13200 }, { "epoch": 27.27, "eval_cer": 0.1563002776817743, "eval_loss": 0.7062773108482361, "eval_runtime": 681.4615, "eval_samples_per_second": 3.735, "step": 13200 }, { "epoch": 28.1, "learning_rate": 1.968616262482168e-05, "loss": 0.1584, "step": 13600 }, { "epoch": 28.1, "eval_cer": 0.15688424364490103, "eval_loss": 0.7029954791069031, "eval_runtime": 682.9133, "eval_samples_per_second": 3.727, "step": 13600 }, { "epoch": 28.92, "learning_rate": 1.1126961483594864e-05, "loss": 0.1625, "step": 14000 }, { "epoch": 28.92, "eval_cer": 0.1568604082994673, "eval_loss": 0.6991938352584839, "eval_runtime": 683.4936, "eval_samples_per_second": 3.724, "step": 14000 }, { "epoch": 29.75, "learning_rate": 2.5677603423680457e-06, "loss": 0.1582, "step": 14400 }, { "epoch": 29.75, "eval_cer": 0.156514795790678, "eval_loss": 0.7013677358627319, "eval_runtime": 682.2827, "eval_samples_per_second": 3.73, "step": 14400 } ], "max_steps": 14520, "num_train_epochs": 30, "total_flos": 1.668287739327801e+20, "trial_name": null, "trial_params": null }