|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 29.75128998968008, |
|
"global_step": 14400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 24.132, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_cer": 1.0, |
|
"eval_loss": 4.829305171966553, |
|
"eval_runtime": 666.1596, |
|
"eval_samples_per_second": 3.82, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0002935805991440799, |
|
"loss": 4.7165, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_cer": 1.0, |
|
"eval_loss": 4.607272148132324, |
|
"eval_runtime": 670.8713, |
|
"eval_samples_per_second": 3.794, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.00028502139800285305, |
|
"loss": 2.9637, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_cer": 0.34719755926062756, |
|
"eval_loss": 1.3734972476959229, |
|
"eval_runtime": 671.6914, |
|
"eval_samples_per_second": 3.789, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.0002764621968616262, |
|
"loss": 1.3373, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"eval_cer": 0.26771859991180924, |
|
"eval_loss": 0.9831737875938416, |
|
"eval_runtime": 669.2245, |
|
"eval_samples_per_second": 3.803, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.0002679029957203994, |
|
"loss": 1.0378, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"eval_cer": 0.24304901738788448, |
|
"eval_loss": 0.8199232220649719, |
|
"eval_runtime": 670.0042, |
|
"eval_samples_per_second": 3.798, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.00025934379457917263, |
|
"loss": 0.8747, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_cer": 0.21944010773576136, |
|
"eval_loss": 0.7384957075119019, |
|
"eval_runtime": 673.0546, |
|
"eval_samples_per_second": 3.781, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 0.0002507845934379458, |
|
"loss": 0.7648, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"eval_cer": 0.21221799806933703, |
|
"eval_loss": 0.7131306529045105, |
|
"eval_runtime": 671.747, |
|
"eval_samples_per_second": 3.789, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.00024222539229671895, |
|
"loss": 0.6863, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"eval_cer": 0.19983553611650717, |
|
"eval_loss": 0.6777553558349609, |
|
"eval_runtime": 679.4426, |
|
"eval_samples_per_second": 3.746, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 0.00023366619115549212, |
|
"loss": 0.6269, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_cer": 0.18854950005362953, |
|
"eval_loss": 0.6489213705062866, |
|
"eval_runtime": 682.8113, |
|
"eval_samples_per_second": 3.727, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 0.00022510699001426532, |
|
"loss": 0.5893, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"eval_cer": 0.19089728157885327, |
|
"eval_loss": 0.6407034993171692, |
|
"eval_runtime": 687.6153, |
|
"eval_samples_per_second": 3.701, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 0.0002165477888730385, |
|
"loss": 0.5301, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_cer": 0.1817445089322957, |
|
"eval_loss": 0.6261754035949707, |
|
"eval_runtime": 681.4189, |
|
"eval_samples_per_second": 3.735, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 0.0002079885877318117, |
|
"loss": 0.4909, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"eval_cer": 0.17955165715239127, |
|
"eval_loss": 0.623394250869751, |
|
"eval_runtime": 672.8551, |
|
"eval_samples_per_second": 3.782, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 0.00019942938659058487, |
|
"loss": 0.4602, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"eval_cer": 0.17870550238949337, |
|
"eval_loss": 0.6308777928352356, |
|
"eval_runtime": 673.8601, |
|
"eval_samples_per_second": 3.777, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 0.00019087018544935804, |
|
"loss": 0.4244, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"eval_cer": 0.17706086355456507, |
|
"eval_loss": 0.6224568486213684, |
|
"eval_runtime": 677.416, |
|
"eval_samples_per_second": 3.757, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 0.0001823109843081312, |
|
"loss": 0.3936, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"eval_cer": 0.17660799199132393, |
|
"eval_loss": 0.6143100261688232, |
|
"eval_runtime": 683.1583, |
|
"eval_samples_per_second": 3.725, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 0.0001737517831669044, |
|
"loss": 0.3792, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"eval_cer": 0.17075641468733985, |
|
"eval_loss": 0.627468466758728, |
|
"eval_runtime": 676.725, |
|
"eval_samples_per_second": 3.761, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 0.00016519258202567759, |
|
"loss": 0.3555, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"eval_cer": 0.16945738836120083, |
|
"eval_loss": 0.6296263933181763, |
|
"eval_runtime": 678.0284, |
|
"eval_samples_per_second": 3.754, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 0.00015663338088445076, |
|
"loss": 0.3282, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"eval_cer": 0.16861123359830293, |
|
"eval_loss": 0.6353692412376404, |
|
"eval_runtime": 677.9049, |
|
"eval_samples_per_second": 3.754, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"learning_rate": 0.00014807417974322396, |
|
"loss": 0.3105, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"eval_cer": 0.16284308000333694, |
|
"eval_loss": 0.6332650780677795, |
|
"eval_runtime": 678.3344, |
|
"eval_samples_per_second": 3.752, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 16.53, |
|
"learning_rate": 0.00013951497860199713, |
|
"loss": 0.2899, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 16.53, |
|
"eval_cer": 0.16626345207307916, |
|
"eval_loss": 0.6411857604980469, |
|
"eval_runtime": 680.0199, |
|
"eval_samples_per_second": 3.743, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 17.36, |
|
"learning_rate": 0.0001309557774607703, |
|
"loss": 0.2769, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 17.36, |
|
"eval_cer": 0.1635343050209155, |
|
"eval_loss": 0.629165768623352, |
|
"eval_runtime": 679.3365, |
|
"eval_samples_per_second": 3.746, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 0.0001223965763195435, |
|
"loss": 0.2685, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"eval_cer": 0.16531003825572943, |
|
"eval_loss": 0.6566583514213562, |
|
"eval_runtime": 677.0871, |
|
"eval_samples_per_second": 3.759, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 0.00011383737517831669, |
|
"loss": 0.2481, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"eval_cer": 0.16478566065618705, |
|
"eval_loss": 0.652852475643158, |
|
"eval_runtime": 678.2412, |
|
"eval_samples_per_second": 3.752, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 19.83, |
|
"learning_rate": 0.00010527817403708987, |
|
"loss": 0.2355, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 19.83, |
|
"eval_cer": 0.1621041842948909, |
|
"eval_loss": 0.6581872701644897, |
|
"eval_runtime": 679.7908, |
|
"eval_samples_per_second": 3.744, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 20.66, |
|
"learning_rate": 9.671897289586304e-05, |
|
"loss": 0.2266, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 20.66, |
|
"eval_cer": 0.1623544554219452, |
|
"eval_loss": 0.6539607644081116, |
|
"eval_runtime": 679.8539, |
|
"eval_samples_per_second": 3.743, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 21.49, |
|
"learning_rate": 8.815977175463624e-05, |
|
"loss": 0.2183, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 21.49, |
|
"eval_cer": 0.1604952984781132, |
|
"eval_loss": 0.6687005758285522, |
|
"eval_runtime": 678.9594, |
|
"eval_samples_per_second": 3.748, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 22.31, |
|
"learning_rate": 7.960057061340941e-05, |
|
"loss": 0.2089, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 22.31, |
|
"eval_cer": 0.16017352131475765, |
|
"eval_loss": 0.6894858479499817, |
|
"eval_runtime": 681.5459, |
|
"eval_samples_per_second": 3.734, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 23.14, |
|
"learning_rate": 7.10413694721826e-05, |
|
"loss": 0.2016, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 23.14, |
|
"eval_cer": 0.1619850075677222, |
|
"eval_loss": 0.6835413575172424, |
|
"eval_runtime": 682.6349, |
|
"eval_samples_per_second": 3.728, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 23.97, |
|
"learning_rate": 6.248216833095577e-05, |
|
"loss": 0.1904, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 23.97, |
|
"eval_cer": 0.16073365193245062, |
|
"eval_loss": 0.6923198103904724, |
|
"eval_runtime": 681.4566, |
|
"eval_samples_per_second": 3.735, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 24.79, |
|
"learning_rate": 5.392296718972896e-05, |
|
"loss": 0.1865, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 24.79, |
|
"eval_cer": 0.15916051913382354, |
|
"eval_loss": 0.6866386532783508, |
|
"eval_runtime": 680.6783, |
|
"eval_samples_per_second": 3.739, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 25.62, |
|
"learning_rate": 4.536376604850214e-05, |
|
"loss": 0.1754, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 25.62, |
|
"eval_cer": 0.15712259709923845, |
|
"eval_loss": 0.693188488483429, |
|
"eval_runtime": 682.8906, |
|
"eval_samples_per_second": 3.727, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 26.45, |
|
"learning_rate": 3.680456490727532e-05, |
|
"loss": 0.1739, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 26.45, |
|
"eval_cer": 0.15602617120928625, |
|
"eval_loss": 0.6990784406661987, |
|
"eval_runtime": 682.7736, |
|
"eval_samples_per_second": 3.727, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 2.82453637660485e-05, |
|
"loss": 0.1657, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"eval_cer": 0.1563002776817743, |
|
"eval_loss": 0.7062773108482361, |
|
"eval_runtime": 681.4615, |
|
"eval_samples_per_second": 3.735, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 28.1, |
|
"learning_rate": 1.968616262482168e-05, |
|
"loss": 0.1584, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 28.1, |
|
"eval_cer": 0.15688424364490103, |
|
"eval_loss": 0.7029954791069031, |
|
"eval_runtime": 682.9133, |
|
"eval_samples_per_second": 3.727, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 28.92, |
|
"learning_rate": 1.1126961483594864e-05, |
|
"loss": 0.1625, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 28.92, |
|
"eval_cer": 0.1568604082994673, |
|
"eval_loss": 0.6991938352584839, |
|
"eval_runtime": 683.4936, |
|
"eval_samples_per_second": 3.724, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 29.75, |
|
"learning_rate": 2.5677603423680457e-06, |
|
"loss": 0.1582, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 29.75, |
|
"eval_cer": 0.156514795790678, |
|
"eval_loss": 0.7013677358627319, |
|
"eval_runtime": 682.2827, |
|
"eval_samples_per_second": 3.73, |
|
"step": 14400 |
|
} |
|
], |
|
"max_steps": 14520, |
|
"num_train_epochs": 30, |
|
"total_flos": 1.668287739327801e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|