|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 29.998955067920583, |
|
"global_step": 14340, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.000148875, |
|
"loss": 6.9674, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 3.427687168121338, |
|
"eval_runtime": 218.9995, |
|
"eval_samples_per_second": 15.53, |
|
"eval_steps_per_second": 0.973, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.000298875, |
|
"loss": 2.566, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 1.0569312572479248, |
|
"eval_runtime": 218.6402, |
|
"eval_samples_per_second": 15.555, |
|
"eval_steps_per_second": 0.974, |
|
"eval_wer": 0.7833242711949475, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.0002912038404726735, |
|
"loss": 1.0118, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 0.6927362680435181, |
|
"eval_runtime": 220.9263, |
|
"eval_samples_per_second": 15.394, |
|
"eval_steps_per_second": 0.964, |
|
"eval_wer": 0.5602283658873227, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.000282341211225997, |
|
"loss": 0.7536, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_loss": 0.5688415765762329, |
|
"eval_runtime": 210.7589, |
|
"eval_samples_per_second": 16.137, |
|
"eval_steps_per_second": 1.011, |
|
"eval_wer": 0.5082395750554185, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.00027347858197932054, |
|
"loss": 0.6251, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_loss": 0.5367330312728882, |
|
"eval_runtime": 211.3794, |
|
"eval_samples_per_second": 16.09, |
|
"eval_steps_per_second": 1.008, |
|
"eval_wer": 0.46095612530846125, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.00026461595273264397, |
|
"loss": 0.5453, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_loss": 0.47355735301971436, |
|
"eval_runtime": 211.7978, |
|
"eval_samples_per_second": 16.058, |
|
"eval_steps_per_second": 1.006, |
|
"eval_wer": 0.4430758291856623, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.0002557533234859675, |
|
"loss": 0.4779, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_loss": 0.4465107023715973, |
|
"eval_runtime": 211.1636, |
|
"eval_samples_per_second": 16.106, |
|
"eval_steps_per_second": 1.009, |
|
"eval_wer": 0.4200719394370321, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 0.00024689069423929094, |
|
"loss": 0.4458, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"eval_loss": 0.4270594120025635, |
|
"eval_runtime": 210.6558, |
|
"eval_samples_per_second": 16.145, |
|
"eval_steps_per_second": 1.011, |
|
"eval_wer": 0.4025262453469405, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 0.00023802806499261444, |
|
"loss": 0.4036, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"eval_loss": 0.4414581060409546, |
|
"eval_runtime": 212.0272, |
|
"eval_samples_per_second": 16.04, |
|
"eval_steps_per_second": 1.005, |
|
"eval_wer": 0.3957505541846167, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 0.00022916543574593795, |
|
"loss": 0.377, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"eval_loss": 0.4214448034763336, |
|
"eval_runtime": 210.8625, |
|
"eval_samples_per_second": 16.129, |
|
"eval_steps_per_second": 1.01, |
|
"eval_wer": 0.39071061106696225, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 0.0002203028064992614, |
|
"loss": 0.347, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_loss": 0.42906612157821655, |
|
"eval_runtime": 212.1547, |
|
"eval_samples_per_second": 16.031, |
|
"eval_steps_per_second": 1.004, |
|
"eval_wer": 0.38757371701033083, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 0.00021144017725258492, |
|
"loss": 0.3322, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"eval_loss": 0.4414941370487213, |
|
"eval_runtime": 210.3342, |
|
"eval_samples_per_second": 16.17, |
|
"eval_steps_per_second": 1.013, |
|
"eval_wer": 0.3664728763227237, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 0.00020257754800590837, |
|
"loss": 0.311, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"eval_loss": 0.40909305214881897, |
|
"eval_runtime": 211.7535, |
|
"eval_samples_per_second": 16.061, |
|
"eval_steps_per_second": 1.006, |
|
"eval_wer": 0.37214019825170436, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 0.00019371491875923188, |
|
"loss": 0.2956, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"eval_loss": 0.46582677960395813, |
|
"eval_runtime": 210.74, |
|
"eval_samples_per_second": 16.138, |
|
"eval_steps_per_second": 1.011, |
|
"eval_wer": 0.3568530678823874, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 0.0001848522895125554, |
|
"loss": 0.2811, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"eval_loss": 0.44131794571876526, |
|
"eval_runtime": 210.7883, |
|
"eval_samples_per_second": 16.135, |
|
"eval_steps_per_second": 1.01, |
|
"eval_wer": 0.3576268350830231, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.39, |
|
"learning_rate": 0.00017598966026587885, |
|
"loss": 0.2732, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 13.39, |
|
"eval_loss": 0.48606938123703003, |
|
"eval_runtime": 210.0412, |
|
"eval_samples_per_second": 16.192, |
|
"eval_steps_per_second": 1.014, |
|
"eval_wer": 0.3552846208540717, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"learning_rate": 0.00016712703101920236, |
|
"loss": 0.2672, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"eval_loss": 0.4633455276489258, |
|
"eval_runtime": 212.5303, |
|
"eval_samples_per_second": 16.002, |
|
"eval_steps_per_second": 1.002, |
|
"eval_wer": 0.3534861349282697, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"learning_rate": 0.00015826440177252584, |
|
"loss": 0.2497, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"eval_loss": 0.468420147895813, |
|
"eval_runtime": 210.9196, |
|
"eval_samples_per_second": 16.125, |
|
"eval_steps_per_second": 1.01, |
|
"eval_wer": 0.3575850098289347, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 0.00014940177252584932, |
|
"loss": 0.2334, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"eval_loss": 0.4702986776828766, |
|
"eval_runtime": 211.8705, |
|
"eval_samples_per_second": 16.052, |
|
"eval_steps_per_second": 1.005, |
|
"eval_wer": 0.34524655987285124, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 0.0001405391432791728, |
|
"loss": 0.2324, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"eval_loss": 0.4267388880252838, |
|
"eval_runtime": 211.5039, |
|
"eval_samples_per_second": 16.08, |
|
"eval_steps_per_second": 1.007, |
|
"eval_wer": 0.35070475553138986, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"learning_rate": 0.0001316765140324963, |
|
"loss": 0.2166, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"eval_loss": 0.4422346353530884, |
|
"eval_runtime": 213.9215, |
|
"eval_samples_per_second": 15.898, |
|
"eval_steps_per_second": 0.996, |
|
"eval_wer": 0.3400393157388431, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 0.00012281388478581977, |
|
"loss": 0.2116, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"eval_loss": 0.4669197201728821, |
|
"eval_runtime": 213.3022, |
|
"eval_samples_per_second": 15.945, |
|
"eval_steps_per_second": 0.999, |
|
"eval_wer": 0.33359822660922667, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 19.25, |
|
"learning_rate": 0.00011395125553914327, |
|
"loss": 0.2055, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 19.25, |
|
"eval_loss": 0.46659788489341736, |
|
"eval_runtime": 209.6264, |
|
"eval_samples_per_second": 16.224, |
|
"eval_steps_per_second": 1.016, |
|
"eval_wer": 0.3343510811828182, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 20.08, |
|
"learning_rate": 0.00010508862629246675, |
|
"loss": 0.2, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 20.08, |
|
"eval_loss": 0.4791451096534729, |
|
"eval_runtime": 212.2536, |
|
"eval_samples_per_second": 16.023, |
|
"eval_steps_per_second": 1.004, |
|
"eval_wer": 0.3353967125350287, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"learning_rate": 9.622599704579024e-05, |
|
"loss": 0.1851, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"eval_loss": 0.4670654237270355, |
|
"eval_runtime": 209.9735, |
|
"eval_samples_per_second": 16.197, |
|
"eval_steps_per_second": 1.014, |
|
"eval_wer": 0.33182065331046884, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"learning_rate": 8.736336779911373e-05, |
|
"loss": 0.1768, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"eval_loss": 0.48615434765815735, |
|
"eval_runtime": 211.2759, |
|
"eval_samples_per_second": 16.097, |
|
"eval_steps_per_second": 1.008, |
|
"eval_wer": 0.33194612907273413, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 22.59, |
|
"learning_rate": 7.850073855243721e-05, |
|
"loss": 0.1759, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 22.59, |
|
"eval_loss": 0.4796726107597351, |
|
"eval_runtime": 210.8467, |
|
"eval_samples_per_second": 16.13, |
|
"eval_steps_per_second": 1.01, |
|
"eval_wer": 0.32908109916767747, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 23.43, |
|
"learning_rate": 6.96381093057607e-05, |
|
"loss": 0.1697, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 23.43, |
|
"eval_loss": 0.5016443133354187, |
|
"eval_runtime": 210.1285, |
|
"eval_samples_per_second": 16.185, |
|
"eval_steps_per_second": 1.014, |
|
"eval_wer": 0.32728261324187546, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 24.27, |
|
"learning_rate": 6.077548005908419e-05, |
|
"loss": 0.162, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 24.27, |
|
"eval_loss": 0.4838166832923889, |
|
"eval_runtime": 210.5105, |
|
"eval_samples_per_second": 16.156, |
|
"eval_steps_per_second": 1.012, |
|
"eval_wer": 0.32224267012422103, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 25.1, |
|
"learning_rate": 5.191285081240768e-05, |
|
"loss": 0.1552, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 25.1, |
|
"eval_loss": 0.4953179657459259, |
|
"eval_runtime": 214.2232, |
|
"eval_samples_per_second": 15.876, |
|
"eval_steps_per_second": 0.994, |
|
"eval_wer": 0.3248985737588356, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"learning_rate": 4.3050221565731165e-05, |
|
"loss": 0.1505, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_loss": 0.5147430300712585, |
|
"eval_runtime": 213.2541, |
|
"eval_samples_per_second": 15.948, |
|
"eval_steps_per_second": 0.999, |
|
"eval_wer": 0.320444184198419, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 26.78, |
|
"learning_rate": 3.418759231905465e-05, |
|
"loss": 0.1505, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 26.78, |
|
"eval_loss": 0.5216009616851807, |
|
"eval_runtime": 214.5736, |
|
"eval_samples_per_second": 15.85, |
|
"eval_steps_per_second": 0.993, |
|
"eval_wer": 0.32184533021038103, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 27.61, |
|
"learning_rate": 2.534711964549483e-05, |
|
"loss": 0.1441, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 27.61, |
|
"eval_loss": 0.5204435586929321, |
|
"eval_runtime": 210.4984, |
|
"eval_samples_per_second": 16.157, |
|
"eval_steps_per_second": 1.012, |
|
"eval_wer": 0.32084152411225897, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 28.45, |
|
"learning_rate": 1.6506646971935004e-05, |
|
"loss": 0.1432, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 28.45, |
|
"eval_loss": 0.5269867777824402, |
|
"eval_runtime": 210.8697, |
|
"eval_samples_per_second": 16.128, |
|
"eval_steps_per_second": 1.01, |
|
"eval_wer": 0.31854113513739596, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 29.29, |
|
"learning_rate": 7.644017725258493e-06, |
|
"loss": 0.1379, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 29.29, |
|
"eval_loss": 0.5423755049705505, |
|
"eval_runtime": 212.2394, |
|
"eval_samples_per_second": 16.024, |
|
"eval_steps_per_second": 1.004, |
|
"eval_wer": 0.3176209795474508, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 14340, |
|
"total_flos": 2.519939618068477e+20, |
|
"train_loss": 0.5493880579162342, |
|
"train_runtime": 54395.2991, |
|
"train_samples_per_second": 16.879, |
|
"train_steps_per_second": 0.264 |
|
} |
|
], |
|
"max_steps": 14340, |
|
"num_train_epochs": 30, |
|
"total_flos": 2.519939618068477e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|