{ "best_metric": 0.39242170690891703, "best_model_checkpoint": "w2v-bert-2.0-CV_Fleurs-lg-5hrs-v4/checkpoint-11868", "epoch": 46.0, "eval_steps": 500, "global_step": 11868, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 11.246596336364746, "learning_rate": 5.1200000000000004e-05, "loss": 1.9959, "step": 258 }, { "epoch": 1.0, "eval_cer": 0.13379445701894616, "eval_loss": 0.5475470423698425, "eval_runtime": 29.6601, "eval_samples_per_second": 27.849, "eval_steps_per_second": 6.979, "eval_wer": 0.6078173559646187, "step": 258 }, { "epoch": 2.0, "grad_norm": 1.1338785886764526, "learning_rate": 9.994466403162056e-05, "loss": 0.5447, "step": 516 }, { "epoch": 2.0, "eval_cer": 0.1309612371199291, "eval_loss": 0.5649892687797546, "eval_runtime": 29.1014, "eval_samples_per_second": 28.384, "eval_steps_per_second": 7.113, "eval_wer": 0.5978962467128854, "step": 516 }, { "epoch": 3.0, "grad_norm": 5.585188388824463, "learning_rate": 9.892490118577076e-05, "loss": 0.4574, "step": 774 }, { "epoch": 3.0, "eval_cer": 0.11218917678342487, "eval_loss": 0.45441317558288574, "eval_runtime": 29.5398, "eval_samples_per_second": 27.962, "eval_steps_per_second": 7.007, "eval_wer": 0.5168539325842697, "step": 774 }, { "epoch": 4.0, "grad_norm": 3.5034706592559814, "learning_rate": 9.790513833992096e-05, "loss": 0.3712, "step": 1032 }, { "epoch": 4.0, "eval_cer": 0.10859621076623562, "eval_loss": 0.4465671181678772, "eval_runtime": 29.2068, "eval_samples_per_second": 28.281, "eval_steps_per_second": 7.087, "eval_wer": 0.5049007889074827, "step": 1032 }, { "epoch": 5.0, "grad_norm": 17.74321174621582, "learning_rate": 9.688932806324111e-05, "loss": 0.3187, "step": 1290 }, { "epoch": 5.0, "eval_cer": 0.10036562781937036, "eval_loss": 0.44379866123199463, "eval_runtime": 29.231, "eval_samples_per_second": 28.258, "eval_steps_per_second": 7.082, "eval_wer": 0.4769304327038011, "step": 1290 }, { "epoch": 6.0, "grad_norm": 10.85033893585205, "learning_rate": 9.586956521739131e-05, "loss": 0.276, "step": 1548 }, { "epoch": 6.0, "eval_cer": 0.1056996786907042, "eval_loss": 0.4188626706600189, "eval_runtime": 29.2483, "eval_samples_per_second": 28.241, "eval_steps_per_second": 7.077, "eval_wer": 0.4794405928759264, "step": 1548 }, { "epoch": 7.0, "grad_norm": 1.633737564086914, "learning_rate": 9.484980237154151e-05, "loss": 0.235, "step": 1806 }, { "epoch": 7.0, "eval_cer": 0.1004447680400133, "eval_loss": 0.4695667326450348, "eval_runtime": 29.6156, "eval_samples_per_second": 27.891, "eval_steps_per_second": 6.99, "eval_wer": 0.4796796557494621, "step": 1806 }, { "epoch": 8.0, "grad_norm": 7.645702838897705, "learning_rate": 9.383003952569171e-05, "loss": 0.2086, "step": 2064 }, { "epoch": 8.0, "eval_cer": 0.09632947656658067, "eval_loss": 0.4218059182167053, "eval_runtime": 29.7338, "eval_samples_per_second": 27.78, "eval_steps_per_second": 6.962, "eval_wer": 0.4603155629930672, "step": 2064 }, { "epoch": 9.0, "grad_norm": 5.886650562286377, "learning_rate": 9.281027667984191e-05, "loss": 0.1883, "step": 2322 }, { "epoch": 9.0, "eval_cer": 0.09757989205273904, "eval_loss": 0.4250072240829468, "eval_runtime": 29.3849, "eval_samples_per_second": 28.11, "eval_steps_per_second": 7.044, "eval_wer": 0.45206789385608415, "step": 2322 }, { "epoch": 10.0, "grad_norm": 1.1357783079147339, "learning_rate": 9.179051383399209e-05, "loss": 0.1671, "step": 2580 }, { "epoch": 10.0, "eval_cer": 0.0972633311701673, "eval_loss": 0.45072105526924133, "eval_runtime": 29.0789, "eval_samples_per_second": 28.405, "eval_steps_per_second": 7.119, "eval_wer": 0.458642122878317, "step": 2580 }, { "epoch": 11.0, "grad_norm": 0.13285361230373383, "learning_rate": 9.077075098814229e-05, "loss": 0.1466, "step": 2838 }, { "epoch": 11.0, "eval_cer": 0.09696259833172415, "eval_loss": 0.4737579822540283, "eval_runtime": 29.2438, "eval_samples_per_second": 28.245, "eval_steps_per_second": 7.078, "eval_wer": 0.4634233803490318, "step": 2838 }, { "epoch": 12.0, "grad_norm": 18.049510955810547, "learning_rate": 8.975098814229249e-05, "loss": 0.1313, "step": 3096 }, { "epoch": 12.0, "eval_cer": 0.09357539688820653, "eval_loss": 0.46005183458328247, "eval_runtime": 29.8062, "eval_samples_per_second": 27.712, "eval_steps_per_second": 6.945, "eval_wer": 0.4410710016734401, "step": 3096 }, { "epoch": 13.0, "grad_norm": 1.1974396705627441, "learning_rate": 8.873517786561266e-05, "loss": 0.1181, "step": 3354 }, { "epoch": 13.0, "eval_cer": 0.08908023235568781, "eval_loss": 0.47361627221107483, "eval_runtime": 29.1477, "eval_samples_per_second": 28.338, "eval_steps_per_second": 7.102, "eval_wer": 0.4217069089170452, "step": 3354 }, { "epoch": 14.0, "grad_norm": 1.3642749786376953, "learning_rate": 8.771541501976284e-05, "loss": 0.1026, "step": 3612 }, { "epoch": 14.0, "eval_cer": 0.09121701831304706, "eval_loss": 0.4470472037792206, "eval_runtime": 29.2097, "eval_samples_per_second": 28.278, "eval_steps_per_second": 7.087, "eval_wer": 0.42780301219220657, "step": 3612 }, { "epoch": 15.0, "grad_norm": 50.79623031616211, "learning_rate": 8.669565217391304e-05, "loss": 0.092, "step": 3870 }, { "epoch": 15.0, "eval_cer": 0.09560138653666567, "eval_loss": 0.4730050265789032, "eval_runtime": 29.5306, "eval_samples_per_second": 27.971, "eval_steps_per_second": 7.01, "eval_wer": 0.4619890031078174, "step": 3870 }, { "epoch": 16.0, "grad_norm": 0.18782581388950348, "learning_rate": 8.567588932806324e-05, "loss": 0.083, "step": 4128 }, { "epoch": 16.0, "eval_cer": 0.09370202124123522, "eval_loss": 0.533858597278595, "eval_runtime": 30.1467, "eval_samples_per_second": 27.399, "eval_steps_per_second": 6.866, "eval_wer": 0.44824288787951233, "step": 4128 }, { "epoch": 17.0, "grad_norm": 0.19636400043964386, "learning_rate": 8.465612648221344e-05, "loss": 0.0744, "step": 4386 }, { "epoch": 17.0, "eval_cer": 0.09436679909463587, "eval_loss": 0.485524982213974, "eval_runtime": 30.0117, "eval_samples_per_second": 27.523, "eval_steps_per_second": 6.897, "eval_wer": 0.4508725794884055, "step": 4386 }, { "epoch": 18.0, "grad_norm": 3.5025405883789062, "learning_rate": 8.363636363636364e-05, "loss": 0.0697, "step": 4644 }, { "epoch": 18.0, "eval_cer": 0.09033064784184619, "eval_loss": 0.5221356153488159, "eval_runtime": 29.9898, "eval_samples_per_second": 27.543, "eval_steps_per_second": 6.902, "eval_wer": 0.43748505857040404, "step": 4644 }, { "epoch": 19.0, "grad_norm": 1.851936936378479, "learning_rate": 8.261660079051384e-05, "loss": 0.0602, "step": 4902 }, { "epoch": 19.0, "eval_cer": 0.08944427737064531, "eval_loss": 0.51482093334198, "eval_runtime": 29.3263, "eval_samples_per_second": 28.166, "eval_steps_per_second": 7.059, "eval_wer": 0.42708582357159935, "step": 4902 }, { "epoch": 20.0, "grad_norm": 0.04486146196722984, "learning_rate": 8.159683794466404e-05, "loss": 0.056, "step": 5160 }, { "epoch": 20.0, "eval_cer": 0.08983997847385998, "eval_loss": 0.5517579317092896, "eval_runtime": 29.8422, "eval_samples_per_second": 27.679, "eval_steps_per_second": 6.936, "eval_wer": 0.4312694238584748, "step": 5160 }, { "epoch": 21.0, "grad_norm": 0.09858173131942749, "learning_rate": 8.057707509881424e-05, "loss": 0.05, "step": 5418 }, { "epoch": 21.0, "eval_cer": 0.09120119026891847, "eval_loss": 0.5373868346214294, "eval_runtime": 29.6954, "eval_samples_per_second": 27.816, "eval_steps_per_second": 6.971, "eval_wer": 0.431030360984939, "step": 5418 }, { "epoch": 22.0, "grad_norm": 0.7042592167854309, "learning_rate": 7.955731225296444e-05, "loss": 0.0464, "step": 5676 }, { "epoch": 22.0, "eval_cer": 0.08990329065037433, "eval_loss": 0.5166798233985901, "eval_runtime": 29.6186, "eval_samples_per_second": 27.888, "eval_steps_per_second": 6.989, "eval_wer": 0.42648816638776, "step": 5676 }, { "epoch": 23.0, "grad_norm": 8.724380493164062, "learning_rate": 7.853754940711463e-05, "loss": 0.0453, "step": 5934 }, { "epoch": 23.0, "eval_cer": 0.08813054970797259, "eval_loss": 0.5781718492507935, "eval_runtime": 29.9397, "eval_samples_per_second": 27.589, "eval_steps_per_second": 6.914, "eval_wer": 0.4226631604111881, "step": 5934 }, { "epoch": 24.0, "grad_norm": 0.4805084764957428, "learning_rate": 7.751778656126482e-05, "loss": 0.0412, "step": 6192 }, { "epoch": 24.0, "eval_cer": 0.09294227512306304, "eval_loss": 0.5274640321731567, "eval_runtime": 29.0914, "eval_samples_per_second": 28.393, "eval_steps_per_second": 7.116, "eval_wer": 0.43533349270858235, "step": 6192 }, { "epoch": 25.0, "grad_norm": 0.1090458482503891, "learning_rate": 7.649802371541502e-05, "loss": 0.0369, "step": 6450 }, { "epoch": 25.0, "eval_cer": 0.09186596812231912, "eval_loss": 0.6112269759178162, "eval_runtime": 29.3026, "eval_samples_per_second": 28.189, "eval_steps_per_second": 7.064, "eval_wer": 0.4233803490317954, "step": 6450 }, { "epoch": 26.0, "grad_norm": 0.0822739452123642, "learning_rate": 7.547826086956522e-05, "loss": 0.0339, "step": 6708 }, { "epoch": 26.0, "eval_cer": 0.09094794156286108, "eval_loss": 0.6159156560897827, "eval_runtime": 29.9499, "eval_samples_per_second": 27.579, "eval_steps_per_second": 6.912, "eval_wer": 0.4164475256992589, "step": 6708 }, { "epoch": 27.0, "grad_norm": 0.11275064945220947, "learning_rate": 7.445849802371542e-05, "loss": 0.0316, "step": 6966 }, { "epoch": 27.0, "eval_cer": 0.08453758369078333, "eval_loss": 0.5938315987586975, "eval_runtime": 29.2749, "eval_samples_per_second": 28.215, "eval_steps_per_second": 7.071, "eval_wer": 0.4031795362180253, "step": 6966 }, { "epoch": 28.0, "grad_norm": 0.01683381013572216, "learning_rate": 7.343873517786562e-05, "loss": 0.0263, "step": 7224 }, { "epoch": 28.0, "eval_cer": 0.08710172683961442, "eval_loss": 0.5883095860481262, "eval_runtime": 29.7516, "eval_samples_per_second": 27.763, "eval_steps_per_second": 6.958, "eval_wer": 0.4093951709299546, "step": 7224 }, { "epoch": 29.0, "grad_norm": 0.0025164771359413862, "learning_rate": 7.242292490118577e-05, "loss": 0.0268, "step": 7482 }, { "epoch": 29.0, "eval_cer": 0.0871492109720002, "eval_loss": 0.6013171076774597, "eval_runtime": 29.0617, "eval_samples_per_second": 28.422, "eval_steps_per_second": 7.123, "eval_wer": 0.41477408558450873, "step": 7482 }, { "epoch": 30.0, "grad_norm": 0.012442449107766151, "learning_rate": 7.140316205533597e-05, "loss": 0.0269, "step": 7740 }, { "epoch": 30.0, "eval_cer": 0.09121701831304706, "eval_loss": 0.6137306690216064, "eval_runtime": 29.4462, "eval_samples_per_second": 28.051, "eval_steps_per_second": 7.03, "eval_wer": 0.42182644035381306, "step": 7740 }, { "epoch": 31.0, "grad_norm": 2.4109809398651123, "learning_rate": 7.038339920948617e-05, "loss": 0.0234, "step": 7998 }, { "epoch": 31.0, "eval_cer": 0.08730749141328606, "eval_loss": 0.5840467214584351, "eval_runtime": 29.7315, "eval_samples_per_second": 27.782, "eval_steps_per_second": 6.962, "eval_wer": 0.40987329667702604, "step": 7998 }, { "epoch": 32.0, "grad_norm": 1.8709559440612793, "learning_rate": 6.936363636363637e-05, "loss": 0.0229, "step": 8256 }, { "epoch": 32.0, "eval_cer": 0.08613621614777062, "eval_loss": 0.628635823726654, "eval_runtime": 29.901, "eval_samples_per_second": 27.624, "eval_steps_per_second": 6.923, "eval_wer": 0.4041357877121683, "step": 8256 }, { "epoch": 33.0, "grad_norm": 21.66620445251465, "learning_rate": 6.834387351778657e-05, "loss": 0.0205, "step": 8514 }, { "epoch": 33.0, "eval_cer": 0.0859146235299704, "eval_loss": 0.5922638773918152, "eval_runtime": 29.394, "eval_samples_per_second": 28.101, "eval_steps_per_second": 7.042, "eval_wer": 0.3968443700693282, "step": 8514 }, { "epoch": 34.0, "grad_norm": 0.023612063378095627, "learning_rate": 6.732411067193675e-05, "loss": 0.0196, "step": 8772 }, { "epoch": 34.0, "eval_cer": 0.08950758954715966, "eval_loss": 0.6187863945960999, "eval_runtime": 29.6115, "eval_samples_per_second": 27.895, "eval_steps_per_second": 6.991, "eval_wer": 0.4049725077695434, "step": 8772 }, { "epoch": 35.0, "grad_norm": 0.011560407467186451, "learning_rate": 6.630434782608695e-05, "loss": 0.0191, "step": 9030 }, { "epoch": 35.0, "eval_cer": 0.08724417923677172, "eval_loss": 0.6254743933677673, "eval_runtime": 30.6227, "eval_samples_per_second": 26.973, "eval_steps_per_second": 6.76, "eval_wer": 0.4148936170212766, "step": 9030 }, { "epoch": 36.0, "grad_norm": 0.30145859718322754, "learning_rate": 6.528458498023716e-05, "loss": 0.0185, "step": 9288 }, { "epoch": 36.0, "eval_cer": 0.08858956298770161, "eval_loss": 0.5937672853469849, "eval_runtime": 29.5915, "eval_samples_per_second": 27.913, "eval_steps_per_second": 6.995, "eval_wer": 0.41035142242409756, "step": 9288 }, { "epoch": 37.0, "grad_norm": 0.013502486981451511, "learning_rate": 6.426482213438736e-05, "loss": 0.0163, "step": 9546 }, { "epoch": 37.0, "eval_cer": 0.08642112094208519, "eval_loss": 0.6004359126091003, "eval_runtime": 29.3597, "eval_samples_per_second": 28.134, "eval_steps_per_second": 7.05, "eval_wer": 0.4076021993784365, "step": 9546 }, { "epoch": 38.0, "grad_norm": 4.638584136962891, "learning_rate": 6.324505928853756e-05, "loss": 0.0171, "step": 9804 }, { "epoch": 38.0, "eval_cer": 0.08646860507447095, "eval_loss": 0.6485220193862915, "eval_runtime": 29.8389, "eval_samples_per_second": 27.682, "eval_steps_per_second": 6.937, "eval_wer": 0.4010279703562037, "step": 9804 }, { "epoch": 39.0, "grad_norm": 0.03485848009586334, "learning_rate": 6.222529644268775e-05, "loss": 0.0163, "step": 10062 }, { "epoch": 39.0, "eval_cer": 0.08507573719115528, "eval_loss": 0.6360179781913757, "eval_runtime": 29.5268, "eval_samples_per_second": 27.975, "eval_steps_per_second": 7.011, "eval_wer": 0.40353813052832893, "step": 10062 }, { "epoch": 40.0, "grad_norm": 0.004591576289385557, "learning_rate": 6.120553359683795e-05, "loss": 0.0144, "step": 10320 }, { "epoch": 40.0, "eval_cer": 0.0878773010019152, "eval_loss": 0.6229643821716309, "eval_runtime": 29.6603, "eval_samples_per_second": 27.849, "eval_steps_per_second": 6.979, "eval_wer": 0.41071001673440116, "step": 10320 }, { "epoch": 41.0, "grad_norm": 0.006813856773078442, "learning_rate": 6.0185770750988146e-05, "loss": 0.0133, "step": 10578 }, { "epoch": 41.0, "eval_cer": 0.08781398882540084, "eval_loss": 0.6123417615890503, "eval_runtime": 29.6582, "eval_samples_per_second": 27.851, "eval_steps_per_second": 6.98, "eval_wer": 0.4066459478842936, "step": 10578 }, { "epoch": 42.0, "grad_norm": 0.0054159704595804214, "learning_rate": 5.9166007905138345e-05, "loss": 0.0131, "step": 10836 }, { "epoch": 42.0, "eval_cer": 0.08721252314851453, "eval_loss": 0.6532118916511536, "eval_runtime": 29.9909, "eval_samples_per_second": 27.542, "eval_steps_per_second": 6.902, "eval_wer": 0.40365766196509684, "step": 10836 }, { "epoch": 43.0, "grad_norm": 0.07446209341287613, "learning_rate": 5.814624505928854e-05, "loss": 0.0122, "step": 11094 }, { "epoch": 43.0, "eval_cer": 0.0865160892068567, "eval_loss": 0.6612946391105652, "eval_runtime": 29.2687, "eval_samples_per_second": 28.221, "eval_steps_per_second": 7.072, "eval_wer": 0.40640688501075783, "step": 11094 }, { "epoch": 44.0, "grad_norm": 5.483983993530273, "learning_rate": 5.712648221343874e-05, "loss": 0.0127, "step": 11352 }, { "epoch": 44.0, "eval_cer": 0.08488580066161225, "eval_loss": 0.627931535243988, "eval_runtime": 30.3672, "eval_samples_per_second": 27.2, "eval_steps_per_second": 6.817, "eval_wer": 0.40234281616065026, "step": 11352 }, { "epoch": 45.0, "grad_norm": 0.0012587299570441246, "learning_rate": 5.6106719367588936e-05, "loss": 0.0115, "step": 11610 }, { "epoch": 45.0, "eval_cer": 0.08453758369078333, "eval_loss": 0.6949998140335083, "eval_runtime": 29.5908, "eval_samples_per_second": 27.914, "eval_steps_per_second": 6.995, "eval_wer": 0.3993545302414535, "step": 11610 }, { "epoch": 46.0, "grad_norm": 0.0007135935593396425, "learning_rate": 5.508695652173913e-05, "loss": 0.01, "step": 11868 }, { "epoch": 46.0, "eval_cer": 0.08490162870574083, "eval_loss": 0.7085043787956238, "eval_runtime": 29.5898, "eval_samples_per_second": 27.915, "eval_steps_per_second": 6.996, "eval_wer": 0.39242170690891703, "step": 11868 } ], "logging_steps": 500, "max_steps": 25800, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 20, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.51304932301838e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }