{ "best_metric": 0.4343813359737396, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bigcgen-male-5hrs-model/checkpoint-2100", "epoch": 7.763975155279503, "eval_steps": 100, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3105590062111801, "grad_norm": 7.037683486938477, "learning_rate": 0.00028799999999999995, "loss": 12.4451, "step": 100 }, { "epoch": 0.3105590062111801, "eval_loss": 1.20354163646698, "eval_runtime": 31.8229, "eval_samples_per_second": 13.858, "eval_steps_per_second": 3.488, "eval_wer": 0.8328925222409232, "step": 100 }, { "epoch": 0.6211180124223602, "grad_norm": 3.7343695163726807, "learning_rate": 0.00029698744769874474, "loss": 1.6342, "step": 200 }, { "epoch": 0.6211180124223602, "eval_loss": 0.6174936890602112, "eval_runtime": 31.7812, "eval_samples_per_second": 13.876, "eval_steps_per_second": 3.493, "eval_wer": 0.5758595816301996, "step": 200 }, { "epoch": 0.9316770186335404, "grad_norm": 4.2912797927856445, "learning_rate": 0.0002938493723849372, "loss": 1.5403, "step": 300 }, { "epoch": 0.9316770186335404, "eval_loss": 0.5695027709007263, "eval_runtime": 31.6004, "eval_samples_per_second": 13.956, "eval_steps_per_second": 3.513, "eval_wer": 0.5534984371243087, "step": 300 }, { "epoch": 1.2422360248447206, "grad_norm": 1.983912467956543, "learning_rate": 0.00029071129707112966, "loss": 1.3778, "step": 400 }, { "epoch": 1.2422360248447206, "eval_loss": 0.5523655414581299, "eval_runtime": 31.7017, "eval_samples_per_second": 13.911, "eval_steps_per_second": 3.501, "eval_wer": 0.5359461408992546, "step": 400 }, { "epoch": 1.5527950310559007, "grad_norm": 1.4964157342910767, "learning_rate": 0.00028757322175732215, "loss": 1.4572, "step": 500 }, { "epoch": 1.5527950310559007, "eval_loss": 0.5301775336265564, "eval_runtime": 32.0399, "eval_samples_per_second": 13.764, "eval_steps_per_second": 3.464, "eval_wer": 0.5171916326039914, "step": 500 }, { "epoch": 1.8633540372670807, "grad_norm": 3.8727850914001465, "learning_rate": 0.0002844665271966527, "loss": 1.4042, "step": 600 }, { "epoch": 1.8633540372670807, "eval_loss": 0.5179265737533569, "eval_runtime": 31.8425, "eval_samples_per_second": 13.849, "eval_steps_per_second": 3.486, "eval_wer": 0.5265688867516229, "step": 600 }, { "epoch": 2.1739130434782608, "grad_norm": 2.505019187927246, "learning_rate": 0.0002813284518828452, "loss": 1.4053, "step": 700 }, { "epoch": 2.1739130434782608, "eval_loss": 0.5029146671295166, "eval_runtime": 31.795, "eval_samples_per_second": 13.87, "eval_steps_per_second": 3.491, "eval_wer": 0.5143063236354893, "step": 700 }, { "epoch": 2.4844720496894412, "grad_norm": 2.7380497455596924, "learning_rate": 0.0002781903765690376, "loss": 1.2782, "step": 800 }, { "epoch": 2.4844720496894412, "eval_loss": 0.4700861871242523, "eval_runtime": 32.0844, "eval_samples_per_second": 13.745, "eval_steps_per_second": 3.46, "eval_wer": 0.4864150036066362, "step": 800 }, { "epoch": 2.795031055900621, "grad_norm": 1.6105161905288696, "learning_rate": 0.0002750523012552301, "loss": 1.2541, "step": 900 }, { "epoch": 2.795031055900621, "eval_loss": 0.4585192799568176, "eval_runtime": 32.0616, "eval_samples_per_second": 13.755, "eval_steps_per_second": 3.462, "eval_wer": 0.48665544602067806, "step": 900 }, { "epoch": 3.1055900621118013, "grad_norm": 3.0428338050842285, "learning_rate": 0.0002719142259414226, "loss": 1.1672, "step": 1000 }, { "epoch": 3.1055900621118013, "eval_loss": 0.47279417514801025, "eval_runtime": 31.7171, "eval_samples_per_second": 13.904, "eval_steps_per_second": 3.5, "eval_wer": 0.4861745611925944, "step": 1000 }, { "epoch": 3.4161490683229814, "grad_norm": 1.6724671125411987, "learning_rate": 0.00026877615062761507, "loss": 1.1205, "step": 1100 }, { "epoch": 3.4161490683229814, "eval_loss": 0.4558369219303131, "eval_runtime": 31.7372, "eval_samples_per_second": 13.895, "eval_steps_per_second": 3.497, "eval_wer": 0.4794421735994229, "step": 1100 }, { "epoch": 3.7267080745341614, "grad_norm": 4.677685260772705, "learning_rate": 0.0002656380753138075, "loss": 1.1699, "step": 1200 }, { "epoch": 3.7267080745341614, "eval_loss": 0.4520164430141449, "eval_runtime": 31.9065, "eval_samples_per_second": 13.822, "eval_steps_per_second": 3.479, "eval_wer": 0.4811252704977158, "step": 1200 }, { "epoch": 4.037267080745342, "grad_norm": 15.624258995056152, "learning_rate": 0.0002625, "loss": 1.2418, "step": 1300 }, { "epoch": 4.037267080745342, "eval_loss": 0.4494916796684265, "eval_runtime": 32.133, "eval_samples_per_second": 13.724, "eval_steps_per_second": 3.454, "eval_wer": 0.4751142101466699, "step": 1300 }, { "epoch": 4.3478260869565215, "grad_norm": 1.0827780961990356, "learning_rate": 0.00025936192468619247, "loss": 1.071, "step": 1400 }, { "epoch": 4.3478260869565215, "eval_loss": 0.4487316310405731, "eval_runtime": 31.845, "eval_samples_per_second": 13.848, "eval_steps_per_second": 3.486, "eval_wer": 0.47367155566241886, "step": 1400 }, { "epoch": 4.658385093167702, "grad_norm": 2.8134868144989014, "learning_rate": 0.0002562238493723849, "loss": 1.078, "step": 1500 }, { "epoch": 4.658385093167702, "eval_loss": 0.4445899724960327, "eval_runtime": 31.7402, "eval_samples_per_second": 13.894, "eval_steps_per_second": 3.497, "eval_wer": 0.4760759798028372, "step": 1500 }, { "epoch": 4.9689440993788825, "grad_norm": 3.0363829135894775, "learning_rate": 0.0002530857740585774, "loss": 1.2474, "step": 1600 }, { "epoch": 4.9689440993788825, "eval_loss": 0.4437084496021271, "eval_runtime": 32.0173, "eval_samples_per_second": 13.774, "eval_steps_per_second": 3.467, "eval_wer": 0.46261120461649435, "step": 1600 }, { "epoch": 5.279503105590062, "grad_norm": 1.907182216644287, "learning_rate": 0.0002499476987447699, "loss": 1.1127, "step": 1700 }, { "epoch": 5.279503105590062, "eval_loss": 0.43802064657211304, "eval_runtime": 32.2846, "eval_samples_per_second": 13.66, "eval_steps_per_second": 3.438, "eval_wer": 0.46573695599903825, "step": 1700 }, { "epoch": 5.590062111801243, "grad_norm": 4.59730339050293, "learning_rate": 0.0002468096234309623, "loss": 1.1761, "step": 1800 }, { "epoch": 5.590062111801243, "eval_loss": 0.44803521037101746, "eval_runtime": 31.8849, "eval_samples_per_second": 13.831, "eval_steps_per_second": 3.481, "eval_wer": 0.4674200528973311, "step": 1800 }, { "epoch": 5.900621118012422, "grad_norm": 2.9201653003692627, "learning_rate": 0.0002436715481171548, "loss": 1.0997, "step": 1900 }, { "epoch": 5.900621118012422, "eval_loss": 0.4469524919986725, "eval_runtime": 31.9803, "eval_samples_per_second": 13.79, "eval_steps_per_second": 3.471, "eval_wer": 0.4652560711709546, "step": 1900 }, { "epoch": 6.211180124223603, "grad_norm": 2.5859551429748535, "learning_rate": 0.00024053347280334725, "loss": 1.1203, "step": 2000 }, { "epoch": 6.211180124223603, "eval_loss": 0.44207584857940674, "eval_runtime": 32.0266, "eval_samples_per_second": 13.77, "eval_steps_per_second": 3.466, "eval_wer": 0.46140899254628515, "step": 2000 }, { "epoch": 6.521739130434782, "grad_norm": 2.423842191696167, "learning_rate": 0.00023739539748953974, "loss": 1.0749, "step": 2100 }, { "epoch": 6.521739130434782, "eval_loss": 0.4343813359737396, "eval_runtime": 32.257, "eval_samples_per_second": 13.671, "eval_steps_per_second": 3.441, "eval_wer": 0.4505890839144025, "step": 2100 }, { "epoch": 6.832298136645963, "grad_norm": 8.447519302368164, "learning_rate": 0.0002342573221757322, "loss": 1.1156, "step": 2200 }, { "epoch": 6.832298136645963, "eval_loss": 0.43543142080307007, "eval_runtime": 31.9424, "eval_samples_per_second": 13.806, "eval_steps_per_second": 3.475, "eval_wer": 0.45106996874248617, "step": 2200 }, { "epoch": 7.142857142857143, "grad_norm": 2.308629274368286, "learning_rate": 0.00023111924686192465, "loss": 1.0404, "step": 2300 }, { "epoch": 7.142857142857143, "eval_loss": 0.43639999628067017, "eval_runtime": 32.0384, "eval_samples_per_second": 13.765, "eval_steps_per_second": 3.465, "eval_wer": 0.45347439288290453, "step": 2300 }, { "epoch": 7.453416149068323, "grad_norm": 1.2582736015319824, "learning_rate": 0.00022798117154811714, "loss": 1.1081, "step": 2400 }, { "epoch": 7.453416149068323, "eval_loss": 0.43768569827079773, "eval_runtime": 31.9331, "eval_samples_per_second": 13.81, "eval_steps_per_second": 3.476, "eval_wer": 0.45155085357056984, "step": 2400 }, { "epoch": 7.763975155279503, "grad_norm": 21.993574142456055, "learning_rate": 0.0002248430962343096, "loss": 1.0535, "step": 2500 }, { "epoch": 7.763975155279503, "eval_loss": 0.44070157408714294, "eval_runtime": 32.2891, "eval_samples_per_second": 13.658, "eval_steps_per_second": 3.438, "eval_wer": 0.4520317383986535, "step": 2500 }, { "epoch": 7.763975155279503, "step": 2500, "total_flos": 1.301164200639248e+19, "train_loss": 1.6717456512451172, "train_runtime": 3684.5977, "train_samples_per_second": 20.949, "train_steps_per_second": 2.622 } ], "logging_steps": 100, "max_steps": 9660, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.301164200639248e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }