{ "best_metric": 0.4401148855686188, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bigcgen-male-30hrs-model/checkpoint-1700", "epoch": 1.0869790318405383, "eval_steps": 100, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05177323323841574, "grad_norm": 5.1029253005981445, "learning_rate": 0.000285, "loss": 14.8177, "step": 100 }, { "epoch": 0.05177323323841574, "eval_loss": 0.9883018732070923, "eval_runtime": 30.7186, "eval_samples_per_second": 14.356, "eval_steps_per_second": 3.613, "eval_wer": 0.8360182736234671, "step": 100 }, { "epoch": 0.10354646647683148, "grad_norm": 3.1198737621307373, "learning_rate": 0.0002995071762061214, "loss": 1.9066, "step": 200 }, { "epoch": 0.10354646647683148, "eval_loss": 0.6110880374908447, "eval_runtime": 30.676, "eval_samples_per_second": 14.376, "eval_steps_per_second": 3.618, "eval_wer": 0.5657610002404424, "step": 200 }, { "epoch": 0.15531969971524723, "grad_norm": 2.1564688682556152, "learning_rate": 0.00029898841431782807, "loss": 1.6259, "step": 300 }, { "epoch": 0.15531969971524723, "eval_loss": 0.5918548107147217, "eval_runtime": 30.4361, "eval_samples_per_second": 14.489, "eval_steps_per_second": 3.647, "eval_wer": 0.5599903823034383, "step": 300 }, { "epoch": 0.20709293295366296, "grad_norm": 2.736656665802002, "learning_rate": 0.0002984696524295348, "loss": 1.5344, "step": 400 }, { "epoch": 0.20709293295366296, "eval_loss": 0.5518040657043457, "eval_runtime": 30.3097, "eval_samples_per_second": 14.55, "eval_steps_per_second": 3.662, "eval_wer": 0.5453233950468863, "step": 400 }, { "epoch": 0.2588661661920787, "grad_norm": 7.7691874504089355, "learning_rate": 0.00029795089054124156, "loss": 1.5788, "step": 500 }, { "epoch": 0.2588661661920787, "eval_loss": 0.5321589112281799, "eval_runtime": 30.6676, "eval_samples_per_second": 14.38, "eval_steps_per_second": 3.619, "eval_wer": 0.5417167588362587, "step": 500 }, { "epoch": 0.31063939943049446, "grad_norm": 2.1344313621520996, "learning_rate": 0.00029743212865294825, "loss": 1.3816, "step": 600 }, { "epoch": 0.31063939943049446, "eval_loss": 0.48904091119766235, "eval_runtime": 30.416, "eval_samples_per_second": 14.499, "eval_steps_per_second": 3.649, "eval_wer": 0.5234431353690792, "step": 600 }, { "epoch": 0.36241263266891016, "grad_norm": 8.843093872070312, "learning_rate": 0.000296913366764655, "loss": 1.3242, "step": 700 }, { "epoch": 0.36241263266891016, "eval_loss": 0.47984835505485535, "eval_runtime": 30.416, "eval_samples_per_second": 14.499, "eval_steps_per_second": 3.649, "eval_wer": 0.5032459725895648, "step": 700 }, { "epoch": 0.4141858659073259, "grad_norm": 2.9696879386901855, "learning_rate": 0.00029639460487636173, "loss": 1.301, "step": 800 }, { "epoch": 0.4141858659073259, "eval_loss": 0.4812551736831665, "eval_runtime": 30.5011, "eval_samples_per_second": 14.458, "eval_steps_per_second": 3.639, "eval_wer": 0.5159894205337822, "step": 800 }, { "epoch": 0.46595909914574163, "grad_norm": 3.925156593322754, "learning_rate": 0.0002958758429880684, "loss": 1.1288, "step": 900 }, { "epoch": 0.46595909914574163, "eval_loss": 0.463159441947937, "eval_runtime": 30.6981, "eval_samples_per_second": 14.366, "eval_steps_per_second": 3.616, "eval_wer": 0.4895407549891801, "step": 900 }, { "epoch": 0.5177323323841574, "grad_norm": 5.512630462646484, "learning_rate": 0.00029535708109977517, "loss": 1.2779, "step": 1000 }, { "epoch": 0.5177323323841574, "eval_loss": 0.46061971783638, "eval_runtime": 30.6206, "eval_samples_per_second": 14.402, "eval_steps_per_second": 3.625, "eval_wer": 0.48545323395046885, "step": 1000 }, { "epoch": 0.5695055656225732, "grad_norm": 2.1363961696624756, "learning_rate": 0.0002948383192114819, "loss": 1.172, "step": 1100 }, { "epoch": 0.5695055656225732, "eval_loss": 0.45322486758232117, "eval_runtime": 30.4814, "eval_samples_per_second": 14.468, "eval_steps_per_second": 3.642, "eval_wer": 0.48352969463813417, "step": 1100 }, { "epoch": 0.6212787988609889, "grad_norm": 3.3401312828063965, "learning_rate": 0.0002943195573231886, "loss": 1.2388, "step": 1200 }, { "epoch": 0.6212787988609889, "eval_loss": 0.4610104262828827, "eval_runtime": 30.4897, "eval_samples_per_second": 14.464, "eval_steps_per_second": 3.641, "eval_wer": 0.4832892522240923, "step": 1200 }, { "epoch": 0.6730520320994046, "grad_norm": 2.449324369430542, "learning_rate": 0.00029380079543489535, "loss": 1.2582, "step": 1300 }, { "epoch": 0.6730520320994046, "eval_loss": 0.4502464532852173, "eval_runtime": 30.8643, "eval_samples_per_second": 14.288, "eval_steps_per_second": 3.596, "eval_wer": 0.4666987256552056, "step": 1300 }, { "epoch": 0.7248252653378203, "grad_norm": 1.9429669380187988, "learning_rate": 0.0002932820335466021, "loss": 1.1046, "step": 1400 }, { "epoch": 0.7248252653378203, "eval_loss": 0.46078842878341675, "eval_runtime": 30.6937, "eval_samples_per_second": 14.368, "eval_steps_per_second": 3.616, "eval_wer": 0.4695840346237076, "step": 1400 }, { "epoch": 0.7765984985762361, "grad_norm": 4.626898765563965, "learning_rate": 0.00029276327165830883, "loss": 1.2732, "step": 1500 }, { "epoch": 0.7765984985762361, "eval_loss": 0.4449756443500519, "eval_runtime": 30.6809, "eval_samples_per_second": 14.374, "eval_steps_per_second": 3.618, "eval_wer": 0.4695840346237076, "step": 1500 }, { "epoch": 0.8283717318146518, "grad_norm": 1.7688357830047607, "learning_rate": 0.0002922445097700155, "loss": 1.2323, "step": 1600 }, { "epoch": 0.8283717318146518, "eval_loss": 0.44945722818374634, "eval_runtime": 30.5188, "eval_samples_per_second": 14.45, "eval_steps_per_second": 3.637, "eval_wer": 0.463092089444578, "step": 1600 }, { "epoch": 0.8801449650530676, "grad_norm": 3.03289794921875, "learning_rate": 0.00029172574788172227, "loss": 1.282, "step": 1700 }, { "epoch": 0.8801449650530676, "eval_loss": 0.4401148855686188, "eval_runtime": 30.7912, "eval_samples_per_second": 14.322, "eval_steps_per_second": 3.605, "eval_wer": 0.4604472228901178, "step": 1700 }, { "epoch": 0.9319181982914833, "grad_norm": 2.1416518688201904, "learning_rate": 0.000291206985993429, "loss": 1.1099, "step": 1800 }, { "epoch": 0.9319181982914833, "eval_loss": 0.4439827501773834, "eval_runtime": 30.5981, "eval_samples_per_second": 14.413, "eval_steps_per_second": 3.628, "eval_wer": 0.46116855013224334, "step": 1800 }, { "epoch": 0.983691431529899, "grad_norm": 3.6136231422424316, "learning_rate": 0.0002906882241051357, "loss": 1.1463, "step": 1900 }, { "epoch": 0.983691431529899, "eval_loss": 0.44165703654289246, "eval_runtime": 30.6823, "eval_samples_per_second": 14.373, "eval_steps_per_second": 3.618, "eval_wer": 0.4688627073815821, "step": 1900 }, { "epoch": 1.0352057986021228, "grad_norm": 2.6243932247161865, "learning_rate": 0.00029016946221684244, "loss": 1.12, "step": 2000 }, { "epoch": 1.0352057986021228, "eval_loss": 0.44080641865730286, "eval_runtime": 30.5757, "eval_samples_per_second": 14.423, "eval_steps_per_second": 3.63, "eval_wer": 0.45491704736715555, "step": 2000 }, { "epoch": 1.0869790318405383, "grad_norm": 3.724260091781616, "learning_rate": 0.0002896507003285492, "loss": 1.1037, "step": 2100 }, { "epoch": 1.0869790318405383, "eval_loss": 0.4417917728424072, "eval_runtime": 30.9792, "eval_samples_per_second": 14.235, "eval_steps_per_second": 3.583, "eval_wer": 0.45131041115652804, "step": 2100 }, { "epoch": 1.0869790318405383, "step": 2100, "total_flos": 1.1010996680275614e+19, "train_loss": 1.948471661522275, "train_runtime": 3045.3624, "train_samples_per_second": 152.199, "train_steps_per_second": 19.022 } ], "logging_steps": 100, "max_steps": 57930, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1010996680275614e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }