mms-1b-toigen-male-model / trainer_state.json
csikasote's picture
End of training
f644efa verified
{
"best_metric": 0.4265601933002472,
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-toigen-male-model/checkpoint-2600",
"epoch": 15.151515151515152,
"eval_steps": 100,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5050505050505051,
"grad_norm": 7.066163539886475,
"learning_rate": 0.00028799999999999995,
"loss": 8.0084,
"step": 100
},
{
"epoch": 0.5050505050505051,
"eval_loss": 3.745828628540039,
"eval_runtime": 18.1656,
"eval_samples_per_second": 10.68,
"eval_steps_per_second": 2.697,
"eval_wer": 0.9983853606027987,
"step": 100
},
{
"epoch": 1.0101010101010102,
"grad_norm": 3.717050552368164,
"learning_rate": 0.0002950684931506849,
"loss": 2.8136,
"step": 200
},
{
"epoch": 1.0101010101010102,
"eval_loss": 1.0028560161590576,
"eval_runtime": 18.1412,
"eval_samples_per_second": 10.694,
"eval_steps_per_second": 2.701,
"eval_wer": 0.7421959095801938,
"step": 200
},
{
"epoch": 1.5151515151515151,
"grad_norm": 2.0190610885620117,
"learning_rate": 0.00028993150684931507,
"loss": 0.9647,
"step": 300
},
{
"epoch": 1.5151515151515151,
"eval_loss": 0.5869513154029846,
"eval_runtime": 18.1111,
"eval_samples_per_second": 10.712,
"eval_steps_per_second": 2.706,
"eval_wer": 0.527448869752422,
"step": 300
},
{
"epoch": 2.0202020202020203,
"grad_norm": 1.3322362899780273,
"learning_rate": 0.0002847945205479452,
"loss": 0.8539,
"step": 400
},
{
"epoch": 2.0202020202020203,
"eval_loss": 0.5461000800132751,
"eval_runtime": 18.1018,
"eval_samples_per_second": 10.717,
"eval_steps_per_second": 2.707,
"eval_wer": 0.5048439181916039,
"step": 400
},
{
"epoch": 2.525252525252525,
"grad_norm": 2.1069583892822266,
"learning_rate": 0.0002796575342465753,
"loss": 0.7525,
"step": 500
},
{
"epoch": 2.525252525252525,
"eval_loss": 0.5256428122520447,
"eval_runtime": 18.1961,
"eval_samples_per_second": 10.662,
"eval_steps_per_second": 2.693,
"eval_wer": 0.49892357373519913,
"step": 500
},
{
"epoch": 3.0303030303030303,
"grad_norm": 2.1016767024993896,
"learning_rate": 0.0002745205479452055,
"loss": 0.7307,
"step": 600
},
{
"epoch": 3.0303030303030303,
"eval_loss": 0.5101361274719238,
"eval_runtime": 18.1622,
"eval_samples_per_second": 10.682,
"eval_steps_per_second": 2.698,
"eval_wer": 0.48708288482238965,
"step": 600
},
{
"epoch": 3.5353535353535355,
"grad_norm": 1.1222645044326782,
"learning_rate": 0.0002693835616438356,
"loss": 0.6997,
"step": 700
},
{
"epoch": 3.5353535353535355,
"eval_loss": 0.5032119750976562,
"eval_runtime": 18.0324,
"eval_samples_per_second": 10.758,
"eval_steps_per_second": 2.717,
"eval_wer": 0.468783638320775,
"step": 700
},
{
"epoch": 4.040404040404041,
"grad_norm": 1.4940961599349976,
"learning_rate": 0.0002642465753424657,
"loss": 0.6882,
"step": 800
},
{
"epoch": 4.040404040404041,
"eval_loss": 0.4878801703453064,
"eval_runtime": 18.1093,
"eval_samples_per_second": 10.713,
"eval_steps_per_second": 2.706,
"eval_wer": 0.4736275565123789,
"step": 800
},
{
"epoch": 4.545454545454545,
"grad_norm": 0.9596569538116455,
"learning_rate": 0.0002591095890410959,
"loss": 0.651,
"step": 900
},
{
"epoch": 4.545454545454545,
"eval_loss": 0.4788007438182831,
"eval_runtime": 18.2582,
"eval_samples_per_second": 10.625,
"eval_steps_per_second": 2.684,
"eval_wer": 0.45586652314316467,
"step": 900
},
{
"epoch": 5.05050505050505,
"grad_norm": 6.235934734344482,
"learning_rate": 0.000253972602739726,
"loss": 0.6623,
"step": 1000
},
{
"epoch": 5.05050505050505,
"eval_loss": 0.4798834025859833,
"eval_runtime": 18.2341,
"eval_samples_per_second": 10.639,
"eval_steps_per_second": 2.687,
"eval_wer": 0.4526372443487621,
"step": 1000
},
{
"epoch": 5.555555555555555,
"grad_norm": 1.2491483688354492,
"learning_rate": 0.00024883561643835613,
"loss": 0.6339,
"step": 1100
},
{
"epoch": 5.555555555555555,
"eval_loss": 0.467692494392395,
"eval_runtime": 18.2104,
"eval_samples_per_second": 10.653,
"eval_steps_per_second": 2.691,
"eval_wer": 0.4418729817007535,
"step": 1100
},
{
"epoch": 6.0606060606060606,
"grad_norm": 1.8275915384292603,
"learning_rate": 0.0002436986301369863,
"loss": 0.6424,
"step": 1200
},
{
"epoch": 6.0606060606060606,
"eval_loss": 0.46495306491851807,
"eval_runtime": 18.1846,
"eval_samples_per_second": 10.668,
"eval_steps_per_second": 2.695,
"eval_wer": 0.4429494079655544,
"step": 1200
},
{
"epoch": 6.565656565656566,
"grad_norm": 12.825818061828613,
"learning_rate": 0.00023856164383561642,
"loss": 0.6365,
"step": 1300
},
{
"epoch": 6.565656565656566,
"eval_loss": 0.47455188632011414,
"eval_runtime": 18.2175,
"eval_samples_per_second": 10.649,
"eval_steps_per_second": 2.69,
"eval_wer": 0.4461786867599569,
"step": 1300
},
{
"epoch": 7.070707070707071,
"grad_norm": 0.8040870428085327,
"learning_rate": 0.00023342465753424657,
"loss": 0.556,
"step": 1400
},
{
"epoch": 7.070707070707071,
"eval_loss": 0.45120927691459656,
"eval_runtime": 18.2585,
"eval_samples_per_second": 10.625,
"eval_steps_per_second": 2.684,
"eval_wer": 0.4381054897739505,
"step": 1400
},
{
"epoch": 7.575757575757576,
"grad_norm": 1.3401854038238525,
"learning_rate": 0.00022828767123287669,
"loss": 0.5969,
"step": 1500
},
{
"epoch": 7.575757575757576,
"eval_loss": 0.4596523940563202,
"eval_runtime": 18.1217,
"eval_samples_per_second": 10.705,
"eval_steps_per_second": 2.704,
"eval_wer": 0.4413347685683531,
"step": 1500
},
{
"epoch": 8.080808080808081,
"grad_norm": 1.6330450773239136,
"learning_rate": 0.00022315068493150683,
"loss": 0.5772,
"step": 1600
},
{
"epoch": 8.080808080808081,
"eval_loss": 0.4455384612083435,
"eval_runtime": 18.0777,
"eval_samples_per_second": 10.731,
"eval_steps_per_second": 2.711,
"eval_wer": 0.4284176533907427,
"step": 1600
},
{
"epoch": 8.585858585858587,
"grad_norm": 2.6647837162017822,
"learning_rate": 0.00021801369863013698,
"loss": 0.5695,
"step": 1700
},
{
"epoch": 8.585858585858587,
"eval_loss": 0.45646098256111145,
"eval_runtime": 18.2232,
"eval_samples_per_second": 10.646,
"eval_steps_per_second": 2.689,
"eval_wer": 0.4268030139935414,
"step": 1700
},
{
"epoch": 9.090909090909092,
"grad_norm": 0.6165424585342407,
"learning_rate": 0.0002129280821917808,
"loss": 0.5752,
"step": 1800
},
{
"epoch": 9.090909090909092,
"eval_loss": 0.44141411781311035,
"eval_runtime": 18.1955,
"eval_samples_per_second": 10.662,
"eval_steps_per_second": 2.693,
"eval_wer": 0.418729817007535,
"step": 1800
},
{
"epoch": 9.595959595959595,
"grad_norm": 1.7799068689346313,
"learning_rate": 0.00020779109589041093,
"loss": 0.5734,
"step": 1900
},
{
"epoch": 9.595959595959595,
"eval_loss": 0.4450058937072754,
"eval_runtime": 18.1771,
"eval_samples_per_second": 10.673,
"eval_steps_per_second": 2.696,
"eval_wer": 0.4085037674919268,
"step": 1900
},
{
"epoch": 10.1010101010101,
"grad_norm": 4.881310939788818,
"learning_rate": 0.00020265410958904108,
"loss": 0.5465,
"step": 2000
},
{
"epoch": 10.1010101010101,
"eval_loss": 0.43734970688819885,
"eval_runtime": 18.1427,
"eval_samples_per_second": 10.693,
"eval_steps_per_second": 2.701,
"eval_wer": 0.4155005382131324,
"step": 2000
},
{
"epoch": 10.606060606060606,
"grad_norm": 4.940012454986572,
"learning_rate": 0.00019751712328767122,
"loss": 0.5553,
"step": 2100
},
{
"epoch": 10.606060606060606,
"eval_loss": 0.45197948813438416,
"eval_runtime": 18.2849,
"eval_samples_per_second": 10.61,
"eval_steps_per_second": 2.68,
"eval_wer": 0.4241119483315393,
"step": 2100
},
{
"epoch": 11.11111111111111,
"grad_norm": 1.0321776866912842,
"learning_rate": 0.00019238013698630134,
"loss": 0.5289,
"step": 2200
},
{
"epoch": 11.11111111111111,
"eval_loss": 0.430617094039917,
"eval_runtime": 18.3569,
"eval_samples_per_second": 10.568,
"eval_steps_per_second": 2.669,
"eval_wer": 0.4085037674919268,
"step": 2200
},
{
"epoch": 11.616161616161616,
"grad_norm": 1.051830768585205,
"learning_rate": 0.0001872431506849315,
"loss": 0.5122,
"step": 2300
},
{
"epoch": 11.616161616161616,
"eval_loss": 0.4372076392173767,
"eval_runtime": 18.1559,
"eval_samples_per_second": 10.685,
"eval_steps_per_second": 2.699,
"eval_wer": 0.4015069967707212,
"step": 2300
},
{
"epoch": 12.121212121212121,
"grad_norm": 1.2487713098526,
"learning_rate": 0.0001821061643835616,
"loss": 0.5659,
"step": 2400
},
{
"epoch": 12.121212121212121,
"eval_loss": 0.4407643973827362,
"eval_runtime": 18.201,
"eval_samples_per_second": 10.659,
"eval_steps_per_second": 2.692,
"eval_wer": 0.4009687836383208,
"step": 2400
},
{
"epoch": 12.626262626262626,
"grad_norm": 0.9535735845565796,
"learning_rate": 0.00017696917808219175,
"loss": 0.5007,
"step": 2500
},
{
"epoch": 12.626262626262626,
"eval_loss": 0.4274228811264038,
"eval_runtime": 18.2686,
"eval_samples_per_second": 10.619,
"eval_steps_per_second": 2.682,
"eval_wer": 0.3982777179763186,
"step": 2500
},
{
"epoch": 13.131313131313131,
"grad_norm": 0.9828107357025146,
"learning_rate": 0.00017188356164383559,
"loss": 0.5366,
"step": 2600
},
{
"epoch": 13.131313131313131,
"eval_loss": 0.4265601933002472,
"eval_runtime": 18.3638,
"eval_samples_per_second": 10.564,
"eval_steps_per_second": 2.668,
"eval_wer": 0.4025834230355221,
"step": 2600
},
{
"epoch": 13.636363636363637,
"grad_norm": 1.9459073543548584,
"learning_rate": 0.00016674657534246576,
"loss": 0.5068,
"step": 2700
},
{
"epoch": 13.636363636363637,
"eval_loss": 0.43655750155448914,
"eval_runtime": 18.2672,
"eval_samples_per_second": 10.62,
"eval_steps_per_second": 2.682,
"eval_wer": 0.3961248654467169,
"step": 2700
},
{
"epoch": 14.141414141414142,
"grad_norm": 1.0564210414886475,
"learning_rate": 0.00016160958904109588,
"loss": 0.507,
"step": 2800
},
{
"epoch": 14.141414141414142,
"eval_loss": 0.4359418749809265,
"eval_runtime": 18.2049,
"eval_samples_per_second": 10.656,
"eval_steps_per_second": 2.692,
"eval_wer": 0.39720129171151775,
"step": 2800
},
{
"epoch": 14.646464646464647,
"grad_norm": 0.9299136400222778,
"learning_rate": 0.00015647260273972602,
"loss": 0.5031,
"step": 2900
},
{
"epoch": 14.646464646464647,
"eval_loss": 0.4333823323249817,
"eval_runtime": 18.3722,
"eval_samples_per_second": 10.559,
"eval_steps_per_second": 2.667,
"eval_wer": 0.3966630785791173,
"step": 2900
},
{
"epoch": 15.151515151515152,
"grad_norm": 2.234483242034912,
"learning_rate": 0.00015133561643835617,
"loss": 0.4949,
"step": 3000
},
{
"epoch": 15.151515151515152,
"eval_loss": 0.43481922149658203,
"eval_runtime": 18.3081,
"eval_samples_per_second": 10.596,
"eval_steps_per_second": 2.676,
"eval_wer": 0.39881593110871905,
"step": 3000
},
{
"epoch": 15.151515151515152,
"step": 3000,
"total_flos": 1.0546657971478737e+19,
"train_loss": 0.9314683748881022,
"train_runtime": 2759.24,
"train_samples_per_second": 8.611,
"train_steps_per_second": 2.153
}
],
"logging_steps": 100,
"max_steps": 5940,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 400,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 4,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 2
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.0546657971478737e+19,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}