whisper-medium-vaani-telugu / trainer_state.json
SujithPulikodan's picture
Upload 11 files
2e0663c verified
{
"best_metric": 53.673860298812414,
"best_model_checkpoint": "./whisper-medium-te/checkpoint-2000",
"epoch": 2.971768202080238,
"eval_steps": 1000,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03714710252600297,
"grad_norm": 9.450862884521484,
"learning_rate": 4.6000000000000004e-07,
"loss": 1.1494,
"step": 25
},
{
"epoch": 0.07429420505200594,
"grad_norm": 5.792855262756348,
"learning_rate": 9.600000000000001e-07,
"loss": 0.8665,
"step": 50
},
{
"epoch": 0.11144130757800892,
"grad_norm": 5.076374530792236,
"learning_rate": 1.46e-06,
"loss": 0.6443,
"step": 75
},
{
"epoch": 0.1485884101040119,
"grad_norm": 4.388276100158691,
"learning_rate": 1.9600000000000003e-06,
"loss": 0.5233,
"step": 100
},
{
"epoch": 0.18573551263001487,
"grad_norm": 4.2827301025390625,
"learning_rate": 2.46e-06,
"loss": 0.4417,
"step": 125
},
{
"epoch": 0.22288261515601784,
"grad_norm": 4.33107328414917,
"learning_rate": 2.96e-06,
"loss": 0.3846,
"step": 150
},
{
"epoch": 0.2600297176820208,
"grad_norm": 5.062885284423828,
"learning_rate": 3.46e-06,
"loss": 0.3456,
"step": 175
},
{
"epoch": 0.2971768202080238,
"grad_norm": 3.433645725250244,
"learning_rate": 3.96e-06,
"loss": 0.3304,
"step": 200
},
{
"epoch": 0.3343239227340267,
"grad_norm": 4.322815418243408,
"learning_rate": 4.4600000000000005e-06,
"loss": 0.3323,
"step": 225
},
{
"epoch": 0.37147102526002973,
"grad_norm": 4.137588024139404,
"learning_rate": 4.960000000000001e-06,
"loss": 0.3087,
"step": 250
},
{
"epoch": 0.4086181277860327,
"grad_norm": 3.188852071762085,
"learning_rate": 5.460000000000001e-06,
"loss": 0.2858,
"step": 275
},
{
"epoch": 0.4457652303120357,
"grad_norm": 3.801391839981079,
"learning_rate": 5.9600000000000005e-06,
"loss": 0.2804,
"step": 300
},
{
"epoch": 0.48291233283803864,
"grad_norm": 3.4735641479492188,
"learning_rate": 6.460000000000001e-06,
"loss": 0.2746,
"step": 325
},
{
"epoch": 0.5200594353640416,
"grad_norm": 4.813545227050781,
"learning_rate": 6.96e-06,
"loss": 0.278,
"step": 350
},
{
"epoch": 0.5572065378900446,
"grad_norm": 3.2244017124176025,
"learning_rate": 7.4600000000000006e-06,
"loss": 0.2655,
"step": 375
},
{
"epoch": 0.5943536404160475,
"grad_norm": 3.476280689239502,
"learning_rate": 7.960000000000002e-06,
"loss": 0.2571,
"step": 400
},
{
"epoch": 0.6315007429420505,
"grad_norm": 3.5403590202331543,
"learning_rate": 8.46e-06,
"loss": 0.2522,
"step": 425
},
{
"epoch": 0.6686478454680534,
"grad_norm": 4.525882244110107,
"learning_rate": 8.96e-06,
"loss": 0.2473,
"step": 450
},
{
"epoch": 0.7057949479940565,
"grad_norm": 4.107092380523682,
"learning_rate": 9.460000000000001e-06,
"loss": 0.2519,
"step": 475
},
{
"epoch": 0.7429420505200595,
"grad_norm": 3.5582709312438965,
"learning_rate": 9.960000000000001e-06,
"loss": 0.2508,
"step": 500
},
{
"epoch": 0.7800891530460624,
"grad_norm": 2.6124589443206787,
"learning_rate": 9.984137931034483e-06,
"loss": 0.2437,
"step": 525
},
{
"epoch": 0.8172362555720654,
"grad_norm": 3.1363003253936768,
"learning_rate": 9.96689655172414e-06,
"loss": 0.241,
"step": 550
},
{
"epoch": 0.8543833580980683,
"grad_norm": 2.3347408771514893,
"learning_rate": 9.949655172413793e-06,
"loss": 0.2369,
"step": 575
},
{
"epoch": 0.8915304606240714,
"grad_norm": 2.609605312347412,
"learning_rate": 9.93241379310345e-06,
"loss": 0.2204,
"step": 600
},
{
"epoch": 0.9286775631500743,
"grad_norm": 3.050915479660034,
"learning_rate": 9.915172413793104e-06,
"loss": 0.2183,
"step": 625
},
{
"epoch": 0.9658246656760773,
"grad_norm": 2.9425406455993652,
"learning_rate": 9.897931034482759e-06,
"loss": 0.2321,
"step": 650
},
{
"epoch": 1.0029717682020802,
"grad_norm": 3.074657917022705,
"learning_rate": 9.880689655172414e-06,
"loss": 0.2241,
"step": 675
},
{
"epoch": 1.0401188707280833,
"grad_norm": 2.8157992362976074,
"learning_rate": 9.86344827586207e-06,
"loss": 0.1898,
"step": 700
},
{
"epoch": 1.0772659732540861,
"grad_norm": 2.4690845012664795,
"learning_rate": 9.846206896551725e-06,
"loss": 0.1857,
"step": 725
},
{
"epoch": 1.1144130757800892,
"grad_norm": 2.4655447006225586,
"learning_rate": 9.82896551724138e-06,
"loss": 0.1743,
"step": 750
},
{
"epoch": 1.151560178306092,
"grad_norm": 2.103703498840332,
"learning_rate": 9.811724137931035e-06,
"loss": 0.1825,
"step": 775
},
{
"epoch": 1.188707280832095,
"grad_norm": 2.727170467376709,
"learning_rate": 9.79448275862069e-06,
"loss": 0.1861,
"step": 800
},
{
"epoch": 1.2258543833580982,
"grad_norm": 2.1180167198181152,
"learning_rate": 9.777241379310347e-06,
"loss": 0.179,
"step": 825
},
{
"epoch": 1.263001485884101,
"grad_norm": 2.076005697250366,
"learning_rate": 9.760000000000001e-06,
"loss": 0.1854,
"step": 850
},
{
"epoch": 1.300148588410104,
"grad_norm": 1.8460164070129395,
"learning_rate": 9.742758620689656e-06,
"loss": 0.1831,
"step": 875
},
{
"epoch": 1.3372956909361071,
"grad_norm": 4.341026306152344,
"learning_rate": 9.725517241379311e-06,
"loss": 0.177,
"step": 900
},
{
"epoch": 1.37444279346211,
"grad_norm": 2.86643123626709,
"learning_rate": 9.708275862068966e-06,
"loss": 0.1853,
"step": 925
},
{
"epoch": 1.4115898959881128,
"grad_norm": 2.4118528366088867,
"learning_rate": 9.691034482758621e-06,
"loss": 0.1741,
"step": 950
},
{
"epoch": 1.4487369985141159,
"grad_norm": 2.506206512451172,
"learning_rate": 9.673793103448277e-06,
"loss": 0.1713,
"step": 975
},
{
"epoch": 1.485884101040119,
"grad_norm": 2.32373309135437,
"learning_rate": 9.65655172413793e-06,
"loss": 0.1662,
"step": 1000
},
{
"epoch": 1.485884101040119,
"eval_loss": 0.2365463376045227,
"eval_runtime": 3136.2768,
"eval_samples_per_second": 0.863,
"eval_steps_per_second": 0.108,
"eval_wer": 55.854935512705914,
"step": 1000
},
{
"epoch": 1.5230312035661218,
"grad_norm": 2.9691216945648193,
"learning_rate": 9.639310344827587e-06,
"loss": 0.1792,
"step": 1025
},
{
"epoch": 1.5601783060921248,
"grad_norm": 2.158869981765747,
"learning_rate": 9.622068965517242e-06,
"loss": 0.1654,
"step": 1050
},
{
"epoch": 1.5973254086181279,
"grad_norm": 2.132694721221924,
"learning_rate": 9.604827586206897e-06,
"loss": 0.1805,
"step": 1075
},
{
"epoch": 1.6344725111441307,
"grad_norm": 2.4110560417175293,
"learning_rate": 9.587586206896554e-06,
"loss": 0.1688,
"step": 1100
},
{
"epoch": 1.6716196136701336,
"grad_norm": 3.177272319793701,
"learning_rate": 9.570344827586208e-06,
"loss": 0.1746,
"step": 1125
},
{
"epoch": 1.7087667161961368,
"grad_norm": 2.058962821960449,
"learning_rate": 9.553103448275863e-06,
"loss": 0.1695,
"step": 1150
},
{
"epoch": 1.7459138187221397,
"grad_norm": 2.6047332286834717,
"learning_rate": 9.535862068965518e-06,
"loss": 0.1767,
"step": 1175
},
{
"epoch": 1.7830609212481425,
"grad_norm": 2.676912546157837,
"learning_rate": 9.518620689655173e-06,
"loss": 0.186,
"step": 1200
},
{
"epoch": 1.8202080237741456,
"grad_norm": 2.346376895904541,
"learning_rate": 9.501379310344828e-06,
"loss": 0.1832,
"step": 1225
},
{
"epoch": 1.8573551263001487,
"grad_norm": 2.172449827194214,
"learning_rate": 9.484137931034484e-06,
"loss": 0.1791,
"step": 1250
},
{
"epoch": 1.8945022288261515,
"grad_norm": 2.105217933654785,
"learning_rate": 9.46689655172414e-06,
"loss": 0.1771,
"step": 1275
},
{
"epoch": 1.9316493313521546,
"grad_norm": 2.7176296710968018,
"learning_rate": 9.449655172413794e-06,
"loss": 0.1711,
"step": 1300
},
{
"epoch": 1.9687964338781576,
"grad_norm": 2.633023262023926,
"learning_rate": 9.432413793103449e-06,
"loss": 0.1717,
"step": 1325
},
{
"epoch": 2.0059435364041605,
"grad_norm": 1.6349059343338013,
"learning_rate": 9.415172413793104e-06,
"loss": 0.1649,
"step": 1350
},
{
"epoch": 2.0430906389301633,
"grad_norm": 2.0442230701446533,
"learning_rate": 9.397931034482759e-06,
"loss": 0.1204,
"step": 1375
},
{
"epoch": 2.0802377414561666,
"grad_norm": 1.8747773170471191,
"learning_rate": 9.380689655172415e-06,
"loss": 0.1214,
"step": 1400
},
{
"epoch": 2.1173848439821694,
"grad_norm": 2.2875783443450928,
"learning_rate": 9.363448275862069e-06,
"loss": 0.1344,
"step": 1425
},
{
"epoch": 2.1545319465081723,
"grad_norm": 1.9838837385177612,
"learning_rate": 9.346206896551725e-06,
"loss": 0.1199,
"step": 1450
},
{
"epoch": 2.1916790490341755,
"grad_norm": 2.1067237854003906,
"learning_rate": 9.32896551724138e-06,
"loss": 0.1181,
"step": 1475
},
{
"epoch": 2.2288261515601784,
"grad_norm": 2.6312592029571533,
"learning_rate": 9.311724137931035e-06,
"loss": 0.1256,
"step": 1500
},
{
"epoch": 2.265973254086181,
"grad_norm": 1.992762804031372,
"learning_rate": 9.294482758620691e-06,
"loss": 0.1272,
"step": 1525
},
{
"epoch": 2.303120356612184,
"grad_norm": 2.416465997695923,
"learning_rate": 9.277241379310346e-06,
"loss": 0.1305,
"step": 1550
},
{
"epoch": 2.3402674591381873,
"grad_norm": 1.7433266639709473,
"learning_rate": 9.260000000000001e-06,
"loss": 0.1262,
"step": 1575
},
{
"epoch": 2.37741456166419,
"grad_norm": 1.751561164855957,
"learning_rate": 9.242758620689656e-06,
"loss": 0.1216,
"step": 1600
},
{
"epoch": 2.414561664190193,
"grad_norm": 2.123859405517578,
"learning_rate": 9.225517241379311e-06,
"loss": 0.1366,
"step": 1625
},
{
"epoch": 2.4517087667161963,
"grad_norm": 1.9784679412841797,
"learning_rate": 9.208275862068966e-06,
"loss": 0.1265,
"step": 1650
},
{
"epoch": 2.488855869242199,
"grad_norm": 2.4382882118225098,
"learning_rate": 9.191034482758622e-06,
"loss": 0.1228,
"step": 1675
},
{
"epoch": 2.526002971768202,
"grad_norm": 2.2365972995758057,
"learning_rate": 9.173793103448277e-06,
"loss": 0.1326,
"step": 1700
},
{
"epoch": 2.563150074294205,
"grad_norm": 1.7844051122665405,
"learning_rate": 9.156551724137932e-06,
"loss": 0.1234,
"step": 1725
},
{
"epoch": 2.600297176820208,
"grad_norm": 1.6666020154953003,
"learning_rate": 9.139310344827587e-06,
"loss": 0.1343,
"step": 1750
},
{
"epoch": 2.637444279346211,
"grad_norm": 2.0560336112976074,
"learning_rate": 9.122068965517242e-06,
"loss": 0.1235,
"step": 1775
},
{
"epoch": 2.6745913818722142,
"grad_norm": 3.1412951946258545,
"learning_rate": 9.104827586206897e-06,
"loss": 0.1274,
"step": 1800
},
{
"epoch": 2.711738484398217,
"grad_norm": 1.8942031860351562,
"learning_rate": 9.087586206896553e-06,
"loss": 0.1285,
"step": 1825
},
{
"epoch": 2.74888558692422,
"grad_norm": 1.8972444534301758,
"learning_rate": 9.070344827586206e-06,
"loss": 0.1335,
"step": 1850
},
{
"epoch": 2.7860326894502228,
"grad_norm": 1.9524787664413452,
"learning_rate": 9.053103448275863e-06,
"loss": 0.1209,
"step": 1875
},
{
"epoch": 2.8231797919762256,
"grad_norm": 1.6888232231140137,
"learning_rate": 9.035862068965518e-06,
"loss": 0.1361,
"step": 1900
},
{
"epoch": 2.860326894502229,
"grad_norm": 2.2144405841827393,
"learning_rate": 9.018620689655173e-06,
"loss": 0.134,
"step": 1925
},
{
"epoch": 2.8974739970282317,
"grad_norm": 2.0730419158935547,
"learning_rate": 9.00137931034483e-06,
"loss": 0.1282,
"step": 1950
},
{
"epoch": 2.934621099554235,
"grad_norm": 1.705673098564148,
"learning_rate": 8.984137931034484e-06,
"loss": 0.1206,
"step": 1975
},
{
"epoch": 2.971768202080238,
"grad_norm": 1.9598541259765625,
"learning_rate": 8.966896551724139e-06,
"loss": 0.1253,
"step": 2000
},
{
"epoch": 2.971768202080238,
"eval_loss": 0.21802061796188354,
"eval_runtime": 3105.4135,
"eval_samples_per_second": 0.872,
"eval_steps_per_second": 0.109,
"eval_wer": 53.673860298812414,
"step": 2000
}
],
"logging_steps": 25,
"max_steps": 15000,
"num_input_tokens_seen": 0,
"num_train_epochs": 23,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.265527462100992e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}