|
{ |
|
"best_metric": 53.673860298812414, |
|
"best_model_checkpoint": "./whisper-medium-te/checkpoint-2000", |
|
"epoch": 2.971768202080238, |
|
"eval_steps": 1000, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03714710252600297, |
|
"grad_norm": 9.450862884521484, |
|
"learning_rate": 4.6000000000000004e-07, |
|
"loss": 1.1494, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07429420505200594, |
|
"grad_norm": 5.792855262756348, |
|
"learning_rate": 9.600000000000001e-07, |
|
"loss": 0.8665, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11144130757800892, |
|
"grad_norm": 5.076374530792236, |
|
"learning_rate": 1.46e-06, |
|
"loss": 0.6443, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1485884101040119, |
|
"grad_norm": 4.388276100158691, |
|
"learning_rate": 1.9600000000000003e-06, |
|
"loss": 0.5233, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18573551263001487, |
|
"grad_norm": 4.2827301025390625, |
|
"learning_rate": 2.46e-06, |
|
"loss": 0.4417, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.22288261515601784, |
|
"grad_norm": 4.33107328414917, |
|
"learning_rate": 2.96e-06, |
|
"loss": 0.3846, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2600297176820208, |
|
"grad_norm": 5.062885284423828, |
|
"learning_rate": 3.46e-06, |
|
"loss": 0.3456, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2971768202080238, |
|
"grad_norm": 3.433645725250244, |
|
"learning_rate": 3.96e-06, |
|
"loss": 0.3304, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3343239227340267, |
|
"grad_norm": 4.322815418243408, |
|
"learning_rate": 4.4600000000000005e-06, |
|
"loss": 0.3323, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.37147102526002973, |
|
"grad_norm": 4.137588024139404, |
|
"learning_rate": 4.960000000000001e-06, |
|
"loss": 0.3087, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4086181277860327, |
|
"grad_norm": 3.188852071762085, |
|
"learning_rate": 5.460000000000001e-06, |
|
"loss": 0.2858, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.4457652303120357, |
|
"grad_norm": 3.801391839981079, |
|
"learning_rate": 5.9600000000000005e-06, |
|
"loss": 0.2804, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.48291233283803864, |
|
"grad_norm": 3.4735641479492188, |
|
"learning_rate": 6.460000000000001e-06, |
|
"loss": 0.2746, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5200594353640416, |
|
"grad_norm": 4.813545227050781, |
|
"learning_rate": 6.96e-06, |
|
"loss": 0.278, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5572065378900446, |
|
"grad_norm": 3.2244017124176025, |
|
"learning_rate": 7.4600000000000006e-06, |
|
"loss": 0.2655, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5943536404160475, |
|
"grad_norm": 3.476280689239502, |
|
"learning_rate": 7.960000000000002e-06, |
|
"loss": 0.2571, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6315007429420505, |
|
"grad_norm": 3.5403590202331543, |
|
"learning_rate": 8.46e-06, |
|
"loss": 0.2522, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.6686478454680534, |
|
"grad_norm": 4.525882244110107, |
|
"learning_rate": 8.96e-06, |
|
"loss": 0.2473, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7057949479940565, |
|
"grad_norm": 4.107092380523682, |
|
"learning_rate": 9.460000000000001e-06, |
|
"loss": 0.2519, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.7429420505200595, |
|
"grad_norm": 3.5582709312438965, |
|
"learning_rate": 9.960000000000001e-06, |
|
"loss": 0.2508, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7800891530460624, |
|
"grad_norm": 2.6124589443206787, |
|
"learning_rate": 9.984137931034483e-06, |
|
"loss": 0.2437, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.8172362555720654, |
|
"grad_norm": 3.1363003253936768, |
|
"learning_rate": 9.96689655172414e-06, |
|
"loss": 0.241, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8543833580980683, |
|
"grad_norm": 2.3347408771514893, |
|
"learning_rate": 9.949655172413793e-06, |
|
"loss": 0.2369, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.8915304606240714, |
|
"grad_norm": 2.609605312347412, |
|
"learning_rate": 9.93241379310345e-06, |
|
"loss": 0.2204, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9286775631500743, |
|
"grad_norm": 3.050915479660034, |
|
"learning_rate": 9.915172413793104e-06, |
|
"loss": 0.2183, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.9658246656760773, |
|
"grad_norm": 2.9425406455993652, |
|
"learning_rate": 9.897931034482759e-06, |
|
"loss": 0.2321, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0029717682020802, |
|
"grad_norm": 3.074657917022705, |
|
"learning_rate": 9.880689655172414e-06, |
|
"loss": 0.2241, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.0401188707280833, |
|
"grad_norm": 2.8157992362976074, |
|
"learning_rate": 9.86344827586207e-06, |
|
"loss": 0.1898, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0772659732540861, |
|
"grad_norm": 2.4690845012664795, |
|
"learning_rate": 9.846206896551725e-06, |
|
"loss": 0.1857, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.1144130757800892, |
|
"grad_norm": 2.4655447006225586, |
|
"learning_rate": 9.82896551724138e-06, |
|
"loss": 0.1743, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.151560178306092, |
|
"grad_norm": 2.103703498840332, |
|
"learning_rate": 9.811724137931035e-06, |
|
"loss": 0.1825, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.188707280832095, |
|
"grad_norm": 2.727170467376709, |
|
"learning_rate": 9.79448275862069e-06, |
|
"loss": 0.1861, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2258543833580982, |
|
"grad_norm": 2.1180167198181152, |
|
"learning_rate": 9.777241379310347e-06, |
|
"loss": 0.179, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.263001485884101, |
|
"grad_norm": 2.076005697250366, |
|
"learning_rate": 9.760000000000001e-06, |
|
"loss": 0.1854, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.300148588410104, |
|
"grad_norm": 1.8460164070129395, |
|
"learning_rate": 9.742758620689656e-06, |
|
"loss": 0.1831, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.3372956909361071, |
|
"grad_norm": 4.341026306152344, |
|
"learning_rate": 9.725517241379311e-06, |
|
"loss": 0.177, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.37444279346211, |
|
"grad_norm": 2.86643123626709, |
|
"learning_rate": 9.708275862068966e-06, |
|
"loss": 0.1853, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.4115898959881128, |
|
"grad_norm": 2.4118528366088867, |
|
"learning_rate": 9.691034482758621e-06, |
|
"loss": 0.1741, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.4487369985141159, |
|
"grad_norm": 2.506206512451172, |
|
"learning_rate": 9.673793103448277e-06, |
|
"loss": 0.1713, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.485884101040119, |
|
"grad_norm": 2.32373309135437, |
|
"learning_rate": 9.65655172413793e-06, |
|
"loss": 0.1662, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.485884101040119, |
|
"eval_loss": 0.2365463376045227, |
|
"eval_runtime": 3136.2768, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.108, |
|
"eval_wer": 55.854935512705914, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5230312035661218, |
|
"grad_norm": 2.9691216945648193, |
|
"learning_rate": 9.639310344827587e-06, |
|
"loss": 0.1792, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.5601783060921248, |
|
"grad_norm": 2.158869981765747, |
|
"learning_rate": 9.622068965517242e-06, |
|
"loss": 0.1654, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.5973254086181279, |
|
"grad_norm": 2.132694721221924, |
|
"learning_rate": 9.604827586206897e-06, |
|
"loss": 0.1805, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.6344725111441307, |
|
"grad_norm": 2.4110560417175293, |
|
"learning_rate": 9.587586206896554e-06, |
|
"loss": 0.1688, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6716196136701336, |
|
"grad_norm": 3.177272319793701, |
|
"learning_rate": 9.570344827586208e-06, |
|
"loss": 0.1746, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.7087667161961368, |
|
"grad_norm": 2.058962821960449, |
|
"learning_rate": 9.553103448275863e-06, |
|
"loss": 0.1695, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.7459138187221397, |
|
"grad_norm": 2.6047332286834717, |
|
"learning_rate": 9.535862068965518e-06, |
|
"loss": 0.1767, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.7830609212481425, |
|
"grad_norm": 2.676912546157837, |
|
"learning_rate": 9.518620689655173e-06, |
|
"loss": 0.186, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8202080237741456, |
|
"grad_norm": 2.346376895904541, |
|
"learning_rate": 9.501379310344828e-06, |
|
"loss": 0.1832, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.8573551263001487, |
|
"grad_norm": 2.172449827194214, |
|
"learning_rate": 9.484137931034484e-06, |
|
"loss": 0.1791, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.8945022288261515, |
|
"grad_norm": 2.105217933654785, |
|
"learning_rate": 9.46689655172414e-06, |
|
"loss": 0.1771, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.9316493313521546, |
|
"grad_norm": 2.7176296710968018, |
|
"learning_rate": 9.449655172413794e-06, |
|
"loss": 0.1711, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.9687964338781576, |
|
"grad_norm": 2.633023262023926, |
|
"learning_rate": 9.432413793103449e-06, |
|
"loss": 0.1717, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.0059435364041605, |
|
"grad_norm": 1.6349059343338013, |
|
"learning_rate": 9.415172413793104e-06, |
|
"loss": 0.1649, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.0430906389301633, |
|
"grad_norm": 2.0442230701446533, |
|
"learning_rate": 9.397931034482759e-06, |
|
"loss": 0.1204, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.0802377414561666, |
|
"grad_norm": 1.8747773170471191, |
|
"learning_rate": 9.380689655172415e-06, |
|
"loss": 0.1214, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.1173848439821694, |
|
"grad_norm": 2.2875783443450928, |
|
"learning_rate": 9.363448275862069e-06, |
|
"loss": 0.1344, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 2.1545319465081723, |
|
"grad_norm": 1.9838837385177612, |
|
"learning_rate": 9.346206896551725e-06, |
|
"loss": 0.1199, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.1916790490341755, |
|
"grad_norm": 2.1067237854003906, |
|
"learning_rate": 9.32896551724138e-06, |
|
"loss": 0.1181, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.2288261515601784, |
|
"grad_norm": 2.6312592029571533, |
|
"learning_rate": 9.311724137931035e-06, |
|
"loss": 0.1256, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.265973254086181, |
|
"grad_norm": 1.992762804031372, |
|
"learning_rate": 9.294482758620691e-06, |
|
"loss": 0.1272, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.303120356612184, |
|
"grad_norm": 2.416465997695923, |
|
"learning_rate": 9.277241379310346e-06, |
|
"loss": 0.1305, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.3402674591381873, |
|
"grad_norm": 1.7433266639709473, |
|
"learning_rate": 9.260000000000001e-06, |
|
"loss": 0.1262, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.37741456166419, |
|
"grad_norm": 1.751561164855957, |
|
"learning_rate": 9.242758620689656e-06, |
|
"loss": 0.1216, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.414561664190193, |
|
"grad_norm": 2.123859405517578, |
|
"learning_rate": 9.225517241379311e-06, |
|
"loss": 0.1366, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 2.4517087667161963, |
|
"grad_norm": 1.9784679412841797, |
|
"learning_rate": 9.208275862068966e-06, |
|
"loss": 0.1265, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.488855869242199, |
|
"grad_norm": 2.4382882118225098, |
|
"learning_rate": 9.191034482758622e-06, |
|
"loss": 0.1228, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.526002971768202, |
|
"grad_norm": 2.2365972995758057, |
|
"learning_rate": 9.173793103448277e-06, |
|
"loss": 0.1326, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.563150074294205, |
|
"grad_norm": 1.7844051122665405, |
|
"learning_rate": 9.156551724137932e-06, |
|
"loss": 0.1234, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 2.600297176820208, |
|
"grad_norm": 1.6666020154953003, |
|
"learning_rate": 9.139310344827587e-06, |
|
"loss": 0.1343, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.637444279346211, |
|
"grad_norm": 2.0560336112976074, |
|
"learning_rate": 9.122068965517242e-06, |
|
"loss": 0.1235, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 2.6745913818722142, |
|
"grad_norm": 3.1412951946258545, |
|
"learning_rate": 9.104827586206897e-06, |
|
"loss": 0.1274, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.711738484398217, |
|
"grad_norm": 1.8942031860351562, |
|
"learning_rate": 9.087586206896553e-06, |
|
"loss": 0.1285, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 2.74888558692422, |
|
"grad_norm": 1.8972444534301758, |
|
"learning_rate": 9.070344827586206e-06, |
|
"loss": 0.1335, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.7860326894502228, |
|
"grad_norm": 1.9524787664413452, |
|
"learning_rate": 9.053103448275863e-06, |
|
"loss": 0.1209, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 2.8231797919762256, |
|
"grad_norm": 1.6888232231140137, |
|
"learning_rate": 9.035862068965518e-06, |
|
"loss": 0.1361, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.860326894502229, |
|
"grad_norm": 2.2144405841827393, |
|
"learning_rate": 9.018620689655173e-06, |
|
"loss": 0.134, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 2.8974739970282317, |
|
"grad_norm": 2.0730419158935547, |
|
"learning_rate": 9.00137931034483e-06, |
|
"loss": 0.1282, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.934621099554235, |
|
"grad_norm": 1.705673098564148, |
|
"learning_rate": 8.984137931034484e-06, |
|
"loss": 0.1206, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 2.971768202080238, |
|
"grad_norm": 1.9598541259765625, |
|
"learning_rate": 8.966896551724139e-06, |
|
"loss": 0.1253, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.971768202080238, |
|
"eval_loss": 0.21802061796188354, |
|
"eval_runtime": 3105.4135, |
|
"eval_samples_per_second": 0.872, |
|
"eval_steps_per_second": 0.109, |
|
"eval_wer": 53.673860298812414, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 15000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 23, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.265527462100992e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|