|
{ |
|
"best_metric": 32.56401305446938, |
|
"best_model_checkpoint": "./whisper-tiny-hi/checkpoint-15000", |
|
"epoch": 2.5942580421999306, |
|
"eval_steps": 1000, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004323763403666551, |
|
"grad_norm": 16.286670684814453, |
|
"learning_rate": 4.6000000000000004e-07, |
|
"loss": 2.1388, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.008647526807333102, |
|
"grad_norm": 12.285518646240234, |
|
"learning_rate": 9.600000000000001e-07, |
|
"loss": 2.0213, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.012971290210999653, |
|
"grad_norm": 12.98936653137207, |
|
"learning_rate": 1.46e-06, |
|
"loss": 1.852, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.017295053614666205, |
|
"grad_norm": 11.324748992919922, |
|
"learning_rate": 1.9600000000000003e-06, |
|
"loss": 1.6847, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.021618817018332757, |
|
"grad_norm": 10.082378387451172, |
|
"learning_rate": 2.46e-06, |
|
"loss": 1.4744, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.025942580421999307, |
|
"grad_norm": 10.503169059753418, |
|
"learning_rate": 2.96e-06, |
|
"loss": 1.3716, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03026634382566586, |
|
"grad_norm": 10.596274375915527, |
|
"learning_rate": 3.46e-06, |
|
"loss": 1.2115, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03459010722933241, |
|
"grad_norm": 8.612921714782715, |
|
"learning_rate": 3.96e-06, |
|
"loss": 1.0883, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03891387063299896, |
|
"grad_norm": 9.459671020507812, |
|
"learning_rate": 4.4600000000000005e-06, |
|
"loss": 0.9957, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.043237634036665515, |
|
"grad_norm": 9.334480285644531, |
|
"learning_rate": 4.960000000000001e-06, |
|
"loss": 0.8869, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04756139744033207, |
|
"grad_norm": 8.94874095916748, |
|
"learning_rate": 5.460000000000001e-06, |
|
"loss": 0.8336, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.051885160843998614, |
|
"grad_norm": 7.839649677276611, |
|
"learning_rate": 5.9600000000000005e-06, |
|
"loss": 0.8094, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05620892424766517, |
|
"grad_norm": 11.369429588317871, |
|
"learning_rate": 6.460000000000001e-06, |
|
"loss": 0.76, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.06053268765133172, |
|
"grad_norm": 7.519564628601074, |
|
"learning_rate": 6.96e-06, |
|
"loss": 0.7319, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06485645105499827, |
|
"grad_norm": 10.504302024841309, |
|
"learning_rate": 7.4600000000000006e-06, |
|
"loss": 0.6995, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.06918021445866482, |
|
"grad_norm": 8.440424919128418, |
|
"learning_rate": 7.960000000000002e-06, |
|
"loss": 0.6782, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07350397786233137, |
|
"grad_norm": 9.647106170654297, |
|
"learning_rate": 8.46e-06, |
|
"loss": 0.6475, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.07782774126599792, |
|
"grad_norm": 9.011175155639648, |
|
"learning_rate": 8.96e-06, |
|
"loss": 0.6515, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08215150466966448, |
|
"grad_norm": 7.202056884765625, |
|
"learning_rate": 9.460000000000001e-06, |
|
"loss": 0.6173, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.08647526807333103, |
|
"grad_norm": 8.170417785644531, |
|
"learning_rate": 9.960000000000001e-06, |
|
"loss": 0.5954, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09079903147699758, |
|
"grad_norm": 7.681850910186768, |
|
"learning_rate": 9.984137931034483e-06, |
|
"loss": 0.5821, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.09512279488066414, |
|
"grad_norm": 6.318211078643799, |
|
"learning_rate": 9.96689655172414e-06, |
|
"loss": 0.5575, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.09944655828433069, |
|
"grad_norm": 7.533044815063477, |
|
"learning_rate": 9.949655172413793e-06, |
|
"loss": 0.5778, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.10377032168799723, |
|
"grad_norm": 9.38764476776123, |
|
"learning_rate": 9.93241379310345e-06, |
|
"loss": 0.5735, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.10809408509166378, |
|
"grad_norm": 6.581768035888672, |
|
"learning_rate": 9.915172413793104e-06, |
|
"loss": 0.5358, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.11241784849533033, |
|
"grad_norm": 8.789082527160645, |
|
"learning_rate": 9.897931034482759e-06, |
|
"loss": 0.5619, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.11674161189899689, |
|
"grad_norm": 8.615039825439453, |
|
"learning_rate": 9.880689655172414e-06, |
|
"loss": 0.5211, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.12106537530266344, |
|
"grad_norm": 8.99618911743164, |
|
"learning_rate": 9.86344827586207e-06, |
|
"loss": 0.5039, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.12538913870632998, |
|
"grad_norm": 7.325024127960205, |
|
"learning_rate": 9.846206896551725e-06, |
|
"loss": 0.4967, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.12971290210999653, |
|
"grad_norm": 7.267101287841797, |
|
"learning_rate": 9.82896551724138e-06, |
|
"loss": 0.4913, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.13403666551366308, |
|
"grad_norm": 8.017245292663574, |
|
"learning_rate": 9.811724137931035e-06, |
|
"loss": 0.4841, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.13836042891732964, |
|
"grad_norm": 6.565851211547852, |
|
"learning_rate": 9.79448275862069e-06, |
|
"loss": 0.4826, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1426841923209962, |
|
"grad_norm": 9.583740234375, |
|
"learning_rate": 9.777241379310347e-06, |
|
"loss": 0.4966, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.14700795572466274, |
|
"grad_norm": 6.968817234039307, |
|
"learning_rate": 9.760000000000001e-06, |
|
"loss": 0.4736, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1513317191283293, |
|
"grad_norm": 6.600325107574463, |
|
"learning_rate": 9.742758620689656e-06, |
|
"loss": 0.5066, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.15565548253199585, |
|
"grad_norm": 7.23114538192749, |
|
"learning_rate": 9.725517241379311e-06, |
|
"loss": 0.4672, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1599792459356624, |
|
"grad_norm": 9.536112785339355, |
|
"learning_rate": 9.708275862068966e-06, |
|
"loss": 0.4724, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.16430300933932895, |
|
"grad_norm": 8.753514289855957, |
|
"learning_rate": 9.691034482758621e-06, |
|
"loss": 0.458, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.1686267727429955, |
|
"grad_norm": 9.407349586486816, |
|
"learning_rate": 9.673793103448277e-06, |
|
"loss": 0.4598, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.17295053614666206, |
|
"grad_norm": 6.315821647644043, |
|
"learning_rate": 9.65655172413793e-06, |
|
"loss": 0.4332, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17295053614666206, |
|
"eval_loss": 0.4459797739982605, |
|
"eval_runtime": 5597.1729, |
|
"eval_samples_per_second": 4.132, |
|
"eval_steps_per_second": 0.517, |
|
"eval_wer": 49.49288587647923, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1772742995503286, |
|
"grad_norm": 6.963139533996582, |
|
"learning_rate": 9.639310344827587e-06, |
|
"loss": 0.4519, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.18159806295399517, |
|
"grad_norm": 6.76003360748291, |
|
"learning_rate": 9.622068965517242e-06, |
|
"loss": 0.4627, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.18592182635766172, |
|
"grad_norm": 6.6258416175842285, |
|
"learning_rate": 9.604827586206897e-06, |
|
"loss": 0.4318, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.19024558976132827, |
|
"grad_norm": 6.607818603515625, |
|
"learning_rate": 9.587586206896554e-06, |
|
"loss": 0.4362, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.19456935316499482, |
|
"grad_norm": 6.816850662231445, |
|
"learning_rate": 9.570344827586208e-06, |
|
"loss": 0.4379, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.19889311656866138, |
|
"grad_norm": 7.295541763305664, |
|
"learning_rate": 9.553103448275863e-06, |
|
"loss": 0.4327, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2032168799723279, |
|
"grad_norm": 5.751529693603516, |
|
"learning_rate": 9.535862068965518e-06, |
|
"loss": 0.434, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.20754064337599445, |
|
"grad_norm": 7.614989280700684, |
|
"learning_rate": 9.518620689655173e-06, |
|
"loss": 0.4214, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.211864406779661, |
|
"grad_norm": 7.232028961181641, |
|
"learning_rate": 9.501379310344828e-06, |
|
"loss": 0.4246, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.21618817018332756, |
|
"grad_norm": 5.579376697540283, |
|
"learning_rate": 9.484137931034484e-06, |
|
"loss": 0.4118, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2205119335869941, |
|
"grad_norm": 6.2615580558776855, |
|
"learning_rate": 9.46689655172414e-06, |
|
"loss": 0.4286, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.22483569699066067, |
|
"grad_norm": 6.8341240882873535, |
|
"learning_rate": 9.449655172413794e-06, |
|
"loss": 0.411, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.22915946039432722, |
|
"grad_norm": 5.939593315124512, |
|
"learning_rate": 9.432413793103449e-06, |
|
"loss": 0.4254, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.23348322379799377, |
|
"grad_norm": 6.054545879364014, |
|
"learning_rate": 9.415172413793104e-06, |
|
"loss": 0.4094, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.23780698720166032, |
|
"grad_norm": 6.636862277984619, |
|
"learning_rate": 9.397931034482759e-06, |
|
"loss": 0.4256, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.24213075060532688, |
|
"grad_norm": 6.010420322418213, |
|
"learning_rate": 9.380689655172415e-06, |
|
"loss": 0.4267, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.24645451400899343, |
|
"grad_norm": 8.598302841186523, |
|
"learning_rate": 9.363448275862069e-06, |
|
"loss": 0.4223, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.25077827741265996, |
|
"grad_norm": 6.458983421325684, |
|
"learning_rate": 9.346206896551725e-06, |
|
"loss": 0.4123, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.25510204081632654, |
|
"grad_norm": 6.562103748321533, |
|
"learning_rate": 9.32896551724138e-06, |
|
"loss": 0.4, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.25942580421999306, |
|
"grad_norm": 7.095609188079834, |
|
"learning_rate": 9.311724137931035e-06, |
|
"loss": 0.4121, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.26374956762365964, |
|
"grad_norm": 6.2590789794921875, |
|
"learning_rate": 9.294482758620691e-06, |
|
"loss": 0.395, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.26807333102732617, |
|
"grad_norm": 7.436612606048584, |
|
"learning_rate": 9.277241379310346e-06, |
|
"loss": 0.4271, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.27239709443099275, |
|
"grad_norm": 6.528996467590332, |
|
"learning_rate": 9.260000000000001e-06, |
|
"loss": 0.3954, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.2767208578346593, |
|
"grad_norm": 7.369086265563965, |
|
"learning_rate": 9.242758620689656e-06, |
|
"loss": 0.388, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.28104462123832585, |
|
"grad_norm": 5.275778770446777, |
|
"learning_rate": 9.225517241379311e-06, |
|
"loss": 0.3789, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.2853683846419924, |
|
"grad_norm": 5.707178115844727, |
|
"learning_rate": 9.208275862068966e-06, |
|
"loss": 0.3748, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.28969214804565896, |
|
"grad_norm": 6.311554908752441, |
|
"learning_rate": 9.191034482758622e-06, |
|
"loss": 0.399, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.2940159114493255, |
|
"grad_norm": 5.5442633628845215, |
|
"learning_rate": 9.173793103448277e-06, |
|
"loss": 0.3857, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.29833967485299207, |
|
"grad_norm": 5.790552616119385, |
|
"learning_rate": 9.156551724137932e-06, |
|
"loss": 0.3914, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.3026634382566586, |
|
"grad_norm": 6.97217321395874, |
|
"learning_rate": 9.139310344827587e-06, |
|
"loss": 0.3832, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.30698720166032517, |
|
"grad_norm": 7.6458353996276855, |
|
"learning_rate": 9.122068965517242e-06, |
|
"loss": 0.3863, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.3113109650639917, |
|
"grad_norm": 6.375527381896973, |
|
"learning_rate": 9.104827586206897e-06, |
|
"loss": 0.3717, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3156347284676583, |
|
"grad_norm": 7.621360778808594, |
|
"learning_rate": 9.087586206896553e-06, |
|
"loss": 0.4053, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.3199584918713248, |
|
"grad_norm": 5.717822551727295, |
|
"learning_rate": 9.070344827586206e-06, |
|
"loss": 0.375, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.3242822552749913, |
|
"grad_norm": 6.005116939544678, |
|
"learning_rate": 9.053103448275863e-06, |
|
"loss": 0.407, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.3286060186786579, |
|
"grad_norm": 6.100674629211426, |
|
"learning_rate": 9.035862068965518e-06, |
|
"loss": 0.383, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.33292978208232443, |
|
"grad_norm": 5.4287495613098145, |
|
"learning_rate": 9.018620689655173e-06, |
|
"loss": 0.3685, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.337253545485991, |
|
"grad_norm": 6.621429920196533, |
|
"learning_rate": 9.00137931034483e-06, |
|
"loss": 0.3858, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.34157730888965754, |
|
"grad_norm": 7.438424110412598, |
|
"learning_rate": 8.984137931034484e-06, |
|
"loss": 0.3874, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.3459010722933241, |
|
"grad_norm": 7.171720027923584, |
|
"learning_rate": 8.966896551724139e-06, |
|
"loss": 0.3796, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3459010722933241, |
|
"eval_loss": 0.3686262369155884, |
|
"eval_runtime": 5442.7156, |
|
"eval_samples_per_second": 4.249, |
|
"eval_steps_per_second": 0.531, |
|
"eval_wer": 43.32270451494399, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.35022483569699064, |
|
"grad_norm": 5.1768059730529785, |
|
"learning_rate": 8.949655172413794e-06, |
|
"loss": 0.3639, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.3545485991006572, |
|
"grad_norm": 6.131287574768066, |
|
"learning_rate": 8.932413793103449e-06, |
|
"loss": 0.3684, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.35887236250432375, |
|
"grad_norm": 6.503818988800049, |
|
"learning_rate": 8.915172413793104e-06, |
|
"loss": 0.3804, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.36319612590799033, |
|
"grad_norm": 5.307014465332031, |
|
"learning_rate": 8.89793103448276e-06, |
|
"loss": 0.3778, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.36751988931165686, |
|
"grad_norm": 4.741936683654785, |
|
"learning_rate": 8.880689655172415e-06, |
|
"loss": 0.3642, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.37184365271532344, |
|
"grad_norm": 5.130821228027344, |
|
"learning_rate": 8.864137931034484e-06, |
|
"loss": 0.3771, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.37616741611898996, |
|
"grad_norm": 6.177253723144531, |
|
"learning_rate": 8.846896551724138e-06, |
|
"loss": 0.3645, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.38049117952265654, |
|
"grad_norm": 5.380115509033203, |
|
"learning_rate": 8.829655172413793e-06, |
|
"loss": 0.3637, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.38481494292632307, |
|
"grad_norm": 7.195498943328857, |
|
"learning_rate": 8.81241379310345e-06, |
|
"loss": 0.3628, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.38913870632998965, |
|
"grad_norm": 6.12623929977417, |
|
"learning_rate": 8.795172413793103e-06, |
|
"loss": 0.3847, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.3934624697336562, |
|
"grad_norm": 6.666962623596191, |
|
"learning_rate": 8.77793103448276e-06, |
|
"loss": 0.3776, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.39778623313732275, |
|
"grad_norm": 6.3228325843811035, |
|
"learning_rate": 8.760689655172415e-06, |
|
"loss": 0.3603, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.4021099965409893, |
|
"grad_norm": 5.662914276123047, |
|
"learning_rate": 8.74344827586207e-06, |
|
"loss": 0.341, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.4064337599446558, |
|
"grad_norm": 6.619643211364746, |
|
"learning_rate": 8.726206896551724e-06, |
|
"loss": 0.3513, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.4107575233483224, |
|
"grad_norm": 5.331205368041992, |
|
"learning_rate": 8.70896551724138e-06, |
|
"loss": 0.3531, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.4150812867519889, |
|
"grad_norm": 6.8525824546813965, |
|
"learning_rate": 8.691724137931034e-06, |
|
"loss": 0.358, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.4194050501556555, |
|
"grad_norm": 6.635438442230225, |
|
"learning_rate": 8.67448275862069e-06, |
|
"loss": 0.3756, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.423728813559322, |
|
"grad_norm": 5.737441062927246, |
|
"learning_rate": 8.657241379310345e-06, |
|
"loss": 0.3502, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.4280525769629886, |
|
"grad_norm": 6.25765323638916, |
|
"learning_rate": 8.64e-06, |
|
"loss": 0.3632, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.4323763403666551, |
|
"grad_norm": 6.037199020385742, |
|
"learning_rate": 8.622758620689657e-06, |
|
"loss": 0.3622, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.4367001037703217, |
|
"grad_norm": 6.072882175445557, |
|
"learning_rate": 8.605517241379312e-06, |
|
"loss": 0.3559, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.4410238671739882, |
|
"grad_norm": 6.014054298400879, |
|
"learning_rate": 8.588275862068967e-06, |
|
"loss": 0.376, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.4453476305776548, |
|
"grad_norm": 6.040060043334961, |
|
"learning_rate": 8.571034482758621e-06, |
|
"loss": 0.3749, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.44967139398132133, |
|
"grad_norm": 5.293423652648926, |
|
"learning_rate": 8.553793103448276e-06, |
|
"loss": 0.3617, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.4539951573849879, |
|
"grad_norm": 6.393920421600342, |
|
"learning_rate": 8.536551724137931e-06, |
|
"loss": 0.3506, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.45831892078865444, |
|
"grad_norm": 5.98794412612915, |
|
"learning_rate": 8.519310344827588e-06, |
|
"loss": 0.3523, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.462642684192321, |
|
"grad_norm": 5.70527458190918, |
|
"learning_rate": 8.502068965517241e-06, |
|
"loss": 0.3622, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.46696644759598754, |
|
"grad_norm": 6.078751087188721, |
|
"learning_rate": 8.484827586206898e-06, |
|
"loss": 0.3403, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.4712902109996541, |
|
"grad_norm": 6.641058444976807, |
|
"learning_rate": 8.467586206896552e-06, |
|
"loss": 0.3506, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.47561397440332065, |
|
"grad_norm": 6.066605091094971, |
|
"learning_rate": 8.450344827586207e-06, |
|
"loss": 0.3773, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.4799377378069872, |
|
"grad_norm": 6.622874736785889, |
|
"learning_rate": 8.433103448275862e-06, |
|
"loss": 0.3435, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.48426150121065376, |
|
"grad_norm": 5.157226085662842, |
|
"learning_rate": 8.415862068965519e-06, |
|
"loss": 0.3586, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.4885852646143203, |
|
"grad_norm": 5.92050838470459, |
|
"learning_rate": 8.398620689655174e-06, |
|
"loss": 0.352, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.49290902801798686, |
|
"grad_norm": 5.522551536560059, |
|
"learning_rate": 8.381379310344828e-06, |
|
"loss": 0.3447, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.4972327914216534, |
|
"grad_norm": 6.284322738647461, |
|
"learning_rate": 8.364137931034483e-06, |
|
"loss": 0.3477, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.5015565548253199, |
|
"grad_norm": 6.7316179275512695, |
|
"learning_rate": 8.346896551724138e-06, |
|
"loss": 0.3402, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5058803182289865, |
|
"grad_norm": 5.808598518371582, |
|
"learning_rate": 8.329655172413795e-06, |
|
"loss": 0.3332, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.5102040816326531, |
|
"grad_norm": 5.109462261199951, |
|
"learning_rate": 8.31241379310345e-06, |
|
"loss": 0.3465, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.5145278450363197, |
|
"grad_norm": 6.471071720123291, |
|
"learning_rate": 8.295172413793104e-06, |
|
"loss": 0.3501, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.5188516084399861, |
|
"grad_norm": 7.2887139320373535, |
|
"learning_rate": 8.27793103448276e-06, |
|
"loss": 0.3414, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5188516084399861, |
|
"eval_loss": 0.33114978671073914, |
|
"eval_runtime": 5410.7097, |
|
"eval_samples_per_second": 4.274, |
|
"eval_steps_per_second": 0.534, |
|
"eval_wer": 39.80441724633165, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5231753718436527, |
|
"grad_norm": 5.811481475830078, |
|
"learning_rate": 8.260689655172414e-06, |
|
"loss": 0.3286, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.5274991352473193, |
|
"grad_norm": 5.4608869552612305, |
|
"learning_rate": 8.243448275862069e-06, |
|
"loss": 0.3255, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.5318228986509859, |
|
"grad_norm": 6.3298797607421875, |
|
"learning_rate": 8.226206896551726e-06, |
|
"loss": 0.3299, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.5361466620546523, |
|
"grad_norm": 5.198216915130615, |
|
"learning_rate": 8.208965517241379e-06, |
|
"loss": 0.3463, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5404704254583189, |
|
"grad_norm": 4.894079685211182, |
|
"learning_rate": 8.191724137931035e-06, |
|
"loss": 0.3362, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.5447941888619855, |
|
"grad_norm": 5.518843650817871, |
|
"learning_rate": 8.17448275862069e-06, |
|
"loss": 0.3592, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.5491179522656521, |
|
"grad_norm": 5.926825046539307, |
|
"learning_rate": 8.157241379310345e-06, |
|
"loss": 0.3402, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.5534417156693185, |
|
"grad_norm": 5.385416507720947, |
|
"learning_rate": 8.14e-06, |
|
"loss": 0.3308, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5577654790729851, |
|
"grad_norm": 4.563920497894287, |
|
"learning_rate": 8.122758620689657e-06, |
|
"loss": 0.3512, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.5620892424766517, |
|
"grad_norm": 5.665447235107422, |
|
"learning_rate": 8.105517241379311e-06, |
|
"loss": 0.3256, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.5664130058803182, |
|
"grad_norm": 6.340756416320801, |
|
"learning_rate": 8.088275862068966e-06, |
|
"loss": 0.3264, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.5707367692839848, |
|
"grad_norm": 5.052750587463379, |
|
"learning_rate": 8.071034482758621e-06, |
|
"loss": 0.3144, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.5750605326876513, |
|
"grad_norm": 6.486083984375, |
|
"learning_rate": 8.053793103448276e-06, |
|
"loss": 0.3306, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.5793842960913179, |
|
"grad_norm": 6.098331451416016, |
|
"learning_rate": 8.036551724137933e-06, |
|
"loss": 0.3363, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.5837080594949844, |
|
"grad_norm": 5.329631328582764, |
|
"learning_rate": 8.019310344827587e-06, |
|
"loss": 0.3245, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.588031822898651, |
|
"grad_norm": 5.355101108551025, |
|
"learning_rate": 8.002068965517242e-06, |
|
"loss": 0.3213, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.5923555863023175, |
|
"grad_norm": 6.330533504486084, |
|
"learning_rate": 7.984827586206897e-06, |
|
"loss": 0.338, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.5966793497059841, |
|
"grad_norm": 5.799704551696777, |
|
"learning_rate": 7.967586206896552e-06, |
|
"loss": 0.3348, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.6010031131096506, |
|
"grad_norm": 6.250985145568848, |
|
"learning_rate": 7.950344827586207e-06, |
|
"loss": 0.3275, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.6053268765133172, |
|
"grad_norm": 5.608683109283447, |
|
"learning_rate": 7.933103448275864e-06, |
|
"loss": 0.3264, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6096506399169838, |
|
"grad_norm": 4.862841606140137, |
|
"learning_rate": 7.915862068965517e-06, |
|
"loss": 0.3352, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.6139744033206503, |
|
"grad_norm": 6.654427528381348, |
|
"learning_rate": 7.898620689655173e-06, |
|
"loss": 0.337, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.6182981667243168, |
|
"grad_norm": 4.726236820220947, |
|
"learning_rate": 7.881379310344828e-06, |
|
"loss": 0.34, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.6226219301279834, |
|
"grad_norm": 5.243809223175049, |
|
"learning_rate": 7.864137931034483e-06, |
|
"loss": 0.3181, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.62694569353165, |
|
"grad_norm": 5.479759216308594, |
|
"learning_rate": 7.84689655172414e-06, |
|
"loss": 0.3297, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.6312694569353166, |
|
"grad_norm": 5.421146392822266, |
|
"learning_rate": 7.829655172413794e-06, |
|
"loss": 0.3221, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.635593220338983, |
|
"grad_norm": 5.118608474731445, |
|
"learning_rate": 7.81241379310345e-06, |
|
"loss": 0.3081, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.6399169837426496, |
|
"grad_norm": 5.862517356872559, |
|
"learning_rate": 7.795172413793104e-06, |
|
"loss": 0.3128, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.6442407471463162, |
|
"grad_norm": 5.084294319152832, |
|
"learning_rate": 7.777931034482759e-06, |
|
"loss": 0.3245, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.6485645105499827, |
|
"grad_norm": 5.693409442901611, |
|
"learning_rate": 7.760689655172414e-06, |
|
"loss": 0.3167, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.6528882739536492, |
|
"grad_norm": 4.5151286125183105, |
|
"learning_rate": 7.74344827586207e-06, |
|
"loss": 0.3278, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.6572120373573158, |
|
"grad_norm": 6.0459442138671875, |
|
"learning_rate": 7.726206896551725e-06, |
|
"loss": 0.3403, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.6615358007609824, |
|
"grad_norm": 5.351093292236328, |
|
"learning_rate": 7.70896551724138e-06, |
|
"loss": 0.311, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.6658595641646489, |
|
"grad_norm": 5.750779151916504, |
|
"learning_rate": 7.691724137931035e-06, |
|
"loss": 0.3112, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.6701833275683154, |
|
"grad_norm": 5.5272216796875, |
|
"learning_rate": 7.67448275862069e-06, |
|
"loss": 0.32, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.674507090971982, |
|
"grad_norm": 5.890946865081787, |
|
"learning_rate": 7.657241379310345e-06, |
|
"loss": 0.3434, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.6788308543756486, |
|
"grad_norm": 6.403041839599609, |
|
"learning_rate": 7.640000000000001e-06, |
|
"loss": 0.3194, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.6831546177793151, |
|
"grad_norm": 4.705821990966797, |
|
"learning_rate": 7.622758620689655e-06, |
|
"loss": 0.3038, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.6874783811829817, |
|
"grad_norm": 4.945014953613281, |
|
"learning_rate": 7.605517241379311e-06, |
|
"loss": 0.3414, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.6918021445866482, |
|
"grad_norm": 6.610292434692383, |
|
"learning_rate": 7.588275862068966e-06, |
|
"loss": 0.3374, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6918021445866482, |
|
"eval_loss": 0.312212198972702, |
|
"eval_runtime": 5649.9248, |
|
"eval_samples_per_second": 4.093, |
|
"eval_steps_per_second": 0.512, |
|
"eval_wer": 38.40712982074705, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6961259079903148, |
|
"grad_norm": 5.513612270355225, |
|
"learning_rate": 7.571034482758622e-06, |
|
"loss": 0.3368, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.7004496713939813, |
|
"grad_norm": 4.718435764312744, |
|
"learning_rate": 7.553793103448277e-06, |
|
"loss": 0.291, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.7047734347976479, |
|
"grad_norm": 5.8622965812683105, |
|
"learning_rate": 7.5365517241379315e-06, |
|
"loss": 0.2975, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.7090971982013144, |
|
"grad_norm": 4.535427570343018, |
|
"learning_rate": 7.519310344827587e-06, |
|
"loss": 0.2925, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.713420961604981, |
|
"grad_norm": 5.352747440338135, |
|
"learning_rate": 7.502068965517242e-06, |
|
"loss": 0.3202, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.7177447250086475, |
|
"grad_norm": 4.6951446533203125, |
|
"learning_rate": 7.484827586206898e-06, |
|
"loss": 0.3239, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.7220684884123141, |
|
"grad_norm": 5.972590923309326, |
|
"learning_rate": 7.467586206896552e-06, |
|
"loss": 0.3203, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.7263922518159807, |
|
"grad_norm": 5.971949577331543, |
|
"learning_rate": 7.4503448275862075e-06, |
|
"loss": 0.3207, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.7307160152196471, |
|
"grad_norm": 5.5973334312438965, |
|
"learning_rate": 7.433103448275862e-06, |
|
"loss": 0.3248, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.7350397786233137, |
|
"grad_norm": 5.686942100524902, |
|
"learning_rate": 7.415862068965518e-06, |
|
"loss": 0.3349, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.7393635420269803, |
|
"grad_norm": 4.615256309509277, |
|
"learning_rate": 7.398620689655173e-06, |
|
"loss": 0.3259, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.7436873054306469, |
|
"grad_norm": 7.120574951171875, |
|
"learning_rate": 7.381379310344829e-06, |
|
"loss": 0.3444, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.7480110688343133, |
|
"grad_norm": 5.114111423492432, |
|
"learning_rate": 7.364137931034483e-06, |
|
"loss": 0.315, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.7523348322379799, |
|
"grad_norm": 5.820385456085205, |
|
"learning_rate": 7.346896551724138e-06, |
|
"loss": 0.3149, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.7566585956416465, |
|
"grad_norm": 5.231871128082275, |
|
"learning_rate": 7.330344827586208e-06, |
|
"loss": 0.3251, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.7609823590453131, |
|
"grad_norm": 5.228817462921143, |
|
"learning_rate": 7.313103448275863e-06, |
|
"loss": 0.3162, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.7653061224489796, |
|
"grad_norm": 4.916074752807617, |
|
"learning_rate": 7.2958620689655175e-06, |
|
"loss": 0.304, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.7696298858526461, |
|
"grad_norm": 5.931058883666992, |
|
"learning_rate": 7.278620689655172e-06, |
|
"loss": 0.3304, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.7739536492563127, |
|
"grad_norm": 5.873823165893555, |
|
"learning_rate": 7.261379310344828e-06, |
|
"loss": 0.3398, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.7782774126599793, |
|
"grad_norm": 5.279876708984375, |
|
"learning_rate": 7.244137931034483e-06, |
|
"loss": 0.3064, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.7826011760636458, |
|
"grad_norm": 4.812004089355469, |
|
"learning_rate": 7.226896551724139e-06, |
|
"loss": 0.3063, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.7869249394673123, |
|
"grad_norm": 6.433504581451416, |
|
"learning_rate": 7.2096551724137944e-06, |
|
"loss": 0.3224, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.7912487028709789, |
|
"grad_norm": 5.206650257110596, |
|
"learning_rate": 7.1924137931034485e-06, |
|
"loss": 0.3247, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.7955724662746455, |
|
"grad_norm": 5.398522853851318, |
|
"learning_rate": 7.175172413793104e-06, |
|
"loss": 0.3017, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.799896229678312, |
|
"grad_norm": 4.88048791885376, |
|
"learning_rate": 7.157931034482759e-06, |
|
"loss": 0.3218, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.8042199930819786, |
|
"grad_norm": 4.6434407234191895, |
|
"learning_rate": 7.140689655172415e-06, |
|
"loss": 0.3134, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.8085437564856451, |
|
"grad_norm": 5.189647674560547, |
|
"learning_rate": 7.12344827586207e-06, |
|
"loss": 0.3068, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.8128675198893116, |
|
"grad_norm": 5.023260116577148, |
|
"learning_rate": 7.106206896551725e-06, |
|
"loss": 0.2977, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.8171912832929782, |
|
"grad_norm": 5.623253345489502, |
|
"learning_rate": 7.088965517241379e-06, |
|
"loss": 0.3083, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.8215150466966448, |
|
"grad_norm": 5.7590789794921875, |
|
"learning_rate": 7.071724137931035e-06, |
|
"loss": 0.3113, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.8258388101003113, |
|
"grad_norm": 5.757692813873291, |
|
"learning_rate": 7.05448275862069e-06, |
|
"loss": 0.3204, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.8301625735039778, |
|
"grad_norm": 4.696317195892334, |
|
"learning_rate": 7.037241379310346e-06, |
|
"loss": 0.2966, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.8344863369076444, |
|
"grad_norm": 5.004659175872803, |
|
"learning_rate": 7.0200000000000006e-06, |
|
"loss": 0.3017, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.838810100311311, |
|
"grad_norm": 5.880341529846191, |
|
"learning_rate": 7.002758620689655e-06, |
|
"loss": 0.3177, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.8431338637149776, |
|
"grad_norm": 5.527527809143066, |
|
"learning_rate": 6.98551724137931e-06, |
|
"loss": 0.3056, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.847457627118644, |
|
"grad_norm": 5.48380708694458, |
|
"learning_rate": 6.968275862068966e-06, |
|
"loss": 0.2952, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.8517813905223106, |
|
"grad_norm": 3.9720652103424072, |
|
"learning_rate": 6.951034482758622e-06, |
|
"loss": 0.2961, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.8561051539259772, |
|
"grad_norm": 5.183377265930176, |
|
"learning_rate": 6.933793103448277e-06, |
|
"loss": 0.3051, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.8604289173296438, |
|
"grad_norm": 5.809779644012451, |
|
"learning_rate": 6.916551724137932e-06, |
|
"loss": 0.3191, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.8647526807333102, |
|
"grad_norm": 5.579798698425293, |
|
"learning_rate": 6.899310344827586e-06, |
|
"loss": 0.3185, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8647526807333102, |
|
"eval_loss": 0.29861804842948914, |
|
"eval_runtime": 5403.1228, |
|
"eval_samples_per_second": 4.28, |
|
"eval_steps_per_second": 0.535, |
|
"eval_wer": 37.68833742945034, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8690764441369768, |
|
"grad_norm": 3.995452880859375, |
|
"learning_rate": 6.882068965517242e-06, |
|
"loss": 0.3095, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 0.8734002075406434, |
|
"grad_norm": 5.617555618286133, |
|
"learning_rate": 6.864827586206897e-06, |
|
"loss": 0.3139, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.8777239709443099, |
|
"grad_norm": 5.809163570404053, |
|
"learning_rate": 6.847586206896553e-06, |
|
"loss": 0.3209, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 0.8820477343479765, |
|
"grad_norm": 6.078767776489258, |
|
"learning_rate": 6.8303448275862075e-06, |
|
"loss": 0.3019, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.886371497751643, |
|
"grad_norm": 5.000216007232666, |
|
"learning_rate": 6.813103448275863e-06, |
|
"loss": 0.3195, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.8906952611553096, |
|
"grad_norm": 4.086789131164551, |
|
"learning_rate": 6.795862068965517e-06, |
|
"loss": 0.2903, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.8950190245589761, |
|
"grad_norm": 6.727631568908691, |
|
"learning_rate": 6.778620689655173e-06, |
|
"loss": 0.3091, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 0.8993427879626427, |
|
"grad_norm": 5.265336513519287, |
|
"learning_rate": 6.761379310344828e-06, |
|
"loss": 0.309, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.9036665513663092, |
|
"grad_norm": 4.668389320373535, |
|
"learning_rate": 6.7441379310344836e-06, |
|
"loss": 0.2931, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 0.9079903147699758, |
|
"grad_norm": 5.374257564544678, |
|
"learning_rate": 6.7268965517241384e-06, |
|
"loss": 0.2978, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.9123140781736423, |
|
"grad_norm": 4.311838150024414, |
|
"learning_rate": 6.709655172413793e-06, |
|
"loss": 0.2889, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 0.9166378415773089, |
|
"grad_norm": 4.521867275238037, |
|
"learning_rate": 6.692413793103448e-06, |
|
"loss": 0.3059, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.9209616049809755, |
|
"grad_norm": 5.439430236816406, |
|
"learning_rate": 6.675172413793104e-06, |
|
"loss": 0.3191, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 0.925285368384642, |
|
"grad_norm": 6.107820987701416, |
|
"learning_rate": 6.65793103448276e-06, |
|
"loss": 0.3078, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.9296091317883085, |
|
"grad_norm": 5.370652675628662, |
|
"learning_rate": 6.6406896551724145e-06, |
|
"loss": 0.3103, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 0.9339328951919751, |
|
"grad_norm": 6.220107555389404, |
|
"learning_rate": 6.62344827586207e-06, |
|
"loss": 0.3074, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.9382566585956417, |
|
"grad_norm": 6.766858100891113, |
|
"learning_rate": 6.606206896551724e-06, |
|
"loss": 0.2855, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.9425804219993082, |
|
"grad_norm": 5.859302043914795, |
|
"learning_rate": 6.58896551724138e-06, |
|
"loss": 0.311, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.9469041854029747, |
|
"grad_norm": 5.242767810821533, |
|
"learning_rate": 6.571724137931035e-06, |
|
"loss": 0.3057, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 0.9512279488066413, |
|
"grad_norm": 5.731853008270264, |
|
"learning_rate": 6.5544827586206905e-06, |
|
"loss": 0.3083, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.9555517122103079, |
|
"grad_norm": 5.8285040855407715, |
|
"learning_rate": 6.537241379310345e-06, |
|
"loss": 0.2989, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 0.9598754756139743, |
|
"grad_norm": 6.33150053024292, |
|
"learning_rate": 6.520000000000001e-06, |
|
"loss": 0.2985, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.9641992390176409, |
|
"grad_norm": 4.892938613891602, |
|
"learning_rate": 6.502758620689655e-06, |
|
"loss": 0.3094, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 0.9685230024213075, |
|
"grad_norm": 4.8297648429870605, |
|
"learning_rate": 6.485517241379311e-06, |
|
"loss": 0.3145, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.9728467658249741, |
|
"grad_norm": 5.7645111083984375, |
|
"learning_rate": 6.468275862068966e-06, |
|
"loss": 0.3013, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 0.9771705292286406, |
|
"grad_norm": 4.90895414352417, |
|
"learning_rate": 6.4510344827586214e-06, |
|
"loss": 0.3054, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.9814942926323071, |
|
"grad_norm": 5.324610710144043, |
|
"learning_rate": 6.433793103448276e-06, |
|
"loss": 0.3134, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 0.9858180560359737, |
|
"grad_norm": 4.568467140197754, |
|
"learning_rate": 6.416551724137931e-06, |
|
"loss": 0.2974, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.9901418194396403, |
|
"grad_norm": 5.177453994750977, |
|
"learning_rate": 6.399310344827587e-06, |
|
"loss": 0.3156, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 0.9944655828433068, |
|
"grad_norm": 6.221437454223633, |
|
"learning_rate": 6.382068965517242e-06, |
|
"loss": 0.3109, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.9987893462469734, |
|
"grad_norm": 5.171840667724609, |
|
"learning_rate": 6.3648275862068975e-06, |
|
"loss": 0.315, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 1.0031131096506398, |
|
"grad_norm": 5.1609907150268555, |
|
"learning_rate": 6.347586206896552e-06, |
|
"loss": 0.2943, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.0074368730543064, |
|
"grad_norm": 5.415836811065674, |
|
"learning_rate": 6.330344827586208e-06, |
|
"loss": 0.2833, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 1.011760636457973, |
|
"grad_norm": 5.87399435043335, |
|
"learning_rate": 6.313103448275862e-06, |
|
"loss": 0.2806, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.0160843998616396, |
|
"grad_norm": 6.694667816162109, |
|
"learning_rate": 6.295862068965518e-06, |
|
"loss": 0.2844, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 1.0204081632653061, |
|
"grad_norm": 6.607420444488525, |
|
"learning_rate": 6.278620689655173e-06, |
|
"loss": 0.3028, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.0247319266689727, |
|
"grad_norm": 5.000158309936523, |
|
"learning_rate": 6.261379310344828e-06, |
|
"loss": 0.287, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 1.0290556900726393, |
|
"grad_norm": 4.624439239501953, |
|
"learning_rate": 6.244137931034483e-06, |
|
"loss": 0.2848, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.0333794534763059, |
|
"grad_norm": 4.626707553863525, |
|
"learning_rate": 6.226896551724139e-06, |
|
"loss": 0.2767, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 1.0377032168799722, |
|
"grad_norm": 6.180722236633301, |
|
"learning_rate": 6.209655172413793e-06, |
|
"loss": 0.2898, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.0377032168799722, |
|
"eval_loss": 0.28753867745399475, |
|
"eval_runtime": 5032.2441, |
|
"eval_samples_per_second": 4.596, |
|
"eval_steps_per_second": 0.574, |
|
"eval_wer": 35.51532588144612, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.0420269802836388, |
|
"grad_norm": 5.8476176261901855, |
|
"learning_rate": 6.192413793103449e-06, |
|
"loss": 0.2917, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 1.0463507436873054, |
|
"grad_norm": 4.507814407348633, |
|
"learning_rate": 6.175172413793104e-06, |
|
"loss": 0.2886, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.050674507090972, |
|
"grad_norm": 5.021352767944336, |
|
"learning_rate": 6.157931034482759e-06, |
|
"loss": 0.2953, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 1.0549982704946386, |
|
"grad_norm": 4.907651901245117, |
|
"learning_rate": 6.140689655172414e-06, |
|
"loss": 0.28, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.0593220338983051, |
|
"grad_norm": 4.90705680847168, |
|
"learning_rate": 6.123448275862069e-06, |
|
"loss": 0.2906, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 1.0636457973019717, |
|
"grad_norm": 5.755337715148926, |
|
"learning_rate": 6.106206896551725e-06, |
|
"loss": 0.2886, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.067969560705638, |
|
"grad_norm": 3.9755401611328125, |
|
"learning_rate": 6.08896551724138e-06, |
|
"loss": 0.283, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 1.0722933241093047, |
|
"grad_norm": 4.78421688079834, |
|
"learning_rate": 6.071724137931035e-06, |
|
"loss": 0.3046, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.0766170875129712, |
|
"grad_norm": 5.140844345092773, |
|
"learning_rate": 6.05448275862069e-06, |
|
"loss": 0.2558, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 1.0809408509166378, |
|
"grad_norm": 4.85251522064209, |
|
"learning_rate": 6.037241379310346e-06, |
|
"loss": 0.2634, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.0852646143203044, |
|
"grad_norm": 5.412033557891846, |
|
"learning_rate": 6.02e-06, |
|
"loss": 0.267, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 1.089588377723971, |
|
"grad_norm": 4.728926658630371, |
|
"learning_rate": 6.002758620689656e-06, |
|
"loss": 0.2644, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.0939121411276376, |
|
"grad_norm": 4.522409915924072, |
|
"learning_rate": 5.9855172413793105e-06, |
|
"loss": 0.2853, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 1.098235904531304, |
|
"grad_norm": 4.903046607971191, |
|
"learning_rate": 5.968275862068966e-06, |
|
"loss": 0.2836, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.1025596679349705, |
|
"grad_norm": 5.107143878936768, |
|
"learning_rate": 5.951724137931036e-06, |
|
"loss": 0.2775, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 1.106883431338637, |
|
"grad_norm": 4.579159259796143, |
|
"learning_rate": 5.93448275862069e-06, |
|
"loss": 0.2855, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.1112071947423037, |
|
"grad_norm": 5.995217323303223, |
|
"learning_rate": 5.917241379310345e-06, |
|
"loss": 0.2759, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 1.1155309581459703, |
|
"grad_norm": 4.0890350341796875, |
|
"learning_rate": 5.9e-06, |
|
"loss": 0.2806, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.1198547215496368, |
|
"grad_norm": 4.915022850036621, |
|
"learning_rate": 5.882758620689656e-06, |
|
"loss": 0.2787, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 1.1241784849533034, |
|
"grad_norm": 5.1101579666137695, |
|
"learning_rate": 5.865517241379311e-06, |
|
"loss": 0.2725, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.12850224835697, |
|
"grad_norm": 5.38516902923584, |
|
"learning_rate": 5.848275862068966e-06, |
|
"loss": 0.2921, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 1.1328260117606366, |
|
"grad_norm": 5.741593360900879, |
|
"learning_rate": 5.831034482758621e-06, |
|
"loss": 0.2747, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.137149775164303, |
|
"grad_norm": 4.1872053146362305, |
|
"learning_rate": 5.813793103448276e-06, |
|
"loss": 0.2992, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 1.1414735385679695, |
|
"grad_norm": 5.309129238128662, |
|
"learning_rate": 5.796551724137931e-06, |
|
"loss": 0.2776, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.145797301971636, |
|
"grad_norm": 4.281647682189941, |
|
"learning_rate": 5.779310344827587e-06, |
|
"loss": 0.2961, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 1.1501210653753027, |
|
"grad_norm": 5.709052085876465, |
|
"learning_rate": 5.762068965517243e-06, |
|
"loss": 0.2934, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.1544448287789693, |
|
"grad_norm": 5.323013782501221, |
|
"learning_rate": 5.744827586206897e-06, |
|
"loss": 0.2814, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 1.1587685921826358, |
|
"grad_norm": 4.633469104766846, |
|
"learning_rate": 5.727586206896552e-06, |
|
"loss": 0.2805, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.1630923555863024, |
|
"grad_norm": 5.769145488739014, |
|
"learning_rate": 5.710344827586207e-06, |
|
"loss": 0.2909, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 1.1674161189899688, |
|
"grad_norm": 4.406848907470703, |
|
"learning_rate": 5.693103448275863e-06, |
|
"loss": 0.2717, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.1717398823936354, |
|
"grad_norm": 5.0518269538879395, |
|
"learning_rate": 5.675862068965518e-06, |
|
"loss": 0.2822, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 1.176063645797302, |
|
"grad_norm": 5.016385078430176, |
|
"learning_rate": 5.6586206896551735e-06, |
|
"loss": 0.2574, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.1803874092009685, |
|
"grad_norm": 6.169381618499756, |
|
"learning_rate": 5.6413793103448275e-06, |
|
"loss": 0.2934, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 1.184711172604635, |
|
"grad_norm": 4.3018879890441895, |
|
"learning_rate": 5.624137931034483e-06, |
|
"loss": 0.2707, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.1890349360083017, |
|
"grad_norm": 5.160860538482666, |
|
"learning_rate": 5.606896551724138e-06, |
|
"loss": 0.2674, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 1.1933586994119683, |
|
"grad_norm": 4.624250888824463, |
|
"learning_rate": 5.589655172413794e-06, |
|
"loss": 0.2896, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.1976824628156346, |
|
"grad_norm": 4.681013584136963, |
|
"learning_rate": 5.572413793103449e-06, |
|
"loss": 0.2827, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 1.2020062262193012, |
|
"grad_norm": 5.956193447113037, |
|
"learning_rate": 5.555172413793104e-06, |
|
"loss": 0.2655, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.2063299896229678, |
|
"grad_norm": 5.1831278800964355, |
|
"learning_rate": 5.5379310344827585e-06, |
|
"loss": 0.2847, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 1.2106537530266344, |
|
"grad_norm": 5.0179595947265625, |
|
"learning_rate": 5.520689655172414e-06, |
|
"loss": 0.2787, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.2106537530266344, |
|
"eval_loss": 0.28157490491867065, |
|
"eval_runtime": 5064.7694, |
|
"eval_samples_per_second": 4.566, |
|
"eval_steps_per_second": 0.571, |
|
"eval_wer": 35.47549709835601, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.214977516430301, |
|
"grad_norm": 3.484829902648926, |
|
"learning_rate": 5.503448275862069e-06, |
|
"loss": 0.2671, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 1.2193012798339675, |
|
"grad_norm": 5.8819169998168945, |
|
"learning_rate": 5.486206896551725e-06, |
|
"loss": 0.2879, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.223625043237634, |
|
"grad_norm": 4.751934051513672, |
|
"learning_rate": 5.4689655172413805e-06, |
|
"loss": 0.2732, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 1.2279488066413007, |
|
"grad_norm": 6.112524032592773, |
|
"learning_rate": 5.4517241379310345e-06, |
|
"loss": 0.281, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.2322725700449673, |
|
"grad_norm": 5.031464099884033, |
|
"learning_rate": 5.43448275862069e-06, |
|
"loss": 0.2796, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 1.2365963334486336, |
|
"grad_norm": 4.925386905670166, |
|
"learning_rate": 5.417241379310345e-06, |
|
"loss": 0.2772, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.2409200968523002, |
|
"grad_norm": 4.831676483154297, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 0.2754, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 1.2452438602559668, |
|
"grad_norm": 5.259826183319092, |
|
"learning_rate": 5.382758620689656e-06, |
|
"loss": 0.2807, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.2495676236596334, |
|
"grad_norm": 5.998917102813721, |
|
"learning_rate": 5.365517241379311e-06, |
|
"loss": 0.264, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 1.2538913870633, |
|
"grad_norm": 4.769485950469971, |
|
"learning_rate": 5.3482758620689654e-06, |
|
"loss": 0.2742, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.2582151504669665, |
|
"grad_norm": 4.706541538238525, |
|
"learning_rate": 5.331034482758621e-06, |
|
"loss": 0.2644, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 1.262538913870633, |
|
"grad_norm": 4.766952991485596, |
|
"learning_rate": 5.313793103448276e-06, |
|
"loss": 0.2863, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.2668626772742995, |
|
"grad_norm": 4.333870887756348, |
|
"learning_rate": 5.296551724137932e-06, |
|
"loss": 0.275, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 1.271186440677966, |
|
"grad_norm": 5.625321388244629, |
|
"learning_rate": 5.279310344827587e-06, |
|
"loss": 0.2775, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.2755102040816326, |
|
"grad_norm": 4.249492645263672, |
|
"learning_rate": 5.2620689655172415e-06, |
|
"loss": 0.259, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 1.2798339674852992, |
|
"grad_norm": 6.244980812072754, |
|
"learning_rate": 5.244827586206896e-06, |
|
"loss": 0.2656, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.2841577308889658, |
|
"grad_norm": 4.6464056968688965, |
|
"learning_rate": 5.227586206896552e-06, |
|
"loss": 0.2644, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 1.2884814942926324, |
|
"grad_norm": 4.944653511047363, |
|
"learning_rate": 5.210344827586208e-06, |
|
"loss": 0.282, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.292805257696299, |
|
"grad_norm": 6.058406352996826, |
|
"learning_rate": 5.193103448275863e-06, |
|
"loss": 0.2717, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 1.2971290210999653, |
|
"grad_norm": 5.668537139892578, |
|
"learning_rate": 5.175862068965518e-06, |
|
"loss": 0.2731, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.3014527845036319, |
|
"grad_norm": 4.982054710388184, |
|
"learning_rate": 5.158620689655172e-06, |
|
"loss": 0.2579, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 1.3057765479072985, |
|
"grad_norm": 4.244757175445557, |
|
"learning_rate": 5.141379310344828e-06, |
|
"loss": 0.2684, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.310100311310965, |
|
"grad_norm": 4.842339515686035, |
|
"learning_rate": 5.124137931034483e-06, |
|
"loss": 0.2795, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 1.3144240747146316, |
|
"grad_norm": 4.516208171844482, |
|
"learning_rate": 5.106896551724139e-06, |
|
"loss": 0.2724, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.3187478381182982, |
|
"grad_norm": 4.063875675201416, |
|
"learning_rate": 5.0896551724137936e-06, |
|
"loss": 0.2816, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 1.3230716015219648, |
|
"grad_norm": 4.514278888702393, |
|
"learning_rate": 5.072413793103449e-06, |
|
"loss": 0.2664, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.3273953649256311, |
|
"grad_norm": 5.441287994384766, |
|
"learning_rate": 5.055172413793103e-06, |
|
"loss": 0.2656, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 1.331719128329298, |
|
"grad_norm": 4.973991870880127, |
|
"learning_rate": 5.037931034482759e-06, |
|
"loss": 0.2751, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.3360428917329643, |
|
"grad_norm": 5.129386901855469, |
|
"learning_rate": 5.020689655172414e-06, |
|
"loss": 0.2644, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 1.340366655136631, |
|
"grad_norm": 5.174379348754883, |
|
"learning_rate": 5.00344827586207e-06, |
|
"loss": 0.2638, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.3446904185402975, |
|
"grad_norm": 3.6351191997528076, |
|
"learning_rate": 4.9862068965517245e-06, |
|
"loss": 0.2712, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 1.349014181943964, |
|
"grad_norm": 5.315875053405762, |
|
"learning_rate": 4.968965517241379e-06, |
|
"loss": 0.2663, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.3533379453476306, |
|
"grad_norm": 5.3713202476501465, |
|
"learning_rate": 4.951724137931035e-06, |
|
"loss": 0.2867, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 1.357661708751297, |
|
"grad_norm": 4.494086265563965, |
|
"learning_rate": 4.93448275862069e-06, |
|
"loss": 0.2633, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.3619854721549638, |
|
"grad_norm": 4.476729869842529, |
|
"learning_rate": 4.917241379310345e-06, |
|
"loss": 0.2646, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 1.3663092355586302, |
|
"grad_norm": 4.904832363128662, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 0.2543, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.3706329989622967, |
|
"grad_norm": 6.566470623016357, |
|
"learning_rate": 4.882758620689655e-06, |
|
"loss": 0.2778, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 1.3749567623659633, |
|
"grad_norm": 7.662662982940674, |
|
"learning_rate": 4.86551724137931e-06, |
|
"loss": 0.2749, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.37928052576963, |
|
"grad_norm": 4.465984344482422, |
|
"learning_rate": 4.848275862068966e-06, |
|
"loss": 0.2497, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 1.3836042891732965, |
|
"grad_norm": 4.482520580291748, |
|
"learning_rate": 4.831034482758621e-06, |
|
"loss": 0.2856, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.3836042891732965, |
|
"eval_loss": 0.27571046352386475, |
|
"eval_runtime": 5061.2899, |
|
"eval_samples_per_second": 4.569, |
|
"eval_steps_per_second": 0.571, |
|
"eval_wer": 35.15030679877327, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.387928052576963, |
|
"grad_norm": 4.781071662902832, |
|
"learning_rate": 4.813793103448276e-06, |
|
"loss": 0.2509, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 1.3922518159806296, |
|
"grad_norm": 5.495136260986328, |
|
"learning_rate": 4.7965517241379314e-06, |
|
"loss": 0.2804, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.396575579384296, |
|
"grad_norm": 5.323382377624512, |
|
"learning_rate": 4.779310344827587e-06, |
|
"loss": 0.2662, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 1.4008993427879626, |
|
"grad_norm": 4.909964084625244, |
|
"learning_rate": 4.762068965517242e-06, |
|
"loss": 0.2815, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.4052231061916292, |
|
"grad_norm": 4.8298420906066895, |
|
"learning_rate": 4.744827586206897e-06, |
|
"loss": 0.2605, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 1.4095468695952957, |
|
"grad_norm": 5.043326377868652, |
|
"learning_rate": 4.727586206896553e-06, |
|
"loss": 0.2805, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.4138706329989623, |
|
"grad_norm": 4.897740364074707, |
|
"learning_rate": 4.7103448275862075e-06, |
|
"loss": 0.2724, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 1.418194396402629, |
|
"grad_norm": 4.258267402648926, |
|
"learning_rate": 4.693103448275862e-06, |
|
"loss": 0.2559, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.4225181598062955, |
|
"grad_norm": 4.238452911376953, |
|
"learning_rate": 4.675862068965517e-06, |
|
"loss": 0.2633, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 1.4268419232099618, |
|
"grad_norm": 5.237506866455078, |
|
"learning_rate": 4.658620689655173e-06, |
|
"loss": 0.2695, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.4311656866136286, |
|
"grad_norm": 7.0735182762146, |
|
"learning_rate": 4.641379310344828e-06, |
|
"loss": 0.2792, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 1.435489450017295, |
|
"grad_norm": 5.525311470031738, |
|
"learning_rate": 4.624137931034483e-06, |
|
"loss": 0.2861, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.4398132134209616, |
|
"grad_norm": 5.706710338592529, |
|
"learning_rate": 4.606896551724138e-06, |
|
"loss": 0.2616, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 1.4441369768246282, |
|
"grad_norm": 4.5631794929504395, |
|
"learning_rate": 4.589655172413793e-06, |
|
"loss": 0.2528, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.4484607402282947, |
|
"grad_norm": 4.35201358795166, |
|
"learning_rate": 4.572413793103448e-06, |
|
"loss": 0.2669, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 1.4527845036319613, |
|
"grad_norm": 5.7140793800354, |
|
"learning_rate": 4.555172413793104e-06, |
|
"loss": 0.275, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.4571082670356277, |
|
"grad_norm": 6.433730602264404, |
|
"learning_rate": 4.537931034482759e-06, |
|
"loss": 0.2718, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 1.4614320304392945, |
|
"grad_norm": 4.385723114013672, |
|
"learning_rate": 4.521379310344828e-06, |
|
"loss": 0.2751, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.4657557938429608, |
|
"grad_norm": 4.835318565368652, |
|
"learning_rate": 4.504137931034483e-06, |
|
"loss": 0.273, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 1.4700795572466274, |
|
"grad_norm": 5.070703506469727, |
|
"learning_rate": 4.486896551724138e-06, |
|
"loss": 0.2659, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.474403320650294, |
|
"grad_norm": 4.764357089996338, |
|
"learning_rate": 4.4696551724137936e-06, |
|
"loss": 0.2614, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 1.4787270840539606, |
|
"grad_norm": 4.433307647705078, |
|
"learning_rate": 4.452413793103449e-06, |
|
"loss": 0.2773, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.4830508474576272, |
|
"grad_norm": 5.220912456512451, |
|
"learning_rate": 4.435172413793104e-06, |
|
"loss": 0.2739, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 1.4873746108612937, |
|
"grad_norm": 4.9389753341674805, |
|
"learning_rate": 4.417931034482759e-06, |
|
"loss": 0.2801, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.4916983742649603, |
|
"grad_norm": 5.52213191986084, |
|
"learning_rate": 4.400689655172414e-06, |
|
"loss": 0.2713, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 1.4960221376686267, |
|
"grad_norm": 4.186790943145752, |
|
"learning_rate": 4.38344827586207e-06, |
|
"loss": 0.2702, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.5003459010722935, |
|
"grad_norm": 5.3168535232543945, |
|
"learning_rate": 4.3662068965517245e-06, |
|
"loss": 0.2744, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 1.5046696644759598, |
|
"grad_norm": 5.070104122161865, |
|
"learning_rate": 4.348965517241379e-06, |
|
"loss": 0.282, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.5089934278796264, |
|
"grad_norm": 4.920200347900391, |
|
"learning_rate": 4.331724137931035e-06, |
|
"loss": 0.262, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 1.513317191283293, |
|
"grad_norm": 4.704413414001465, |
|
"learning_rate": 4.31448275862069e-06, |
|
"loss": 0.2539, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.5176409546869594, |
|
"grad_norm": 5.695456027984619, |
|
"learning_rate": 4.297241379310345e-06, |
|
"loss": 0.2843, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 1.5219647180906262, |
|
"grad_norm": 5.497166633605957, |
|
"learning_rate": 4.2800000000000005e-06, |
|
"loss": 0.2813, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.5262884814942925, |
|
"grad_norm": 6.1521100997924805, |
|
"learning_rate": 4.262758620689655e-06, |
|
"loss": 0.2835, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 1.5306122448979593, |
|
"grad_norm": 4.628076076507568, |
|
"learning_rate": 4.24551724137931e-06, |
|
"loss": 0.2685, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.5349360083016257, |
|
"grad_norm": 6.371509552001953, |
|
"learning_rate": 4.228275862068966e-06, |
|
"loss": 0.2596, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 1.5392597717052923, |
|
"grad_norm": 6.203851699829102, |
|
"learning_rate": 4.211034482758621e-06, |
|
"loss": 0.2801, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.5435835351089588, |
|
"grad_norm": 5.120136260986328, |
|
"learning_rate": 4.193793103448276e-06, |
|
"loss": 0.2749, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 1.5479072985126254, |
|
"grad_norm": 4.296701908111572, |
|
"learning_rate": 4.1765517241379314e-06, |
|
"loss": 0.268, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.552231061916292, |
|
"grad_norm": 4.204896926879883, |
|
"learning_rate": 4.159310344827587e-06, |
|
"loss": 0.2731, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 1.5565548253199584, |
|
"grad_norm": 4.787944793701172, |
|
"learning_rate": 4.142068965517242e-06, |
|
"loss": 0.2672, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.5565548253199584, |
|
"eval_loss": 0.27039873600006104, |
|
"eval_runtime": 5069.0399, |
|
"eval_samples_per_second": 4.562, |
|
"eval_steps_per_second": 0.57, |
|
"eval_wer": 34.11897560369892, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.5608785887236252, |
|
"grad_norm": 5.445296764373779, |
|
"learning_rate": 4.124827586206897e-06, |
|
"loss": 0.256, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 1.5652023521272915, |
|
"grad_norm": 6.453240871429443, |
|
"learning_rate": 4.107586206896552e-06, |
|
"loss": 0.2559, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.569526115530958, |
|
"grad_norm": 6.632162570953369, |
|
"learning_rate": 4.0903448275862075e-06, |
|
"loss": 0.287, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 1.5738498789346247, |
|
"grad_norm": 5.117345809936523, |
|
"learning_rate": 4.073103448275862e-06, |
|
"loss": 0.2669, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.5781736423382913, |
|
"grad_norm": 4.2106804847717285, |
|
"learning_rate": 4.055862068965517e-06, |
|
"loss": 0.2566, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 1.5824974057419579, |
|
"grad_norm": 4.997186660766602, |
|
"learning_rate": 4.038620689655173e-06, |
|
"loss": 0.2718, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.5868211691456242, |
|
"grad_norm": 6.009954452514648, |
|
"learning_rate": 4.021379310344828e-06, |
|
"loss": 0.2759, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 1.591144932549291, |
|
"grad_norm": 4.32164192199707, |
|
"learning_rate": 4.004137931034483e-06, |
|
"loss": 0.2437, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.5954686959529574, |
|
"grad_norm": 4.823526382446289, |
|
"learning_rate": 3.986896551724138e-06, |
|
"loss": 0.2632, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 1.599792459356624, |
|
"grad_norm": 4.981450080871582, |
|
"learning_rate": 3.969655172413793e-06, |
|
"loss": 0.263, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.6041162227602905, |
|
"grad_norm": 4.784339427947998, |
|
"learning_rate": 3.952413793103448e-06, |
|
"loss": 0.2763, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 1.6084399861639571, |
|
"grad_norm": 4.33436393737793, |
|
"learning_rate": 3.935172413793104e-06, |
|
"loss": 0.2595, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.6127637495676237, |
|
"grad_norm": 5.4285688400268555, |
|
"learning_rate": 3.917931034482759e-06, |
|
"loss": 0.2783, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 1.61708751297129, |
|
"grad_norm": 5.2749786376953125, |
|
"learning_rate": 3.9006896551724144e-06, |
|
"loss": 0.2497, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 1.6214112763749569, |
|
"grad_norm": 4.453002452850342, |
|
"learning_rate": 3.883448275862069e-06, |
|
"loss": 0.2749, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 1.6257350397786232, |
|
"grad_norm": 4.48176383972168, |
|
"learning_rate": 3.866206896551725e-06, |
|
"loss": 0.2867, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.63005880318229, |
|
"grad_norm": 4.524690628051758, |
|
"learning_rate": 3.84896551724138e-06, |
|
"loss": 0.2569, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 1.6343825665859564, |
|
"grad_norm": 5.824161529541016, |
|
"learning_rate": 3.831724137931035e-06, |
|
"loss": 0.2774, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 1.638706329989623, |
|
"grad_norm": 4.404734134674072, |
|
"learning_rate": 3.81448275862069e-06, |
|
"loss": 0.2761, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 1.6430300933932895, |
|
"grad_norm": 4.873973369598389, |
|
"learning_rate": 3.7972413793103454e-06, |
|
"loss": 0.2595, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.6473538567969561, |
|
"grad_norm": 5.397193908691406, |
|
"learning_rate": 3.7800000000000002e-06, |
|
"loss": 0.2654, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 1.6516776202006227, |
|
"grad_norm": 5.546463966369629, |
|
"learning_rate": 3.7627586206896555e-06, |
|
"loss": 0.2579, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 1.656001383604289, |
|
"grad_norm": 4.908322334289551, |
|
"learning_rate": 3.745517241379311e-06, |
|
"loss": 0.2644, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 1.6603251470079559, |
|
"grad_norm": 4.3278985023498535, |
|
"learning_rate": 3.7282758620689657e-06, |
|
"loss": 0.2522, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.6646489104116222, |
|
"grad_norm": 3.946744918823242, |
|
"learning_rate": 3.711034482758621e-06, |
|
"loss": 0.2612, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 1.6689726738152888, |
|
"grad_norm": 4.440819263458252, |
|
"learning_rate": 3.693793103448276e-06, |
|
"loss": 0.2743, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 1.6732964372189554, |
|
"grad_norm": 4.908570766448975, |
|
"learning_rate": 3.676551724137931e-06, |
|
"loss": 0.2556, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 1.677620200622622, |
|
"grad_norm": 6.872124195098877, |
|
"learning_rate": 3.6593103448275864e-06, |
|
"loss": 0.2755, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.6819439640262885, |
|
"grad_norm": 5.950398921966553, |
|
"learning_rate": 3.6420689655172413e-06, |
|
"loss": 0.258, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 1.686267727429955, |
|
"grad_norm": 5.322935581207275, |
|
"learning_rate": 3.624827586206897e-06, |
|
"loss": 0.2603, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.6905914908336217, |
|
"grad_norm": 4.969674587249756, |
|
"learning_rate": 3.6075862068965523e-06, |
|
"loss": 0.2719, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"grad_norm": 4.605515003204346, |
|
"learning_rate": 3.590344827586207e-06, |
|
"loss": 0.2731, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.6992390176409546, |
|
"grad_norm": 4.371634006500244, |
|
"learning_rate": 3.5731034482758625e-06, |
|
"loss": 0.2581, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 1.7035627810446212, |
|
"grad_norm": 5.099359512329102, |
|
"learning_rate": 3.5558620689655178e-06, |
|
"loss": 0.2702, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 1.7078865444482878, |
|
"grad_norm": 5.02194881439209, |
|
"learning_rate": 3.5386206896551726e-06, |
|
"loss": 0.2575, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 1.7122103078519544, |
|
"grad_norm": 5.109201431274414, |
|
"learning_rate": 3.521379310344828e-06, |
|
"loss": 0.2452, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.7165340712556207, |
|
"grad_norm": 4.52341890335083, |
|
"learning_rate": 3.5041379310344832e-06, |
|
"loss": 0.2708, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 1.7208578346592875, |
|
"grad_norm": 4.951146602630615, |
|
"learning_rate": 3.486896551724138e-06, |
|
"loss": 0.2612, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 1.725181598062954, |
|
"grad_norm": 5.593024730682373, |
|
"learning_rate": 3.4696551724137934e-06, |
|
"loss": 0.2716, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 1.7295053614666207, |
|
"grad_norm": 5.451072692871094, |
|
"learning_rate": 3.4524137931034487e-06, |
|
"loss": 0.2623, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.7295053614666207, |
|
"eval_loss": 0.26585614681243896, |
|
"eval_runtime": 5142.3561, |
|
"eval_samples_per_second": 4.497, |
|
"eval_steps_per_second": 0.562, |
|
"eval_wer": 33.59979570177145, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.733829124870287, |
|
"grad_norm": 4.803134441375732, |
|
"learning_rate": 3.4351724137931036e-06, |
|
"loss": 0.2573, |
|
"step": 10025 |
|
}, |
|
{ |
|
"epoch": 1.7381528882739536, |
|
"grad_norm": 4.306927680969238, |
|
"learning_rate": 3.417931034482759e-06, |
|
"loss": 0.2617, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 1.7424766516776202, |
|
"grad_norm": 5.380494594573975, |
|
"learning_rate": 3.4006896551724137e-06, |
|
"loss": 0.2653, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 1.7468004150812868, |
|
"grad_norm": 4.847281455993652, |
|
"learning_rate": 3.383448275862069e-06, |
|
"loss": 0.267, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.7511241784849534, |
|
"grad_norm": 4.344338893890381, |
|
"learning_rate": 3.3662068965517243e-06, |
|
"loss": 0.2612, |
|
"step": 10125 |
|
}, |
|
{ |
|
"epoch": 1.7554479418886197, |
|
"grad_norm": 5.100605487823486, |
|
"learning_rate": 3.34896551724138e-06, |
|
"loss": 0.2624, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 1.7597717052922865, |
|
"grad_norm": 6.067707061767578, |
|
"learning_rate": 3.331724137931035e-06, |
|
"loss": 0.2797, |
|
"step": 10175 |
|
}, |
|
{ |
|
"epoch": 1.764095468695953, |
|
"grad_norm": 4.3519392013549805, |
|
"learning_rate": 3.31448275862069e-06, |
|
"loss": 0.2607, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.7684192320996195, |
|
"grad_norm": 4.383536338806152, |
|
"learning_rate": 3.297241379310345e-06, |
|
"loss": 0.2516, |
|
"step": 10225 |
|
}, |
|
{ |
|
"epoch": 1.772742995503286, |
|
"grad_norm": 5.012591361999512, |
|
"learning_rate": 3.2800000000000004e-06, |
|
"loss": 0.2724, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 1.7770667589069526, |
|
"grad_norm": 4.1687774658203125, |
|
"learning_rate": 3.2627586206896557e-06, |
|
"loss": 0.2698, |
|
"step": 10275 |
|
}, |
|
{ |
|
"epoch": 1.7813905223106192, |
|
"grad_norm": 5.5503034591674805, |
|
"learning_rate": 3.2455172413793105e-06, |
|
"loss": 0.2652, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 5.016941547393799, |
|
"learning_rate": 3.228275862068966e-06, |
|
"loss": 0.248, |
|
"step": 10325 |
|
}, |
|
{ |
|
"epoch": 1.7900380491179524, |
|
"grad_norm": 4.316959381103516, |
|
"learning_rate": 3.211034482758621e-06, |
|
"loss": 0.2598, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 1.7943618125216187, |
|
"grad_norm": 6.067855358123779, |
|
"learning_rate": 3.193793103448276e-06, |
|
"loss": 0.2677, |
|
"step": 10375 |
|
}, |
|
{ |
|
"epoch": 1.7986855759252853, |
|
"grad_norm": 5.613093852996826, |
|
"learning_rate": 3.1765517241379313e-06, |
|
"loss": 0.2679, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.803009339328952, |
|
"grad_norm": 4.065937519073486, |
|
"learning_rate": 3.1593103448275866e-06, |
|
"loss": 0.2634, |
|
"step": 10425 |
|
}, |
|
{ |
|
"epoch": 1.8073331027326185, |
|
"grad_norm": 5.0161638259887695, |
|
"learning_rate": 3.1427586206896555e-06, |
|
"loss": 0.2706, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 1.811656866136285, |
|
"grad_norm": 6.777268886566162, |
|
"learning_rate": 3.1255172413793104e-06, |
|
"loss": 0.2756, |
|
"step": 10475 |
|
}, |
|
{ |
|
"epoch": 1.8159806295399514, |
|
"grad_norm": 6.550757884979248, |
|
"learning_rate": 3.1082758620689657e-06, |
|
"loss": 0.2704, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.8203043929436182, |
|
"grad_norm": 5.1796393394470215, |
|
"learning_rate": 3.091034482758621e-06, |
|
"loss": 0.2707, |
|
"step": 10525 |
|
}, |
|
{ |
|
"epoch": 1.8246281563472846, |
|
"grad_norm": 5.0789570808410645, |
|
"learning_rate": 3.073793103448276e-06, |
|
"loss": 0.2592, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 1.8289519197509514, |
|
"grad_norm": 4.78272819519043, |
|
"learning_rate": 3.056551724137931e-06, |
|
"loss": 0.2616, |
|
"step": 10575 |
|
}, |
|
{ |
|
"epoch": 1.8332756831546178, |
|
"grad_norm": 4.711808681488037, |
|
"learning_rate": 3.0393103448275864e-06, |
|
"loss": 0.2729, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.8375994465582843, |
|
"grad_norm": 4.821509838104248, |
|
"learning_rate": 3.0220689655172413e-06, |
|
"loss": 0.2522, |
|
"step": 10625 |
|
}, |
|
{ |
|
"epoch": 1.841923209961951, |
|
"grad_norm": 4.584725379943848, |
|
"learning_rate": 3.004827586206897e-06, |
|
"loss": 0.2639, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 1.8462469733656173, |
|
"grad_norm": 6.850635051727295, |
|
"learning_rate": 2.9875862068965523e-06, |
|
"loss": 0.2658, |
|
"step": 10675 |
|
}, |
|
{ |
|
"epoch": 1.850570736769284, |
|
"grad_norm": 6.138273239135742, |
|
"learning_rate": 2.970344827586207e-06, |
|
"loss": 0.2735, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.8548945001729504, |
|
"grad_norm": 5.5177507400512695, |
|
"learning_rate": 2.9531034482758625e-06, |
|
"loss": 0.2497, |
|
"step": 10725 |
|
}, |
|
{ |
|
"epoch": 1.8592182635766172, |
|
"grad_norm": 5.395568370819092, |
|
"learning_rate": 2.9358620689655178e-06, |
|
"loss": 0.2556, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 1.8635420269802836, |
|
"grad_norm": 3.9228525161743164, |
|
"learning_rate": 2.9186206896551727e-06, |
|
"loss": 0.2697, |
|
"step": 10775 |
|
}, |
|
{ |
|
"epoch": 1.8678657903839502, |
|
"grad_norm": 3.7661633491516113, |
|
"learning_rate": 2.901379310344828e-06, |
|
"loss": 0.24, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.8721895537876168, |
|
"grad_norm": 3.9159317016601562, |
|
"learning_rate": 2.884137931034483e-06, |
|
"loss": 0.2663, |
|
"step": 10825 |
|
}, |
|
{ |
|
"epoch": 1.8765133171912833, |
|
"grad_norm": 5.218613147735596, |
|
"learning_rate": 2.866896551724138e-06, |
|
"loss": 0.2539, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 1.88083708059495, |
|
"grad_norm": 4.241786479949951, |
|
"learning_rate": 2.8496551724137934e-06, |
|
"loss": 0.2672, |
|
"step": 10875 |
|
}, |
|
{ |
|
"epoch": 1.8851608439986163, |
|
"grad_norm": 5.012778282165527, |
|
"learning_rate": 2.8324137931034483e-06, |
|
"loss": 0.2624, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.889484607402283, |
|
"grad_norm": 5.514505863189697, |
|
"learning_rate": 2.8151724137931036e-06, |
|
"loss": 0.2635, |
|
"step": 10925 |
|
}, |
|
{ |
|
"epoch": 1.8938083708059494, |
|
"grad_norm": 4.5488667488098145, |
|
"learning_rate": 2.797931034482759e-06, |
|
"loss": 0.2456, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 1.898132134209616, |
|
"grad_norm": 4.1149516105651855, |
|
"learning_rate": 2.7806896551724137e-06, |
|
"loss": 0.2644, |
|
"step": 10975 |
|
}, |
|
{ |
|
"epoch": 1.9024558976132826, |
|
"grad_norm": 5.599881649017334, |
|
"learning_rate": 2.763448275862069e-06, |
|
"loss": 0.2772, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.9024558976132826, |
|
"eval_loss": 0.263325035572052, |
|
"eval_runtime": 5127.6582, |
|
"eval_samples_per_second": 4.51, |
|
"eval_steps_per_second": 0.564, |
|
"eval_wer": 33.32895997675873, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.9067796610169492, |
|
"grad_norm": 5.128805637359619, |
|
"learning_rate": 2.7462068965517243e-06, |
|
"loss": 0.2599, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.9111034244206158, |
|
"grad_norm": 6.218988418579102, |
|
"learning_rate": 2.728965517241379e-06, |
|
"loss": 0.271, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 1.9154271878242821, |
|
"grad_norm": 4.934520721435547, |
|
"learning_rate": 2.711724137931035e-06, |
|
"loss": 0.2633, |
|
"step": 11075 |
|
}, |
|
{ |
|
"epoch": 1.919750951227949, |
|
"grad_norm": 5.033214092254639, |
|
"learning_rate": 2.69448275862069e-06, |
|
"loss": 0.2563, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.9240747146316153, |
|
"grad_norm": 5.1451416015625, |
|
"learning_rate": 2.677241379310345e-06, |
|
"loss": 0.2525, |
|
"step": 11125 |
|
}, |
|
{ |
|
"epoch": 1.9283984780352819, |
|
"grad_norm": 5.570995330810547, |
|
"learning_rate": 2.6600000000000004e-06, |
|
"loss": 0.2688, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 1.9327222414389484, |
|
"grad_norm": 3.9599545001983643, |
|
"learning_rate": 2.6427586206896557e-06, |
|
"loss": 0.2533, |
|
"step": 11175 |
|
}, |
|
{ |
|
"epoch": 1.937046004842615, |
|
"grad_norm": 4.358778476715088, |
|
"learning_rate": 2.6255172413793105e-06, |
|
"loss": 0.2596, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.9413697682462816, |
|
"grad_norm": 4.855038166046143, |
|
"learning_rate": 2.608275862068966e-06, |
|
"loss": 0.2651, |
|
"step": 11225 |
|
}, |
|
{ |
|
"epoch": 1.945693531649948, |
|
"grad_norm": 6.604015350341797, |
|
"learning_rate": 2.5910344827586207e-06, |
|
"loss": 0.2601, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 1.9500172950536148, |
|
"grad_norm": 5.335809230804443, |
|
"learning_rate": 2.573793103448276e-06, |
|
"loss": 0.2574, |
|
"step": 11275 |
|
}, |
|
{ |
|
"epoch": 1.9543410584572811, |
|
"grad_norm": 4.764793395996094, |
|
"learning_rate": 2.5565517241379313e-06, |
|
"loss": 0.2634, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.958664821860948, |
|
"grad_norm": 5.247544765472412, |
|
"learning_rate": 2.539310344827586e-06, |
|
"loss": 0.2664, |
|
"step": 11325 |
|
}, |
|
{ |
|
"epoch": 1.9629885852646143, |
|
"grad_norm": 4.707311630249023, |
|
"learning_rate": 2.5220689655172414e-06, |
|
"loss": 0.2609, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 1.9673123486682809, |
|
"grad_norm": 4.89914083480835, |
|
"learning_rate": 2.5048275862068967e-06, |
|
"loss": 0.2578, |
|
"step": 11375 |
|
}, |
|
{ |
|
"epoch": 1.9716361120719474, |
|
"grad_norm": 4.219529151916504, |
|
"learning_rate": 2.487586206896552e-06, |
|
"loss": 0.2609, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.975959875475614, |
|
"grad_norm": 5.0991435050964355, |
|
"learning_rate": 2.4703448275862073e-06, |
|
"loss": 0.2511, |
|
"step": 11425 |
|
}, |
|
{ |
|
"epoch": 1.9802836388792806, |
|
"grad_norm": 4.904223918914795, |
|
"learning_rate": 2.453103448275862e-06, |
|
"loss": 0.257, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 1.984607402282947, |
|
"grad_norm": 4.135773658752441, |
|
"learning_rate": 2.4358620689655175e-06, |
|
"loss": 0.2673, |
|
"step": 11475 |
|
}, |
|
{ |
|
"epoch": 1.9889311656866138, |
|
"grad_norm": 4.5873332023620605, |
|
"learning_rate": 2.4186206896551724e-06, |
|
"loss": 0.2598, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.9932549290902801, |
|
"grad_norm": 5.754453659057617, |
|
"learning_rate": 2.4013793103448277e-06, |
|
"loss": 0.2602, |
|
"step": 11525 |
|
}, |
|
{ |
|
"epoch": 1.9975786924939467, |
|
"grad_norm": 5.299346923828125, |
|
"learning_rate": 2.384137931034483e-06, |
|
"loss": 0.2629, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 2.0019024558976133, |
|
"grad_norm": 4.596057415008545, |
|
"learning_rate": 2.3668965517241382e-06, |
|
"loss": 0.2577, |
|
"step": 11575 |
|
}, |
|
{ |
|
"epoch": 2.0062262193012796, |
|
"grad_norm": 4.527500629425049, |
|
"learning_rate": 2.3496551724137935e-06, |
|
"loss": 0.2519, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.0105499827049464, |
|
"grad_norm": 4.149202823638916, |
|
"learning_rate": 2.3324137931034484e-06, |
|
"loss": 0.2485, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 2.014873746108613, |
|
"grad_norm": 4.757724761962891, |
|
"learning_rate": 2.3151724137931037e-06, |
|
"loss": 0.2445, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 2.0191975095122796, |
|
"grad_norm": 4.565730571746826, |
|
"learning_rate": 2.297931034482759e-06, |
|
"loss": 0.2376, |
|
"step": 11675 |
|
}, |
|
{ |
|
"epoch": 2.023521272915946, |
|
"grad_norm": 4.340237140655518, |
|
"learning_rate": 2.280689655172414e-06, |
|
"loss": 0.2617, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.0278450363196128, |
|
"grad_norm": 5.368824005126953, |
|
"learning_rate": 2.263448275862069e-06, |
|
"loss": 0.2384, |
|
"step": 11725 |
|
}, |
|
{ |
|
"epoch": 2.032168799723279, |
|
"grad_norm": 4.97426176071167, |
|
"learning_rate": 2.246206896551724e-06, |
|
"loss": 0.2501, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 2.0364925631269455, |
|
"grad_norm": 6.131598949432373, |
|
"learning_rate": 2.2289655172413797e-06, |
|
"loss": 0.2518, |
|
"step": 11775 |
|
}, |
|
{ |
|
"epoch": 2.0408163265306123, |
|
"grad_norm": 4.28442907333374, |
|
"learning_rate": 2.2117241379310346e-06, |
|
"loss": 0.2653, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.0451400899342786, |
|
"grad_norm": 4.612010478973389, |
|
"learning_rate": 2.19448275862069e-06, |
|
"loss": 0.2447, |
|
"step": 11825 |
|
}, |
|
{ |
|
"epoch": 2.0494638533379455, |
|
"grad_norm": 4.856114387512207, |
|
"learning_rate": 2.177241379310345e-06, |
|
"loss": 0.2456, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 2.053787616741612, |
|
"grad_norm": 5.475619792938232, |
|
"learning_rate": 2.16e-06, |
|
"loss": 0.2516, |
|
"step": 11875 |
|
}, |
|
{ |
|
"epoch": 2.0581113801452786, |
|
"grad_norm": 4.844070911407471, |
|
"learning_rate": 2.1427586206896554e-06, |
|
"loss": 0.2354, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.062435143548945, |
|
"grad_norm": 4.885989189147949, |
|
"learning_rate": 2.1255172413793102e-06, |
|
"loss": 0.2427, |
|
"step": 11925 |
|
}, |
|
{ |
|
"epoch": 2.0667589069526118, |
|
"grad_norm": 5.934988975524902, |
|
"learning_rate": 2.1082758620689655e-06, |
|
"loss": 0.2599, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 2.071082670356278, |
|
"grad_norm": 5.128411769866943, |
|
"learning_rate": 2.091034482758621e-06, |
|
"loss": 0.2457, |
|
"step": 11975 |
|
}, |
|
{ |
|
"epoch": 2.0754064337599445, |
|
"grad_norm": 4.521295547485352, |
|
"learning_rate": 2.073793103448276e-06, |
|
"loss": 0.2481, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.0754064337599445, |
|
"eval_loss": 0.2604476511478424, |
|
"eval_runtime": 5099.3278, |
|
"eval_samples_per_second": 4.535, |
|
"eval_steps_per_second": 0.567, |
|
"eval_wer": 32.88803191925535, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.0797301971636113, |
|
"grad_norm": 4.452366828918457, |
|
"learning_rate": 2.0565517241379314e-06, |
|
"loss": 0.2485, |
|
"step": 12025 |
|
}, |
|
{ |
|
"epoch": 2.0840539605672777, |
|
"grad_norm": 4.331884384155273, |
|
"learning_rate": 2.0393103448275863e-06, |
|
"loss": 0.2452, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 2.0883777239709445, |
|
"grad_norm": 4.478641510009766, |
|
"learning_rate": 2.0220689655172416e-06, |
|
"loss": 0.2485, |
|
"step": 12075 |
|
}, |
|
{ |
|
"epoch": 2.092701487374611, |
|
"grad_norm": 4.945328235626221, |
|
"learning_rate": 2.004827586206897e-06, |
|
"loss": 0.2334, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.0970252507782776, |
|
"grad_norm": 5.300661087036133, |
|
"learning_rate": 1.9875862068965517e-06, |
|
"loss": 0.2457, |
|
"step": 12125 |
|
}, |
|
{ |
|
"epoch": 2.101349014181944, |
|
"grad_norm": 5.748834133148193, |
|
"learning_rate": 1.970344827586207e-06, |
|
"loss": 0.2562, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 2.1056727775856103, |
|
"grad_norm": 4.078002452850342, |
|
"learning_rate": 1.9531034482758623e-06, |
|
"loss": 0.2496, |
|
"step": 12175 |
|
}, |
|
{ |
|
"epoch": 2.109996540989277, |
|
"grad_norm": 4.0208258628845215, |
|
"learning_rate": 1.9358620689655176e-06, |
|
"loss": 0.2528, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.1143203043929435, |
|
"grad_norm": 5.57705545425415, |
|
"learning_rate": 1.9186206896551725e-06, |
|
"loss": 0.239, |
|
"step": 12225 |
|
}, |
|
{ |
|
"epoch": 2.1186440677966103, |
|
"grad_norm": 4.870556831359863, |
|
"learning_rate": 1.9013793103448278e-06, |
|
"loss": 0.2438, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 2.1229678312002767, |
|
"grad_norm": 5.422897815704346, |
|
"learning_rate": 1.8841379310344829e-06, |
|
"loss": 0.2332, |
|
"step": 12275 |
|
}, |
|
{ |
|
"epoch": 2.1272915946039435, |
|
"grad_norm": 5.2437052726745605, |
|
"learning_rate": 1.866896551724138e-06, |
|
"loss": 0.2505, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.13161535800761, |
|
"grad_norm": 5.451413631439209, |
|
"learning_rate": 1.8496551724137932e-06, |
|
"loss": 0.249, |
|
"step": 12325 |
|
}, |
|
{ |
|
"epoch": 2.135939121411276, |
|
"grad_norm": 4.326362609863281, |
|
"learning_rate": 1.8324137931034483e-06, |
|
"loss": 0.2428, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 2.140262884814943, |
|
"grad_norm": 5.135347366333008, |
|
"learning_rate": 1.8151724137931036e-06, |
|
"loss": 0.2514, |
|
"step": 12375 |
|
}, |
|
{ |
|
"epoch": 2.1445866482186093, |
|
"grad_norm": 4.476009368896484, |
|
"learning_rate": 1.797931034482759e-06, |
|
"loss": 0.2474, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.148910411622276, |
|
"grad_norm": 4.706514358520508, |
|
"learning_rate": 1.780689655172414e-06, |
|
"loss": 0.2376, |
|
"step": 12425 |
|
}, |
|
{ |
|
"epoch": 2.1532341750259425, |
|
"grad_norm": 5.396040916442871, |
|
"learning_rate": 1.7641379310344827e-06, |
|
"loss": 0.2595, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 2.1575579384296093, |
|
"grad_norm": 4.83308744430542, |
|
"learning_rate": 1.7468965517241382e-06, |
|
"loss": 0.2447, |
|
"step": 12475 |
|
}, |
|
{ |
|
"epoch": 2.1618817018332757, |
|
"grad_norm": 4.416563034057617, |
|
"learning_rate": 1.7296551724137933e-06, |
|
"loss": 0.2408, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.166205465236942, |
|
"grad_norm": 4.504754543304443, |
|
"learning_rate": 1.7124137931034484e-06, |
|
"loss": 0.2476, |
|
"step": 12525 |
|
}, |
|
{ |
|
"epoch": 2.170529228640609, |
|
"grad_norm": 5.582792282104492, |
|
"learning_rate": 1.6951724137931035e-06, |
|
"loss": 0.2334, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 2.174852992044275, |
|
"grad_norm": 5.201076030731201, |
|
"learning_rate": 1.6779310344827588e-06, |
|
"loss": 0.2459, |
|
"step": 12575 |
|
}, |
|
{ |
|
"epoch": 2.179176755447942, |
|
"grad_norm": 4.038005828857422, |
|
"learning_rate": 1.6606896551724139e-06, |
|
"loss": 0.2518, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.1835005188516083, |
|
"grad_norm": 4.951401233673096, |
|
"learning_rate": 1.643448275862069e-06, |
|
"loss": 0.2433, |
|
"step": 12625 |
|
}, |
|
{ |
|
"epoch": 2.187824282255275, |
|
"grad_norm": 4.419375896453857, |
|
"learning_rate": 1.6262068965517242e-06, |
|
"loss": 0.2427, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 2.1921480456589415, |
|
"grad_norm": 4.835983753204346, |
|
"learning_rate": 1.6089655172413795e-06, |
|
"loss": 0.2348, |
|
"step": 12675 |
|
}, |
|
{ |
|
"epoch": 2.196471809062608, |
|
"grad_norm": 4.035714149475098, |
|
"learning_rate": 1.5917241379310346e-06, |
|
"loss": 0.2481, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.2007955724662747, |
|
"grad_norm": 6.195335388183594, |
|
"learning_rate": 1.57448275862069e-06, |
|
"loss": 0.2559, |
|
"step": 12725 |
|
}, |
|
{ |
|
"epoch": 2.205119335869941, |
|
"grad_norm": 4.89235258102417, |
|
"learning_rate": 1.557241379310345e-06, |
|
"loss": 0.2288, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 2.209443099273608, |
|
"grad_norm": 4.976417541503906, |
|
"learning_rate": 1.54e-06, |
|
"loss": 0.2412, |
|
"step": 12775 |
|
}, |
|
{ |
|
"epoch": 2.213766862677274, |
|
"grad_norm": 4.204568862915039, |
|
"learning_rate": 1.5227586206896552e-06, |
|
"loss": 0.2607, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.218090626080941, |
|
"grad_norm": 4.752575397491455, |
|
"learning_rate": 1.5055172413793105e-06, |
|
"loss": 0.2328, |
|
"step": 12825 |
|
}, |
|
{ |
|
"epoch": 2.2224143894846073, |
|
"grad_norm": 5.109005451202393, |
|
"learning_rate": 1.4882758620689655e-06, |
|
"loss": 0.2472, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 2.226738152888274, |
|
"grad_norm": 4.870945453643799, |
|
"learning_rate": 1.4710344827586208e-06, |
|
"loss": 0.2411, |
|
"step": 12875 |
|
}, |
|
{ |
|
"epoch": 2.2310619162919405, |
|
"grad_norm": 4.599150657653809, |
|
"learning_rate": 1.4537931034482761e-06, |
|
"loss": 0.2475, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.235385679695607, |
|
"grad_norm": 5.0102057456970215, |
|
"learning_rate": 1.4365517241379312e-06, |
|
"loss": 0.2466, |
|
"step": 12925 |
|
}, |
|
{ |
|
"epoch": 2.2397094430992737, |
|
"grad_norm": 4.145412921905518, |
|
"learning_rate": 1.4193103448275863e-06, |
|
"loss": 0.2484, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 2.24403320650294, |
|
"grad_norm": 3.842782735824585, |
|
"learning_rate": 1.4020689655172416e-06, |
|
"loss": 0.2357, |
|
"step": 12975 |
|
}, |
|
{ |
|
"epoch": 2.248356969906607, |
|
"grad_norm": 4.585821151733398, |
|
"learning_rate": 1.3848275862068967e-06, |
|
"loss": 0.2436, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.248356969906607, |
|
"eval_loss": 0.25910165905952454, |
|
"eval_runtime": 5129.3259, |
|
"eval_samples_per_second": 4.509, |
|
"eval_steps_per_second": 0.564, |
|
"eval_wer": 32.73105965648846, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.252680733310273, |
|
"grad_norm": 4.6802568435668945, |
|
"learning_rate": 1.3675862068965517e-06, |
|
"loss": 0.2348, |
|
"step": 13025 |
|
}, |
|
{ |
|
"epoch": 2.25700449671394, |
|
"grad_norm": 5.42781925201416, |
|
"learning_rate": 1.3503448275862068e-06, |
|
"loss": 0.2349, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 2.2613282601176063, |
|
"grad_norm": 5.2979536056518555, |
|
"learning_rate": 1.3331034482758623e-06, |
|
"loss": 0.2421, |
|
"step": 13075 |
|
}, |
|
{ |
|
"epoch": 2.265652023521273, |
|
"grad_norm": 5.278492450714111, |
|
"learning_rate": 1.3158620689655174e-06, |
|
"loss": 0.2438, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.2699757869249395, |
|
"grad_norm": 4.465871810913086, |
|
"learning_rate": 1.2986206896551725e-06, |
|
"loss": 0.2624, |
|
"step": 13125 |
|
}, |
|
{ |
|
"epoch": 2.274299550328606, |
|
"grad_norm": 4.552392959594727, |
|
"learning_rate": 1.2813793103448278e-06, |
|
"loss": 0.2708, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 2.2786233137322727, |
|
"grad_norm": 4.50985050201416, |
|
"learning_rate": 1.2641379310344829e-06, |
|
"loss": 0.2437, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 2.282947077135939, |
|
"grad_norm": 5.000662326812744, |
|
"learning_rate": 1.246896551724138e-06, |
|
"loss": 0.2499, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.287270840539606, |
|
"grad_norm": 4.699297904968262, |
|
"learning_rate": 1.2296551724137932e-06, |
|
"loss": 0.2546, |
|
"step": 13225 |
|
}, |
|
{ |
|
"epoch": 2.291594603943272, |
|
"grad_norm": 4.824137210845947, |
|
"learning_rate": 1.2124137931034483e-06, |
|
"loss": 0.2566, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 2.295918367346939, |
|
"grad_norm": 5.796444416046143, |
|
"learning_rate": 1.1951724137931036e-06, |
|
"loss": 0.254, |
|
"step": 13275 |
|
}, |
|
{ |
|
"epoch": 2.3002421307506054, |
|
"grad_norm": 4.286495208740234, |
|
"learning_rate": 1.1779310344827587e-06, |
|
"loss": 0.2258, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.3045658941542717, |
|
"grad_norm": 4.737240314483643, |
|
"learning_rate": 1.160689655172414e-06, |
|
"loss": 0.2295, |
|
"step": 13325 |
|
}, |
|
{ |
|
"epoch": 2.3088896575579385, |
|
"grad_norm": 5.5657148361206055, |
|
"learning_rate": 1.143448275862069e-06, |
|
"loss": 0.2516, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 2.313213420961605, |
|
"grad_norm": 6.122943878173828, |
|
"learning_rate": 1.1262068965517242e-06, |
|
"loss": 0.257, |
|
"step": 13375 |
|
}, |
|
{ |
|
"epoch": 2.3175371843652717, |
|
"grad_norm": 5.030111789703369, |
|
"learning_rate": 1.1089655172413795e-06, |
|
"loss": 0.2409, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.321860947768938, |
|
"grad_norm": 4.784854412078857, |
|
"learning_rate": 1.0917241379310345e-06, |
|
"loss": 0.2371, |
|
"step": 13425 |
|
}, |
|
{ |
|
"epoch": 2.326184711172605, |
|
"grad_norm": 4.606825351715088, |
|
"learning_rate": 1.0744827586206898e-06, |
|
"loss": 0.2496, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 2.330508474576271, |
|
"grad_norm": 5.06074333190918, |
|
"learning_rate": 1.057241379310345e-06, |
|
"loss": 0.2328, |
|
"step": 13475 |
|
}, |
|
{ |
|
"epoch": 2.3348322379799376, |
|
"grad_norm": 4.836973190307617, |
|
"learning_rate": 1.04e-06, |
|
"loss": 0.2485, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.3391560013836044, |
|
"grad_norm": 4.6793365478515625, |
|
"learning_rate": 1.0227586206896553e-06, |
|
"loss": 0.257, |
|
"step": 13525 |
|
}, |
|
{ |
|
"epoch": 2.3434797647872707, |
|
"grad_norm": 4.693976879119873, |
|
"learning_rate": 1.0055172413793104e-06, |
|
"loss": 0.2445, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 2.3478035281909375, |
|
"grad_norm": 4.091955184936523, |
|
"learning_rate": 9.882758620689657e-07, |
|
"loss": 0.2425, |
|
"step": 13575 |
|
}, |
|
{ |
|
"epoch": 2.352127291594604, |
|
"grad_norm": 4.664127349853516, |
|
"learning_rate": 9.710344827586207e-07, |
|
"loss": 0.2414, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.3564510549982707, |
|
"grad_norm": 4.556197166442871, |
|
"learning_rate": 9.53793103448276e-07, |
|
"loss": 0.2503, |
|
"step": 13625 |
|
}, |
|
{ |
|
"epoch": 2.360774818401937, |
|
"grad_norm": 4.94804573059082, |
|
"learning_rate": 9.365517241379311e-07, |
|
"loss": 0.2394, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 2.3650985818056034, |
|
"grad_norm": 5.318421840667725, |
|
"learning_rate": 9.193103448275863e-07, |
|
"loss": 0.2515, |
|
"step": 13675 |
|
}, |
|
{ |
|
"epoch": 2.36942234520927, |
|
"grad_norm": 4.128868579864502, |
|
"learning_rate": 9.020689655172414e-07, |
|
"loss": 0.2494, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.3737461086129366, |
|
"grad_norm": 4.886101245880127, |
|
"learning_rate": 8.848275862068967e-07, |
|
"loss": 0.2567, |
|
"step": 13725 |
|
}, |
|
{ |
|
"epoch": 2.3780698720166034, |
|
"grad_norm": 5.137601852416992, |
|
"learning_rate": 8.675862068965518e-07, |
|
"loss": 0.2529, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 2.3823936354202697, |
|
"grad_norm": 5.788776397705078, |
|
"learning_rate": 8.50344827586207e-07, |
|
"loss": 0.2369, |
|
"step": 13775 |
|
}, |
|
{ |
|
"epoch": 2.3867173988239365, |
|
"grad_norm": 5.918336868286133, |
|
"learning_rate": 8.331034482758621e-07, |
|
"loss": 0.2505, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.391041162227603, |
|
"grad_norm": 4.928556442260742, |
|
"learning_rate": 8.158620689655173e-07, |
|
"loss": 0.2406, |
|
"step": 13825 |
|
}, |
|
{ |
|
"epoch": 2.3953649256312692, |
|
"grad_norm": 5.406356334686279, |
|
"learning_rate": 7.986206896551725e-07, |
|
"loss": 0.2393, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 2.399688689034936, |
|
"grad_norm": 4.0312018394470215, |
|
"learning_rate": 7.813793103448276e-07, |
|
"loss": 0.2369, |
|
"step": 13875 |
|
}, |
|
{ |
|
"epoch": 2.4040124524386024, |
|
"grad_norm": 5.446890830993652, |
|
"learning_rate": 7.641379310344828e-07, |
|
"loss": 0.2313, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.408336215842269, |
|
"grad_norm": 5.433596611022949, |
|
"learning_rate": 7.468965517241381e-07, |
|
"loss": 0.2434, |
|
"step": 13925 |
|
}, |
|
{ |
|
"epoch": 2.4126599792459356, |
|
"grad_norm": 3.490582227706909, |
|
"learning_rate": 7.296551724137932e-07, |
|
"loss": 0.2268, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 2.4169837426496024, |
|
"grad_norm": 4.078389644622803, |
|
"learning_rate": 7.124137931034484e-07, |
|
"loss": 0.2397, |
|
"step": 13975 |
|
}, |
|
{ |
|
"epoch": 2.4213075060532687, |
|
"grad_norm": 5.175432205200195, |
|
"learning_rate": 6.951724137931034e-07, |
|
"loss": 0.243, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.4213075060532687, |
|
"eval_loss": 0.2580419182777405, |
|
"eval_runtime": 5125.701, |
|
"eval_samples_per_second": 4.512, |
|
"eval_steps_per_second": 0.564, |
|
"eval_wer": 32.74277400445614, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.425631269456935, |
|
"grad_norm": 5.08500337600708, |
|
"learning_rate": 6.779310344827587e-07, |
|
"loss": 0.2431, |
|
"step": 14025 |
|
}, |
|
{ |
|
"epoch": 2.429955032860602, |
|
"grad_norm": 4.165167331695557, |
|
"learning_rate": 6.606896551724139e-07, |
|
"loss": 0.2549, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 2.4342787962642682, |
|
"grad_norm": 4.766578197479248, |
|
"learning_rate": 6.43448275862069e-07, |
|
"loss": 0.2325, |
|
"step": 14075 |
|
}, |
|
{ |
|
"epoch": 2.438602559667935, |
|
"grad_norm": 4.852494716644287, |
|
"learning_rate": 6.262068965517242e-07, |
|
"loss": 0.2471, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.4429263230716014, |
|
"grad_norm": 5.093419551849365, |
|
"learning_rate": 6.089655172413794e-07, |
|
"loss": 0.2543, |
|
"step": 14125 |
|
}, |
|
{ |
|
"epoch": 2.447250086475268, |
|
"grad_norm": 6.8064045906066895, |
|
"learning_rate": 5.917241379310346e-07, |
|
"loss": 0.2446, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 2.4515738498789346, |
|
"grad_norm": 6.039092063903809, |
|
"learning_rate": 5.744827586206897e-07, |
|
"loss": 0.2437, |
|
"step": 14175 |
|
}, |
|
{ |
|
"epoch": 2.4558976132826014, |
|
"grad_norm": 4.570720672607422, |
|
"learning_rate": 5.572413793103449e-07, |
|
"loss": 0.2357, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.4602213766862677, |
|
"grad_norm": 6.016234397888184, |
|
"learning_rate": 5.4e-07, |
|
"loss": 0.2389, |
|
"step": 14225 |
|
}, |
|
{ |
|
"epoch": 2.4645451400899345, |
|
"grad_norm": 5.1399407386779785, |
|
"learning_rate": 5.227586206896552e-07, |
|
"loss": 0.2551, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 2.468868903493601, |
|
"grad_norm": 4.784331798553467, |
|
"learning_rate": 5.055172413793104e-07, |
|
"loss": 0.2292, |
|
"step": 14275 |
|
}, |
|
{ |
|
"epoch": 2.4731926668972672, |
|
"grad_norm": 4.694582462310791, |
|
"learning_rate": 4.882758620689656e-07, |
|
"loss": 0.238, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.477516430300934, |
|
"grad_norm": 4.42811393737793, |
|
"learning_rate": 4.710344827586207e-07, |
|
"loss": 0.2471, |
|
"step": 14325 |
|
}, |
|
{ |
|
"epoch": 2.4818401937046004, |
|
"grad_norm": 5.3042731285095215, |
|
"learning_rate": 4.537931034482759e-07, |
|
"loss": 0.2469, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 2.486163957108267, |
|
"grad_norm": 5.875729560852051, |
|
"learning_rate": 4.365517241379311e-07, |
|
"loss": 0.2438, |
|
"step": 14375 |
|
}, |
|
{ |
|
"epoch": 2.4904877205119336, |
|
"grad_norm": 4.108542442321777, |
|
"learning_rate": 4.193103448275863e-07, |
|
"loss": 0.2482, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.4948114839156004, |
|
"grad_norm": 4.775691986083984, |
|
"learning_rate": 4.020689655172414e-07, |
|
"loss": 0.2374, |
|
"step": 14425 |
|
}, |
|
{ |
|
"epoch": 2.4991352473192667, |
|
"grad_norm": 4.536764621734619, |
|
"learning_rate": 3.848275862068966e-07, |
|
"loss": 0.2399, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 2.503459010722933, |
|
"grad_norm": 3.924809694290161, |
|
"learning_rate": 3.6758620689655174e-07, |
|
"loss": 0.2518, |
|
"step": 14475 |
|
}, |
|
{ |
|
"epoch": 2.5077827741266, |
|
"grad_norm": 4.728981971740723, |
|
"learning_rate": 3.50344827586207e-07, |
|
"loss": 0.2688, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.5121065375302662, |
|
"grad_norm": 4.350174903869629, |
|
"learning_rate": 3.331034482758621e-07, |
|
"loss": 0.2495, |
|
"step": 14525 |
|
}, |
|
{ |
|
"epoch": 2.516430300933933, |
|
"grad_norm": 5.987773418426514, |
|
"learning_rate": 3.158620689655173e-07, |
|
"loss": 0.2436, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 2.5207540643375994, |
|
"grad_norm": 3.9468040466308594, |
|
"learning_rate": 2.9862068965517244e-07, |
|
"loss": 0.2241, |
|
"step": 14575 |
|
}, |
|
{ |
|
"epoch": 2.525077827741266, |
|
"grad_norm": 4.972886562347412, |
|
"learning_rate": 2.813793103448276e-07, |
|
"loss": 0.2383, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.5294015911449326, |
|
"grad_norm": 5.958413600921631, |
|
"learning_rate": 2.6413793103448276e-07, |
|
"loss": 0.2435, |
|
"step": 14625 |
|
}, |
|
{ |
|
"epoch": 2.533725354548599, |
|
"grad_norm": 4.814051151275635, |
|
"learning_rate": 2.4689655172413795e-07, |
|
"loss": 0.2485, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 2.5380491179522657, |
|
"grad_norm": 5.348843574523926, |
|
"learning_rate": 2.2965517241379313e-07, |
|
"loss": 0.2542, |
|
"step": 14675 |
|
}, |
|
{ |
|
"epoch": 2.542372881355932, |
|
"grad_norm": 4.570481300354004, |
|
"learning_rate": 2.124137931034483e-07, |
|
"loss": 0.2415, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.546696644759599, |
|
"grad_norm": 4.454525470733643, |
|
"learning_rate": 1.9517241379310346e-07, |
|
"loss": 0.2356, |
|
"step": 14725 |
|
}, |
|
{ |
|
"epoch": 2.5510204081632653, |
|
"grad_norm": 6.421968460083008, |
|
"learning_rate": 1.7793103448275864e-07, |
|
"loss": 0.2421, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 2.555344171566932, |
|
"grad_norm": 4.840851306915283, |
|
"learning_rate": 1.606896551724138e-07, |
|
"loss": 0.2446, |
|
"step": 14775 |
|
}, |
|
{ |
|
"epoch": 2.5596679349705984, |
|
"grad_norm": 5.429544925689697, |
|
"learning_rate": 1.4344827586206897e-07, |
|
"loss": 0.2477, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.5639916983742648, |
|
"grad_norm": 5.538628101348877, |
|
"learning_rate": 1.2620689655172415e-07, |
|
"loss": 0.2475, |
|
"step": 14825 |
|
}, |
|
{ |
|
"epoch": 2.5683154617779316, |
|
"grad_norm": 4.517398357391357, |
|
"learning_rate": 1.0896551724137932e-07, |
|
"loss": 0.2448, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 2.572639225181598, |
|
"grad_norm": 4.563333034515381, |
|
"learning_rate": 9.172413793103449e-08, |
|
"loss": 0.2316, |
|
"step": 14875 |
|
}, |
|
{ |
|
"epoch": 2.5769629885852647, |
|
"grad_norm": 4.876601696014404, |
|
"learning_rate": 7.448275862068966e-08, |
|
"loss": 0.2378, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 2.581286751988931, |
|
"grad_norm": 4.932113170623779, |
|
"learning_rate": 5.724137931034483e-08, |
|
"loss": 0.2549, |
|
"step": 14925 |
|
}, |
|
{ |
|
"epoch": 2.585610515392598, |
|
"grad_norm": 5.311142921447754, |
|
"learning_rate": 4e-08, |
|
"loss": 0.2477, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 2.5899342787962643, |
|
"grad_norm": 5.145079135894775, |
|
"learning_rate": 2.2758620689655175e-08, |
|
"loss": 0.2361, |
|
"step": 14975 |
|
}, |
|
{ |
|
"epoch": 2.5942580421999306, |
|
"grad_norm": 4.524856090545654, |
|
"learning_rate": 5.517241379310346e-09, |
|
"loss": 0.2344, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.5942580421999306, |
|
"eval_loss": 0.2568697929382324, |
|
"eval_runtime": 5134.1958, |
|
"eval_samples_per_second": 4.504, |
|
"eval_steps_per_second": 0.563, |
|
"eval_wer": 32.56401305446938, |
|
"step": 15000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 15000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.90799081332736e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|