|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.1595576619273302, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01579778830963665, |
|
"grad_norm": 39.78215789794922, |
|
"learning_rate": 4.6000000000000004e-07, |
|
"loss": 1.9397, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0315955766192733, |
|
"grad_norm": 27.938426971435547, |
|
"learning_rate": 9.600000000000001e-07, |
|
"loss": 1.2937, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04739336492890995, |
|
"grad_norm": 25.550430297851562, |
|
"learning_rate": 1.46e-06, |
|
"loss": 0.9786, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0631911532385466, |
|
"grad_norm": 26.635549545288086, |
|
"learning_rate": 1.9600000000000003e-06, |
|
"loss": 0.8818, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07898894154818326, |
|
"grad_norm": 25.53322410583496, |
|
"learning_rate": 2.46e-06, |
|
"loss": 0.8233, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0947867298578199, |
|
"grad_norm": 26.468778610229492, |
|
"learning_rate": 2.96e-06, |
|
"loss": 0.7945, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11058451816745656, |
|
"grad_norm": 22.71087074279785, |
|
"learning_rate": 3.46e-06, |
|
"loss": 0.752, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1263823064770932, |
|
"grad_norm": 21.494964599609375, |
|
"learning_rate": 3.96e-06, |
|
"loss": 0.7655, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14218009478672985, |
|
"grad_norm": 22.558645248413086, |
|
"learning_rate": 4.4600000000000005e-06, |
|
"loss": 0.72, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1579778830963665, |
|
"grad_norm": 22.013551712036133, |
|
"learning_rate": 4.960000000000001e-06, |
|
"loss": 0.6961, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17377567140600317, |
|
"grad_norm": 25.887876510620117, |
|
"learning_rate": 5.460000000000001e-06, |
|
"loss": 0.6897, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 19.806230545043945, |
|
"learning_rate": 5.9600000000000005e-06, |
|
"loss": 0.6839, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.20537124802527645, |
|
"grad_norm": 21.767576217651367, |
|
"learning_rate": 6.460000000000001e-06, |
|
"loss": 0.6767, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2211690363349131, |
|
"grad_norm": 19.838890075683594, |
|
"learning_rate": 6.96e-06, |
|
"loss": 0.6637, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23696682464454977, |
|
"grad_norm": 22.119140625, |
|
"learning_rate": 7.4600000000000006e-06, |
|
"loss": 0.6811, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2527646129541864, |
|
"grad_norm": 21.972688674926758, |
|
"learning_rate": 7.960000000000002e-06, |
|
"loss": 0.6843, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2685624012638231, |
|
"grad_norm": 21.99839973449707, |
|
"learning_rate": 8.46e-06, |
|
"loss": 0.6487, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.2843601895734597, |
|
"grad_norm": 18.968303680419922, |
|
"learning_rate": 8.96e-06, |
|
"loss": 0.6528, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3001579778830964, |
|
"grad_norm": 20.77776336669922, |
|
"learning_rate": 9.460000000000001e-06, |
|
"loss": 0.6346, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.315955766192733, |
|
"grad_norm": 17.947195053100586, |
|
"learning_rate": 9.960000000000001e-06, |
|
"loss": 0.6105, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33175355450236965, |
|
"grad_norm": 17.2167911529541, |
|
"learning_rate": 9.94888888888889e-06, |
|
"loss": 0.6399, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.34755134281200634, |
|
"grad_norm": 19.390045166015625, |
|
"learning_rate": 9.893333333333334e-06, |
|
"loss": 0.5825, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.36334913112164297, |
|
"grad_norm": 17.195106506347656, |
|
"learning_rate": 9.837777777777778e-06, |
|
"loss": 0.5895, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 18.29102325439453, |
|
"learning_rate": 9.782222222222222e-06, |
|
"loss": 0.621, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3949447077409163, |
|
"grad_norm": 16.213546752929688, |
|
"learning_rate": 9.726666666666668e-06, |
|
"loss": 0.587, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.4107424960505529, |
|
"grad_norm": 15.86738109588623, |
|
"learning_rate": 9.671111111111112e-06, |
|
"loss": 0.5765, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4265402843601896, |
|
"grad_norm": 17.446897506713867, |
|
"learning_rate": 9.617777777777778e-06, |
|
"loss": 0.5914, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.4423380726698262, |
|
"grad_norm": 15.107172966003418, |
|
"learning_rate": 9.562222222222223e-06, |
|
"loss": 0.5517, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45813586097946285, |
|
"grad_norm": 16.07261848449707, |
|
"learning_rate": 9.506666666666667e-06, |
|
"loss": 0.5523, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.47393364928909953, |
|
"grad_norm": 15.2976655960083, |
|
"learning_rate": 9.451111111111112e-06, |
|
"loss": 0.5419, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.48973143759873616, |
|
"grad_norm": 16.560869216918945, |
|
"learning_rate": 9.395555555555556e-06, |
|
"loss": 0.5763, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5055292259083728, |
|
"grad_norm": 18.58966064453125, |
|
"learning_rate": 9.340000000000002e-06, |
|
"loss": 0.5497, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5213270142180095, |
|
"grad_norm": 16.940420150756836, |
|
"learning_rate": 9.284444444444444e-06, |
|
"loss": 0.5177, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.5371248025276462, |
|
"grad_norm": 12.964641571044922, |
|
"learning_rate": 9.22888888888889e-06, |
|
"loss": 0.5293, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5529225908372828, |
|
"grad_norm": 16.428470611572266, |
|
"learning_rate": 9.173333333333334e-06, |
|
"loss": 0.5062, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 17.921024322509766, |
|
"learning_rate": 9.117777777777778e-06, |
|
"loss": 0.5471, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.584518167456556, |
|
"grad_norm": 14.068251609802246, |
|
"learning_rate": 9.062222222222224e-06, |
|
"loss": 0.5072, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6003159557661928, |
|
"grad_norm": 13.403594017028809, |
|
"learning_rate": 9.006666666666666e-06, |
|
"loss": 0.5051, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6161137440758294, |
|
"grad_norm": 14.557646751403809, |
|
"learning_rate": 8.951111111111112e-06, |
|
"loss": 0.5391, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.631911532385466, |
|
"grad_norm": 15.385436058044434, |
|
"learning_rate": 8.895555555555556e-06, |
|
"loss": 0.5037, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.631911532385466, |
|
"eval_loss": 0.551193118095398, |
|
"eval_runtime": 1363.4745, |
|
"eval_samples_per_second": 2.861, |
|
"eval_steps_per_second": 0.179, |
|
"eval_wer": 0.38095238095238093, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6477093206951027, |
|
"grad_norm": 13.815781593322754, |
|
"learning_rate": 8.84e-06, |
|
"loss": 0.4888, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.6635071090047393, |
|
"grad_norm": 16.213642120361328, |
|
"learning_rate": 8.784444444444446e-06, |
|
"loss": 0.4865, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6793048973143759, |
|
"grad_norm": 14.779074668884277, |
|
"learning_rate": 8.72888888888889e-06, |
|
"loss": 0.4963, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.6951026856240127, |
|
"grad_norm": 13.794785499572754, |
|
"learning_rate": 8.673333333333334e-06, |
|
"loss": 0.482, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7109004739336493, |
|
"grad_norm": 14.701066970825195, |
|
"learning_rate": 8.617777777777778e-06, |
|
"loss": 0.5027, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.7266982622432859, |
|
"grad_norm": 14.537113189697266, |
|
"learning_rate": 8.562222222222224e-06, |
|
"loss": 0.4798, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.7424960505529226, |
|
"grad_norm": 14.886212348937988, |
|
"learning_rate": 8.506666666666668e-06, |
|
"loss": 0.4777, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 13.01016616821289, |
|
"learning_rate": 8.451111111111112e-06, |
|
"loss": 0.4736, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7740916271721959, |
|
"grad_norm": 14.213628768920898, |
|
"learning_rate": 8.395555555555557e-06, |
|
"loss": 0.4808, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.7898894154818326, |
|
"grad_norm": 14.541191101074219, |
|
"learning_rate": 8.34e-06, |
|
"loss": 0.5008, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8056872037914692, |
|
"grad_norm": 14.092310905456543, |
|
"learning_rate": 8.284444444444446e-06, |
|
"loss": 0.4693, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.8214849921011058, |
|
"grad_norm": 14.289324760437012, |
|
"learning_rate": 8.22888888888889e-06, |
|
"loss": 0.4418, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.8372827804107424, |
|
"grad_norm": 13.257657051086426, |
|
"learning_rate": 8.173333333333334e-06, |
|
"loss": 0.4643, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.8530805687203792, |
|
"grad_norm": 15.05517864227295, |
|
"learning_rate": 8.11777777777778e-06, |
|
"loss": 0.4751, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.8688783570300158, |
|
"grad_norm": 13.352594375610352, |
|
"learning_rate": 8.062222222222222e-06, |
|
"loss": 0.442, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.8846761453396524, |
|
"grad_norm": 12.487988471984863, |
|
"learning_rate": 8.006666666666667e-06, |
|
"loss": 0.4496, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9004739336492891, |
|
"grad_norm": 13.963912963867188, |
|
"learning_rate": 7.951111111111111e-06, |
|
"loss": 0.4415, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.9162717219589257, |
|
"grad_norm": 13.90829086303711, |
|
"learning_rate": 7.895555555555557e-06, |
|
"loss": 0.4349, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.9320695102685624, |
|
"grad_norm": 16.863481521606445, |
|
"learning_rate": 7.840000000000001e-06, |
|
"loss": 0.4722, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 13.991209983825684, |
|
"learning_rate": 7.784444444444445e-06, |
|
"loss": 0.4397, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9636650868878357, |
|
"grad_norm": 12.423737525939941, |
|
"learning_rate": 7.72888888888889e-06, |
|
"loss": 0.423, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.9794628751974723, |
|
"grad_norm": 13.574849128723145, |
|
"learning_rate": 7.673333333333333e-06, |
|
"loss": 0.4472, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.995260663507109, |
|
"grad_norm": 10.542879104614258, |
|
"learning_rate": 7.617777777777778e-06, |
|
"loss": 0.4319, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.0110584518167456, |
|
"grad_norm": 12.750446319580078, |
|
"learning_rate": 7.562222222222223e-06, |
|
"loss": 0.3401, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.0268562401263823, |
|
"grad_norm": 10.037585258483887, |
|
"learning_rate": 7.506666666666668e-06, |
|
"loss": 0.3236, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.042654028436019, |
|
"grad_norm": 12.519506454467773, |
|
"learning_rate": 7.451111111111111e-06, |
|
"loss": 0.3379, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.0584518167456556, |
|
"grad_norm": 11.666909217834473, |
|
"learning_rate": 7.395555555555556e-06, |
|
"loss": 0.3082, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.0742496050552923, |
|
"grad_norm": 14.614953994750977, |
|
"learning_rate": 7.340000000000001e-06, |
|
"loss": 0.3272, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.0900473933649288, |
|
"grad_norm": 11.243821144104004, |
|
"learning_rate": 7.284444444444445e-06, |
|
"loss": 0.3081, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.1058451816745656, |
|
"grad_norm": 10.896337509155273, |
|
"learning_rate": 7.22888888888889e-06, |
|
"loss": 0.3212, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.1216429699842023, |
|
"grad_norm": 11.159407615661621, |
|
"learning_rate": 7.173333333333335e-06, |
|
"loss": 0.3316, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.1374407582938388, |
|
"grad_norm": 10.72888469696045, |
|
"learning_rate": 7.117777777777778e-06, |
|
"loss": 0.3465, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.1532385466034756, |
|
"grad_norm": 11.136720657348633, |
|
"learning_rate": 7.062222222222223e-06, |
|
"loss": 0.3219, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.169036334913112, |
|
"grad_norm": 11.062825202941895, |
|
"learning_rate": 7.006666666666667e-06, |
|
"loss": 0.3234, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.1848341232227488, |
|
"grad_norm": 13.367472648620605, |
|
"learning_rate": 6.951111111111112e-06, |
|
"loss": 0.3215, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.2006319115323856, |
|
"grad_norm": 12.034098625183105, |
|
"learning_rate": 6.8955555555555565e-06, |
|
"loss": 0.3195, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.216429699842022, |
|
"grad_norm": 12.74405574798584, |
|
"learning_rate": 6.8400000000000014e-06, |
|
"loss": 0.3105, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.2322274881516588, |
|
"grad_norm": 14.234502792358398, |
|
"learning_rate": 6.784444444444445e-06, |
|
"loss": 0.3327, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.2480252764612954, |
|
"grad_norm": 12.721147537231445, |
|
"learning_rate": 6.7288888888888895e-06, |
|
"loss": 0.3405, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.263823064770932, |
|
"grad_norm": 12.109272003173828, |
|
"learning_rate": 6.6733333333333335e-06, |
|
"loss": 0.3264, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.263823064770932, |
|
"eval_loss": 0.4630681872367859, |
|
"eval_runtime": 1327.3738, |
|
"eval_samples_per_second": 2.939, |
|
"eval_steps_per_second": 0.184, |
|
"eval_wer": 0.331425673717763, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2796208530805688, |
|
"grad_norm": 10.781163215637207, |
|
"learning_rate": 6.617777777777778e-06, |
|
"loss": 0.3152, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.2954186413902053, |
|
"grad_norm": 12.081149101257324, |
|
"learning_rate": 6.562222222222223e-06, |
|
"loss": 0.334, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.311216429699842, |
|
"grad_norm": 11.082480430603027, |
|
"learning_rate": 6.5066666666666665e-06, |
|
"loss": 0.3182, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.3270142180094786, |
|
"grad_norm": 10.400703430175781, |
|
"learning_rate": 6.451111111111111e-06, |
|
"loss": 0.3209, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.3428120063191153, |
|
"grad_norm": 14.698405265808105, |
|
"learning_rate": 6.395555555555556e-06, |
|
"loss": 0.3358, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.358609794628752, |
|
"grad_norm": 11.301855087280273, |
|
"learning_rate": 6.34e-06, |
|
"loss": 0.3274, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.3744075829383886, |
|
"grad_norm": 13.15268611907959, |
|
"learning_rate": 6.284444444444445e-06, |
|
"loss": 0.3091, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.3902053712480253, |
|
"grad_norm": 10.712764739990234, |
|
"learning_rate": 6.22888888888889e-06, |
|
"loss": 0.3217, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.4060031595576619, |
|
"grad_norm": 9.865320205688477, |
|
"learning_rate": 6.173333333333333e-06, |
|
"loss": 0.3341, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.4218009478672986, |
|
"grad_norm": 11.386091232299805, |
|
"learning_rate": 6.117777777777778e-06, |
|
"loss": 0.308, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.4375987361769353, |
|
"grad_norm": 10.972213745117188, |
|
"learning_rate": 6.062222222222223e-06, |
|
"loss": 0.3184, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.4533965244865719, |
|
"grad_norm": 9.059267044067383, |
|
"learning_rate": 6.006666666666667e-06, |
|
"loss": 0.3285, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.4691943127962086, |
|
"grad_norm": 10.708878517150879, |
|
"learning_rate": 5.951111111111112e-06, |
|
"loss": 0.288, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.4849921011058451, |
|
"grad_norm": 9.580412864685059, |
|
"learning_rate": 5.895555555555557e-06, |
|
"loss": 0.3133, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.5007898894154819, |
|
"grad_norm": 13.611145973205566, |
|
"learning_rate": 5.84e-06, |
|
"loss": 0.3398, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.5165876777251186, |
|
"grad_norm": 11.563826560974121, |
|
"learning_rate": 5.784444444444445e-06, |
|
"loss": 0.3187, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.5323854660347551, |
|
"grad_norm": 11.612549781799316, |
|
"learning_rate": 5.72888888888889e-06, |
|
"loss": 0.3169, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.5481832543443916, |
|
"grad_norm": 9.463521003723145, |
|
"learning_rate": 5.673333333333334e-06, |
|
"loss": 0.3124, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.5639810426540284, |
|
"grad_norm": 14.442208290100098, |
|
"learning_rate": 5.617777777777779e-06, |
|
"loss": 0.3202, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.5797788309636651, |
|
"grad_norm": 11.377900123596191, |
|
"learning_rate": 5.562222222222222e-06, |
|
"loss": 0.317, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.5955766192733019, |
|
"grad_norm": 11.9774808883667, |
|
"learning_rate": 5.506666666666667e-06, |
|
"loss": 0.3188, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.6113744075829384, |
|
"grad_norm": 11.492107391357422, |
|
"learning_rate": 5.451111111111112e-06, |
|
"loss": 0.3109, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.627172195892575, |
|
"grad_norm": 12.987119674682617, |
|
"learning_rate": 5.3955555555555565e-06, |
|
"loss": 0.3127, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.6429699842022116, |
|
"grad_norm": 11.403392791748047, |
|
"learning_rate": 5.3400000000000005e-06, |
|
"loss": 0.3007, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.6587677725118484, |
|
"grad_norm": 10.354674339294434, |
|
"learning_rate": 5.2844444444444454e-06, |
|
"loss": 0.3129, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.674565560821485, |
|
"grad_norm": 9.936201095581055, |
|
"learning_rate": 5.228888888888889e-06, |
|
"loss": 0.3226, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.6903633491311216, |
|
"grad_norm": 10.426243782043457, |
|
"learning_rate": 5.1733333333333335e-06, |
|
"loss": 0.3021, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.7061611374407581, |
|
"grad_norm": 10.784858703613281, |
|
"learning_rate": 5.117777777777778e-06, |
|
"loss": 0.2923, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.7219589257503949, |
|
"grad_norm": 10.411828994750977, |
|
"learning_rate": 5.062222222222222e-06, |
|
"loss": 0.2893, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.7377567140600316, |
|
"grad_norm": 12.027934074401855, |
|
"learning_rate": 5.006666666666667e-06, |
|
"loss": 0.3136, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.7535545023696684, |
|
"grad_norm": 10.067774772644043, |
|
"learning_rate": 4.951111111111111e-06, |
|
"loss": 0.296, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.7693522906793049, |
|
"grad_norm": 10.396674156188965, |
|
"learning_rate": 4.895555555555556e-06, |
|
"loss": 0.2973, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.7851500789889414, |
|
"grad_norm": 9.719764709472656, |
|
"learning_rate": 4.84e-06, |
|
"loss": 0.2831, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.8009478672985781, |
|
"grad_norm": 11.552470207214355, |
|
"learning_rate": 4.784444444444445e-06, |
|
"loss": 0.2802, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.8167456556082149, |
|
"grad_norm": 10.932677268981934, |
|
"learning_rate": 4.728888888888889e-06, |
|
"loss": 0.3189, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.8325434439178516, |
|
"grad_norm": 12.281967163085938, |
|
"learning_rate": 4.673333333333333e-06, |
|
"loss": 0.3014, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.8483412322274881, |
|
"grad_norm": 12.78361988067627, |
|
"learning_rate": 4.617777777777778e-06, |
|
"loss": 0.3265, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.8641390205371247, |
|
"grad_norm": 11.523568153381348, |
|
"learning_rate": 4.562222222222222e-06, |
|
"loss": 0.3062, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.8799368088467614, |
|
"grad_norm": 12.009855270385742, |
|
"learning_rate": 4.506666666666667e-06, |
|
"loss": 0.294, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"grad_norm": 11.980591773986816, |
|
"learning_rate": 4.451111111111112e-06, |
|
"loss": 0.2997, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"eval_loss": 0.420254111289978, |
|
"eval_runtime": 1304.4467, |
|
"eval_samples_per_second": 2.991, |
|
"eval_steps_per_second": 0.187, |
|
"eval_wer": 0.30416304452815607, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.9115323854660349, |
|
"grad_norm": 9.556490898132324, |
|
"learning_rate": 4.395555555555556e-06, |
|
"loss": 0.3128, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.9273301737756714, |
|
"grad_norm": 12.445517539978027, |
|
"learning_rate": 4.34e-06, |
|
"loss": 0.2898, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.943127962085308, |
|
"grad_norm": 11.485321998596191, |
|
"learning_rate": 4.284444444444445e-06, |
|
"loss": 0.2922, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.9589257503949447, |
|
"grad_norm": 11.148347854614258, |
|
"learning_rate": 4.228888888888889e-06, |
|
"loss": 0.2941, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.9747235387045814, |
|
"grad_norm": 8.813661575317383, |
|
"learning_rate": 4.173333333333334e-06, |
|
"loss": 0.2738, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.9905213270142181, |
|
"grad_norm": 9.704155921936035, |
|
"learning_rate": 4.117777777777779e-06, |
|
"loss": 0.3064, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.006319115323855, |
|
"grad_norm": 8.704689979553223, |
|
"learning_rate": 4.062222222222223e-06, |
|
"loss": 0.2478, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 2.022116903633491, |
|
"grad_norm": 10.47323226928711, |
|
"learning_rate": 4.006666666666667e-06, |
|
"loss": 0.1972, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.037914691943128, |
|
"grad_norm": 9.401123046875, |
|
"learning_rate": 3.953333333333333e-06, |
|
"loss": 0.2048, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 2.0537124802527646, |
|
"grad_norm": 8.968587875366211, |
|
"learning_rate": 3.897777777777778e-06, |
|
"loss": 0.2047, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.0695102685624014, |
|
"grad_norm": 10.138334274291992, |
|
"learning_rate": 3.842222222222223e-06, |
|
"loss": 0.2041, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 2.085308056872038, |
|
"grad_norm": 9.157357215881348, |
|
"learning_rate": 3.7866666666666667e-06, |
|
"loss": 0.1983, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.1011058451816744, |
|
"grad_norm": 11.541014671325684, |
|
"learning_rate": 3.7311111111111116e-06, |
|
"loss": 0.2102, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 2.116903633491311, |
|
"grad_norm": 13.219682693481445, |
|
"learning_rate": 3.675555555555556e-06, |
|
"loss": 0.2032, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.132701421800948, |
|
"grad_norm": 9.11551284790039, |
|
"learning_rate": 3.62e-06, |
|
"loss": 0.183, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 2.1484992101105846, |
|
"grad_norm": 7.927098751068115, |
|
"learning_rate": 3.564444444444445e-06, |
|
"loss": 0.2022, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.1642969984202214, |
|
"grad_norm": 10.464715003967285, |
|
"learning_rate": 3.508888888888889e-06, |
|
"loss": 0.1877, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.1800947867298577, |
|
"grad_norm": 8.424384117126465, |
|
"learning_rate": 3.4533333333333334e-06, |
|
"loss": 0.1879, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.1958925750394944, |
|
"grad_norm": 7.617179870605469, |
|
"learning_rate": 3.3977777777777783e-06, |
|
"loss": 0.1973, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.211690363349131, |
|
"grad_norm": 9.299885749816895, |
|
"learning_rate": 3.3422222222222224e-06, |
|
"loss": 0.1982, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.227488151658768, |
|
"grad_norm": 8.834092140197754, |
|
"learning_rate": 3.286666666666667e-06, |
|
"loss": 0.2003, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.2432859399684046, |
|
"grad_norm": 8.07299518585205, |
|
"learning_rate": 3.2311111111111117e-06, |
|
"loss": 0.1971, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.259083728278041, |
|
"grad_norm": 10.275826454162598, |
|
"learning_rate": 3.1755555555555557e-06, |
|
"loss": 0.1914, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.2748815165876777, |
|
"grad_norm": 9.910749435424805, |
|
"learning_rate": 3.12e-06, |
|
"loss": 0.2067, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.2906793048973144, |
|
"grad_norm": 10.053370475769043, |
|
"learning_rate": 3.064444444444445e-06, |
|
"loss": 0.2129, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.306477093206951, |
|
"grad_norm": 10.744956970214844, |
|
"learning_rate": 3.008888888888889e-06, |
|
"loss": 0.1984, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.322274881516588, |
|
"grad_norm": 9.880094528198242, |
|
"learning_rate": 2.9533333333333336e-06, |
|
"loss": 0.1852, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.338072669826224, |
|
"grad_norm": 10.811684608459473, |
|
"learning_rate": 2.8977777777777785e-06, |
|
"loss": 0.2019, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.353870458135861, |
|
"grad_norm": 12.169087409973145, |
|
"learning_rate": 2.8422222222222225e-06, |
|
"loss": 0.1944, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.3696682464454977, |
|
"grad_norm": 10.1768217086792, |
|
"learning_rate": 2.786666666666667e-06, |
|
"loss": 0.2035, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.3854660347551344, |
|
"grad_norm": 8.389801979064941, |
|
"learning_rate": 2.7311111111111114e-06, |
|
"loss": 0.2096, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.401263823064771, |
|
"grad_norm": 9.511481285095215, |
|
"learning_rate": 2.675555555555556e-06, |
|
"loss": 0.1868, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.4170616113744074, |
|
"grad_norm": 10.304895401000977, |
|
"learning_rate": 2.6200000000000003e-06, |
|
"loss": 0.2023, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.432859399684044, |
|
"grad_norm": 11.822694778442383, |
|
"learning_rate": 2.5644444444444444e-06, |
|
"loss": 0.1938, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.448657187993681, |
|
"grad_norm": 10.087789535522461, |
|
"learning_rate": 2.5088888888888892e-06, |
|
"loss": 0.1987, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.4644549763033177, |
|
"grad_norm": 8.504409790039062, |
|
"learning_rate": 2.4533333333333333e-06, |
|
"loss": 0.1845, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.4802527646129544, |
|
"grad_norm": 9.70301342010498, |
|
"learning_rate": 2.397777777777778e-06, |
|
"loss": 0.1832, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.4960505529225907, |
|
"grad_norm": 9.534614562988281, |
|
"learning_rate": 2.342222222222222e-06, |
|
"loss": 0.182, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.5118483412322274, |
|
"grad_norm": 8.997177124023438, |
|
"learning_rate": 2.2866666666666667e-06, |
|
"loss": 0.205, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.527646129541864, |
|
"grad_norm": 9.527833938598633, |
|
"learning_rate": 2.2311111111111115e-06, |
|
"loss": 0.1851, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.527646129541864, |
|
"eval_loss": 0.41017213463783264, |
|
"eval_runtime": 1287.8118, |
|
"eval_samples_per_second": 3.029, |
|
"eval_steps_per_second": 0.189, |
|
"eval_wer": 0.2870182555780933, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.543443917851501, |
|
"grad_norm": 8.116750717163086, |
|
"learning_rate": 2.1755555555555556e-06, |
|
"loss": 0.1937, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.5592417061611377, |
|
"grad_norm": 11.475099563598633, |
|
"learning_rate": 2.12e-06, |
|
"loss": 0.1795, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.575039494470774, |
|
"grad_norm": 7.422430038452148, |
|
"learning_rate": 2.064444444444445e-06, |
|
"loss": 0.2029, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.5908372827804107, |
|
"grad_norm": 8.631953239440918, |
|
"learning_rate": 2.008888888888889e-06, |
|
"loss": 0.1883, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.6066350710900474, |
|
"grad_norm": 9.813713073730469, |
|
"learning_rate": 1.9533333333333334e-06, |
|
"loss": 0.1901, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.622432859399684, |
|
"grad_norm": 10.284896850585938, |
|
"learning_rate": 1.8977777777777779e-06, |
|
"loss": 0.183, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.638230647709321, |
|
"grad_norm": 9.403543472290039, |
|
"learning_rate": 1.8422222222222225e-06, |
|
"loss": 0.1892, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.654028436018957, |
|
"grad_norm": 9.446948051452637, |
|
"learning_rate": 1.7866666666666668e-06, |
|
"loss": 0.1886, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.669826224328594, |
|
"grad_norm": 10.583983421325684, |
|
"learning_rate": 1.7311111111111112e-06, |
|
"loss": 0.2211, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 2.6856240126382307, |
|
"grad_norm": 10.528802871704102, |
|
"learning_rate": 1.675555555555556e-06, |
|
"loss": 0.1937, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.7014218009478674, |
|
"grad_norm": 8.71202278137207, |
|
"learning_rate": 1.6200000000000002e-06, |
|
"loss": 0.1936, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 2.717219589257504, |
|
"grad_norm": 8.44046401977539, |
|
"learning_rate": 1.5644444444444446e-06, |
|
"loss": 0.1994, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.7330173775671405, |
|
"grad_norm": 9.856565475463867, |
|
"learning_rate": 1.5088888888888889e-06, |
|
"loss": 0.1753, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.748815165876777, |
|
"grad_norm": 9.842414855957031, |
|
"learning_rate": 1.4533333333333335e-06, |
|
"loss": 0.1815, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.764612954186414, |
|
"grad_norm": 9.333725929260254, |
|
"learning_rate": 1.397777777777778e-06, |
|
"loss": 0.1851, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.7804107424960507, |
|
"grad_norm": 9.97825813293457, |
|
"learning_rate": 1.3422222222222222e-06, |
|
"loss": 0.1815, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.7962085308056874, |
|
"grad_norm": 9.474321365356445, |
|
"learning_rate": 1.286666666666667e-06, |
|
"loss": 0.1792, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 2.8120063191153237, |
|
"grad_norm": 8.71677303314209, |
|
"learning_rate": 1.2311111111111112e-06, |
|
"loss": 0.1921, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.8278041074249605, |
|
"grad_norm": 9.69323444366455, |
|
"learning_rate": 1.1755555555555556e-06, |
|
"loss": 0.1778, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 2.843601895734597, |
|
"grad_norm": 9.335270881652832, |
|
"learning_rate": 1.12e-06, |
|
"loss": 0.1783, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.859399684044234, |
|
"grad_norm": 8.661324501037598, |
|
"learning_rate": 1.0644444444444445e-06, |
|
"loss": 0.1689, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 2.8751974723538707, |
|
"grad_norm": 9.32027530670166, |
|
"learning_rate": 1.008888888888889e-06, |
|
"loss": 0.1772, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.890995260663507, |
|
"grad_norm": 8.178330421447754, |
|
"learning_rate": 9.533333333333335e-07, |
|
"loss": 0.1806, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.9067930489731437, |
|
"grad_norm": 9.38011646270752, |
|
"learning_rate": 8.977777777777778e-07, |
|
"loss": 0.1753, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.9225908372827805, |
|
"grad_norm": 9.022958755493164, |
|
"learning_rate": 8.422222222222224e-07, |
|
"loss": 0.1813, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.938388625592417, |
|
"grad_norm": 9.93110466003418, |
|
"learning_rate": 7.866666666666667e-07, |
|
"loss": 0.1825, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.954186413902054, |
|
"grad_norm": 9.306452751159668, |
|
"learning_rate": 7.311111111111112e-07, |
|
"loss": 0.1943, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.9699842022116902, |
|
"grad_norm": 7.849850177764893, |
|
"learning_rate": 6.755555555555555e-07, |
|
"loss": 0.1696, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.985781990521327, |
|
"grad_norm": 10.920326232910156, |
|
"learning_rate": 6.200000000000001e-07, |
|
"loss": 0.1788, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 3.0015797788309637, |
|
"grad_norm": 7.569627285003662, |
|
"learning_rate": 5.644444444444445e-07, |
|
"loss": 0.1698, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 3.0173775671406005, |
|
"grad_norm": 8.557785987854004, |
|
"learning_rate": 5.088888888888889e-07, |
|
"loss": 0.1323, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 3.0331753554502368, |
|
"grad_norm": 7.7341132164001465, |
|
"learning_rate": 4.533333333333334e-07, |
|
"loss": 0.1288, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.0489731437598735, |
|
"grad_norm": 6.455957889556885, |
|
"learning_rate": 3.9777777777777783e-07, |
|
"loss": 0.1253, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 3.0647709320695102, |
|
"grad_norm": 9.151886940002441, |
|
"learning_rate": 3.422222222222223e-07, |
|
"loss": 0.1369, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 3.080568720379147, |
|
"grad_norm": 8.11299991607666, |
|
"learning_rate": 2.866666666666667e-07, |
|
"loss": 0.1272, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 3.0963665086887837, |
|
"grad_norm": 6.720188617706299, |
|
"learning_rate": 2.3111111111111112e-07, |
|
"loss": 0.1329, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.11216429699842, |
|
"grad_norm": 7.401381969451904, |
|
"learning_rate": 1.7555555555555558e-07, |
|
"loss": 0.1257, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 3.1279620853080567, |
|
"grad_norm": 7.703917026519775, |
|
"learning_rate": 1.2000000000000002e-07, |
|
"loss": 0.1224, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 3.1437598736176935, |
|
"grad_norm": 7.16009521484375, |
|
"learning_rate": 6.444444444444445e-08, |
|
"loss": 0.1296, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 3.1595576619273302, |
|
"grad_norm": 7.600194931030273, |
|
"learning_rate": 8.88888888888889e-09, |
|
"loss": 0.1313, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.1595576619273302, |
|
"eval_loss": 0.4071974754333496, |
|
"eval_runtime": 1289.8235, |
|
"eval_samples_per_second": 3.024, |
|
"eval_steps_per_second": 0.189, |
|
"eval_wer": 0.2801361924079977, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.1595576619273302, |
|
"step": 5000, |
|
"total_flos": 2.727921844224e+20, |
|
"train_loss": 0.3573408980369568, |
|
"train_runtime": 57234.8416, |
|
"train_samples_per_second": 2.796, |
|
"train_steps_per_second": 0.087 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.727921844224e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|