Automatic Speech Recognition
Transformers
TensorBoard
Safetensors
Welsh
whisper
Generated from Trainer
DewiBrynJones's picture
End of training
0e8e70f verified
raw
history blame
36.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.1595576619273302,
"eval_steps": 1000,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01579778830963665,
"grad_norm": 39.78215789794922,
"learning_rate": 4.6000000000000004e-07,
"loss": 1.9397,
"step": 25
},
{
"epoch": 0.0315955766192733,
"grad_norm": 27.938426971435547,
"learning_rate": 9.600000000000001e-07,
"loss": 1.2937,
"step": 50
},
{
"epoch": 0.04739336492890995,
"grad_norm": 25.550430297851562,
"learning_rate": 1.46e-06,
"loss": 0.9786,
"step": 75
},
{
"epoch": 0.0631911532385466,
"grad_norm": 26.635549545288086,
"learning_rate": 1.9600000000000003e-06,
"loss": 0.8818,
"step": 100
},
{
"epoch": 0.07898894154818326,
"grad_norm": 25.53322410583496,
"learning_rate": 2.46e-06,
"loss": 0.8233,
"step": 125
},
{
"epoch": 0.0947867298578199,
"grad_norm": 26.468778610229492,
"learning_rate": 2.96e-06,
"loss": 0.7945,
"step": 150
},
{
"epoch": 0.11058451816745656,
"grad_norm": 22.71087074279785,
"learning_rate": 3.46e-06,
"loss": 0.752,
"step": 175
},
{
"epoch": 0.1263823064770932,
"grad_norm": 21.494964599609375,
"learning_rate": 3.96e-06,
"loss": 0.7655,
"step": 200
},
{
"epoch": 0.14218009478672985,
"grad_norm": 22.558645248413086,
"learning_rate": 4.4600000000000005e-06,
"loss": 0.72,
"step": 225
},
{
"epoch": 0.1579778830963665,
"grad_norm": 22.013551712036133,
"learning_rate": 4.960000000000001e-06,
"loss": 0.6961,
"step": 250
},
{
"epoch": 0.17377567140600317,
"grad_norm": 25.887876510620117,
"learning_rate": 5.460000000000001e-06,
"loss": 0.6897,
"step": 275
},
{
"epoch": 0.1895734597156398,
"grad_norm": 19.806230545043945,
"learning_rate": 5.9600000000000005e-06,
"loss": 0.6839,
"step": 300
},
{
"epoch": 0.20537124802527645,
"grad_norm": 21.767576217651367,
"learning_rate": 6.460000000000001e-06,
"loss": 0.6767,
"step": 325
},
{
"epoch": 0.2211690363349131,
"grad_norm": 19.838890075683594,
"learning_rate": 6.96e-06,
"loss": 0.6637,
"step": 350
},
{
"epoch": 0.23696682464454977,
"grad_norm": 22.119140625,
"learning_rate": 7.4600000000000006e-06,
"loss": 0.6811,
"step": 375
},
{
"epoch": 0.2527646129541864,
"grad_norm": 21.972688674926758,
"learning_rate": 7.960000000000002e-06,
"loss": 0.6843,
"step": 400
},
{
"epoch": 0.2685624012638231,
"grad_norm": 21.99839973449707,
"learning_rate": 8.46e-06,
"loss": 0.6487,
"step": 425
},
{
"epoch": 0.2843601895734597,
"grad_norm": 18.968303680419922,
"learning_rate": 8.96e-06,
"loss": 0.6528,
"step": 450
},
{
"epoch": 0.3001579778830964,
"grad_norm": 20.77776336669922,
"learning_rate": 9.460000000000001e-06,
"loss": 0.6346,
"step": 475
},
{
"epoch": 0.315955766192733,
"grad_norm": 17.947195053100586,
"learning_rate": 9.960000000000001e-06,
"loss": 0.6105,
"step": 500
},
{
"epoch": 0.33175355450236965,
"grad_norm": 17.2167911529541,
"learning_rate": 9.94888888888889e-06,
"loss": 0.6399,
"step": 525
},
{
"epoch": 0.34755134281200634,
"grad_norm": 19.390045166015625,
"learning_rate": 9.893333333333334e-06,
"loss": 0.5825,
"step": 550
},
{
"epoch": 0.36334913112164297,
"grad_norm": 17.195106506347656,
"learning_rate": 9.837777777777778e-06,
"loss": 0.5895,
"step": 575
},
{
"epoch": 0.3791469194312796,
"grad_norm": 18.29102325439453,
"learning_rate": 9.782222222222222e-06,
"loss": 0.621,
"step": 600
},
{
"epoch": 0.3949447077409163,
"grad_norm": 16.213546752929688,
"learning_rate": 9.726666666666668e-06,
"loss": 0.587,
"step": 625
},
{
"epoch": 0.4107424960505529,
"grad_norm": 15.86738109588623,
"learning_rate": 9.671111111111112e-06,
"loss": 0.5765,
"step": 650
},
{
"epoch": 0.4265402843601896,
"grad_norm": 17.446897506713867,
"learning_rate": 9.617777777777778e-06,
"loss": 0.5914,
"step": 675
},
{
"epoch": 0.4423380726698262,
"grad_norm": 15.107172966003418,
"learning_rate": 9.562222222222223e-06,
"loss": 0.5517,
"step": 700
},
{
"epoch": 0.45813586097946285,
"grad_norm": 16.07261848449707,
"learning_rate": 9.506666666666667e-06,
"loss": 0.5523,
"step": 725
},
{
"epoch": 0.47393364928909953,
"grad_norm": 15.2976655960083,
"learning_rate": 9.451111111111112e-06,
"loss": 0.5419,
"step": 750
},
{
"epoch": 0.48973143759873616,
"grad_norm": 16.560869216918945,
"learning_rate": 9.395555555555556e-06,
"loss": 0.5763,
"step": 775
},
{
"epoch": 0.5055292259083728,
"grad_norm": 18.58966064453125,
"learning_rate": 9.340000000000002e-06,
"loss": 0.5497,
"step": 800
},
{
"epoch": 0.5213270142180095,
"grad_norm": 16.940420150756836,
"learning_rate": 9.284444444444444e-06,
"loss": 0.5177,
"step": 825
},
{
"epoch": 0.5371248025276462,
"grad_norm": 12.964641571044922,
"learning_rate": 9.22888888888889e-06,
"loss": 0.5293,
"step": 850
},
{
"epoch": 0.5529225908372828,
"grad_norm": 16.428470611572266,
"learning_rate": 9.173333333333334e-06,
"loss": 0.5062,
"step": 875
},
{
"epoch": 0.5687203791469194,
"grad_norm": 17.921024322509766,
"learning_rate": 9.117777777777778e-06,
"loss": 0.5471,
"step": 900
},
{
"epoch": 0.584518167456556,
"grad_norm": 14.068251609802246,
"learning_rate": 9.062222222222224e-06,
"loss": 0.5072,
"step": 925
},
{
"epoch": 0.6003159557661928,
"grad_norm": 13.403594017028809,
"learning_rate": 9.006666666666666e-06,
"loss": 0.5051,
"step": 950
},
{
"epoch": 0.6161137440758294,
"grad_norm": 14.557646751403809,
"learning_rate": 8.951111111111112e-06,
"loss": 0.5391,
"step": 975
},
{
"epoch": 0.631911532385466,
"grad_norm": 15.385436058044434,
"learning_rate": 8.895555555555556e-06,
"loss": 0.5037,
"step": 1000
},
{
"epoch": 0.631911532385466,
"eval_loss": 0.551193118095398,
"eval_runtime": 1363.4745,
"eval_samples_per_second": 2.861,
"eval_steps_per_second": 0.179,
"eval_wer": 0.38095238095238093,
"step": 1000
},
{
"epoch": 0.6477093206951027,
"grad_norm": 13.815781593322754,
"learning_rate": 8.84e-06,
"loss": 0.4888,
"step": 1025
},
{
"epoch": 0.6635071090047393,
"grad_norm": 16.213642120361328,
"learning_rate": 8.784444444444446e-06,
"loss": 0.4865,
"step": 1050
},
{
"epoch": 0.6793048973143759,
"grad_norm": 14.779074668884277,
"learning_rate": 8.72888888888889e-06,
"loss": 0.4963,
"step": 1075
},
{
"epoch": 0.6951026856240127,
"grad_norm": 13.794785499572754,
"learning_rate": 8.673333333333334e-06,
"loss": 0.482,
"step": 1100
},
{
"epoch": 0.7109004739336493,
"grad_norm": 14.701066970825195,
"learning_rate": 8.617777777777778e-06,
"loss": 0.5027,
"step": 1125
},
{
"epoch": 0.7266982622432859,
"grad_norm": 14.537113189697266,
"learning_rate": 8.562222222222224e-06,
"loss": 0.4798,
"step": 1150
},
{
"epoch": 0.7424960505529226,
"grad_norm": 14.886212348937988,
"learning_rate": 8.506666666666668e-06,
"loss": 0.4777,
"step": 1175
},
{
"epoch": 0.7582938388625592,
"grad_norm": 13.01016616821289,
"learning_rate": 8.451111111111112e-06,
"loss": 0.4736,
"step": 1200
},
{
"epoch": 0.7740916271721959,
"grad_norm": 14.213628768920898,
"learning_rate": 8.395555555555557e-06,
"loss": 0.4808,
"step": 1225
},
{
"epoch": 0.7898894154818326,
"grad_norm": 14.541191101074219,
"learning_rate": 8.34e-06,
"loss": 0.5008,
"step": 1250
},
{
"epoch": 0.8056872037914692,
"grad_norm": 14.092310905456543,
"learning_rate": 8.284444444444446e-06,
"loss": 0.4693,
"step": 1275
},
{
"epoch": 0.8214849921011058,
"grad_norm": 14.289324760437012,
"learning_rate": 8.22888888888889e-06,
"loss": 0.4418,
"step": 1300
},
{
"epoch": 0.8372827804107424,
"grad_norm": 13.257657051086426,
"learning_rate": 8.173333333333334e-06,
"loss": 0.4643,
"step": 1325
},
{
"epoch": 0.8530805687203792,
"grad_norm": 15.05517864227295,
"learning_rate": 8.11777777777778e-06,
"loss": 0.4751,
"step": 1350
},
{
"epoch": 0.8688783570300158,
"grad_norm": 13.352594375610352,
"learning_rate": 8.062222222222222e-06,
"loss": 0.442,
"step": 1375
},
{
"epoch": 0.8846761453396524,
"grad_norm": 12.487988471984863,
"learning_rate": 8.006666666666667e-06,
"loss": 0.4496,
"step": 1400
},
{
"epoch": 0.9004739336492891,
"grad_norm": 13.963912963867188,
"learning_rate": 7.951111111111111e-06,
"loss": 0.4415,
"step": 1425
},
{
"epoch": 0.9162717219589257,
"grad_norm": 13.90829086303711,
"learning_rate": 7.895555555555557e-06,
"loss": 0.4349,
"step": 1450
},
{
"epoch": 0.9320695102685624,
"grad_norm": 16.863481521606445,
"learning_rate": 7.840000000000001e-06,
"loss": 0.4722,
"step": 1475
},
{
"epoch": 0.9478672985781991,
"grad_norm": 13.991209983825684,
"learning_rate": 7.784444444444445e-06,
"loss": 0.4397,
"step": 1500
},
{
"epoch": 0.9636650868878357,
"grad_norm": 12.423737525939941,
"learning_rate": 7.72888888888889e-06,
"loss": 0.423,
"step": 1525
},
{
"epoch": 0.9794628751974723,
"grad_norm": 13.574849128723145,
"learning_rate": 7.673333333333333e-06,
"loss": 0.4472,
"step": 1550
},
{
"epoch": 0.995260663507109,
"grad_norm": 10.542879104614258,
"learning_rate": 7.617777777777778e-06,
"loss": 0.4319,
"step": 1575
},
{
"epoch": 1.0110584518167456,
"grad_norm": 12.750446319580078,
"learning_rate": 7.562222222222223e-06,
"loss": 0.3401,
"step": 1600
},
{
"epoch": 1.0268562401263823,
"grad_norm": 10.037585258483887,
"learning_rate": 7.506666666666668e-06,
"loss": 0.3236,
"step": 1625
},
{
"epoch": 1.042654028436019,
"grad_norm": 12.519506454467773,
"learning_rate": 7.451111111111111e-06,
"loss": 0.3379,
"step": 1650
},
{
"epoch": 1.0584518167456556,
"grad_norm": 11.666909217834473,
"learning_rate": 7.395555555555556e-06,
"loss": 0.3082,
"step": 1675
},
{
"epoch": 1.0742496050552923,
"grad_norm": 14.614953994750977,
"learning_rate": 7.340000000000001e-06,
"loss": 0.3272,
"step": 1700
},
{
"epoch": 1.0900473933649288,
"grad_norm": 11.243821144104004,
"learning_rate": 7.284444444444445e-06,
"loss": 0.3081,
"step": 1725
},
{
"epoch": 1.1058451816745656,
"grad_norm": 10.896337509155273,
"learning_rate": 7.22888888888889e-06,
"loss": 0.3212,
"step": 1750
},
{
"epoch": 1.1216429699842023,
"grad_norm": 11.159407615661621,
"learning_rate": 7.173333333333335e-06,
"loss": 0.3316,
"step": 1775
},
{
"epoch": 1.1374407582938388,
"grad_norm": 10.72888469696045,
"learning_rate": 7.117777777777778e-06,
"loss": 0.3465,
"step": 1800
},
{
"epoch": 1.1532385466034756,
"grad_norm": 11.136720657348633,
"learning_rate": 7.062222222222223e-06,
"loss": 0.3219,
"step": 1825
},
{
"epoch": 1.169036334913112,
"grad_norm": 11.062825202941895,
"learning_rate": 7.006666666666667e-06,
"loss": 0.3234,
"step": 1850
},
{
"epoch": 1.1848341232227488,
"grad_norm": 13.367472648620605,
"learning_rate": 6.951111111111112e-06,
"loss": 0.3215,
"step": 1875
},
{
"epoch": 1.2006319115323856,
"grad_norm": 12.034098625183105,
"learning_rate": 6.8955555555555565e-06,
"loss": 0.3195,
"step": 1900
},
{
"epoch": 1.216429699842022,
"grad_norm": 12.74405574798584,
"learning_rate": 6.8400000000000014e-06,
"loss": 0.3105,
"step": 1925
},
{
"epoch": 1.2322274881516588,
"grad_norm": 14.234502792358398,
"learning_rate": 6.784444444444445e-06,
"loss": 0.3327,
"step": 1950
},
{
"epoch": 1.2480252764612954,
"grad_norm": 12.721147537231445,
"learning_rate": 6.7288888888888895e-06,
"loss": 0.3405,
"step": 1975
},
{
"epoch": 1.263823064770932,
"grad_norm": 12.109272003173828,
"learning_rate": 6.6733333333333335e-06,
"loss": 0.3264,
"step": 2000
},
{
"epoch": 1.263823064770932,
"eval_loss": 0.4630681872367859,
"eval_runtime": 1327.3738,
"eval_samples_per_second": 2.939,
"eval_steps_per_second": 0.184,
"eval_wer": 0.331425673717763,
"step": 2000
},
{
"epoch": 1.2796208530805688,
"grad_norm": 10.781163215637207,
"learning_rate": 6.617777777777778e-06,
"loss": 0.3152,
"step": 2025
},
{
"epoch": 1.2954186413902053,
"grad_norm": 12.081149101257324,
"learning_rate": 6.562222222222223e-06,
"loss": 0.334,
"step": 2050
},
{
"epoch": 1.311216429699842,
"grad_norm": 11.082480430603027,
"learning_rate": 6.5066666666666665e-06,
"loss": 0.3182,
"step": 2075
},
{
"epoch": 1.3270142180094786,
"grad_norm": 10.400703430175781,
"learning_rate": 6.451111111111111e-06,
"loss": 0.3209,
"step": 2100
},
{
"epoch": 1.3428120063191153,
"grad_norm": 14.698405265808105,
"learning_rate": 6.395555555555556e-06,
"loss": 0.3358,
"step": 2125
},
{
"epoch": 1.358609794628752,
"grad_norm": 11.301855087280273,
"learning_rate": 6.34e-06,
"loss": 0.3274,
"step": 2150
},
{
"epoch": 1.3744075829383886,
"grad_norm": 13.15268611907959,
"learning_rate": 6.284444444444445e-06,
"loss": 0.3091,
"step": 2175
},
{
"epoch": 1.3902053712480253,
"grad_norm": 10.712764739990234,
"learning_rate": 6.22888888888889e-06,
"loss": 0.3217,
"step": 2200
},
{
"epoch": 1.4060031595576619,
"grad_norm": 9.865320205688477,
"learning_rate": 6.173333333333333e-06,
"loss": 0.3341,
"step": 2225
},
{
"epoch": 1.4218009478672986,
"grad_norm": 11.386091232299805,
"learning_rate": 6.117777777777778e-06,
"loss": 0.308,
"step": 2250
},
{
"epoch": 1.4375987361769353,
"grad_norm": 10.972213745117188,
"learning_rate": 6.062222222222223e-06,
"loss": 0.3184,
"step": 2275
},
{
"epoch": 1.4533965244865719,
"grad_norm": 9.059267044067383,
"learning_rate": 6.006666666666667e-06,
"loss": 0.3285,
"step": 2300
},
{
"epoch": 1.4691943127962086,
"grad_norm": 10.708878517150879,
"learning_rate": 5.951111111111112e-06,
"loss": 0.288,
"step": 2325
},
{
"epoch": 1.4849921011058451,
"grad_norm": 9.580412864685059,
"learning_rate": 5.895555555555557e-06,
"loss": 0.3133,
"step": 2350
},
{
"epoch": 1.5007898894154819,
"grad_norm": 13.611145973205566,
"learning_rate": 5.84e-06,
"loss": 0.3398,
"step": 2375
},
{
"epoch": 1.5165876777251186,
"grad_norm": 11.563826560974121,
"learning_rate": 5.784444444444445e-06,
"loss": 0.3187,
"step": 2400
},
{
"epoch": 1.5323854660347551,
"grad_norm": 11.612549781799316,
"learning_rate": 5.72888888888889e-06,
"loss": 0.3169,
"step": 2425
},
{
"epoch": 1.5481832543443916,
"grad_norm": 9.463521003723145,
"learning_rate": 5.673333333333334e-06,
"loss": 0.3124,
"step": 2450
},
{
"epoch": 1.5639810426540284,
"grad_norm": 14.442208290100098,
"learning_rate": 5.617777777777779e-06,
"loss": 0.3202,
"step": 2475
},
{
"epoch": 1.5797788309636651,
"grad_norm": 11.377900123596191,
"learning_rate": 5.562222222222222e-06,
"loss": 0.317,
"step": 2500
},
{
"epoch": 1.5955766192733019,
"grad_norm": 11.9774808883667,
"learning_rate": 5.506666666666667e-06,
"loss": 0.3188,
"step": 2525
},
{
"epoch": 1.6113744075829384,
"grad_norm": 11.492107391357422,
"learning_rate": 5.451111111111112e-06,
"loss": 0.3109,
"step": 2550
},
{
"epoch": 1.627172195892575,
"grad_norm": 12.987119674682617,
"learning_rate": 5.3955555555555565e-06,
"loss": 0.3127,
"step": 2575
},
{
"epoch": 1.6429699842022116,
"grad_norm": 11.403392791748047,
"learning_rate": 5.3400000000000005e-06,
"loss": 0.3007,
"step": 2600
},
{
"epoch": 1.6587677725118484,
"grad_norm": 10.354674339294434,
"learning_rate": 5.2844444444444454e-06,
"loss": 0.3129,
"step": 2625
},
{
"epoch": 1.674565560821485,
"grad_norm": 9.936201095581055,
"learning_rate": 5.228888888888889e-06,
"loss": 0.3226,
"step": 2650
},
{
"epoch": 1.6903633491311216,
"grad_norm": 10.426243782043457,
"learning_rate": 5.1733333333333335e-06,
"loss": 0.3021,
"step": 2675
},
{
"epoch": 1.7061611374407581,
"grad_norm": 10.784858703613281,
"learning_rate": 5.117777777777778e-06,
"loss": 0.2923,
"step": 2700
},
{
"epoch": 1.7219589257503949,
"grad_norm": 10.411828994750977,
"learning_rate": 5.062222222222222e-06,
"loss": 0.2893,
"step": 2725
},
{
"epoch": 1.7377567140600316,
"grad_norm": 12.027934074401855,
"learning_rate": 5.006666666666667e-06,
"loss": 0.3136,
"step": 2750
},
{
"epoch": 1.7535545023696684,
"grad_norm": 10.067774772644043,
"learning_rate": 4.951111111111111e-06,
"loss": 0.296,
"step": 2775
},
{
"epoch": 1.7693522906793049,
"grad_norm": 10.396674156188965,
"learning_rate": 4.895555555555556e-06,
"loss": 0.2973,
"step": 2800
},
{
"epoch": 1.7851500789889414,
"grad_norm": 9.719764709472656,
"learning_rate": 4.84e-06,
"loss": 0.2831,
"step": 2825
},
{
"epoch": 1.8009478672985781,
"grad_norm": 11.552470207214355,
"learning_rate": 4.784444444444445e-06,
"loss": 0.2802,
"step": 2850
},
{
"epoch": 1.8167456556082149,
"grad_norm": 10.932677268981934,
"learning_rate": 4.728888888888889e-06,
"loss": 0.3189,
"step": 2875
},
{
"epoch": 1.8325434439178516,
"grad_norm": 12.281967163085938,
"learning_rate": 4.673333333333333e-06,
"loss": 0.3014,
"step": 2900
},
{
"epoch": 1.8483412322274881,
"grad_norm": 12.78361988067627,
"learning_rate": 4.617777777777778e-06,
"loss": 0.3265,
"step": 2925
},
{
"epoch": 1.8641390205371247,
"grad_norm": 11.523568153381348,
"learning_rate": 4.562222222222222e-06,
"loss": 0.3062,
"step": 2950
},
{
"epoch": 1.8799368088467614,
"grad_norm": 12.009855270385742,
"learning_rate": 4.506666666666667e-06,
"loss": 0.294,
"step": 2975
},
{
"epoch": 1.8957345971563981,
"grad_norm": 11.980591773986816,
"learning_rate": 4.451111111111112e-06,
"loss": 0.2997,
"step": 3000
},
{
"epoch": 1.8957345971563981,
"eval_loss": 0.420254111289978,
"eval_runtime": 1304.4467,
"eval_samples_per_second": 2.991,
"eval_steps_per_second": 0.187,
"eval_wer": 0.30416304452815607,
"step": 3000
},
{
"epoch": 1.9115323854660349,
"grad_norm": 9.556490898132324,
"learning_rate": 4.395555555555556e-06,
"loss": 0.3128,
"step": 3025
},
{
"epoch": 1.9273301737756714,
"grad_norm": 12.445517539978027,
"learning_rate": 4.34e-06,
"loss": 0.2898,
"step": 3050
},
{
"epoch": 1.943127962085308,
"grad_norm": 11.485321998596191,
"learning_rate": 4.284444444444445e-06,
"loss": 0.2922,
"step": 3075
},
{
"epoch": 1.9589257503949447,
"grad_norm": 11.148347854614258,
"learning_rate": 4.228888888888889e-06,
"loss": 0.2941,
"step": 3100
},
{
"epoch": 1.9747235387045814,
"grad_norm": 8.813661575317383,
"learning_rate": 4.173333333333334e-06,
"loss": 0.2738,
"step": 3125
},
{
"epoch": 1.9905213270142181,
"grad_norm": 9.704155921936035,
"learning_rate": 4.117777777777779e-06,
"loss": 0.3064,
"step": 3150
},
{
"epoch": 2.006319115323855,
"grad_norm": 8.704689979553223,
"learning_rate": 4.062222222222223e-06,
"loss": 0.2478,
"step": 3175
},
{
"epoch": 2.022116903633491,
"grad_norm": 10.47323226928711,
"learning_rate": 4.006666666666667e-06,
"loss": 0.1972,
"step": 3200
},
{
"epoch": 2.037914691943128,
"grad_norm": 9.401123046875,
"learning_rate": 3.953333333333333e-06,
"loss": 0.2048,
"step": 3225
},
{
"epoch": 2.0537124802527646,
"grad_norm": 8.968587875366211,
"learning_rate": 3.897777777777778e-06,
"loss": 0.2047,
"step": 3250
},
{
"epoch": 2.0695102685624014,
"grad_norm": 10.138334274291992,
"learning_rate": 3.842222222222223e-06,
"loss": 0.2041,
"step": 3275
},
{
"epoch": 2.085308056872038,
"grad_norm": 9.157357215881348,
"learning_rate": 3.7866666666666667e-06,
"loss": 0.1983,
"step": 3300
},
{
"epoch": 2.1011058451816744,
"grad_norm": 11.541014671325684,
"learning_rate": 3.7311111111111116e-06,
"loss": 0.2102,
"step": 3325
},
{
"epoch": 2.116903633491311,
"grad_norm": 13.219682693481445,
"learning_rate": 3.675555555555556e-06,
"loss": 0.2032,
"step": 3350
},
{
"epoch": 2.132701421800948,
"grad_norm": 9.11551284790039,
"learning_rate": 3.62e-06,
"loss": 0.183,
"step": 3375
},
{
"epoch": 2.1484992101105846,
"grad_norm": 7.927098751068115,
"learning_rate": 3.564444444444445e-06,
"loss": 0.2022,
"step": 3400
},
{
"epoch": 2.1642969984202214,
"grad_norm": 10.464715003967285,
"learning_rate": 3.508888888888889e-06,
"loss": 0.1877,
"step": 3425
},
{
"epoch": 2.1800947867298577,
"grad_norm": 8.424384117126465,
"learning_rate": 3.4533333333333334e-06,
"loss": 0.1879,
"step": 3450
},
{
"epoch": 2.1958925750394944,
"grad_norm": 7.617179870605469,
"learning_rate": 3.3977777777777783e-06,
"loss": 0.1973,
"step": 3475
},
{
"epoch": 2.211690363349131,
"grad_norm": 9.299885749816895,
"learning_rate": 3.3422222222222224e-06,
"loss": 0.1982,
"step": 3500
},
{
"epoch": 2.227488151658768,
"grad_norm": 8.834092140197754,
"learning_rate": 3.286666666666667e-06,
"loss": 0.2003,
"step": 3525
},
{
"epoch": 2.2432859399684046,
"grad_norm": 8.07299518585205,
"learning_rate": 3.2311111111111117e-06,
"loss": 0.1971,
"step": 3550
},
{
"epoch": 2.259083728278041,
"grad_norm": 10.275826454162598,
"learning_rate": 3.1755555555555557e-06,
"loss": 0.1914,
"step": 3575
},
{
"epoch": 2.2748815165876777,
"grad_norm": 9.910749435424805,
"learning_rate": 3.12e-06,
"loss": 0.2067,
"step": 3600
},
{
"epoch": 2.2906793048973144,
"grad_norm": 10.053370475769043,
"learning_rate": 3.064444444444445e-06,
"loss": 0.2129,
"step": 3625
},
{
"epoch": 2.306477093206951,
"grad_norm": 10.744956970214844,
"learning_rate": 3.008888888888889e-06,
"loss": 0.1984,
"step": 3650
},
{
"epoch": 2.322274881516588,
"grad_norm": 9.880094528198242,
"learning_rate": 2.9533333333333336e-06,
"loss": 0.1852,
"step": 3675
},
{
"epoch": 2.338072669826224,
"grad_norm": 10.811684608459473,
"learning_rate": 2.8977777777777785e-06,
"loss": 0.2019,
"step": 3700
},
{
"epoch": 2.353870458135861,
"grad_norm": 12.169087409973145,
"learning_rate": 2.8422222222222225e-06,
"loss": 0.1944,
"step": 3725
},
{
"epoch": 2.3696682464454977,
"grad_norm": 10.1768217086792,
"learning_rate": 2.786666666666667e-06,
"loss": 0.2035,
"step": 3750
},
{
"epoch": 2.3854660347551344,
"grad_norm": 8.389801979064941,
"learning_rate": 2.7311111111111114e-06,
"loss": 0.2096,
"step": 3775
},
{
"epoch": 2.401263823064771,
"grad_norm": 9.511481285095215,
"learning_rate": 2.675555555555556e-06,
"loss": 0.1868,
"step": 3800
},
{
"epoch": 2.4170616113744074,
"grad_norm": 10.304895401000977,
"learning_rate": 2.6200000000000003e-06,
"loss": 0.2023,
"step": 3825
},
{
"epoch": 2.432859399684044,
"grad_norm": 11.822694778442383,
"learning_rate": 2.5644444444444444e-06,
"loss": 0.1938,
"step": 3850
},
{
"epoch": 2.448657187993681,
"grad_norm": 10.087789535522461,
"learning_rate": 2.5088888888888892e-06,
"loss": 0.1987,
"step": 3875
},
{
"epoch": 2.4644549763033177,
"grad_norm": 8.504409790039062,
"learning_rate": 2.4533333333333333e-06,
"loss": 0.1845,
"step": 3900
},
{
"epoch": 2.4802527646129544,
"grad_norm": 9.70301342010498,
"learning_rate": 2.397777777777778e-06,
"loss": 0.1832,
"step": 3925
},
{
"epoch": 2.4960505529225907,
"grad_norm": 9.534614562988281,
"learning_rate": 2.342222222222222e-06,
"loss": 0.182,
"step": 3950
},
{
"epoch": 2.5118483412322274,
"grad_norm": 8.997177124023438,
"learning_rate": 2.2866666666666667e-06,
"loss": 0.205,
"step": 3975
},
{
"epoch": 2.527646129541864,
"grad_norm": 9.527833938598633,
"learning_rate": 2.2311111111111115e-06,
"loss": 0.1851,
"step": 4000
},
{
"epoch": 2.527646129541864,
"eval_loss": 0.41017213463783264,
"eval_runtime": 1287.8118,
"eval_samples_per_second": 3.029,
"eval_steps_per_second": 0.189,
"eval_wer": 0.2870182555780933,
"step": 4000
},
{
"epoch": 2.543443917851501,
"grad_norm": 8.116750717163086,
"learning_rate": 2.1755555555555556e-06,
"loss": 0.1937,
"step": 4025
},
{
"epoch": 2.5592417061611377,
"grad_norm": 11.475099563598633,
"learning_rate": 2.12e-06,
"loss": 0.1795,
"step": 4050
},
{
"epoch": 2.575039494470774,
"grad_norm": 7.422430038452148,
"learning_rate": 2.064444444444445e-06,
"loss": 0.2029,
"step": 4075
},
{
"epoch": 2.5908372827804107,
"grad_norm": 8.631953239440918,
"learning_rate": 2.008888888888889e-06,
"loss": 0.1883,
"step": 4100
},
{
"epoch": 2.6066350710900474,
"grad_norm": 9.813713073730469,
"learning_rate": 1.9533333333333334e-06,
"loss": 0.1901,
"step": 4125
},
{
"epoch": 2.622432859399684,
"grad_norm": 10.284896850585938,
"learning_rate": 1.8977777777777779e-06,
"loss": 0.183,
"step": 4150
},
{
"epoch": 2.638230647709321,
"grad_norm": 9.403543472290039,
"learning_rate": 1.8422222222222225e-06,
"loss": 0.1892,
"step": 4175
},
{
"epoch": 2.654028436018957,
"grad_norm": 9.446948051452637,
"learning_rate": 1.7866666666666668e-06,
"loss": 0.1886,
"step": 4200
},
{
"epoch": 2.669826224328594,
"grad_norm": 10.583983421325684,
"learning_rate": 1.7311111111111112e-06,
"loss": 0.2211,
"step": 4225
},
{
"epoch": 2.6856240126382307,
"grad_norm": 10.528802871704102,
"learning_rate": 1.675555555555556e-06,
"loss": 0.1937,
"step": 4250
},
{
"epoch": 2.7014218009478674,
"grad_norm": 8.71202278137207,
"learning_rate": 1.6200000000000002e-06,
"loss": 0.1936,
"step": 4275
},
{
"epoch": 2.717219589257504,
"grad_norm": 8.44046401977539,
"learning_rate": 1.5644444444444446e-06,
"loss": 0.1994,
"step": 4300
},
{
"epoch": 2.7330173775671405,
"grad_norm": 9.856565475463867,
"learning_rate": 1.5088888888888889e-06,
"loss": 0.1753,
"step": 4325
},
{
"epoch": 2.748815165876777,
"grad_norm": 9.842414855957031,
"learning_rate": 1.4533333333333335e-06,
"loss": 0.1815,
"step": 4350
},
{
"epoch": 2.764612954186414,
"grad_norm": 9.333725929260254,
"learning_rate": 1.397777777777778e-06,
"loss": 0.1851,
"step": 4375
},
{
"epoch": 2.7804107424960507,
"grad_norm": 9.97825813293457,
"learning_rate": 1.3422222222222222e-06,
"loss": 0.1815,
"step": 4400
},
{
"epoch": 2.7962085308056874,
"grad_norm": 9.474321365356445,
"learning_rate": 1.286666666666667e-06,
"loss": 0.1792,
"step": 4425
},
{
"epoch": 2.8120063191153237,
"grad_norm": 8.71677303314209,
"learning_rate": 1.2311111111111112e-06,
"loss": 0.1921,
"step": 4450
},
{
"epoch": 2.8278041074249605,
"grad_norm": 9.69323444366455,
"learning_rate": 1.1755555555555556e-06,
"loss": 0.1778,
"step": 4475
},
{
"epoch": 2.843601895734597,
"grad_norm": 9.335270881652832,
"learning_rate": 1.12e-06,
"loss": 0.1783,
"step": 4500
},
{
"epoch": 2.859399684044234,
"grad_norm": 8.661324501037598,
"learning_rate": 1.0644444444444445e-06,
"loss": 0.1689,
"step": 4525
},
{
"epoch": 2.8751974723538707,
"grad_norm": 9.32027530670166,
"learning_rate": 1.008888888888889e-06,
"loss": 0.1772,
"step": 4550
},
{
"epoch": 2.890995260663507,
"grad_norm": 8.178330421447754,
"learning_rate": 9.533333333333335e-07,
"loss": 0.1806,
"step": 4575
},
{
"epoch": 2.9067930489731437,
"grad_norm": 9.38011646270752,
"learning_rate": 8.977777777777778e-07,
"loss": 0.1753,
"step": 4600
},
{
"epoch": 2.9225908372827805,
"grad_norm": 9.022958755493164,
"learning_rate": 8.422222222222224e-07,
"loss": 0.1813,
"step": 4625
},
{
"epoch": 2.938388625592417,
"grad_norm": 9.93110466003418,
"learning_rate": 7.866666666666667e-07,
"loss": 0.1825,
"step": 4650
},
{
"epoch": 2.954186413902054,
"grad_norm": 9.306452751159668,
"learning_rate": 7.311111111111112e-07,
"loss": 0.1943,
"step": 4675
},
{
"epoch": 2.9699842022116902,
"grad_norm": 7.849850177764893,
"learning_rate": 6.755555555555555e-07,
"loss": 0.1696,
"step": 4700
},
{
"epoch": 2.985781990521327,
"grad_norm": 10.920326232910156,
"learning_rate": 6.200000000000001e-07,
"loss": 0.1788,
"step": 4725
},
{
"epoch": 3.0015797788309637,
"grad_norm": 7.569627285003662,
"learning_rate": 5.644444444444445e-07,
"loss": 0.1698,
"step": 4750
},
{
"epoch": 3.0173775671406005,
"grad_norm": 8.557785987854004,
"learning_rate": 5.088888888888889e-07,
"loss": 0.1323,
"step": 4775
},
{
"epoch": 3.0331753554502368,
"grad_norm": 7.7341132164001465,
"learning_rate": 4.533333333333334e-07,
"loss": 0.1288,
"step": 4800
},
{
"epoch": 3.0489731437598735,
"grad_norm": 6.455957889556885,
"learning_rate": 3.9777777777777783e-07,
"loss": 0.1253,
"step": 4825
},
{
"epoch": 3.0647709320695102,
"grad_norm": 9.151886940002441,
"learning_rate": 3.422222222222223e-07,
"loss": 0.1369,
"step": 4850
},
{
"epoch": 3.080568720379147,
"grad_norm": 8.11299991607666,
"learning_rate": 2.866666666666667e-07,
"loss": 0.1272,
"step": 4875
},
{
"epoch": 3.0963665086887837,
"grad_norm": 6.720188617706299,
"learning_rate": 2.3111111111111112e-07,
"loss": 0.1329,
"step": 4900
},
{
"epoch": 3.11216429699842,
"grad_norm": 7.401381969451904,
"learning_rate": 1.7555555555555558e-07,
"loss": 0.1257,
"step": 4925
},
{
"epoch": 3.1279620853080567,
"grad_norm": 7.703917026519775,
"learning_rate": 1.2000000000000002e-07,
"loss": 0.1224,
"step": 4950
},
{
"epoch": 3.1437598736176935,
"grad_norm": 7.16009521484375,
"learning_rate": 6.444444444444445e-08,
"loss": 0.1296,
"step": 4975
},
{
"epoch": 3.1595576619273302,
"grad_norm": 7.600194931030273,
"learning_rate": 8.88888888888889e-09,
"loss": 0.1313,
"step": 5000
},
{
"epoch": 3.1595576619273302,
"eval_loss": 0.4071974754333496,
"eval_runtime": 1289.8235,
"eval_samples_per_second": 3.024,
"eval_steps_per_second": 0.189,
"eval_wer": 0.2801361924079977,
"step": 5000
},
{
"epoch": 3.1595576619273302,
"step": 5000,
"total_flos": 2.727921844224e+20,
"train_loss": 0.3573408980369568,
"train_runtime": 57234.8416,
"train_samples_per_second": 2.796,
"train_steps_per_second": 0.087
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.727921844224e+20,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}