xls-r-300m-sv-cv8 / trainer_state.json
patrickvonplaten's picture
End of training
bffec0e
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 49.998632010943915,
"global_step": 18250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.27,
"learning_rate": 3.7125e-06,
"loss": 12.2128,
"step": 100
},
{
"epoch": 0.55,
"learning_rate": 7.4625e-06,
"loss": 6.1116,
"step": 200
},
{
"epoch": 0.82,
"learning_rate": 1.1212499999999998e-05,
"loss": 4.1396,
"step": 300
},
{
"epoch": 1.1,
"learning_rate": 1.49625e-05,
"loss": 3.6725,
"step": 400
},
{
"epoch": 1.37,
"learning_rate": 1.8712499999999997e-05,
"loss": 3.3224,
"step": 500
},
{
"epoch": 1.37,
"eval_loss": 3.335383415222168,
"eval_runtime": 191.0801,
"eval_samples_per_second": 25.345,
"eval_steps_per_second": 3.171,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 1.64,
"learning_rate": 2.2462499999999997e-05,
"loss": 3.1394,
"step": 600
},
{
"epoch": 1.92,
"learning_rate": 2.6212499999999997e-05,
"loss": 3.0669,
"step": 700
},
{
"epoch": 2.19,
"learning_rate": 2.99625e-05,
"loss": 3.0401,
"step": 800
},
{
"epoch": 2.47,
"learning_rate": 3.37125e-05,
"loss": 2.9719,
"step": 900
},
{
"epoch": 2.74,
"learning_rate": 3.7462499999999996e-05,
"loss": 2.9318,
"step": 1000
},
{
"epoch": 2.74,
"eval_loss": 2.93613600730896,
"eval_runtime": 191.7956,
"eval_samples_per_second": 25.251,
"eval_steps_per_second": 3.16,
"eval_wer": 1.0000283848992335,
"step": 1000
},
{
"epoch": 3.01,
"learning_rate": 4.12125e-05,
"loss": 2.9182,
"step": 1100
},
{
"epoch": 3.29,
"learning_rate": 4.4962499999999995e-05,
"loss": 2.8629,
"step": 1200
},
{
"epoch": 3.56,
"learning_rate": 4.871249999999999e-05,
"loss": 2.7891,
"step": 1300
},
{
"epoch": 3.83,
"learning_rate": 5.2462499999999994e-05,
"loss": 2.4819,
"step": 1400
},
{
"epoch": 4.11,
"learning_rate": 5.62125e-05,
"loss": 2.1371,
"step": 1500
},
{
"epoch": 4.11,
"eval_loss": 1.1156758069992065,
"eval_runtime": 192.2259,
"eval_samples_per_second": 25.194,
"eval_steps_per_second": 3.153,
"eval_wer": 0.8358501277320466,
"step": 1500
},
{
"epoch": 4.38,
"learning_rate": 5.9962499999999994e-05,
"loss": 1.9602,
"step": 1600
},
{
"epoch": 4.66,
"learning_rate": 6.37125e-05,
"loss": 1.8562,
"step": 1700
},
{
"epoch": 4.93,
"learning_rate": 6.746249999999999e-05,
"loss": 1.7867,
"step": 1800
},
{
"epoch": 5.21,
"learning_rate": 7.121249999999999e-05,
"loss": 1.7286,
"step": 1900
},
{
"epoch": 5.48,
"learning_rate": 7.49625e-05,
"loss": 1.6883,
"step": 2000
},
{
"epoch": 5.48,
"eval_loss": 0.6002867221832275,
"eval_runtime": 191.4975,
"eval_samples_per_second": 25.29,
"eval_steps_per_second": 3.165,
"eval_wer": 0.6314220834516038,
"step": 2000
},
{
"epoch": 5.75,
"learning_rate": 7.454307692307691e-05,
"loss": 1.6813,
"step": 2100
},
{
"epoch": 6.03,
"learning_rate": 7.408153846153846e-05,
"loss": 1.6522,
"step": 2200
},
{
"epoch": 6.3,
"learning_rate": 7.361999999999999e-05,
"loss": 1.6091,
"step": 2300
},
{
"epoch": 6.57,
"learning_rate": 7.316307692307692e-05,
"loss": 1.6031,
"step": 2400
},
{
"epoch": 6.85,
"learning_rate": 7.270153846153845e-05,
"loss": 1.5812,
"step": 2500
},
{
"epoch": 6.85,
"eval_loss": 0.4746354818344116,
"eval_runtime": 192.4568,
"eval_samples_per_second": 25.164,
"eval_steps_per_second": 3.149,
"eval_wer": 0.4725234175418677,
"step": 2500
},
{
"epoch": 7.12,
"learning_rate": 7.223999999999999e-05,
"loss": 1.5692,
"step": 2600
},
{
"epoch": 7.4,
"learning_rate": 7.177846153846153e-05,
"loss": 1.5295,
"step": 2700
},
{
"epoch": 7.67,
"learning_rate": 7.131692307692308e-05,
"loss": 1.538,
"step": 2800
},
{
"epoch": 7.94,
"learning_rate": 7.085538461538461e-05,
"loss": 1.5169,
"step": 2900
},
{
"epoch": 8.22,
"learning_rate": 7.039846153846153e-05,
"loss": 1.5145,
"step": 3000
},
{
"epoch": 8.22,
"eval_loss": 0.437589168548584,
"eval_runtime": 191.0945,
"eval_samples_per_second": 25.343,
"eval_steps_per_second": 3.171,
"eval_wer": 0.4736304286119784,
"step": 3000
},
{
"epoch": 8.49,
"learning_rate": 6.993692307692307e-05,
"loss": 1.4912,
"step": 3100
},
{
"epoch": 8.77,
"learning_rate": 6.94753846153846e-05,
"loss": 1.4809,
"step": 3200
},
{
"epoch": 9.04,
"learning_rate": 6.901384615384615e-05,
"loss": 1.4801,
"step": 3300
},
{
"epoch": 9.31,
"learning_rate": 6.855692307692307e-05,
"loss": 1.4658,
"step": 3400
},
{
"epoch": 9.59,
"learning_rate": 6.809538461538462e-05,
"loss": 1.4763,
"step": 3500
},
{
"epoch": 9.59,
"eval_loss": 0.40057530999183655,
"eval_runtime": 190.9848,
"eval_samples_per_second": 25.358,
"eval_steps_per_second": 3.173,
"eval_wer": 0.38626170877093385,
"step": 3500
},
{
"epoch": 9.86,
"learning_rate": 6.763384615384615e-05,
"loss": 1.4436,
"step": 3600
},
{
"epoch": 10.14,
"learning_rate": 6.717230769230768e-05,
"loss": 1.4412,
"step": 3700
},
{
"epoch": 10.41,
"learning_rate": 6.671076923076923e-05,
"loss": 1.4332,
"step": 3800
},
{
"epoch": 10.68,
"learning_rate": 6.624923076923076e-05,
"loss": 1.4142,
"step": 3900
},
{
"epoch": 10.96,
"learning_rate": 6.57876923076923e-05,
"loss": 1.4215,
"step": 4000
},
{
"epoch": 10.96,
"eval_loss": 0.3783426582813263,
"eval_runtime": 191.1153,
"eval_samples_per_second": 25.341,
"eval_steps_per_second": 3.171,
"eval_wer": 0.3628725518024411,
"step": 4000
},
{
"epoch": 11.23,
"learning_rate": 6.532615384615384e-05,
"loss": 1.3928,
"step": 4100
},
{
"epoch": 11.51,
"learning_rate": 6.486461538461538e-05,
"loss": 1.408,
"step": 4200
},
{
"epoch": 11.78,
"learning_rate": 6.440307692307692e-05,
"loss": 1.3892,
"step": 4300
},
{
"epoch": 12.05,
"learning_rate": 6.394153846153845e-05,
"loss": 1.4071,
"step": 4400
},
{
"epoch": 12.33,
"learning_rate": 6.348e-05,
"loss": 1.3638,
"step": 4500
},
{
"epoch": 12.33,
"eval_loss": 0.35546526312828064,
"eval_runtime": 191.7841,
"eval_samples_per_second": 25.252,
"eval_steps_per_second": 3.16,
"eval_wer": 0.3425205790519444,
"step": 4500
},
{
"epoch": 12.6,
"learning_rate": 6.301846153846153e-05,
"loss": 1.3788,
"step": 4600
},
{
"epoch": 12.88,
"learning_rate": 6.255692307692307e-05,
"loss": 1.3885,
"step": 4700
},
{
"epoch": 13.15,
"learning_rate": 6.20953846153846e-05,
"loss": 1.3793,
"step": 4800
},
{
"epoch": 13.42,
"learning_rate": 6.163384615384614e-05,
"loss": 1.3525,
"step": 4900
},
{
"epoch": 13.7,
"learning_rate": 6.117230769230768e-05,
"loss": 1.3561,
"step": 5000
},
{
"epoch": 13.7,
"eval_loss": 0.33404484391212463,
"eval_runtime": 191.5334,
"eval_samples_per_second": 25.285,
"eval_steps_per_second": 3.164,
"eval_wer": 0.3227646891853534,
"step": 5000
},
{
"epoch": 13.97,
"learning_rate": 6.0710769230769224e-05,
"loss": 1.3548,
"step": 5100
},
{
"epoch": 14.25,
"learning_rate": 6.024923076923076e-05,
"loss": 1.3317,
"step": 5200
},
{
"epoch": 14.52,
"learning_rate": 5.97876923076923e-05,
"loss": 1.3356,
"step": 5300
},
{
"epoch": 14.79,
"learning_rate": 5.9326153846153835e-05,
"loss": 1.3564,
"step": 5400
},
{
"epoch": 15.07,
"learning_rate": 5.886461538461538e-05,
"loss": 1.3406,
"step": 5500
},
{
"epoch": 15.07,
"eval_loss": 0.3372970223426819,
"eval_runtime": 191.1166,
"eval_samples_per_second": 25.341,
"eval_steps_per_second": 3.171,
"eval_wer": 0.32949191030371844,
"step": 5500
},
{
"epoch": 15.34,
"learning_rate": 5.840307692307692e-05,
"loss": 1.3158,
"step": 5600
},
{
"epoch": 15.62,
"learning_rate": 5.794153846153845e-05,
"loss": 1.3406,
"step": 5700
},
{
"epoch": 15.89,
"learning_rate": 5.747999999999999e-05,
"loss": 1.3312,
"step": 5800
},
{
"epoch": 16.16,
"learning_rate": 5.701846153846153e-05,
"loss": 1.3263,
"step": 5900
},
{
"epoch": 16.44,
"learning_rate": 5.655692307692308e-05,
"loss": 1.3055,
"step": 6000
},
{
"epoch": 16.44,
"eval_loss": 0.3432324528694153,
"eval_runtime": 190.6681,
"eval_samples_per_second": 25.4,
"eval_steps_per_second": 3.178,
"eval_wer": 0.3210332103321033,
"step": 6000
},
{
"epoch": 16.71,
"learning_rate": 5.609538461538461e-05,
"loss": 1.3106,
"step": 6100
},
{
"epoch": 16.98,
"learning_rate": 5.563384615384615e-05,
"loss": 1.299,
"step": 6200
},
{
"epoch": 17.26,
"learning_rate": 5.517230769230769e-05,
"loss": 1.2942,
"step": 6300
},
{
"epoch": 17.53,
"learning_rate": 5.471076923076922e-05,
"loss": 1.2824,
"step": 6400
},
{
"epoch": 17.81,
"learning_rate": 5.4249230769230766e-05,
"loss": 1.3048,
"step": 6500
},
{
"epoch": 17.81,
"eval_loss": 0.3281940519809723,
"eval_runtime": 190.4965,
"eval_samples_per_second": 25.423,
"eval_steps_per_second": 3.181,
"eval_wer": 0.3117797331819472,
"step": 6500
},
{
"epoch": 18.08,
"learning_rate": 5.3787692307692306e-05,
"loss": 1.2909,
"step": 6600
},
{
"epoch": 18.36,
"learning_rate": 5.332615384615384e-05,
"loss": 1.2934,
"step": 6700
},
{
"epoch": 18.63,
"learning_rate": 5.286461538461538e-05,
"loss": 1.2826,
"step": 6800
},
{
"epoch": 18.9,
"learning_rate": 5.240307692307692e-05,
"loss": 1.2683,
"step": 6900
},
{
"epoch": 19.18,
"learning_rate": 5.194153846153846e-05,
"loss": 1.2863,
"step": 7000
},
{
"epoch": 19.18,
"eval_loss": 0.32255104184150696,
"eval_runtime": 191.9877,
"eval_samples_per_second": 25.226,
"eval_steps_per_second": 3.156,
"eval_wer": 0.3018166335509509,
"step": 7000
},
{
"epoch": 19.45,
"learning_rate": 5.1479999999999995e-05,
"loss": 1.2779,
"step": 7100
},
{
"epoch": 19.73,
"learning_rate": 5.1018461538461534e-05,
"loss": 1.2452,
"step": 7200
},
{
"epoch": 20.0,
"learning_rate": 5.055692307692307e-05,
"loss": 1.2789,
"step": 7300
},
{
"epoch": 20.27,
"learning_rate": 5.009538461538461e-05,
"loss": 1.2589,
"step": 7400
},
{
"epoch": 20.55,
"learning_rate": 4.963846153846154e-05,
"loss": 1.2389,
"step": 7500
},
{
"epoch": 20.55,
"eval_loss": 0.3049624264240265,
"eval_runtime": 191.4537,
"eval_samples_per_second": 25.296,
"eval_steps_per_second": 3.165,
"eval_wer": 0.2985807550383196,
"step": 7500
},
{
"epoch": 20.82,
"learning_rate": 4.917692307692307e-05,
"loss": 1.2441,
"step": 7600
},
{
"epoch": 21.1,
"learning_rate": 4.8719999999999994e-05,
"loss": 1.2391,
"step": 7700
},
{
"epoch": 21.37,
"learning_rate": 4.825846153846154e-05,
"loss": 1.2227,
"step": 7800
},
{
"epoch": 21.64,
"learning_rate": 4.779692307692307e-05,
"loss": 1.2377,
"step": 7900
},
{
"epoch": 21.92,
"learning_rate": 4.734e-05,
"loss": 1.2361,
"step": 8000
},
{
"epoch": 21.92,
"eval_loss": 0.30481210350990295,
"eval_runtime": 190.3668,
"eval_samples_per_second": 25.44,
"eval_steps_per_second": 3.183,
"eval_wer": 0.29804144195288107,
"step": 8000
},
{
"epoch": 22.19,
"learning_rate": 4.687846153846153e-05,
"loss": 1.2605,
"step": 8100
},
{
"epoch": 22.47,
"learning_rate": 4.6416923076923076e-05,
"loss": 1.2185,
"step": 8200
},
{
"epoch": 22.74,
"learning_rate": 4.5955384615384615e-05,
"loss": 1.2321,
"step": 8300
},
{
"epoch": 23.01,
"learning_rate": 4.549384615384615e-05,
"loss": 1.2674,
"step": 8400
},
{
"epoch": 23.29,
"learning_rate": 4.503230769230769e-05,
"loss": 1.2263,
"step": 8500
},
{
"epoch": 23.29,
"eval_loss": 0.3011174499988556,
"eval_runtime": 190.5,
"eval_samples_per_second": 25.423,
"eval_steps_per_second": 3.181,
"eval_wer": 0.2977008231620778,
"step": 8500
},
{
"epoch": 23.56,
"learning_rate": 4.4570769230769226e-05,
"loss": 1.2177,
"step": 8600
},
{
"epoch": 23.83,
"learning_rate": 4.410923076923077e-05,
"loss": 1.2375,
"step": 8700
},
{
"epoch": 24.11,
"learning_rate": 4.3647692307692304e-05,
"loss": 1.2119,
"step": 8800
},
{
"epoch": 24.38,
"learning_rate": 4.3186153846153844e-05,
"loss": 1.221,
"step": 8900
},
{
"epoch": 24.66,
"learning_rate": 4.2724615384615376e-05,
"loss": 1.2225,
"step": 9000
},
{
"epoch": 24.66,
"eval_loss": 0.3017021119594574,
"eval_runtime": 191.4744,
"eval_samples_per_second": 25.293,
"eval_steps_per_second": 3.165,
"eval_wer": 0.2958841896111269,
"step": 9000
},
{
"epoch": 24.93,
"learning_rate": 4.2263076923076915e-05,
"loss": 1.2195,
"step": 9100
},
{
"epoch": 25.21,
"learning_rate": 4.180153846153846e-05,
"loss": 1.2156,
"step": 9200
},
{
"epoch": 25.48,
"learning_rate": 4.134e-05,
"loss": 1.1914,
"step": 9300
},
{
"epoch": 25.75,
"learning_rate": 4.087846153846153e-05,
"loss": 1.1992,
"step": 9400
},
{
"epoch": 26.03,
"learning_rate": 4.041692307692307e-05,
"loss": 1.2044,
"step": 9500
},
{
"epoch": 26.03,
"eval_loss": 0.29767629504203796,
"eval_runtime": 190.7997,
"eval_samples_per_second": 25.383,
"eval_steps_per_second": 3.176,
"eval_wer": 0.2782287822878229,
"step": 9500
},
{
"epoch": 26.3,
"learning_rate": 3.995538461538461e-05,
"loss": 1.1957,
"step": 9600
},
{
"epoch": 26.57,
"learning_rate": 3.949384615384615e-05,
"loss": 1.184,
"step": 9700
},
{
"epoch": 26.85,
"learning_rate": 3.903230769230769e-05,
"loss": 1.1873,
"step": 9800
},
{
"epoch": 27.12,
"learning_rate": 3.857076923076923e-05,
"loss": 1.1762,
"step": 9900
},
{
"epoch": 27.4,
"learning_rate": 3.810923076923076e-05,
"loss": 1.2017,
"step": 10000
},
{
"epoch": 27.4,
"eval_loss": 0.29657357931137085,
"eval_runtime": 191.6103,
"eval_samples_per_second": 25.275,
"eval_steps_per_second": 3.163,
"eval_wer": 0.27814362759012207,
"step": 10000
},
{
"epoch": 27.67,
"learning_rate": 3.76476923076923e-05,
"loss": 1.1837,
"step": 10100
},
{
"epoch": 27.94,
"learning_rate": 3.718615384615384e-05,
"loss": 1.1948,
"step": 10200
},
{
"epoch": 28.22,
"learning_rate": 3.6724615384615386e-05,
"loss": 1.1852,
"step": 10300
},
{
"epoch": 28.49,
"learning_rate": 3.626307692307692e-05,
"loss": 1.1681,
"step": 10400
},
{
"epoch": 28.77,
"learning_rate": 3.580153846153846e-05,
"loss": 1.1912,
"step": 10500
},
{
"epoch": 28.77,
"eval_loss": 0.2999463379383087,
"eval_runtime": 192.5233,
"eval_samples_per_second": 25.155,
"eval_steps_per_second": 3.148,
"eval_wer": 0.2785977859778598,
"step": 10500
},
{
"epoch": 29.04,
"learning_rate": 3.534e-05,
"loss": 1.1658,
"step": 10600
},
{
"epoch": 29.31,
"learning_rate": 3.4878461538461536e-05,
"loss": 1.1691,
"step": 10700
},
{
"epoch": 29.59,
"learning_rate": 3.4416923076923076e-05,
"loss": 1.1531,
"step": 10800
},
{
"epoch": 29.86,
"learning_rate": 3.3955384615384615e-05,
"loss": 1.1569,
"step": 10900
},
{
"epoch": 30.14,
"learning_rate": 3.349384615384615e-05,
"loss": 1.1658,
"step": 11000
},
{
"epoch": 30.14,
"eval_loss": 0.29912158846855164,
"eval_runtime": 192.0401,
"eval_samples_per_second": 25.219,
"eval_steps_per_second": 3.156,
"eval_wer": 0.2757309111552654,
"step": 11000
},
{
"epoch": 30.41,
"learning_rate": 3.303230769230769e-05,
"loss": 1.1565,
"step": 11100
},
{
"epoch": 30.68,
"learning_rate": 3.2570769230769226e-05,
"loss": 1.1556,
"step": 11200
},
{
"epoch": 30.96,
"learning_rate": 3.2109230769230765e-05,
"loss": 1.1538,
"step": 11300
},
{
"epoch": 31.23,
"learning_rate": 3.1647692307692304e-05,
"loss": 1.1531,
"step": 11400
},
{
"epoch": 31.51,
"learning_rate": 3.118615384615384e-05,
"loss": 1.148,
"step": 11500
},
{
"epoch": 31.51,
"eval_loss": 0.2915021479129791,
"eval_runtime": 191.8689,
"eval_samples_per_second": 25.241,
"eval_steps_per_second": 3.158,
"eval_wer": 0.268379222253761,
"step": 11500
},
{
"epoch": 31.78,
"learning_rate": 3.072461538461538e-05,
"loss": 1.1414,
"step": 11600
},
{
"epoch": 32.05,
"learning_rate": 3.026307692307692e-05,
"loss": 1.1364,
"step": 11700
},
{
"epoch": 32.33,
"learning_rate": 2.980153846153846e-05,
"loss": 1.1531,
"step": 11800
},
{
"epoch": 32.6,
"learning_rate": 2.9339999999999997e-05,
"loss": 1.1331,
"step": 11900
},
{
"epoch": 32.88,
"learning_rate": 2.8878461538461536e-05,
"loss": 1.1423,
"step": 12000
},
{
"epoch": 32.88,
"eval_loss": 0.29132989048957825,
"eval_runtime": 191.3127,
"eval_samples_per_second": 25.315,
"eval_steps_per_second": 3.168,
"eval_wer": 0.2642634118648879,
"step": 12000
},
{
"epoch": 33.15,
"learning_rate": 2.8416923076923075e-05,
"loss": 1.1527,
"step": 12100
},
{
"epoch": 33.42,
"learning_rate": 2.795538461538461e-05,
"loss": 1.1155,
"step": 12200
},
{
"epoch": 33.7,
"learning_rate": 2.7498461538461536e-05,
"loss": 1.1082,
"step": 12300
},
{
"epoch": 33.97,
"learning_rate": 2.703692307692307e-05,
"loss": 1.1172,
"step": 12400
},
{
"epoch": 34.25,
"learning_rate": 2.6575384615384614e-05,
"loss": 1.123,
"step": 12500
},
{
"epoch": 34.25,
"eval_loss": 0.2776812016963959,
"eval_runtime": 191.5218,
"eval_samples_per_second": 25.287,
"eval_steps_per_second": 3.164,
"eval_wer": 0.26304286119784276,
"step": 12500
},
{
"epoch": 34.52,
"learning_rate": 2.611384615384615e-05,
"loss": 1.1189,
"step": 12600
},
{
"epoch": 34.79,
"learning_rate": 2.5652307692307693e-05,
"loss": 1.1115,
"step": 12700
},
{
"epoch": 35.07,
"learning_rate": 2.519076923076923e-05,
"loss": 1.1182,
"step": 12800
},
{
"epoch": 35.34,
"learning_rate": 2.4729230769230764e-05,
"loss": 1.1172,
"step": 12900
},
{
"epoch": 35.62,
"learning_rate": 2.4267692307692307e-05,
"loss": 1.1297,
"step": 13000
},
{
"epoch": 35.62,
"eval_loss": 0.28734585642814636,
"eval_runtime": 191.3152,
"eval_samples_per_second": 25.314,
"eval_steps_per_second": 3.168,
"eval_wer": 0.26460403065569116,
"step": 13000
},
{
"epoch": 35.89,
"learning_rate": 2.3806153846153843e-05,
"loss": 1.1112,
"step": 13100
},
{
"epoch": 36.16,
"learning_rate": 2.3349230769230767e-05,
"loss": 1.1157,
"step": 13200
},
{
"epoch": 36.44,
"learning_rate": 2.2887692307692303e-05,
"loss": 1.0938,
"step": 13300
},
{
"epoch": 36.71,
"learning_rate": 2.2426153846153846e-05,
"loss": 1.1007,
"step": 13400
},
{
"epoch": 36.98,
"learning_rate": 2.1964615384615382e-05,
"loss": 1.0987,
"step": 13500
},
{
"epoch": 36.98,
"eval_loss": 0.28285086154937744,
"eval_runtime": 190.4602,
"eval_samples_per_second": 25.428,
"eval_steps_per_second": 3.182,
"eval_wer": 0.2619358501277321,
"step": 13500
},
{
"epoch": 37.26,
"learning_rate": 2.1503076923076924e-05,
"loss": 1.0967,
"step": 13600
},
{
"epoch": 37.53,
"learning_rate": 2.104153846153846e-05,
"loss": 1.0922,
"step": 13700
},
{
"epoch": 37.81,
"learning_rate": 2.0579999999999996e-05,
"loss": 1.0885,
"step": 13800
},
{
"epoch": 38.08,
"learning_rate": 2.011846153846154e-05,
"loss": 1.0979,
"step": 13900
},
{
"epoch": 38.36,
"learning_rate": 1.9656923076923075e-05,
"loss": 1.0873,
"step": 14000
},
{
"epoch": 38.36,
"eval_loss": 0.2864430546760559,
"eval_runtime": 191.1374,
"eval_samples_per_second": 25.338,
"eval_steps_per_second": 3.17,
"eval_wer": 0.26080045415838776,
"step": 14000
},
{
"epoch": 38.63,
"learning_rate": 1.9195384615384614e-05,
"loss": 1.0868,
"step": 14100
},
{
"epoch": 38.9,
"learning_rate": 1.8733846153846153e-05,
"loss": 1.0831,
"step": 14200
},
{
"epoch": 39.18,
"learning_rate": 1.8272307692307692e-05,
"loss": 1.0907,
"step": 14300
},
{
"epoch": 39.45,
"learning_rate": 1.781076923076923e-05,
"loss": 1.093,
"step": 14400
},
{
"epoch": 39.73,
"learning_rate": 1.7349230769230767e-05,
"loss": 1.0848,
"step": 14500
},
{
"epoch": 39.73,
"eval_loss": 0.2827305793762207,
"eval_runtime": 193.1681,
"eval_samples_per_second": 25.071,
"eval_steps_per_second": 3.137,
"eval_wer": 0.2576781152426909,
"step": 14500
},
{
"epoch": 40.0,
"learning_rate": 1.6887692307692307e-05,
"loss": 1.0913,
"step": 14600
},
{
"epoch": 40.27,
"learning_rate": 1.6426153846153846e-05,
"loss": 1.0764,
"step": 14700
},
{
"epoch": 40.55,
"learning_rate": 1.5964615384615385e-05,
"loss": 1.0668,
"step": 14800
},
{
"epoch": 40.82,
"learning_rate": 1.550307692307692e-05,
"loss": 1.0845,
"step": 14900
},
{
"epoch": 41.1,
"learning_rate": 1.5041538461538462e-05,
"loss": 1.0628,
"step": 15000
},
{
"epoch": 41.1,
"eval_loss": 0.28957271575927734,
"eval_runtime": 192.7832,
"eval_samples_per_second": 25.121,
"eval_steps_per_second": 3.143,
"eval_wer": 0.258103888731195,
"step": 15000
},
{
"epoch": 41.37,
"learning_rate": 1.4579999999999998e-05,
"loss": 1.0746,
"step": 15100
},
{
"epoch": 41.64,
"learning_rate": 1.4118461538461537e-05,
"loss": 1.0889,
"step": 15200
},
{
"epoch": 41.92,
"learning_rate": 1.3656923076923076e-05,
"loss": 1.0669,
"step": 15300
},
{
"epoch": 42.19,
"learning_rate": 1.3195384615384615e-05,
"loss": 1.069,
"step": 15400
},
{
"epoch": 42.47,
"learning_rate": 1.2733846153846153e-05,
"loss": 1.0815,
"step": 15500
},
{
"epoch": 42.47,
"eval_loss": 0.28144747018814087,
"eval_runtime": 192.799,
"eval_samples_per_second": 25.119,
"eval_steps_per_second": 3.143,
"eval_wer": 0.25608856088560883,
"step": 15500
},
{
"epoch": 42.74,
"learning_rate": 1.227230769230769e-05,
"loss": 1.0468,
"step": 15600
},
{
"epoch": 43.01,
"learning_rate": 1.1815384615384615e-05,
"loss": 1.073,
"step": 15700
},
{
"epoch": 43.29,
"learning_rate": 1.1353846153846154e-05,
"loss": 1.05,
"step": 15800
},
{
"epoch": 43.56,
"learning_rate": 1.089230769230769e-05,
"loss": 1.0562,
"step": 15900
},
{
"epoch": 43.83,
"learning_rate": 1.043076923076923e-05,
"loss": 1.0587,
"step": 16000
},
{
"epoch": 43.83,
"eval_loss": 0.27383875846862793,
"eval_runtime": 191.944,
"eval_samples_per_second": 25.231,
"eval_steps_per_second": 3.157,
"eval_wer": 0.25421515753619073,
"step": 16000
},
{
"epoch": 44.11,
"learning_rate": 9.969230769230769e-06,
"loss": 1.0479,
"step": 16100
},
{
"epoch": 44.38,
"learning_rate": 9.507692307692308e-06,
"loss": 1.0464,
"step": 16200
},
{
"epoch": 44.66,
"learning_rate": 9.046153846153845e-06,
"loss": 1.0515,
"step": 16300
},
{
"epoch": 44.93,
"learning_rate": 8.584615384615385e-06,
"loss": 1.0456,
"step": 16400
},
{
"epoch": 45.21,
"learning_rate": 8.123076923076922e-06,
"loss": 1.0709,
"step": 16500
},
{
"epoch": 45.21,
"eval_loss": 0.27852663397789,
"eval_runtime": 191.9755,
"eval_samples_per_second": 25.227,
"eval_steps_per_second": 3.157,
"eval_wer": 0.25784842463809254,
"step": 16500
},
{
"epoch": 45.48,
"learning_rate": 7.661538461538461e-06,
"loss": 1.0616,
"step": 16600
},
{
"epoch": 45.75,
"learning_rate": 7.2e-06,
"loss": 1.0707,
"step": 16700
},
{
"epoch": 46.03,
"learning_rate": 6.738461538461537e-06,
"loss": 1.037,
"step": 16800
},
{
"epoch": 46.3,
"learning_rate": 6.2769230769230764e-06,
"loss": 1.0475,
"step": 16900
},
{
"epoch": 46.57,
"learning_rate": 5.815384615384615e-06,
"loss": 1.0512,
"step": 17000
},
{
"epoch": 46.57,
"eval_loss": 0.27928975224494934,
"eval_runtime": 191.7381,
"eval_samples_per_second": 25.258,
"eval_steps_per_second": 3.161,
"eval_wer": 0.2539029236446211,
"step": 17000
},
{
"epoch": 46.85,
"learning_rate": 5.353846153846153e-06,
"loss": 1.0454,
"step": 17100
},
{
"epoch": 47.12,
"learning_rate": 4.892307692307692e-06,
"loss": 1.0505,
"step": 17200
},
{
"epoch": 47.4,
"learning_rate": 4.43076923076923e-06,
"loss": 1.0351,
"step": 17300
},
{
"epoch": 47.67,
"learning_rate": 3.969230769230768e-06,
"loss": 1.0591,
"step": 17400
},
{
"epoch": 47.94,
"learning_rate": 3.5076923076923076e-06,
"loss": 1.0396,
"step": 17500
},
{
"epoch": 47.94,
"eval_loss": 0.27876144647598267,
"eval_runtime": 191.8086,
"eval_samples_per_second": 25.249,
"eval_steps_per_second": 3.159,
"eval_wer": 0.25251206358217426,
"step": 17500
},
{
"epoch": 48.22,
"learning_rate": 3.046153846153846e-06,
"loss": 1.0482,
"step": 17600
},
{
"epoch": 48.49,
"learning_rate": 2.5846153846153844e-06,
"loss": 1.0433,
"step": 17700
},
{
"epoch": 48.77,
"learning_rate": 2.1230769230769228e-06,
"loss": 1.036,
"step": 17800
},
{
"epoch": 49.04,
"learning_rate": 1.6615384615384613e-06,
"loss": 1.0739,
"step": 17900
},
{
"epoch": 49.31,
"learning_rate": 1.2046153846153845e-06,
"loss": 1.0481,
"step": 18000
},
{
"epoch": 49.31,
"eval_loss": 0.27769750356674194,
"eval_runtime": 191.0585,
"eval_samples_per_second": 25.348,
"eval_steps_per_second": 3.172,
"eval_wer": 0.2534487652568833,
"step": 18000
},
{
"epoch": 49.59,
"learning_rate": 7.43076923076923e-07,
"loss": 1.0195,
"step": 18100
},
{
"epoch": 49.86,
"learning_rate": 2.8153846153846154e-07,
"loss": 1.0473,
"step": 18200
},
{
"epoch": 50.0,
"step": 18250,
"total_flos": 5.432980623924042e+19,
"train_loss": 1.449225697974636,
"train_runtime": 31196.8756,
"train_samples_per_second": 18.736,
"train_steps_per_second": 0.585
}
],
"max_steps": 18250,
"num_train_epochs": 50,
"total_flos": 5.432980623924042e+19,
"trial_name": null,
"trial_params": null
}