xls-r-300m-it-cv8-ds13 / trainer_state.json
masapasa's picture
End of training
a9dcb74
raw
history blame
8.17 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 49.994535519125684,
"global_step": 4550,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.1,
"learning_rate": 3.7499999999999997e-06,
"loss": 14.8405,
"step": 100
},
{
"epoch": 2.2,
"learning_rate": 7.499999999999999e-06,
"loss": 6.8597,
"step": 200
},
{
"epoch": 3.3,
"learning_rate": 1.1249999999999999e-05,
"loss": 4.4132,
"step": 300
},
{
"epoch": 4.39,
"learning_rate": 1.4999999999999999e-05,
"loss": 3.7795,
"step": 400
},
{
"epoch": 5.49,
"learning_rate": 1.875e-05,
"loss": 3.4129,
"step": 500
},
{
"epoch": 5.49,
"eval_loss": 3.3224499225616455,
"eval_runtime": 168.3653,
"eval_samples_per_second": 28.765,
"eval_steps_per_second": 0.903,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 6.59,
"learning_rate": 2.2499999999999998e-05,
"loss": 3.2045,
"step": 600
},
{
"epoch": 7.69,
"learning_rate": 2.6249999999999998e-05,
"loss": 3.142,
"step": 700
},
{
"epoch": 8.79,
"learning_rate": 2.9999999999999997e-05,
"loss": 3.0872,
"step": 800
},
{
"epoch": 9.89,
"learning_rate": 3.375e-05,
"loss": 2.9848,
"step": 900
},
{
"epoch": 10.98,
"learning_rate": 3.75e-05,
"loss": 2.9323,
"step": 1000
},
{
"epoch": 10.98,
"eval_loss": 2.9127891063690186,
"eval_runtime": 166.7051,
"eval_samples_per_second": 29.051,
"eval_steps_per_second": 0.912,
"eval_wer": 1.0000283848992335,
"step": 1000
},
{
"epoch": 12.09,
"learning_rate": 4.125e-05,
"loss": 2.9013,
"step": 1100
},
{
"epoch": 13.19,
"learning_rate": 4.4999999999999996e-05,
"loss": 2.729,
"step": 1200
},
{
"epoch": 14.28,
"learning_rate": 4.875e-05,
"loss": 2.1939,
"step": 1300
},
{
"epoch": 15.38,
"learning_rate": 5.2499999999999995e-05,
"loss": 1.8125,
"step": 1400
},
{
"epoch": 16.48,
"learning_rate": 5.625e-05,
"loss": 1.6839,
"step": 1500
},
{
"epoch": 16.48,
"eval_loss": 0.7740097641944885,
"eval_runtime": 167.3266,
"eval_samples_per_second": 28.943,
"eval_steps_per_second": 0.908,
"eval_wer": 0.685381776894692,
"step": 1500
},
{
"epoch": 17.58,
"learning_rate": 5.9999999999999995e-05,
"loss": 1.6194,
"step": 1600
},
{
"epoch": 18.68,
"learning_rate": 6.374999999999999e-05,
"loss": 1.5613,
"step": 1700
},
{
"epoch": 19.78,
"learning_rate": 6.75e-05,
"loss": 1.5197,
"step": 1800
},
{
"epoch": 20.87,
"learning_rate": 7.1175e-05,
"loss": 1.5009,
"step": 1900
},
{
"epoch": 21.97,
"learning_rate": 7.492499999999999e-05,
"loss": 1.485,
"step": 2000
},
{
"epoch": 21.97,
"eval_loss": 0.5829736590385437,
"eval_runtime": 166.3956,
"eval_samples_per_second": 29.105,
"eval_steps_per_second": 0.913,
"eval_wer": 0.597615668464377,
"step": 2000
},
{
"epoch": 23.08,
"learning_rate": 7.211764705882351e-05,
"loss": 1.4606,
"step": 2100
},
{
"epoch": 24.17,
"learning_rate": 6.920588235294117e-05,
"loss": 1.4257,
"step": 2200
},
{
"epoch": 25.27,
"learning_rate": 6.626470588235294e-05,
"loss": 1.4002,
"step": 2300
},
{
"epoch": 26.37,
"learning_rate": 6.33235294117647e-05,
"loss": 1.3856,
"step": 2400
},
{
"epoch": 27.47,
"learning_rate": 6.038235294117646e-05,
"loss": 1.362,
"step": 2500
},
{
"epoch": 27.47,
"eval_loss": 0.48657190799713135,
"eval_runtime": 166.2387,
"eval_samples_per_second": 29.133,
"eval_steps_per_second": 0.914,
"eval_wer": 0.490519443655975,
"step": 2500
},
{
"epoch": 28.57,
"learning_rate": 5.744117647058823e-05,
"loss": 1.3342,
"step": 2600
},
{
"epoch": 29.67,
"learning_rate": 5.4499999999999997e-05,
"loss": 1.318,
"step": 2700
},
{
"epoch": 30.77,
"learning_rate": 5.155882352941176e-05,
"loss": 1.3013,
"step": 2800
},
{
"epoch": 31.86,
"learning_rate": 4.861764705882352e-05,
"loss": 1.2849,
"step": 2900
},
{
"epoch": 32.96,
"learning_rate": 4.567647058823529e-05,
"loss": 1.2752,
"step": 3000
},
{
"epoch": 32.96,
"eval_loss": 0.42398178577423096,
"eval_runtime": 167.0219,
"eval_samples_per_second": 28.996,
"eval_steps_per_second": 0.91,
"eval_wer": 0.4966789667896679,
"step": 3000
},
{
"epoch": 34.07,
"learning_rate": 4.2735294117647056e-05,
"loss": 1.2682,
"step": 3100
},
{
"epoch": 35.16,
"learning_rate": 3.979411764705882e-05,
"loss": 1.2428,
"step": 3200
},
{
"epoch": 36.26,
"learning_rate": 3.6852941176470586e-05,
"loss": 1.2282,
"step": 3300
},
{
"epoch": 37.36,
"learning_rate": 3.391176470588235e-05,
"loss": 1.2198,
"step": 3400
},
{
"epoch": 38.46,
"learning_rate": 3.0970588235294116e-05,
"loss": 1.1957,
"step": 3500
},
{
"epoch": 38.46,
"eval_loss": 0.38985687494277954,
"eval_runtime": 166.2154,
"eval_samples_per_second": 29.137,
"eval_steps_per_second": 0.914,
"eval_wer": 0.42577348850411584,
"step": 3500
},
{
"epoch": 39.56,
"learning_rate": 2.8029411764705878e-05,
"loss": 1.1891,
"step": 3600
},
{
"epoch": 40.66,
"learning_rate": 2.5088235294117646e-05,
"loss": 1.1847,
"step": 3700
},
{
"epoch": 41.75,
"learning_rate": 2.2147058823529408e-05,
"loss": 1.1779,
"step": 3800
},
{
"epoch": 42.85,
"learning_rate": 1.9205882352941176e-05,
"loss": 1.1654,
"step": 3900
},
{
"epoch": 43.95,
"learning_rate": 1.626470588235294e-05,
"loss": 1.1646,
"step": 4000
},
{
"epoch": 43.95,
"eval_loss": 0.3597247898578644,
"eval_runtime": 165.7161,
"eval_samples_per_second": 29.225,
"eval_steps_per_second": 0.917,
"eval_wer": 0.401447629860914,
"step": 4000
},
{
"epoch": 45.05,
"learning_rate": 1.3323529411764704e-05,
"loss": 1.1574,
"step": 4100
},
{
"epoch": 46.15,
"learning_rate": 1.0411764705882353e-05,
"loss": 1.1513,
"step": 4200
},
{
"epoch": 47.25,
"learning_rate": 7.470588235294117e-06,
"loss": 1.1412,
"step": 4300
},
{
"epoch": 48.35,
"learning_rate": 4.529411764705882e-06,
"loss": 1.1383,
"step": 4400
},
{
"epoch": 49.45,
"learning_rate": 1.5882352941176468e-06,
"loss": 1.1265,
"step": 4500
},
{
"epoch": 49.45,
"eval_loss": 0.35590532422065735,
"eval_runtime": 165.6811,
"eval_samples_per_second": 29.231,
"eval_steps_per_second": 0.917,
"eval_wer": 0.38288390576213455,
"step": 4500
},
{
"epoch": 49.99,
"step": 4550,
"total_flos": 7.032770514539837e+19,
"train_loss": 2.1873197584885817,
"train_runtime": 245048.5131,
"train_samples_per_second": 2.385,
"train_steps_per_second": 0.019
}
],
"max_steps": 4550,
"num_train_epochs": 50,
"total_flos": 7.032770514539837e+19,
"trial_name": null,
"trial_params": null
}