|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.719392314566577, |
|
"eval_steps": 200, |
|
"global_step": 3200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5179, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4435, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_f1_score": 0.4513274336283186, |
|
"eval_label_f1": 0.7168141592920354, |
|
"eval_loss": 0.4356614947319031, |
|
"eval_runtime": 45.8378, |
|
"eval_samples_per_second": 2.182, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 6e-05, |
|
"loss": 0.4296, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8e-05, |
|
"loss": 0.4309, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_f1_score": 0.6751054852320675, |
|
"eval_label_f1": 0.8354430379746836, |
|
"eval_loss": 0.43058258295059204, |
|
"eval_runtime": 45.2842, |
|
"eval_samples_per_second": 2.208, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4262, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 0.4235, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_f1_score": 0.6721991701244813, |
|
"eval_label_f1": 0.8547717842323652, |
|
"eval_loss": 0.42819398641586304, |
|
"eval_runtime": 45.2307, |
|
"eval_samples_per_second": 2.211, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 0.423, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 0.4267, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_f1_score": 0.7073170731707317, |
|
"eval_label_f1": 0.845528455284553, |
|
"eval_loss": 0.4269372224807739, |
|
"eval_runtime": 45.2446, |
|
"eval_samples_per_second": 2.21, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 0.4283, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 0.4254, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_f1_score": 0.7272727272727273, |
|
"eval_label_f1": 0.8677685950413223, |
|
"eval_loss": 0.42638763785362244, |
|
"eval_runtime": 45.2086, |
|
"eval_samples_per_second": 2.212, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 0.4225, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 0.4264, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_f1_score": 0.7398373983739838, |
|
"eval_label_f1": 0.8780487804878049, |
|
"eval_loss": 0.42636168003082275, |
|
"eval_runtime": 45.2052, |
|
"eval_samples_per_second": 2.212, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 0.4263, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.4206, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_f1_score": 0.7206477732793523, |
|
"eval_label_f1": 0.8582995951417003, |
|
"eval_loss": 0.4262264370918274, |
|
"eval_runtime": 45.2829, |
|
"eval_samples_per_second": 2.208, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 0.4244, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 0.4232, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_f1_score": 0.7410358565737052, |
|
"eval_label_f1": 0.8685258964143426, |
|
"eval_loss": 0.42596718668937683, |
|
"eval_runtime": 45.2943, |
|
"eval_samples_per_second": 2.208, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 0.4224, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 0.4249, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_f1_score": 0.7603305785123967, |
|
"eval_label_f1": 0.8925619834710744, |
|
"eval_loss": 0.4255012273788452, |
|
"eval_runtime": 45.1763, |
|
"eval_samples_per_second": 2.214, |
|
"eval_steps_per_second": 0.089, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 0.4231, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.4239, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_f1_score": 0.7630522088353413, |
|
"eval_label_f1": 0.8835341365461847, |
|
"eval_loss": 0.42558813095092773, |
|
"eval_runtime": 45.2083, |
|
"eval_samples_per_second": 2.212, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 0.4222, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 6.873032967079561e-05, |
|
"loss": 0.4213, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_f1_score": 0.7692307692307692, |
|
"eval_label_f1": 0.8987854251012146, |
|
"eval_loss": 0.42547106742858887, |
|
"eval_runtime": 45.1934, |
|
"eval_samples_per_second": 2.213, |
|
"eval_steps_per_second": 0.089, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.4219, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 0.4213, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_f1_score": 0.7768595041322314, |
|
"eval_label_f1": 0.8925619834710744, |
|
"eval_loss": 0.4256010055541992, |
|
"eval_runtime": 45.2023, |
|
"eval_samples_per_second": 2.212, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 0.4206, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 0.4244, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_f1_score": 0.7710843373493976, |
|
"eval_label_f1": 0.899598393574297, |
|
"eval_loss": 0.42528876662254333, |
|
"eval_runtime": 45.2, |
|
"eval_samples_per_second": 2.212, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 0.4234, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.825502516487497e-05, |
|
"loss": 0.4234, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1_score": 0.7385892116182572, |
|
"eval_label_f1": 0.8796680497925312, |
|
"eval_loss": 0.42544686794281006, |
|
"eval_runtime": 45.4613, |
|
"eval_samples_per_second": 2.2, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 0.4227, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 0.4222, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_f1_score": 0.7916666666666667, |
|
"eval_label_f1": 0.9, |
|
"eval_loss": 0.4252234399318695, |
|
"eval_runtime": 45.4771, |
|
"eval_samples_per_second": 2.199, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 3.790390522001662e-05, |
|
"loss": 0.4233, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 0.4239, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"eval_f1_score": 0.7800829875518673, |
|
"eval_label_f1": 0.896265560165975, |
|
"eval_loss": 0.42535117268562317, |
|
"eval_runtime": 45.3979, |
|
"eval_samples_per_second": 2.203, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 3200 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 200, |
|
"total_flos": 8.76745040626764e+20, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|