|
{ |
|
"best_metric": 0.11493609100580215, |
|
"best_model_checkpoint": "/kaggle/working/checkpoint-11812", |
|
"epoch": 8.999619047619047, |
|
"eval_steps": 500, |
|
"global_step": 11812, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7619047619047619, |
|
"grad_norm": 1.9151228666305542, |
|
"learning_rate": 9.240091463414634e-06, |
|
"loss": 0.5245, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9996190476190476, |
|
"eval_avg_time_per_word": 0.2856894375340317, |
|
"eval_bleu": 0.3241854207405991, |
|
"eval_cer": 0.6666666865348816, |
|
"eval_loss": 0.20001943409442902, |
|
"eval_runtime": 241.8615, |
|
"eval_sacrebleu": 32.41854207405989, |
|
"eval_samples_per_second": 28.942, |
|
"eval_steps_per_second": 0.604, |
|
"eval_wer": 0.6888888888888889, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 1.5238095238095237, |
|
"grad_norm": 2.0819993019104004, |
|
"learning_rate": 8.478658536585368e-06, |
|
"loss": 0.2796, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_avg_time_per_word": 0.2867684533090989, |
|
"eval_bleu": 0.33444843598328117, |
|
"eval_cer": 0.6666666865348816, |
|
"eval_loss": 0.15544836223125458, |
|
"eval_runtime": 242.1808, |
|
"eval_sacrebleu": 33.44484359832811, |
|
"eval_samples_per_second": 28.904, |
|
"eval_steps_per_second": 0.603, |
|
"eval_wer": 0.6888888888888889, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 2.2857142857142856, |
|
"grad_norm": 1.885176181793213, |
|
"learning_rate": 7.716463414634146e-06, |
|
"loss": 0.2149, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.9996190476190474, |
|
"eval_avg_time_per_word": 0.2856631360126506, |
|
"eval_bleu": 0.3407651434588302, |
|
"eval_cer": 0.6666666865348816, |
|
"eval_loss": 0.13238175213336945, |
|
"eval_runtime": 243.4299, |
|
"eval_sacrebleu": 34.076514345883034, |
|
"eval_samples_per_second": 28.756, |
|
"eval_steps_per_second": 0.6, |
|
"eval_wer": 0.6888888888888889, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 3.0476190476190474, |
|
"grad_norm": 1.629309892654419, |
|
"learning_rate": 6.954268292682927e-06, |
|
"loss": 0.1782, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.8095238095238093, |
|
"grad_norm": 1.4296305179595947, |
|
"learning_rate": 6.192073170731708e-06, |
|
"loss": 0.1498, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_avg_time_per_word": 0.28190079163009985, |
|
"eval_bleu": 0.342484737426721, |
|
"eval_cer": 0.6666666865348816, |
|
"eval_loss": 0.12768897414207458, |
|
"eval_runtime": 240.7041, |
|
"eval_sacrebleu": 34.248473742672104, |
|
"eval_samples_per_second": 29.081, |
|
"eval_steps_per_second": 0.607, |
|
"eval_wer": 0.6888888888888889, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 4.571428571428571, |
|
"grad_norm": 1.3963040113449097, |
|
"learning_rate": 5.429878048780488e-06, |
|
"loss": 0.1318, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.999619047619047, |
|
"eval_avg_time_per_word": 0.2839514807375623, |
|
"eval_bleu": 0.3428596484328475, |
|
"eval_cer": 0.6666666865348816, |
|
"eval_loss": 0.12459039688110352, |
|
"eval_runtime": 243.3533, |
|
"eval_sacrebleu": 34.28596484328476, |
|
"eval_samples_per_second": 28.765, |
|
"eval_steps_per_second": 0.6, |
|
"eval_wer": 0.6888888888888889, |
|
"step": 6562 |
|
}, |
|
{ |
|
"epoch": 5.333333333333333, |
|
"grad_norm": 1.245492935180664, |
|
"learning_rate": 4.667682926829269e-06, |
|
"loss": 0.1188, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_avg_time_per_word": 0.2730801561768131, |
|
"eval_bleu": 0.3446570827661078, |
|
"eval_cer": 0.6666666865348816, |
|
"eval_loss": 0.11544822156429291, |
|
"eval_runtime": 241.1081, |
|
"eval_sacrebleu": 34.46570827661079, |
|
"eval_samples_per_second": 29.033, |
|
"eval_steps_per_second": 0.606, |
|
"eval_wer": 0.6888888888888889, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 6.095238095238095, |
|
"grad_norm": 1.6760395765304565, |
|
"learning_rate": 3.90625e-06, |
|
"loss": 0.109, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.857142857142857, |
|
"grad_norm": 1.5629112720489502, |
|
"learning_rate": 3.1440548780487807e-06, |
|
"loss": 0.0996, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.999619047619047, |
|
"eval_avg_time_per_word": 0.2809329230335656, |
|
"eval_bleu": 0.34531231334616685, |
|
"eval_cer": 0.6666666865348816, |
|
"eval_loss": 0.11558092385530472, |
|
"eval_runtime": 240.4449, |
|
"eval_sacrebleu": 34.5312313346167, |
|
"eval_samples_per_second": 29.113, |
|
"eval_steps_per_second": 0.607, |
|
"eval_wer": 0.6888888888888889, |
|
"step": 9187 |
|
}, |
|
{ |
|
"epoch": 7.619047619047619, |
|
"grad_norm": 1.3507906198501587, |
|
"learning_rate": 2.3818597560975614e-06, |
|
"loss": 0.092, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_avg_time_per_word": 0.27973620380425623, |
|
"eval_bleu": 0.34567075563975486, |
|
"eval_cer": 0.6666666865348816, |
|
"eval_loss": 0.11671777069568634, |
|
"eval_runtime": 240.0499, |
|
"eval_sacrebleu": 34.56707556397547, |
|
"eval_samples_per_second": 29.161, |
|
"eval_steps_per_second": 0.608, |
|
"eval_wer": 0.6888888888888889, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.380952380952381, |
|
"grad_norm": 1.3803397417068481, |
|
"learning_rate": 1.6196646341463414e-06, |
|
"loss": 0.0873, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.999619047619047, |
|
"eval_avg_time_per_word": 0.27570831504890414, |
|
"eval_bleu": 0.3461272171150006, |
|
"eval_cer": 0.6666666865348816, |
|
"eval_loss": 0.11493609100580215, |
|
"eval_runtime": 241.1666, |
|
"eval_sacrebleu": 34.61272171150005, |
|
"eval_samples_per_second": 29.026, |
|
"eval_steps_per_second": 0.605, |
|
"eval_wer": 0.6888888888888889, |
|
"step": 11812 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 13120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0292481466328678e+17, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|