|
{ |
|
"best_metric": 0.860730593607306, |
|
"best_model_checkpoint": "tmp/tst-translation354/checkpoint-840", |
|
"epoch": 10.0, |
|
"global_step": 1050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.3698630136986301, |
|
"eval_loss": 0.3618854582309723, |
|
"eval_runtime": 11.9681, |
|
"eval_samples_per_second": 36.597, |
|
"eval_steps_per_second": 3.092, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7351598173515982, |
|
"eval_loss": 0.1750085949897766, |
|
"eval_runtime": 13.2291, |
|
"eval_samples_per_second": 33.109, |
|
"eval_steps_per_second": 2.797, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7922374429223744, |
|
"eval_loss": 0.14155679941177368, |
|
"eval_runtime": 12.7875, |
|
"eval_samples_per_second": 34.252, |
|
"eval_steps_per_second": 2.893, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8424657534246576, |
|
"eval_loss": 0.12254055589437485, |
|
"eval_runtime": 14.2052, |
|
"eval_samples_per_second": 30.834, |
|
"eval_steps_per_second": 2.605, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.6190476190476192e-05, |
|
"loss": 0.3579, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8515981735159818, |
|
"eval_loss": 0.13104116916656494, |
|
"eval_runtime": 12.6738, |
|
"eval_samples_per_second": 34.559, |
|
"eval_steps_per_second": 2.919, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8424657534246576, |
|
"eval_loss": 0.1357753723859787, |
|
"eval_runtime": 12.4892, |
|
"eval_samples_per_second": 35.07, |
|
"eval_steps_per_second": 2.963, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8493150684931506, |
|
"eval_loss": 0.16049088537693024, |
|
"eval_runtime": 12.626, |
|
"eval_samples_per_second": 34.69, |
|
"eval_steps_per_second": 2.93, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.860730593607306, |
|
"eval_loss": 0.1871233731508255, |
|
"eval_runtime": 12.5302, |
|
"eval_samples_per_second": 34.956, |
|
"eval_steps_per_second": 2.953, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8515981735159818, |
|
"eval_loss": 0.19895039498806, |
|
"eval_runtime": 12.66, |
|
"eval_samples_per_second": 34.597, |
|
"eval_steps_per_second": 2.923, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 2.3809523809523808e-06, |
|
"loss": 0.054, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.860730593607306, |
|
"eval_loss": 0.2134334295988083, |
|
"eval_runtime": 12.5085, |
|
"eval_samples_per_second": 35.016, |
|
"eval_steps_per_second": 2.958, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 1050, |
|
"total_flos": 8482350845952000.0, |
|
"train_loss": 0.1974729861531939, |
|
"train_runtime": 785.0626, |
|
"train_samples_per_second": 15.973, |
|
"train_steps_per_second": 1.337 |
|
} |
|
], |
|
"max_steps": 1050, |
|
"num_train_epochs": 10, |
|
"total_flos": 8482350845952000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|