|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"global_step": 1410, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 3.4338, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.4137182235717773, |
|
"eval_runtime": 36.2713, |
|
"eval_samples_per_second": 5.514, |
|
"eval_steps_per_second": 0.689, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 2.9565, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.173758029937744, |
|
"eval_runtime": 35.902, |
|
"eval_samples_per_second": 5.571, |
|
"eval_steps_per_second": 0.696, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.7101, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.012174606323242, |
|
"eval_runtime": 35.6264, |
|
"eval_samples_per_second": 5.614, |
|
"eval_steps_per_second": 0.702, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 2.7515, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.964645504951477, |
|
"eval_runtime": 35.4193, |
|
"eval_samples_per_second": 5.647, |
|
"eval_steps_per_second": 0.706, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 2.724, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.1284220218658447, |
|
"eval_runtime": 34.8555, |
|
"eval_samples_per_second": 5.738, |
|
"eval_steps_per_second": 0.717, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 2.6193, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.9379758834838867, |
|
"eval_runtime": 36.3088, |
|
"eval_samples_per_second": 5.508, |
|
"eval_steps_per_second": 0.689, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.5032, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.9285995960235596, |
|
"eval_runtime": 35.9858, |
|
"eval_samples_per_second": 5.558, |
|
"eval_steps_per_second": 0.695, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 2.5342, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.9365949630737305, |
|
"eval_runtime": 35.1567, |
|
"eval_samples_per_second": 5.689, |
|
"eval_steps_per_second": 0.711, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4e-05, |
|
"loss": 2.5519, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.9736474752426147, |
|
"eval_runtime": 35.9727, |
|
"eval_samples_per_second": 5.56, |
|
"eval_steps_per_second": 0.695, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 2.4988, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.881581425666809, |
|
"eval_runtime": 35.6769, |
|
"eval_samples_per_second": 5.606, |
|
"eval_steps_per_second": 0.701, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 2.5101, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.8453679084777832, |
|
"eval_runtime": 33.4013, |
|
"eval_samples_per_second": 5.988, |
|
"eval_steps_per_second": 0.748, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.4441, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.8143038749694824, |
|
"eval_runtime": 34.7625, |
|
"eval_samples_per_second": 5.753, |
|
"eval_steps_per_second": 0.719, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 2.3857, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.7919152975082397, |
|
"eval_runtime": 33.975, |
|
"eval_samples_per_second": 5.887, |
|
"eval_steps_per_second": 0.736, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 2.2877, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.7400457859039307, |
|
"eval_runtime": 34.7007, |
|
"eval_samples_per_second": 5.764, |
|
"eval_steps_per_second": 0.72, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.3013, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.7408628463745117, |
|
"eval_runtime": 34.5398, |
|
"eval_samples_per_second": 5.79, |
|
"eval_steps_per_second": 0.724, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 1410, |
|
"total_flos": 5.3762898528e+18, |
|
"train_loss": 1.3009196721070202, |
|
"train_runtime": 1911.8077, |
|
"train_samples_per_second": 5.884, |
|
"train_steps_per_second": 0.738 |
|
} |
|
], |
|
"max_steps": 1410, |
|
"num_train_epochs": 15, |
|
"total_flos": 5.3762898528e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|