|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.997442455242968, |
|
"global_step": 3900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9110807180404663, |
|
"eval_loss": 0.2603251338005066, |
|
"eval_runtime": 3.7587, |
|
"eval_samples_per_second": 194.484, |
|
"eval_steps_per_second": 24.477, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9179206490516663, |
|
"eval_loss": 0.2944748103618622, |
|
"eval_runtime": 3.8108, |
|
"eval_samples_per_second": 191.821, |
|
"eval_steps_per_second": 24.142, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.282996735800617e-05, |
|
"loss": 0.1807, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9083447456359863, |
|
"eval_loss": 0.43401873111724854, |
|
"eval_runtime": 3.7691, |
|
"eval_samples_per_second": 193.947, |
|
"eval_steps_per_second": 24.409, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9124487042427063, |
|
"eval_loss": 0.39779114723205566, |
|
"eval_runtime": 3.7531, |
|
"eval_samples_per_second": 194.771, |
|
"eval_steps_per_second": 24.513, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9042407870292664, |
|
"eval_loss": 0.6419674754142761, |
|
"eval_runtime": 3.6686, |
|
"eval_samples_per_second": 199.26, |
|
"eval_steps_per_second": 25.078, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 2.800203098182879e-05, |
|
"loss": 0.0523, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9069767594337463, |
|
"eval_loss": 0.6490611433982849, |
|
"eval_runtime": 3.596, |
|
"eval_samples_per_second": 203.28, |
|
"eval_steps_per_second": 25.584, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9124487042427063, |
|
"eval_loss": 0.6302383542060852, |
|
"eval_runtime": 3.7728, |
|
"eval_samples_per_second": 193.754, |
|
"eval_steps_per_second": 24.385, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 2.3174094605651413e-05, |
|
"loss": 0.0146, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9097127318382263, |
|
"eval_loss": 0.728517472743988, |
|
"eval_runtime": 3.574, |
|
"eval_samples_per_second": 204.533, |
|
"eval_steps_per_second": 25.742, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9206566214561462, |
|
"eval_loss": 0.6614168882369995, |
|
"eval_runtime": 3.5326, |
|
"eval_samples_per_second": 206.93, |
|
"eval_steps_per_second": 26.043, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9151846766471863, |
|
"eval_loss": 0.7393134236335754, |
|
"eval_runtime": 3.6341, |
|
"eval_samples_per_second": 201.15, |
|
"eval_steps_per_second": 25.316, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 1.8346158229474035e-05, |
|
"loss": 0.0063, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9165526628494263, |
|
"eval_loss": 0.7556606531143188, |
|
"eval_runtime": 3.6289, |
|
"eval_samples_per_second": 201.437, |
|
"eval_steps_per_second": 25.352, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9151846766471863, |
|
"eval_loss": 0.7709615230560303, |
|
"eval_runtime": 3.5866, |
|
"eval_samples_per_second": 203.815, |
|
"eval_steps_per_second": 25.651, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 1.3518221853296657e-05, |
|
"loss": 0.0037, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9192886352539062, |
|
"eval_loss": 0.7879533171653748, |
|
"eval_runtime": 3.5996, |
|
"eval_samples_per_second": 203.079, |
|
"eval_steps_per_second": 25.558, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9124487042427063, |
|
"eval_loss": 0.8315412402153015, |
|
"eval_runtime": 3.763, |
|
"eval_samples_per_second": 194.257, |
|
"eval_steps_per_second": 24.448, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9179206490516663, |
|
"eval_loss": 0.8191958069801331, |
|
"eval_runtime": 3.6714, |
|
"eval_samples_per_second": 199.108, |
|
"eval_steps_per_second": 25.059, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 8.690285477119281e-06, |
|
"loss": 0.0001, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9192886352539062, |
|
"eval_loss": 0.8122023940086365, |
|
"eval_runtime": 3.7565, |
|
"eval_samples_per_second": 194.598, |
|
"eval_steps_per_second": 24.491, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9179206490516663, |
|
"eval_loss": 0.8226170539855957, |
|
"eval_runtime": 3.6589, |
|
"eval_samples_per_second": 199.788, |
|
"eval_steps_per_second": 25.144, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"learning_rate": 3.862349100941902e-06, |
|
"loss": 0.0001, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9179206490516663, |
|
"eval_loss": 0.8273718953132629, |
|
"eval_runtime": 3.7025, |
|
"eval_samples_per_second": 197.433, |
|
"eval_steps_per_second": 24.848, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9179206490516663, |
|
"eval_loss": 0.8303136825561523, |
|
"eval_runtime": 3.6587, |
|
"eval_samples_per_second": 199.797, |
|
"eval_steps_per_second": 25.145, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9179206490516663, |
|
"eval_loss": 0.8313552737236023, |
|
"eval_runtime": 3.6572, |
|
"eval_samples_per_second": 199.879, |
|
"eval_steps_per_second": 25.156, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 3900, |
|
"total_flos": 4114530683719680.0, |
|
"train_loss": 0.033037299305582656, |
|
"train_runtime": 1549.9583, |
|
"train_samples_per_second": 40.362, |
|
"train_steps_per_second": 2.516 |
|
} |
|
], |
|
"max_steps": 3900, |
|
"num_train_epochs": 20, |
|
"total_flos": 4114530683719680.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|