ner-bert-ingredients / trainer_state.json
Shresthadev403's picture
End of training
809a49e
raw
history blame
6.04 kB
{
"best_metric": 1.4048632383346558,
"best_model_checkpoint": "ner-bert-ingredients/checkpoint-8250",
"epoch": 17.181542606938365,
"eval_steps": 750,
"global_step": 12750,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.01,
"learning_rate": 4.949460916442048e-05,
"loss": 5.4,
"step": 750
},
{
"epoch": 1.01,
"eval_loss": 3.985283613204956,
"eval_runtime": 84.6074,
"eval_samples_per_second": 59.096,
"eval_steps_per_second": 0.934,
"step": 750
},
{
"epoch": 2.02,
"learning_rate": 4.8989218328840976e-05,
"loss": 3.3737,
"step": 1500
},
{
"epoch": 2.02,
"eval_loss": 2.947718858718872,
"eval_runtime": 84.1622,
"eval_samples_per_second": 59.409,
"eval_steps_per_second": 0.939,
"step": 1500
},
{
"epoch": 3.03,
"learning_rate": 4.84845013477089e-05,
"loss": 2.5551,
"step": 2250
},
{
"epoch": 3.03,
"eval_loss": 2.259490728378296,
"eval_runtime": 84.2333,
"eval_samples_per_second": 59.359,
"eval_steps_per_second": 0.938,
"step": 2250
},
{
"epoch": 4.04,
"learning_rate": 4.7979110512129385e-05,
"loss": 2.0321,
"step": 3000
},
{
"epoch": 4.04,
"eval_loss": 1.9211012125015259,
"eval_runtime": 84.2271,
"eval_samples_per_second": 59.363,
"eval_steps_per_second": 0.938,
"step": 3000
},
{
"epoch": 5.05,
"learning_rate": 4.7473719676549865e-05,
"loss": 1.6766,
"step": 3750
},
{
"epoch": 5.05,
"eval_loss": 1.6346299648284912,
"eval_runtime": 84.1885,
"eval_samples_per_second": 59.391,
"eval_steps_per_second": 0.938,
"step": 3750
},
{
"epoch": 6.06,
"learning_rate": 4.696900269541779e-05,
"loss": 1.437,
"step": 4500
},
{
"epoch": 6.06,
"eval_loss": 1.5281634330749512,
"eval_runtime": 84.2371,
"eval_samples_per_second": 59.356,
"eval_steps_per_second": 0.938,
"step": 4500
},
{
"epoch": 7.08,
"learning_rate": 4.949460916442048e-05,
"loss": 3.7548,
"step": 5250
},
{
"epoch": 7.08,
"eval_loss": 2.378469228744507,
"eval_runtime": 84.0031,
"eval_samples_per_second": 59.522,
"eval_steps_per_second": 0.94,
"step": 5250
},
{
"epoch": 8.09,
"learning_rate": 4.8989218328840976e-05,
"loss": 1.9326,
"step": 6000
},
{
"epoch": 8.09,
"eval_loss": 1.8503246307373047,
"eval_runtime": 83.659,
"eval_samples_per_second": 59.766,
"eval_steps_per_second": 0.944,
"step": 6000
},
{
"epoch": 9.1,
"learning_rate": 4.84845013477089e-05,
"loss": 1.489,
"step": 6750
},
{
"epoch": 9.1,
"eval_loss": 1.630631685256958,
"eval_runtime": 83.6797,
"eval_samples_per_second": 59.752,
"eval_steps_per_second": 0.944,
"step": 6750
},
{
"epoch": 10.11,
"learning_rate": 4.7979784366576826e-05,
"loss": 1.2457,
"step": 7500
},
{
"epoch": 10.11,
"eval_loss": 1.4776005744934082,
"eval_runtime": 83.6823,
"eval_samples_per_second": 59.75,
"eval_steps_per_second": 0.944,
"step": 7500
},
{
"epoch": 11.12,
"learning_rate": 4.7474393530997306e-05,
"loss": 1.0713,
"step": 8250
},
{
"epoch": 11.12,
"eval_loss": 1.4048632383346558,
"eval_runtime": 83.6632,
"eval_samples_per_second": 59.763,
"eval_steps_per_second": 0.944,
"step": 8250
},
{
"epoch": 12.13,
"learning_rate": 4.696900269541779e-05,
"loss": 0.9469,
"step": 9000
},
{
"epoch": 12.13,
"eval_loss": 1.349704384803772,
"eval_runtime": 83.6712,
"eval_samples_per_second": 59.758,
"eval_steps_per_second": 0.944,
"step": 9000
},
{
"epoch": 13.14,
"learning_rate": 4.949460916442048e-05,
"loss": 3.1119,
"step": 9750
},
{
"epoch": 13.14,
"eval_loss": 2.09057354927063,
"eval_runtime": 84.2266,
"eval_samples_per_second": 59.364,
"eval_steps_per_second": 0.938,
"step": 9750
},
{
"epoch": 14.15,
"learning_rate": 4.8989218328840976e-05,
"loss": 1.5021,
"step": 10500
},
{
"epoch": 14.15,
"eval_loss": 1.6629363298416138,
"eval_runtime": 83.9283,
"eval_samples_per_second": 59.575,
"eval_steps_per_second": 0.941,
"step": 10500
},
{
"epoch": 15.16,
"learning_rate": 4.84845013477089e-05,
"loss": 1.1682,
"step": 11250
},
{
"epoch": 15.16,
"eval_loss": 1.5476787090301514,
"eval_runtime": 83.9134,
"eval_samples_per_second": 59.585,
"eval_steps_per_second": 0.941,
"step": 11250
},
{
"epoch": 16.17,
"learning_rate": 4.7979110512129385e-05,
"loss": 0.9836,
"step": 12000
},
{
"epoch": 16.17,
"eval_loss": 1.5123697519302368,
"eval_runtime": 83.9374,
"eval_samples_per_second": 59.568,
"eval_steps_per_second": 0.941,
"step": 12000
},
{
"epoch": 17.18,
"learning_rate": 4.7474393530997306e-05,
"loss": 0.8542,
"step": 12750
},
{
"epoch": 17.18,
"eval_loss": 1.3803095817565918,
"eval_runtime": 83.8966,
"eval_samples_per_second": 59.597,
"eval_steps_per_second": 0.942,
"step": 12750
}
],
"logging_steps": 750,
"max_steps": 74200,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 750,
"total_flos": 5.518175447763763e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}