|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.0, |
|
"global_step": 732, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0071, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 69.49572032587398, |
|
"eval_f1": 72.3955989739612, |
|
"eval_runtime": 9.3521, |
|
"eval_samples_per_second": 1036.877, |
|
"eval_steps_per_second": 3.315, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5106, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 69.23790863153553, |
|
"eval_f1": 73.11703884791969, |
|
"eval_runtime": 8.7135, |
|
"eval_samples_per_second": 1112.875, |
|
"eval_steps_per_second": 3.558, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4499, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 70.26915540888935, |
|
"eval_f1": 73.2316411397418, |
|
"eval_runtime": 8.6973, |
|
"eval_samples_per_second": 1114.94, |
|
"eval_steps_per_second": 3.564, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4097, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 70.05259358564504, |
|
"eval_f1": 72.9686899402206, |
|
"eval_runtime": 8.6799, |
|
"eval_samples_per_second": 1117.174, |
|
"eval_steps_per_second": 3.571, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3784, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 70.3619676188512, |
|
"eval_f1": 73.05879115940134, |
|
"eval_runtime": 8.692, |
|
"eval_samples_per_second": 1115.618, |
|
"eval_steps_per_second": 3.566, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3567, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 70.85696607198102, |
|
"eval_f1": 73.53518484191017, |
|
"eval_runtime": 8.6772, |
|
"eval_samples_per_second": 1117.529, |
|
"eval_steps_per_second": 3.573, |
|
"step": 732 |
|
} |
|
], |
|
"max_steps": 1220, |
|
"num_train_epochs": 10, |
|
"total_flos": 231098185416704.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|