|
{ |
|
"best_metric": 0.980825229383001, |
|
"best_model_checkpoint": "omarmomen/sf_babylm_1/finetune/relative_position_control/checkpoint-400", |
|
"epoch": 10.0, |
|
"global_step": 1420, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.41, |
|
"eval_accuracy": 0.9216228127479553, |
|
"eval_f1": 0.9270501080358061, |
|
"eval_loss": 0.4737815260887146, |
|
"eval_mcc": 0.8527382673557574, |
|
"eval_runtime": 30.4369, |
|
"eval_samples_per_second": 594.409, |
|
"eval_steps_per_second": 74.318, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.9807097315788269, |
|
"eval_f1": 0.980825229383001, |
|
"eval_loss": 0.08114158362150192, |
|
"eval_mcc": 0.9614892142400866, |
|
"eval_runtime": 30.4073, |
|
"eval_samples_per_second": 594.988, |
|
"eval_steps_per_second": 74.39, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 3.23943661971831e-05, |
|
"loss": 0.0425, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"eval_accuracy": 0.9766194820404053, |
|
"eval_f1": 0.9769545083083628, |
|
"eval_loss": 0.11881698668003082, |
|
"eval_mcc": 0.9536421310802985, |
|
"eval_runtime": 30.3659, |
|
"eval_samples_per_second": 595.799, |
|
"eval_steps_per_second": 74.491, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"eval_accuracy": 0.9430134892463684, |
|
"eval_f1": 0.9460689438719464, |
|
"eval_loss": 0.407075434923172, |
|
"eval_mcc": 0.8917702307190046, |
|
"eval_runtime": 30.3806, |
|
"eval_samples_per_second": 595.512, |
|
"eval_steps_per_second": 74.455, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.4788732394366198e-05, |
|
"loss": 0.0001, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"eval_accuracy": 0.9644041657447815, |
|
"eval_f1": 0.9655172413793104, |
|
"eval_loss": 0.2293713390827179, |
|
"eval_mcc": 0.930749953390411, |
|
"eval_runtime": 30.5501, |
|
"eval_samples_per_second": 592.207, |
|
"eval_steps_per_second": 74.042, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"eval_accuracy": 0.9323458075523376, |
|
"eval_f1": 0.9365540120257101, |
|
"eval_loss": 0.5338804721832275, |
|
"eval_mcc": 0.8724016613441335, |
|
"eval_runtime": 30.3498, |
|
"eval_samples_per_second": 596.116, |
|
"eval_steps_per_second": 74.531, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"eval_accuracy": 0.951028048992157, |
|
"eval_f1": 0.9530521407376007, |
|
"eval_loss": 0.35129231214523315, |
|
"eval_mcc": 0.9054283286330445, |
|
"eval_runtime": 30.3142, |
|
"eval_samples_per_second": 596.816, |
|
"eval_steps_per_second": 74.618, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 1420, |
|
"total_flos": 2323809310863360.0, |
|
"train_loss": 0.015042712267907992, |
|
"train_runtime": 512.0019, |
|
"train_samples_per_second": 177.109, |
|
"train_steps_per_second": 2.773 |
|
} |
|
], |
|
"max_steps": 1420, |
|
"num_train_epochs": 10, |
|
"total_flos": 2323809310863360.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|