|
{ |
|
"best_metric": 0.9050816297531128, |
|
"best_model_checkpoint": "./mbert_ar_ur/checkpoint-5000", |
|
"epoch": 4.0, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.885714285714286e-05, |
|
"loss": 0.4454, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.7714285714285717e-05, |
|
"loss": 0.2955, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_LOC_f1": 0.09510682288077188, |
|
"eval_ORG_f1": 0.24583663758921492, |
|
"eval_PER_f1": 0.3119353304802663, |
|
"eval_loss": 1.4924763441085815, |
|
"eval_overall_accuracy": 0.4886179434773416, |
|
"eval_overall_f1": 0.1982125758059368, |
|
"eval_overall_precision": 0.16568836712913554, |
|
"eval_overall_recall": 0.2466243050039714, |
|
"eval_runtime": 2.7088, |
|
"eval_samples_per_second": 369.163, |
|
"eval_steps_per_second": 23.257, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.6571428571428574e-05, |
|
"loss": 0.2182, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.542857142857143e-05, |
|
"loss": 0.1877, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 0.181, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_LOC_f1": 0.16472203157172272, |
|
"eval_ORG_f1": 0.26800929512006194, |
|
"eval_PER_f1": 0.6124661246612466, |
|
"eval_loss": 1.1303032636642456, |
|
"eval_overall_accuracy": 0.6376770737895553, |
|
"eval_overall_f1": 0.3045238514346066, |
|
"eval_overall_precision": 0.2734745494783433, |
|
"eval_overall_recall": 0.3435266084193805, |
|
"eval_runtime": 2.7853, |
|
"eval_samples_per_second": 359.026, |
|
"eval_steps_per_second": 22.619, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.3142857142857145e-05, |
|
"loss": 0.1295, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.1253, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_LOC_f1": 0.15284677111196027, |
|
"eval_ORG_f1": 0.3176020408163266, |
|
"eval_PER_f1": 0.6216577540106952, |
|
"eval_loss": 1.348933458328247, |
|
"eval_overall_accuracy": 0.6324617661568821, |
|
"eval_overall_f1": 0.3217743355043126, |
|
"eval_overall_precision": 0.2889661713563073, |
|
"eval_overall_recall": 0.36298649722001586, |
|
"eval_runtime": 2.5633, |
|
"eval_samples_per_second": 390.124, |
|
"eval_steps_per_second": 24.578, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.0857142857142858e-05, |
|
"loss": 0.098, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 9.714285714285715e-06, |
|
"loss": 0.0916, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.0866, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_LOC_f1": 0.42877697841726614, |
|
"eval_ORG_f1": 0.4028497409326424, |
|
"eval_PER_f1": 0.6109282422646478, |
|
"eval_loss": 0.9050816297531128, |
|
"eval_overall_accuracy": 0.7715836211149482, |
|
"eval_overall_f1": 0.47474747474747475, |
|
"eval_overall_precision": 0.464638783269962, |
|
"eval_overall_recall": 0.4853057982525814, |
|
"eval_runtime": 2.9178, |
|
"eval_samples_per_second": 342.725, |
|
"eval_steps_per_second": 21.592, |
|
"step": 5000 |
|
} |
|
], |
|
"max_steps": 8750, |
|
"num_train_epochs": 7, |
|
"total_flos": 1363663262792160.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|