|
{ |
|
"best_metric": 0.7862407862407862, |
|
"best_model_checkpoint": "finetune_results/omarmomen/structformer_s1_final_with_pos/qnli/checkpoint-2000", |
|
"epoch": 10.0, |
|
"global_step": 3660, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.09, |
|
"eval_accuracy": 0.6736658215522766, |
|
"eval_f1": 0.7146136189747514, |
|
"eval_loss": 0.5989475250244141, |
|
"eval_mcc": 0.3425894886466018, |
|
"eval_runtime": 4.4241, |
|
"eval_samples_per_second": 516.72, |
|
"eval_steps_per_second": 64.646, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.316939890710383e-05, |
|
"loss": 0.6481, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_accuracy": 0.7349081635475159, |
|
"eval_f1": 0.72579185520362, |
|
"eval_loss": 0.5559495091438293, |
|
"eval_mcc": 0.4829898982899456, |
|
"eval_runtime": 4.4215, |
|
"eval_samples_per_second": 517.013, |
|
"eval_steps_per_second": 64.683, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 3.633879781420765e-05, |
|
"loss": 0.5099, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_accuracy": 0.751968502998352, |
|
"eval_f1": 0.7618647627047459, |
|
"eval_loss": 0.555103063583374, |
|
"eval_mcc": 0.5037717283176012, |
|
"eval_runtime": 4.4181, |
|
"eval_samples_per_second": 517.421, |
|
"eval_steps_per_second": 64.734, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 2.9508196721311478e-05, |
|
"loss": 0.3245, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_accuracy": 0.7755905389785767, |
|
"eval_f1": 0.7782101167315174, |
|
"eval_loss": 0.6722451448440552, |
|
"eval_mcc": 0.5551932955703639, |
|
"eval_runtime": 4.4196, |
|
"eval_samples_per_second": 517.245, |
|
"eval_steps_per_second": 64.712, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 2.2677595628415303e-05, |
|
"loss": 0.1577, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_accuracy": 0.7716535329818726, |
|
"eval_f1": 0.7862407862407862, |
|
"eval_loss": 0.8745389580726624, |
|
"eval_mcc": 0.5412131716827608, |
|
"eval_runtime": 4.4197, |
|
"eval_samples_per_second": 517.228, |
|
"eval_steps_per_second": 64.71, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_accuracy": 0.7677165269851685, |
|
"eval_f1": 0.7777312683131016, |
|
"eval_loss": 0.9956287741661072, |
|
"eval_mcc": 0.5349834947951929, |
|
"eval_runtime": 4.4246, |
|
"eval_samples_per_second": 516.662, |
|
"eval_steps_per_second": 64.639, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 1.5846994535519128e-05, |
|
"loss": 0.0887, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"eval_accuracy": 0.7672790884971619, |
|
"eval_f1": 0.7745762711864408, |
|
"eval_loss": 1.1708937883377075, |
|
"eval_mcc": 0.5355122905220886, |
|
"eval_runtime": 4.4191, |
|
"eval_samples_per_second": 517.303, |
|
"eval_steps_per_second": 64.719, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 9.016393442622952e-06, |
|
"loss": 0.045, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"eval_accuracy": 0.778215229511261, |
|
"eval_f1": 0.7823100042936882, |
|
"eval_loss": 1.3273141384124756, |
|
"eval_mcc": 0.5593191626279744, |
|
"eval_runtime": 4.4227, |
|
"eval_samples_per_second": 516.884, |
|
"eval_steps_per_second": 64.667, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 2.185792349726776e-06, |
|
"loss": 0.0253, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"eval_accuracy": 0.7808399200439453, |
|
"eval_f1": 0.7839586028460542, |
|
"eval_loss": 1.4356170892715454, |
|
"eval_mcc": 0.565280259416071, |
|
"eval_runtime": 4.4184, |
|
"eval_samples_per_second": 517.385, |
|
"eval_steps_per_second": 64.73, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 3660, |
|
"total_flos": 3.665914093619712e+16, |
|
"train_loss": 0.24654610430608032, |
|
"train_runtime": 2153.3841, |
|
"train_samples_per_second": 203.944, |
|
"train_steps_per_second": 1.7 |
|
} |
|
], |
|
"max_steps": 3660, |
|
"num_train_epochs": 10, |
|
"total_flos": 3.665914093619712e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|