|
{ |
|
"best_metric": 0.7080158591270447, |
|
"best_model_checkpoint": "final_models/transformer_base_final_2/finetune/mnli/checkpoint-6000", |
|
"epoch": 3.140877598152425, |
|
"global_step": 6800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.5156964063644409, |
|
"eval_loss": 0.9809399247169495, |
|
"eval_runtime": 9.0348, |
|
"eval_samples_per_second": 726.302, |
|
"eval_steps_per_second": 90.871, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.884526558891455e-05, |
|
"loss": 1.0588, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.571014940738678, |
|
"eval_loss": 0.9007604718208313, |
|
"eval_runtime": 8.9421, |
|
"eval_samples_per_second": 733.831, |
|
"eval_steps_per_second": 91.813, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.7690531177829104e-05, |
|
"loss": 0.9283, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.6338006854057312, |
|
"eval_loss": 0.8218390941619873, |
|
"eval_runtime": 8.965, |
|
"eval_samples_per_second": 731.959, |
|
"eval_steps_per_second": 91.579, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.653579676674365e-05, |
|
"loss": 0.836, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.6562023758888245, |
|
"eval_loss": 0.7788540124893188, |
|
"eval_runtime": 8.9831, |
|
"eval_samples_per_second": 730.487, |
|
"eval_steps_per_second": 91.394, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.53810623556582e-05, |
|
"loss": 0.7876, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.6472111940383911, |
|
"eval_loss": 0.7989409565925598, |
|
"eval_runtime": 9.0636, |
|
"eval_samples_per_second": 723.995, |
|
"eval_steps_per_second": 90.582, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_accuracy": 0.6717464327812195, |
|
"eval_loss": 0.7579967379570007, |
|
"eval_runtime": 8.9628, |
|
"eval_samples_per_second": 732.136, |
|
"eval_steps_per_second": 91.601, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.422632794457275e-05, |
|
"loss": 0.7301, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_accuracy": 0.6728131771087646, |
|
"eval_loss": 0.7662198543548584, |
|
"eval_runtime": 8.9967, |
|
"eval_samples_per_second": 729.376, |
|
"eval_steps_per_second": 91.255, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.30715935334873e-05, |
|
"loss": 0.6933, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.6782993078231812, |
|
"eval_loss": 0.7824717164039612, |
|
"eval_runtime": 8.9469, |
|
"eval_samples_per_second": 733.442, |
|
"eval_steps_per_second": 91.764, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.1916859122401844e-05, |
|
"loss": 0.6844, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_accuracy": 0.6924718022346497, |
|
"eval_loss": 0.727609395980835, |
|
"eval_runtime": 9.0458, |
|
"eval_samples_per_second": 725.417, |
|
"eval_steps_per_second": 90.76, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.07621247113164e-05, |
|
"loss": 0.6825, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.6773849725723267, |
|
"eval_loss": 0.7659159302711487, |
|
"eval_runtime": 9.0209, |
|
"eval_samples_per_second": 727.419, |
|
"eval_steps_per_second": 91.011, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.6930813789367676, |
|
"eval_loss": 0.7922490239143372, |
|
"eval_runtime": 8.9692, |
|
"eval_samples_per_second": 731.612, |
|
"eval_steps_per_second": 91.535, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.960739030023095e-05, |
|
"loss": 0.639, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_accuracy": 0.7077110409736633, |
|
"eval_loss": 0.7321968674659729, |
|
"eval_runtime": 8.9855, |
|
"eval_samples_per_second": 730.29, |
|
"eval_steps_per_second": 91.37, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.84526558891455e-05, |
|
"loss": 0.5802, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.6982627511024475, |
|
"eval_loss": 0.7771458029747009, |
|
"eval_runtime": 8.9398, |
|
"eval_samples_per_second": 734.024, |
|
"eval_steps_per_second": 91.837, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.729792147806005e-05, |
|
"loss": 0.5786, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_accuracy": 0.7005485892295837, |
|
"eval_loss": 0.7566787004470825, |
|
"eval_runtime": 8.9729, |
|
"eval_samples_per_second": 731.313, |
|
"eval_steps_per_second": 91.498, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.61431870669746e-05, |
|
"loss": 0.5796, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_accuracy": 0.7080158591270447, |
|
"eval_loss": 0.7298412919044495, |
|
"eval_runtime": 8.9509, |
|
"eval_samples_per_second": 733.11, |
|
"eval_steps_per_second": 91.723, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.7008534073829651, |
|
"eval_loss": 0.7494065165519714, |
|
"eval_runtime": 9.0706, |
|
"eval_samples_per_second": 723.435, |
|
"eval_steps_per_second": 90.512, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.498845265588915e-05, |
|
"loss": 0.5797, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_accuracy": 0.7040536403656006, |
|
"eval_loss": 0.8426190614700317, |
|
"eval_runtime": 8.9718, |
|
"eval_samples_per_second": 731.403, |
|
"eval_steps_per_second": 91.509, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"step": 6800, |
|
"total_flos": 5.369124305668608e+16, |
|
"train_loss": 0.7091084985172047, |
|
"train_runtime": 2839.0059, |
|
"train_samples_per_second": 915.039, |
|
"train_steps_per_second": 7.626 |
|
} |
|
], |
|
"max_steps": 21650, |
|
"num_train_epochs": 10, |
|
"total_flos": 5.369124305668608e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|