|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 3510, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"eval_f1": 0.37622621795062866, |
|
"eval_loss": 1.36316978931427, |
|
"eval_runtime": 3.2856, |
|
"eval_samples_per_second": 29.827, |
|
"eval_steps_per_second": 29.827, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_f1": 0.41617398891170265, |
|
"eval_loss": 1.227824330329895, |
|
"eval_runtime": 3.2714, |
|
"eval_samples_per_second": 29.957, |
|
"eval_steps_per_second": 29.957, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_f1": 0.41594055599199414, |
|
"eval_loss": 1.1802130937576294, |
|
"eval_runtime": 3.2851, |
|
"eval_samples_per_second": 29.832, |
|
"eval_steps_per_second": 29.832, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_f1": 0.4879226887989845, |
|
"eval_loss": 1.3237018585205078, |
|
"eval_runtime": 3.2714, |
|
"eval_samples_per_second": 29.957, |
|
"eval_steps_per_second": 29.957, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.572649572649573e-05, |
|
"loss": 1.2, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_f1": 0.46450380175056494, |
|
"eval_loss": 1.2971174716949463, |
|
"eval_runtime": 3.2843, |
|
"eval_samples_per_second": 29.839, |
|
"eval_steps_per_second": 29.839, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_f1": 0.5019868520647613, |
|
"eval_loss": 1.2549620866775513, |
|
"eval_runtime": 3.267, |
|
"eval_samples_per_second": 29.997, |
|
"eval_steps_per_second": 29.997, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.48057967334012397, |
|
"eval_loss": 1.1853649616241455, |
|
"eval_runtime": 3.277, |
|
"eval_samples_per_second": 29.905, |
|
"eval_steps_per_second": 29.905, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_f1": 0.5011814210846155, |
|
"eval_loss": 1.1788480281829834, |
|
"eval_runtime": 3.2639, |
|
"eval_samples_per_second": 30.025, |
|
"eval_steps_per_second": 30.025, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_f1": 0.4964300899620197, |
|
"eval_loss": 1.093542218208313, |
|
"eval_runtime": 3.2724, |
|
"eval_samples_per_second": 29.947, |
|
"eval_steps_per_second": 29.947, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.1452991452991456e-05, |
|
"loss": 0.9189, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_f1": 0.4986272191320895, |
|
"eval_loss": 1.2862237691879272, |
|
"eval_runtime": 3.302, |
|
"eval_samples_per_second": 29.679, |
|
"eval_steps_per_second": 29.679, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_f1": 0.49297809308258944, |
|
"eval_loss": 1.2222929000854492, |
|
"eval_runtime": 3.3171, |
|
"eval_samples_per_second": 29.544, |
|
"eval_steps_per_second": 29.544, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_f1": 0.4953797333525823, |
|
"eval_loss": 1.1196690797805786, |
|
"eval_runtime": 3.2943, |
|
"eval_samples_per_second": 29.749, |
|
"eval_steps_per_second": 29.749, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_f1": 0.5153008157478032, |
|
"eval_loss": 1.1256704330444336, |
|
"eval_runtime": 3.2631, |
|
"eval_samples_per_second": 30.033, |
|
"eval_steps_per_second": 30.033, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_f1": 0.5263780363862973, |
|
"eval_loss": 1.1729286909103394, |
|
"eval_runtime": 3.2904, |
|
"eval_samples_per_second": 29.783, |
|
"eval_steps_per_second": 29.783, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.7179487179487178e-05, |
|
"loss": 0.8143, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_f1": 0.5165321012151871, |
|
"eval_loss": 1.272233486175537, |
|
"eval_runtime": 3.3087, |
|
"eval_samples_per_second": 29.619, |
|
"eval_steps_per_second": 29.619, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_f1": 0.539472065505205, |
|
"eval_loss": 1.3217926025390625, |
|
"eval_runtime": 3.2634, |
|
"eval_samples_per_second": 30.03, |
|
"eval_steps_per_second": 30.03, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_f1": 0.5170136038987323, |
|
"eval_loss": 1.338261604309082, |
|
"eval_runtime": 3.2635, |
|
"eval_samples_per_second": 30.029, |
|
"eval_steps_per_second": 30.029, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_f1": 0.5138801729725696, |
|
"eval_loss": 1.250339388847351, |
|
"eval_runtime": 3.2656, |
|
"eval_samples_per_second": 30.009, |
|
"eval_steps_per_second": 30.009, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_f1": 0.523963853035474, |
|
"eval_loss": 1.362999439239502, |
|
"eval_runtime": 3.3211, |
|
"eval_samples_per_second": 29.508, |
|
"eval_steps_per_second": 29.508, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.2905982905982905e-05, |
|
"loss": 0.6175, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_f1": 0.5305458058252502, |
|
"eval_loss": 1.402750015258789, |
|
"eval_runtime": 3.2768, |
|
"eval_samples_per_second": 29.907, |
|
"eval_steps_per_second": 29.907, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_f1": 0.5408209021870833, |
|
"eval_loss": 1.4016790390014648, |
|
"eval_runtime": 3.3122, |
|
"eval_samples_per_second": 29.588, |
|
"eval_steps_per_second": 29.588, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_f1": 0.541281162975512, |
|
"eval_loss": 1.5929616689682007, |
|
"eval_runtime": 3.294, |
|
"eval_samples_per_second": 29.751, |
|
"eval_steps_per_second": 29.751, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_f1": 0.5564758214624422, |
|
"eval_loss": 1.5372625589370728, |
|
"eval_runtime": 3.2882, |
|
"eval_samples_per_second": 29.803, |
|
"eval_steps_per_second": 29.803, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_f1": 0.5722151004353093, |
|
"eval_loss": 1.5012538433074951, |
|
"eval_runtime": 3.3067, |
|
"eval_samples_per_second": 29.637, |
|
"eval_steps_per_second": 29.637, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 8.632478632478633e-06, |
|
"loss": 0.4726, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_f1": 0.5226487560978434, |
|
"eval_loss": 1.570418119430542, |
|
"eval_runtime": 3.3114, |
|
"eval_samples_per_second": 29.595, |
|
"eval_steps_per_second": 29.595, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_f1": 0.5483719296880323, |
|
"eval_loss": 1.5890936851501465, |
|
"eval_runtime": 3.2745, |
|
"eval_samples_per_second": 29.928, |
|
"eval_steps_per_second": 29.928, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"eval_f1": 0.5630120856995185, |
|
"eval_loss": 1.5236029624938965, |
|
"eval_runtime": 3.2951, |
|
"eval_samples_per_second": 29.741, |
|
"eval_steps_per_second": 29.741, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_f1": 0.5422100713682105, |
|
"eval_loss": 1.52333664894104, |
|
"eval_runtime": 3.3261, |
|
"eval_samples_per_second": 29.464, |
|
"eval_steps_per_second": 29.464, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"eval_f1": 0.5469719933620487, |
|
"eval_loss": 1.6104604005813599, |
|
"eval_runtime": 3.2888, |
|
"eval_samples_per_second": 29.798, |
|
"eval_steps_per_second": 29.798, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 4.358974358974359e-06, |
|
"loss": 0.3745, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"eval_f1": 0.5525357490677262, |
|
"eval_loss": 1.7136110067367554, |
|
"eval_runtime": 3.3248, |
|
"eval_samples_per_second": 29.476, |
|
"eval_steps_per_second": 29.476, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"eval_f1": 0.5539436259955471, |
|
"eval_loss": 1.6561492681503296, |
|
"eval_runtime": 3.2857, |
|
"eval_samples_per_second": 29.826, |
|
"eval_steps_per_second": 29.826, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_f1": 0.5504413375623162, |
|
"eval_loss": 1.7664132118225098, |
|
"eval_runtime": 3.2517, |
|
"eval_samples_per_second": 30.138, |
|
"eval_steps_per_second": 30.138, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_f1": 0.5494419672200014, |
|
"eval_loss": 1.750455379486084, |
|
"eval_runtime": 3.27, |
|
"eval_samples_per_second": 29.969, |
|
"eval_steps_per_second": 29.969, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"eval_f1": 0.5516497223039627, |
|
"eval_loss": 1.7312653064727783, |
|
"eval_runtime": 3.3127, |
|
"eval_samples_per_second": 29.583, |
|
"eval_steps_per_second": 29.583, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 8.547008547008547e-08, |
|
"loss": 0.307, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_f1": 0.5515045914952008, |
|
"eval_loss": 1.7193822860717773, |
|
"eval_runtime": 3.2769, |
|
"eval_samples_per_second": 29.907, |
|
"eval_steps_per_second": 29.907, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3510, |
|
"total_flos": 2890172619430200.0, |
|
"train_loss": 0.6706694952103487, |
|
"train_runtime": 824.1732, |
|
"train_samples_per_second": 4.259, |
|
"train_steps_per_second": 4.259 |
|
} |
|
], |
|
"max_steps": 3510, |
|
"num_train_epochs": 5, |
|
"total_flos": 2890172619430200.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|