{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 3510, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "eval_f1": 0.37622621795062866, "eval_loss": 1.36316978931427, "eval_runtime": 3.2856, "eval_samples_per_second": 29.827, "eval_steps_per_second": 29.827, "step": 100 }, { "epoch": 0.28, "eval_f1": 0.41617398891170265, "eval_loss": 1.227824330329895, "eval_runtime": 3.2714, "eval_samples_per_second": 29.957, "eval_steps_per_second": 29.957, "step": 200 }, { "epoch": 0.43, "eval_f1": 0.41594055599199414, "eval_loss": 1.1802130937576294, "eval_runtime": 3.2851, "eval_samples_per_second": 29.832, "eval_steps_per_second": 29.832, "step": 300 }, { "epoch": 0.57, "eval_f1": 0.4879226887989845, "eval_loss": 1.3237018585205078, "eval_runtime": 3.2714, "eval_samples_per_second": 29.957, "eval_steps_per_second": 29.957, "step": 400 }, { "epoch": 0.71, "learning_rate": 2.572649572649573e-05, "loss": 1.2, "step": 500 }, { "epoch": 0.71, "eval_f1": 0.46450380175056494, "eval_loss": 1.2971174716949463, "eval_runtime": 3.2843, "eval_samples_per_second": 29.839, "eval_steps_per_second": 29.839, "step": 500 }, { "epoch": 0.85, "eval_f1": 0.5019868520647613, "eval_loss": 1.2549620866775513, "eval_runtime": 3.267, "eval_samples_per_second": 29.997, "eval_steps_per_second": 29.997, "step": 600 }, { "epoch": 1.0, "eval_f1": 0.48057967334012397, "eval_loss": 1.1853649616241455, "eval_runtime": 3.277, "eval_samples_per_second": 29.905, "eval_steps_per_second": 29.905, "step": 700 }, { "epoch": 1.14, "eval_f1": 0.5011814210846155, "eval_loss": 1.1788480281829834, "eval_runtime": 3.2639, "eval_samples_per_second": 30.025, "eval_steps_per_second": 30.025, "step": 800 }, { "epoch": 1.28, "eval_f1": 0.4964300899620197, "eval_loss": 1.093542218208313, "eval_runtime": 3.2724, "eval_samples_per_second": 29.947, "eval_steps_per_second": 29.947, "step": 900 }, { "epoch": 1.42, "learning_rate": 2.1452991452991456e-05, "loss": 0.9189, "step": 1000 }, { "epoch": 1.42, "eval_f1": 0.4986272191320895, "eval_loss": 1.2862237691879272, "eval_runtime": 3.302, "eval_samples_per_second": 29.679, "eval_steps_per_second": 29.679, "step": 1000 }, { "epoch": 1.57, "eval_f1": 0.49297809308258944, "eval_loss": 1.2222929000854492, "eval_runtime": 3.3171, "eval_samples_per_second": 29.544, "eval_steps_per_second": 29.544, "step": 1100 }, { "epoch": 1.71, "eval_f1": 0.4953797333525823, "eval_loss": 1.1196690797805786, "eval_runtime": 3.2943, "eval_samples_per_second": 29.749, "eval_steps_per_second": 29.749, "step": 1200 }, { "epoch": 1.85, "eval_f1": 0.5153008157478032, "eval_loss": 1.1256704330444336, "eval_runtime": 3.2631, "eval_samples_per_second": 30.033, "eval_steps_per_second": 30.033, "step": 1300 }, { "epoch": 1.99, "eval_f1": 0.5263780363862973, "eval_loss": 1.1729286909103394, "eval_runtime": 3.2904, "eval_samples_per_second": 29.783, "eval_steps_per_second": 29.783, "step": 1400 }, { "epoch": 2.14, "learning_rate": 1.7179487179487178e-05, "loss": 0.8143, "step": 1500 }, { "epoch": 2.14, "eval_f1": 0.5165321012151871, "eval_loss": 1.272233486175537, "eval_runtime": 3.3087, "eval_samples_per_second": 29.619, "eval_steps_per_second": 29.619, "step": 1500 }, { "epoch": 2.28, "eval_f1": 0.539472065505205, "eval_loss": 1.3217926025390625, "eval_runtime": 3.2634, "eval_samples_per_second": 30.03, "eval_steps_per_second": 30.03, "step": 1600 }, { "epoch": 2.42, "eval_f1": 0.5170136038987323, "eval_loss": 1.338261604309082, "eval_runtime": 3.2635, "eval_samples_per_second": 30.029, "eval_steps_per_second": 30.029, "step": 1700 }, { "epoch": 2.56, "eval_f1": 0.5138801729725696, "eval_loss": 1.250339388847351, "eval_runtime": 3.2656, "eval_samples_per_second": 30.009, "eval_steps_per_second": 30.009, "step": 1800 }, { "epoch": 2.71, "eval_f1": 0.523963853035474, "eval_loss": 1.362999439239502, "eval_runtime": 3.3211, "eval_samples_per_second": 29.508, "eval_steps_per_second": 29.508, "step": 1900 }, { "epoch": 2.85, "learning_rate": 1.2905982905982905e-05, "loss": 0.6175, "step": 2000 }, { "epoch": 2.85, "eval_f1": 0.5305458058252502, "eval_loss": 1.402750015258789, "eval_runtime": 3.2768, "eval_samples_per_second": 29.907, "eval_steps_per_second": 29.907, "step": 2000 }, { "epoch": 2.99, "eval_f1": 0.5408209021870833, "eval_loss": 1.4016790390014648, "eval_runtime": 3.3122, "eval_samples_per_second": 29.588, "eval_steps_per_second": 29.588, "step": 2100 }, { "epoch": 3.13, "eval_f1": 0.541281162975512, "eval_loss": 1.5929616689682007, "eval_runtime": 3.294, "eval_samples_per_second": 29.751, "eval_steps_per_second": 29.751, "step": 2200 }, { "epoch": 3.28, "eval_f1": 0.5564758214624422, "eval_loss": 1.5372625589370728, "eval_runtime": 3.2882, "eval_samples_per_second": 29.803, "eval_steps_per_second": 29.803, "step": 2300 }, { "epoch": 3.42, "eval_f1": 0.5722151004353093, "eval_loss": 1.5012538433074951, "eval_runtime": 3.3067, "eval_samples_per_second": 29.637, "eval_steps_per_second": 29.637, "step": 2400 }, { "epoch": 3.56, "learning_rate": 8.632478632478633e-06, "loss": 0.4726, "step": 2500 }, { "epoch": 3.56, "eval_f1": 0.5226487560978434, "eval_loss": 1.570418119430542, "eval_runtime": 3.3114, "eval_samples_per_second": 29.595, "eval_steps_per_second": 29.595, "step": 2500 }, { "epoch": 3.7, "eval_f1": 0.5483719296880323, "eval_loss": 1.5890936851501465, "eval_runtime": 3.2745, "eval_samples_per_second": 29.928, "eval_steps_per_second": 29.928, "step": 2600 }, { "epoch": 3.85, "eval_f1": 0.5630120856995185, "eval_loss": 1.5236029624938965, "eval_runtime": 3.2951, "eval_samples_per_second": 29.741, "eval_steps_per_second": 29.741, "step": 2700 }, { "epoch": 3.99, "eval_f1": 0.5422100713682105, "eval_loss": 1.52333664894104, "eval_runtime": 3.3261, "eval_samples_per_second": 29.464, "eval_steps_per_second": 29.464, "step": 2800 }, { "epoch": 4.13, "eval_f1": 0.5469719933620487, "eval_loss": 1.6104604005813599, "eval_runtime": 3.2888, "eval_samples_per_second": 29.798, "eval_steps_per_second": 29.798, "step": 2900 }, { "epoch": 4.27, "learning_rate": 4.358974358974359e-06, "loss": 0.3745, "step": 3000 }, { "epoch": 4.27, "eval_f1": 0.5525357490677262, "eval_loss": 1.7136110067367554, "eval_runtime": 3.3248, "eval_samples_per_second": 29.476, "eval_steps_per_second": 29.476, "step": 3000 }, { "epoch": 4.42, "eval_f1": 0.5539436259955471, "eval_loss": 1.6561492681503296, "eval_runtime": 3.2857, "eval_samples_per_second": 29.826, "eval_steps_per_second": 29.826, "step": 3100 }, { "epoch": 4.56, "eval_f1": 0.5504413375623162, "eval_loss": 1.7664132118225098, "eval_runtime": 3.2517, "eval_samples_per_second": 30.138, "eval_steps_per_second": 30.138, "step": 3200 }, { "epoch": 4.7, "eval_f1": 0.5494419672200014, "eval_loss": 1.750455379486084, "eval_runtime": 3.27, "eval_samples_per_second": 29.969, "eval_steps_per_second": 29.969, "step": 3300 }, { "epoch": 4.84, "eval_f1": 0.5516497223039627, "eval_loss": 1.7312653064727783, "eval_runtime": 3.3127, "eval_samples_per_second": 29.583, "eval_steps_per_second": 29.583, "step": 3400 }, { "epoch": 4.99, "learning_rate": 8.547008547008547e-08, "loss": 0.307, "step": 3500 }, { "epoch": 4.99, "eval_f1": 0.5515045914952008, "eval_loss": 1.7193822860717773, "eval_runtime": 3.2769, "eval_samples_per_second": 29.907, "eval_steps_per_second": 29.907, "step": 3500 }, { "epoch": 5.0, "step": 3510, "total_flos": 2890172619430200.0, "train_loss": 0.6706694952103487, "train_runtime": 824.1732, "train_samples_per_second": 4.259, "train_steps_per_second": 4.259 } ], "max_steps": 3510, "num_train_epochs": 5, "total_flos": 2890172619430200.0, "trial_name": null, "trial_params": null }