|
{ |
|
"best_metric": 0.09502185136079788, |
|
"best_model_checkpoint": "fine-tuning-Phi2-with-webglm-qa-with-lora_7/checkpoint-1000", |
|
"epoch": 15.723270440251572, |
|
"eval_steps": 20, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 7.3505, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 6.286308765411377, |
|
"eval_runtime": 25.4764, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 4.0914, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 0.9255164861679077, |
|
"eval_runtime": 25.461, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6517, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 0.576193630695343, |
|
"eval_runtime": 25.4995, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.431, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.893617021276596e-05, |
|
"loss": 0.4621, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 0.40622055530548096, |
|
"eval_runtime": 25.4724, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.787234042553192e-05, |
|
"loss": 0.3128, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 0.30563685297966003, |
|
"eval_runtime": 25.4374, |
|
"eval_samples_per_second": 0.865, |
|
"eval_steps_per_second": 0.432, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 4.680851063829788e-05, |
|
"loss": 0.2536, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 0.2603812515735626, |
|
"eval_runtime": 25.4763, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.574468085106383e-05, |
|
"loss": 0.2227, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 0.22473430633544922, |
|
"eval_runtime": 25.4963, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.431, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.468085106382979e-05, |
|
"loss": 0.1901, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_loss": 0.2041008621454239, |
|
"eval_runtime": 25.459, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.3617021276595746e-05, |
|
"loss": 0.176, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 0.1812312752008438, |
|
"eval_runtime": 25.4868, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.432, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 4.2553191489361704e-05, |
|
"loss": 0.1453, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_loss": 0.16830970346927643, |
|
"eval_runtime": 25.481, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.432, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 4.148936170212766e-05, |
|
"loss": 0.1557, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_loss": 0.15921901166439056, |
|
"eval_runtime": 25.5028, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.431, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 4.0425531914893614e-05, |
|
"loss": 0.1441, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 0.14882861077785492, |
|
"eval_runtime": 25.4427, |
|
"eval_samples_per_second": 0.865, |
|
"eval_steps_per_second": 0.432, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.936170212765958e-05, |
|
"loss": 0.1282, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"eval_loss": 0.1429978758096695, |
|
"eval_runtime": 25.4905, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.432, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 3.829787234042553e-05, |
|
"loss": 0.1215, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_loss": 0.13476386666297913, |
|
"eval_runtime": 25.4804, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.432, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 3.723404255319149e-05, |
|
"loss": 0.1217, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 0.13230670988559723, |
|
"eval_runtime": 25.4706, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 3.617021276595745e-05, |
|
"loss": 0.117, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_loss": 0.12707628309726715, |
|
"eval_runtime": 25.4283, |
|
"eval_samples_per_second": 0.865, |
|
"eval_steps_per_second": 0.433, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 3.5106382978723407e-05, |
|
"loss": 0.109, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_loss": 0.12554581463336945, |
|
"eval_runtime": 25.4583, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 3.4042553191489365e-05, |
|
"loss": 0.1094, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 0.12099047005176544, |
|
"eval_runtime": 25.4606, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 3.2978723404255317e-05, |
|
"loss": 0.1057, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_loss": 0.11747618019580841, |
|
"eval_runtime": 25.4382, |
|
"eval_samples_per_second": 0.865, |
|
"eval_steps_per_second": 0.432, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 3.191489361702128e-05, |
|
"loss": 0.0937, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"eval_loss": 0.11580007523298264, |
|
"eval_runtime": 25.4149, |
|
"eval_samples_per_second": 0.866, |
|
"eval_steps_per_second": 0.433, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3.085106382978723e-05, |
|
"loss": 0.0942, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 0.11587951332330704, |
|
"eval_runtime": 25.4544, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 2.9787234042553192e-05, |
|
"loss": 0.1007, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"eval_loss": 0.11250220984220505, |
|
"eval_runtime": 25.4316, |
|
"eval_samples_per_second": 0.865, |
|
"eval_steps_per_second": 0.433, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 2.8723404255319154e-05, |
|
"loss": 0.0876, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"eval_loss": 0.11185076832771301, |
|
"eval_runtime": 25.4765, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 2.765957446808511e-05, |
|
"loss": 0.0894, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_loss": 0.1098945215344429, |
|
"eval_runtime": 25.4185, |
|
"eval_samples_per_second": 0.866, |
|
"eval_steps_per_second": 0.433, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 2.6595744680851064e-05, |
|
"loss": 0.0827, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"eval_loss": 0.10724210739135742, |
|
"eval_runtime": 25.4969, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.431, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 2.5531914893617022e-05, |
|
"loss": 0.0894, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"eval_loss": 0.10687847435474396, |
|
"eval_runtime": 25.4493, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 2.446808510638298e-05, |
|
"loss": 0.0805, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 0.10752053558826447, |
|
"eval_runtime": 25.4419, |
|
"eval_samples_per_second": 0.865, |
|
"eval_steps_per_second": 0.432, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 2.340425531914894e-05, |
|
"loss": 0.0782, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"eval_loss": 0.10425343364477158, |
|
"eval_runtime": 25.4865, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.432, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 2.2340425531914894e-05, |
|
"loss": 0.0881, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"eval_loss": 0.1033911183476448, |
|
"eval_runtime": 25.4549, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 2.1276595744680852e-05, |
|
"loss": 0.0839, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 0.10145573318004608, |
|
"eval_runtime": 25.4481, |
|
"eval_samples_per_second": 0.865, |
|
"eval_steps_per_second": 0.432, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 2.0212765957446807e-05, |
|
"loss": 0.0694, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"eval_loss": 0.10002648830413818, |
|
"eval_runtime": 25.4089, |
|
"eval_samples_per_second": 0.866, |
|
"eval_steps_per_second": 0.433, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 1.9148936170212766e-05, |
|
"loss": 0.068, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"eval_loss": 0.10073428601026535, |
|
"eval_runtime": 25.4963, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.431, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 1.8085106382978724e-05, |
|
"loss": 0.072, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"eval_loss": 0.09940142929553986, |
|
"eval_runtime": 25.4629, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 1.7021276595744682e-05, |
|
"loss": 0.0709, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"eval_loss": 0.09847569465637207, |
|
"eval_runtime": 25.5056, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.431, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 1.595744680851064e-05, |
|
"loss": 0.0712, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"eval_loss": 0.09858354926109314, |
|
"eval_runtime": 25.4627, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 1.4893617021276596e-05, |
|
"loss": 0.0673, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"eval_loss": 0.09991483390331268, |
|
"eval_runtime": 25.4891, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.432, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 1.3829787234042554e-05, |
|
"loss": 0.0669, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"eval_loss": 0.09741941094398499, |
|
"eval_runtime": 25.4606, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 1.2765957446808511e-05, |
|
"loss": 0.0706, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"eval_loss": 0.0980500727891922, |
|
"eval_runtime": 25.4509, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 1.170212765957447e-05, |
|
"loss": 0.0641, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_loss": 0.09693228453397751, |
|
"eval_runtime": 25.436, |
|
"eval_samples_per_second": 0.865, |
|
"eval_steps_per_second": 0.432, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 12.58, |
|
"learning_rate": 1.0638297872340426e-05, |
|
"loss": 0.0652, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 12.58, |
|
"eval_loss": 0.0963829830288887, |
|
"eval_runtime": 25.4744, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"learning_rate": 9.574468085106383e-06, |
|
"loss": 0.0668, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"eval_loss": 0.09619712829589844, |
|
"eval_runtime": 25.446, |
|
"eval_samples_per_second": 0.865, |
|
"eval_steps_per_second": 0.432, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 8.510638297872341e-06, |
|
"loss": 0.0617, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"eval_loss": 0.09718295931816101, |
|
"eval_runtime": 25.4457, |
|
"eval_samples_per_second": 0.865, |
|
"eval_steps_per_second": 0.432, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 13.52, |
|
"learning_rate": 7.446808510638298e-06, |
|
"loss": 0.0628, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 13.52, |
|
"eval_loss": 0.09600641578435898, |
|
"eval_runtime": 25.4516, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 13.84, |
|
"learning_rate": 6.3829787234042555e-06, |
|
"loss": 0.0637, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 13.84, |
|
"eval_loss": 0.09490146487951279, |
|
"eval_runtime": 25.4889, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.432, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 5.319148936170213e-06, |
|
"loss": 0.0633, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_loss": 0.0950675681233406, |
|
"eval_runtime": 25.4561, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 4.255319148936171e-06, |
|
"loss": 0.0577, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"eval_loss": 0.09526454657316208, |
|
"eval_runtime": 25.5288, |
|
"eval_samples_per_second": 0.862, |
|
"eval_steps_per_second": 0.431, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 3.1914893617021277e-06, |
|
"loss": 0.0646, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"eval_loss": 0.09467268735170364, |
|
"eval_runtime": 25.4341, |
|
"eval_samples_per_second": 0.865, |
|
"eval_steps_per_second": 0.432, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"loss": 0.06, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"eval_loss": 0.09462392330169678, |
|
"eval_runtime": 25.446, |
|
"eval_samples_per_second": 0.865, |
|
"eval_steps_per_second": 0.432, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 1.0638297872340427e-06, |
|
"loss": 0.0584, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"eval_loss": 0.09491446614265442, |
|
"eval_runtime": 25.4603, |
|
"eval_samples_per_second": 0.864, |
|
"eval_steps_per_second": 0.432, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"learning_rate": 0.0, |
|
"loss": 0.0638, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"eval_loss": 0.09502185136079788, |
|
"eval_runtime": 25.4343, |
|
"eval_samples_per_second": 0.865, |
|
"eval_steps_per_second": 0.432, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 16, |
|
"save_steps": 200, |
|
"total_flos": 1.63933992517632e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|