|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.994667614646285, |
|
"eval_steps": 1000, |
|
"global_step": 1053, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.028439388553146108, |
|
"grad_norm": 1.0473670959472656, |
|
"learning_rate": 9.433962264150944e-06, |
|
"loss": 2.6607, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.056878777106292217, |
|
"grad_norm": 0.4230079650878906, |
|
"learning_rate": 1.8867924528301888e-05, |
|
"loss": 2.5121, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08531816565943832, |
|
"grad_norm": 0.35547205805778503, |
|
"learning_rate": 2.830188679245283e-05, |
|
"loss": 2.3998, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11375755421258443, |
|
"grad_norm": 0.3876227140426636, |
|
"learning_rate": 3.7735849056603776e-05, |
|
"loss": 2.3461, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14219694276573053, |
|
"grad_norm": 0.38349416851997375, |
|
"learning_rate": 4.716981132075472e-05, |
|
"loss": 2.2716, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17063633131887665, |
|
"grad_norm": 0.4068184792995453, |
|
"learning_rate": 4.999395511092461e-05, |
|
"loss": 2.2223, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.19907571987202274, |
|
"grad_norm": 0.4073835015296936, |
|
"learning_rate": 4.996435452798774e-05, |
|
"loss": 2.1657, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.22751510842516887, |
|
"grad_norm": 0.5059598684310913, |
|
"learning_rate": 4.991011714111481e-05, |
|
"loss": 2.1248, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.25595449697831496, |
|
"grad_norm": 0.47395384311676025, |
|
"learning_rate": 4.9831296476058484e-05, |
|
"loss": 2.1223, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.28439388553146105, |
|
"grad_norm": 0.43479716777801514, |
|
"learning_rate": 4.9727970319299044e-05, |
|
"loss": 2.0617, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3128332740846072, |
|
"grad_norm": 0.48797106742858887, |
|
"learning_rate": 4.9600240641278496e-05, |
|
"loss": 2.0526, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3412726626377533, |
|
"grad_norm": 0.5094576478004456, |
|
"learning_rate": 4.944823349576805e-05, |
|
"loss": 2.0356, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3697120511908994, |
|
"grad_norm": 0.5222152471542358, |
|
"learning_rate": 4.9272098895468277e-05, |
|
"loss": 2.0315, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3981514397440455, |
|
"grad_norm": 0.48217442631721497, |
|
"learning_rate": 4.907201066396469e-05, |
|
"loss": 2.013, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.42659082829719164, |
|
"grad_norm": 0.46154898405075073, |
|
"learning_rate": 4.8848166264184844e-05, |
|
"loss": 1.9751, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.45503021685033773, |
|
"grad_norm": 0.4828225374221802, |
|
"learning_rate": 4.860078660352625e-05, |
|
"loss": 1.9866, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4834696054034838, |
|
"grad_norm": 0.4831569194793701, |
|
"learning_rate": 4.8330115815847465e-05, |
|
"loss": 1.9529, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5119089939566299, |
|
"grad_norm": 0.4747070074081421, |
|
"learning_rate": 4.803642102053746e-05, |
|
"loss": 1.9575, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.540348382509776, |
|
"grad_norm": 0.4549647271633148, |
|
"learning_rate": 4.7719992058901006e-05, |
|
"loss": 1.9488, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5687877710629221, |
|
"grad_norm": 0.4767439067363739, |
|
"learning_rate": 4.7381141208120296e-05, |
|
"loss": 1.9074, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5972271596160682, |
|
"grad_norm": 0.4805672764778137, |
|
"learning_rate": 4.702020287307509e-05, |
|
"loss": 1.9286, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.6256665481692144, |
|
"grad_norm": 0.47634372115135193, |
|
"learning_rate": 4.663753325632548e-05, |
|
"loss": 1.924, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6541059367223605, |
|
"grad_norm": 0.4570080637931824, |
|
"learning_rate": 4.6233510006582914e-05, |
|
"loss": 1.8953, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6825453252755066, |
|
"grad_norm": 0.47734373807907104, |
|
"learning_rate": 4.580853184601659e-05, |
|
"loss": 1.9023, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7109847138286527, |
|
"grad_norm": 0.48877596855163574, |
|
"learning_rate": 4.536301817676274e-05, |
|
"loss": 1.8832, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7394241023817988, |
|
"grad_norm": 0.46437644958496094, |
|
"learning_rate": 4.48974086670254e-05, |
|
"loss": 1.9052, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7678634909349449, |
|
"grad_norm": 0.4709625840187073, |
|
"learning_rate": 4.4412162817176965e-05, |
|
"loss": 1.867, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.796302879488091, |
|
"grad_norm": 0.5039490461349487, |
|
"learning_rate": 4.39077595062868e-05, |
|
"loss": 1.8549, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8247422680412371, |
|
"grad_norm": 0.4863899350166321, |
|
"learning_rate": 4.33846965195254e-05, |
|
"loss": 1.8589, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8531816565943833, |
|
"grad_norm": 0.4797697961330414, |
|
"learning_rate": 4.2843490056910534e-05, |
|
"loss": 1.8624, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8816210451475294, |
|
"grad_norm": 0.5284737348556519, |
|
"learning_rate": 4.228467422388016e-05, |
|
"loss": 1.878, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9100604337006755, |
|
"grad_norm": 0.45375633239746094, |
|
"learning_rate": 4.1708800504194827e-05, |
|
"loss": 1.8837, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9384998222538216, |
|
"grad_norm": 0.4459105432033539, |
|
"learning_rate": 4.1116437215689784e-05, |
|
"loss": 1.8472, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9669392108069677, |
|
"grad_norm": 0.48929303884506226, |
|
"learning_rate": 4.0508168949413906e-05, |
|
"loss": 1.8261, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9953785993601137, |
|
"grad_norm": 0.4859321415424347, |
|
"learning_rate": 3.988459599270888e-05, |
|
"loss": 1.8697, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0238179879132598, |
|
"grad_norm": 0.481235533952713, |
|
"learning_rate": 3.9246333736798095e-05, |
|
"loss": 1.8783, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.052257376466406, |
|
"grad_norm": 0.5238078832626343, |
|
"learning_rate": 3.859401206946982e-05, |
|
"loss": 1.7799, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.080696765019552, |
|
"grad_norm": 0.5101935863494873, |
|
"learning_rate": 3.792827475345393e-05, |
|
"loss": 1.8068, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.1091361535726982, |
|
"grad_norm": 0.4812924265861511, |
|
"learning_rate": 3.724977879110591e-05, |
|
"loss": 1.7974, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.1375755421258442, |
|
"grad_norm": 0.5056812763214111, |
|
"learning_rate": 3.6559193776024794e-05, |
|
"loss": 1.7918, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1660149306789904, |
|
"grad_norm": 0.5049041509628296, |
|
"learning_rate": 3.585720123224512e-05, |
|
"loss": 1.7856, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.1944543192321366, |
|
"grad_norm": 0.5172712802886963, |
|
"learning_rate": 3.5144493941655e-05, |
|
"loss": 1.786, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.2228937077852826, |
|
"grad_norm": 0.48730871081352234, |
|
"learning_rate": 3.442177526030407e-05, |
|
"loss": 1.7553, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.2513330963384286, |
|
"grad_norm": 0.5034505724906921, |
|
"learning_rate": 3.3689758424275926e-05, |
|
"loss": 1.7567, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.2797724848915748, |
|
"grad_norm": 0.5002079606056213, |
|
"learning_rate": 3.294916584581027e-05, |
|
"loss": 1.7851, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.308211873444721, |
|
"grad_norm": 0.5162603855133057, |
|
"learning_rate": 3.220072840036923e-05, |
|
"loss": 1.7631, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.336651261997867, |
|
"grad_norm": 0.5188742876052856, |
|
"learning_rate": 3.14451847053515e-05, |
|
"loss": 1.7645, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.3650906505510132, |
|
"grad_norm": 0.5118332505226135, |
|
"learning_rate": 3.068328039116616e-05, |
|
"loss": 1.7693, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.3935300391041592, |
|
"grad_norm": 0.5362988710403442, |
|
"learning_rate": 2.99157673653855e-05, |
|
"loss": 1.7712, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.4219694276573054, |
|
"grad_norm": 0.5271508097648621, |
|
"learning_rate": 2.9143403070702997e-05, |
|
"loss": 1.7613, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4504088162104516, |
|
"grad_norm": 0.5073094964027405, |
|
"learning_rate": 2.8366949737428817e-05, |
|
"loss": 1.7729, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.4788482047635976, |
|
"grad_norm": 0.5312384366989136, |
|
"learning_rate": 2.7587173631260566e-05, |
|
"loss": 1.7649, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.5072875933167436, |
|
"grad_norm": 0.5345740914344788, |
|
"learning_rate": 2.6804844297071526e-05, |
|
"loss": 1.747, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.5357269818698898, |
|
"grad_norm": 0.5181931257247925, |
|
"learning_rate": 2.6020733799462754e-05, |
|
"loss": 1.7496, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.564166370423036, |
|
"grad_norm": 0.5075932741165161, |
|
"learning_rate": 2.5235615960828605e-05, |
|
"loss": 1.7517, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5926057589761822, |
|
"grad_norm": 0.5280550122261047, |
|
"learning_rate": 2.4450265597687376e-05, |
|
"loss": 1.7768, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.6210451475293282, |
|
"grad_norm": 0.526050865650177, |
|
"learning_rate": 2.3665457756030988e-05, |
|
"loss": 1.7369, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.6494845360824741, |
|
"grad_norm": 0.5144099593162537, |
|
"learning_rate": 2.2881966946448167e-05, |
|
"loss": 1.7389, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.6779239246356203, |
|
"grad_norm": 0.532425045967102, |
|
"learning_rate": 2.2100566379775967e-05, |
|
"loss": 1.7364, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.7063633131887666, |
|
"grad_norm": 0.5095171332359314, |
|
"learning_rate": 2.1322027204034066e-05, |
|
"loss": 1.7447, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.7348027017419125, |
|
"grad_norm": 0.5283172726631165, |
|
"learning_rate": 2.0547117743394744e-05, |
|
"loss": 1.7647, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.7632420902950585, |
|
"grad_norm": 0.5820568203926086, |
|
"learning_rate": 1.9776602739939714e-05, |
|
"loss": 1.7343, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.7916814788482047, |
|
"grad_norm": 0.5260512232780457, |
|
"learning_rate": 1.9011242598951962e-05, |
|
"loss": 1.7402, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.820120867401351, |
|
"grad_norm": 0.5743537545204163, |
|
"learning_rate": 1.8251792638487596e-05, |
|
"loss": 1.7421, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.8485602559544971, |
|
"grad_norm": 0.508335530757904, |
|
"learning_rate": 1.7499002343968098e-05, |
|
"loss": 1.7303, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.8769996445076431, |
|
"grad_norm": 0.5431790947914124, |
|
"learning_rate": 1.675361462852868e-05, |
|
"loss": 1.7236, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.905439033060789, |
|
"grad_norm": 0.5047234892845154, |
|
"learning_rate": 1.6016365099852735e-05, |
|
"loss": 1.7163, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.9338784216139353, |
|
"grad_norm": 0.5409323573112488, |
|
"learning_rate": 1.528798133421585e-05, |
|
"loss": 1.7253, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.9623178101670815, |
|
"grad_norm": 0.5161160826683044, |
|
"learning_rate": 1.4569182158455875e-05, |
|
"loss": 1.7054, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.9907571987202275, |
|
"grad_norm": 0.5363497734069824, |
|
"learning_rate": 1.3860676940577594e-05, |
|
"loss": 1.7469, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.0191965872733735, |
|
"grad_norm": 0.5758066177368164, |
|
"learning_rate": 1.3163164889692197e-05, |
|
"loss": 1.7598, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.0476359758265197, |
|
"grad_norm": 0.5709594488143921, |
|
"learning_rate": 1.2477334365982248e-05, |
|
"loss": 1.6736, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.076075364379666, |
|
"grad_norm": 0.5616790056228638, |
|
"learning_rate": 1.1803862201373342e-05, |
|
"loss": 1.6529, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.104514752932812, |
|
"grad_norm": 0.5473997592926025, |
|
"learning_rate": 1.1143413031582645e-05, |
|
"loss": 1.6734, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.132954141485958, |
|
"grad_norm": 0.559503436088562, |
|
"learning_rate": 1.0496638640203774e-05, |
|
"loss": 1.6702, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.161393530039104, |
|
"grad_norm": 0.5698120594024658, |
|
"learning_rate": 9.864177315474968e-06, |
|
"loss": 1.6613, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.1898329185922503, |
|
"grad_norm": 0.5590789318084717, |
|
"learning_rate": 9.246653220365778e-06, |
|
"loss": 1.6898, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.2182723071453965, |
|
"grad_norm": 0.5687025785446167, |
|
"learning_rate": 8.644675776603476e-06, |
|
"loss": 1.6688, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.2467116956985427, |
|
"grad_norm": 0.5703499913215637, |
|
"learning_rate": 8.058839063247447e-06, |
|
"loss": 1.6761, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.2751510842516884, |
|
"grad_norm": 0.586949348449707, |
|
"learning_rate": 7.489721230404842e-06, |
|
"loss": 1.7022, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.3035904728048346, |
|
"grad_norm": 0.5651352405548096, |
|
"learning_rate": 6.937883928666255e-06, |
|
"loss": 1.6778, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.332029861357981, |
|
"grad_norm": 0.5906292200088501, |
|
"learning_rate": 6.403871754824373e-06, |
|
"loss": 1.6517, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.360469249911127, |
|
"grad_norm": 0.5772544145584106, |
|
"learning_rate": 5.8882117144227115e-06, |
|
"loss": 1.6696, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.3889086384642733, |
|
"grad_norm": 0.5833613872528076, |
|
"learning_rate": 5.391412701664744e-06, |
|
"loss": 1.6452, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.417348027017419, |
|
"grad_norm": 0.5983288884162903, |
|
"learning_rate": 4.91396499719681e-06, |
|
"loss": 1.6739, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.4457874155705652, |
|
"grad_norm": 0.5902206897735596, |
|
"learning_rate": 4.456339784260247e-06, |
|
"loss": 1.6582, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.4742268041237114, |
|
"grad_norm": 0.5683711767196655, |
|
"learning_rate": 4.018988683690461e-06, |
|
"loss": 1.6483, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.502666192676857, |
|
"grad_norm": 0.5805079936981201, |
|
"learning_rate": 3.6023433082216755e-06, |
|
"loss": 1.6731, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.5311055812300034, |
|
"grad_norm": 0.5799385905265808, |
|
"learning_rate": 3.2068148365372806e-06, |
|
"loss": 1.6705, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.5595449697831496, |
|
"grad_norm": 0.6097893118858337, |
|
"learning_rate": 2.832793607486087e-06, |
|
"loss": 1.6851, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.587984358336296, |
|
"grad_norm": 0.5920665860176086, |
|
"learning_rate": 2.4806487348650485e-06, |
|
"loss": 1.6464, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.616423746889442, |
|
"grad_norm": 0.5650290846824646, |
|
"learning_rate": 2.150727743148473e-06, |
|
"loss": 1.6576, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.6448631354425878, |
|
"grad_norm": 0.5883274674415588, |
|
"learning_rate": 1.8433562245233349e-06, |
|
"loss": 1.6304, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.673302523995734, |
|
"grad_norm": 0.5887704491615295, |
|
"learning_rate": 1.5588375175691117e-06, |
|
"loss": 1.6674, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.70174191254888, |
|
"grad_norm": 0.576253354549408, |
|
"learning_rate": 1.2974524078991995e-06, |
|
"loss": 1.6493, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.7301813011020264, |
|
"grad_norm": 0.5727583169937134, |
|
"learning_rate": 1.0594588510594445e-06, |
|
"loss": 1.6529, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.7586206896551726, |
|
"grad_norm": 0.5819412469863892, |
|
"learning_rate": 8.450917179571305e-07, |
|
"loss": 1.6612, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.7870600782083184, |
|
"grad_norm": 0.570379376411438, |
|
"learning_rate": 6.545625630717783e-07, |
|
"loss": 1.6637, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.8154994667614646, |
|
"grad_norm": 0.5768330693244934, |
|
"learning_rate": 4.880594156763896e-07, |
|
"loss": 1.6784, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.8439388553146108, |
|
"grad_norm": 0.5636200904846191, |
|
"learning_rate": 3.4574659427528133e-07, |
|
"loss": 1.6679, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.8439388553146108, |
|
"eval_loss": 1.758003830909729, |
|
"eval_runtime": 181.1788, |
|
"eval_samples_per_second": 55.194, |
|
"eval_steps_per_second": 1.728, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.872378243867757, |
|
"grad_norm": 0.5933385491371155, |
|
"learning_rate": 2.2776454444153328e-07, |
|
"loss": 1.6689, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.900817632420903, |
|
"grad_norm": 0.5807960033416748, |
|
"learning_rate": 1.342297002141918e-07, |
|
"loss": 1.6333, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.929257020974049, |
|
"grad_norm": 0.5805134773254395, |
|
"learning_rate": 6.523436919190773e-08, |
|
"loss": 1.6701, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.957696409527195, |
|
"grad_norm": 0.5947986841201782, |
|
"learning_rate": 2.0846641436497726e-08, |
|
"loss": 1.652, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.9861357980803414, |
|
"grad_norm": 0.5900915861129761, |
|
"learning_rate": 1.1103222762542941e-09, |
|
"loss": 1.6704, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.994667614646285, |
|
"step": 1053, |
|
"total_flos": 4.229776342129836e+18, |
|
"train_loss": 1.8177586702200084, |
|
"train_runtime": 12857.7751, |
|
"train_samples_per_second": 20.999, |
|
"train_steps_per_second": 0.082 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1053, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.229776342129836e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|