|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 2230, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0004484304932735426, |
|
"grad_norm": 4.697020980518476, |
|
"learning_rate": 4.4843049327354265e-08, |
|
"loss": 0.9912, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004484304932735426, |
|
"grad_norm": 5.023039914107906, |
|
"learning_rate": 4.484304932735426e-07, |
|
"loss": 1.0335, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008968609865470852, |
|
"grad_norm": 4.618075407201601, |
|
"learning_rate": 8.968609865470852e-07, |
|
"loss": 1.0355, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.013452914798206279, |
|
"grad_norm": 2.181160774619855, |
|
"learning_rate": 1.345291479820628e-06, |
|
"loss": 0.9824, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.017937219730941704, |
|
"grad_norm": 2.008569343268214, |
|
"learning_rate": 1.7937219730941704e-06, |
|
"loss": 0.9383, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02242152466367713, |
|
"grad_norm": 1.7133805307500916, |
|
"learning_rate": 2.242152466367713e-06, |
|
"loss": 0.87, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.026905829596412557, |
|
"grad_norm": 1.6402285214131669, |
|
"learning_rate": 2.690582959641256e-06, |
|
"loss": 0.8608, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03139013452914798, |
|
"grad_norm": 1.548076900788033, |
|
"learning_rate": 3.1390134529147986e-06, |
|
"loss": 0.8675, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03587443946188341, |
|
"grad_norm": 1.5686573872779637, |
|
"learning_rate": 3.587443946188341e-06, |
|
"loss": 0.8581, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04035874439461883, |
|
"grad_norm": 1.8398749741247653, |
|
"learning_rate": 4.0358744394618836e-06, |
|
"loss": 0.8272, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04484304932735426, |
|
"grad_norm": 1.431522929426351, |
|
"learning_rate": 4.484304932735426e-06, |
|
"loss": 0.837, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04932735426008968, |
|
"grad_norm": 1.5183209831163855, |
|
"learning_rate": 4.932735426008969e-06, |
|
"loss": 0.7993, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.053811659192825115, |
|
"grad_norm": 1.440741479973038, |
|
"learning_rate": 5.381165919282512e-06, |
|
"loss": 0.7768, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05829596412556054, |
|
"grad_norm": 1.6957957755628803, |
|
"learning_rate": 5.8295964125560544e-06, |
|
"loss": 0.793, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06278026905829596, |
|
"grad_norm": 1.5847929433417824, |
|
"learning_rate": 6.278026905829597e-06, |
|
"loss": 0.8145, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06726457399103139, |
|
"grad_norm": 1.721638040499256, |
|
"learning_rate": 6.72645739910314e-06, |
|
"loss": 0.7677, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07174887892376682, |
|
"grad_norm": 1.8900581111883776, |
|
"learning_rate": 7.174887892376682e-06, |
|
"loss": 0.784, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07623318385650224, |
|
"grad_norm": 1.554784427401762, |
|
"learning_rate": 7.6233183856502244e-06, |
|
"loss": 0.7595, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08071748878923767, |
|
"grad_norm": 1.8326262271816938, |
|
"learning_rate": 8.071748878923767e-06, |
|
"loss": 0.7722, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08520179372197309, |
|
"grad_norm": 1.4707798772479566, |
|
"learning_rate": 8.52017937219731e-06, |
|
"loss": 0.764, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08968609865470852, |
|
"grad_norm": 1.449476026227771, |
|
"learning_rate": 8.968609865470853e-06, |
|
"loss": 0.773, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09417040358744394, |
|
"grad_norm": 1.5945882538155545, |
|
"learning_rate": 9.417040358744395e-06, |
|
"loss": 0.7549, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09865470852017937, |
|
"grad_norm": 1.4765341188830319, |
|
"learning_rate": 9.865470852017938e-06, |
|
"loss": 0.784, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1031390134529148, |
|
"grad_norm": 1.452955640004767, |
|
"learning_rate": 9.999699851108367e-06, |
|
"loss": 0.7909, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.10762331838565023, |
|
"grad_norm": 1.5997501860541272, |
|
"learning_rate": 9.9982298208374e-06, |
|
"loss": 0.7651, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.11210762331838565, |
|
"grad_norm": 1.554511883278329, |
|
"learning_rate": 9.995535139530904e-06, |
|
"loss": 0.7621, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11659192825112108, |
|
"grad_norm": 1.6319742757477633, |
|
"learning_rate": 9.991616467431486e-06, |
|
"loss": 0.7906, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1210762331838565, |
|
"grad_norm": 1.6863475759473823, |
|
"learning_rate": 9.986474764680236e-06, |
|
"loss": 0.7684, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12556053811659193, |
|
"grad_norm": 1.5563949560805244, |
|
"learning_rate": 9.98011129108149e-06, |
|
"loss": 0.793, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.13004484304932734, |
|
"grad_norm": 1.6203440473254576, |
|
"learning_rate": 9.972527605794151e-06, |
|
"loss": 0.771, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.13452914798206278, |
|
"grad_norm": 1.5211967329248808, |
|
"learning_rate": 9.963725566949674e-06, |
|
"loss": 0.7865, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13901345291479822, |
|
"grad_norm": 1.3996609263865165, |
|
"learning_rate": 9.953707331196787e-06, |
|
"loss": 0.7397, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.14349775784753363, |
|
"grad_norm": 1.7467903395183983, |
|
"learning_rate": 9.94247535317308e-06, |
|
"loss": 0.802, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14798206278026907, |
|
"grad_norm": 1.4561862746225176, |
|
"learning_rate": 9.930032384903566e-06, |
|
"loss": 0.78, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.15246636771300448, |
|
"grad_norm": 1.261592243705757, |
|
"learning_rate": 9.916381475126406e-06, |
|
"loss": 0.7678, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.15695067264573992, |
|
"grad_norm": 1.3845441861346746, |
|
"learning_rate": 9.901525968545907e-06, |
|
"loss": 0.7462, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.16143497757847533, |
|
"grad_norm": 1.330783975393604, |
|
"learning_rate": 9.885469505013006e-06, |
|
"loss": 0.7516, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.16591928251121077, |
|
"grad_norm": 1.5411411223309597, |
|
"learning_rate": 9.868216018633456e-06, |
|
"loss": 0.7617, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.17040358744394618, |
|
"grad_norm": 1.4997661320107978, |
|
"learning_rate": 9.8497697368039e-06, |
|
"loss": 0.7433, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.17488789237668162, |
|
"grad_norm": 1.4613487182945122, |
|
"learning_rate": 9.830135179176086e-06, |
|
"loss": 0.7977, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.17937219730941703, |
|
"grad_norm": 1.3823002584421413, |
|
"learning_rate": 9.809317156549476e-06, |
|
"loss": 0.7668, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.18385650224215247, |
|
"grad_norm": 1.3405610913825137, |
|
"learning_rate": 9.787320769692517e-06, |
|
"loss": 0.755, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.18834080717488788, |
|
"grad_norm": 1.321174566440371, |
|
"learning_rate": 9.76415140809287e-06, |
|
"loss": 0.7712, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.19282511210762332, |
|
"grad_norm": 1.5855307049280556, |
|
"learning_rate": 9.739814748636892e-06, |
|
"loss": 0.7876, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.19730941704035873, |
|
"grad_norm": 1.3277684374580685, |
|
"learning_rate": 9.7143167542187e-06, |
|
"loss": 0.7497, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.20179372197309417, |
|
"grad_norm": 1.3316052843966186, |
|
"learning_rate": 9.687663672279167e-06, |
|
"loss": 0.7742, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2062780269058296, |
|
"grad_norm": 1.3422686862026139, |
|
"learning_rate": 9.659862033275188e-06, |
|
"loss": 0.7443, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.21076233183856502, |
|
"grad_norm": 1.3794382112115433, |
|
"learning_rate": 9.630918649079606e-06, |
|
"loss": 0.7423, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.21524663677130046, |
|
"grad_norm": 1.3473781761704757, |
|
"learning_rate": 9.600840611312198e-06, |
|
"loss": 0.756, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.21973094170403587, |
|
"grad_norm": 1.477159074283593, |
|
"learning_rate": 9.569635289602098e-06, |
|
"loss": 0.758, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2242152466367713, |
|
"grad_norm": 1.4406496372056654, |
|
"learning_rate": 9.537310329782109e-06, |
|
"loss": 0.7373, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22869955156950672, |
|
"grad_norm": 1.3236307975388621, |
|
"learning_rate": 9.503873652015358e-06, |
|
"loss": 0.7485, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.23318385650224216, |
|
"grad_norm": 1.2168213830447414, |
|
"learning_rate": 9.469333448854713e-06, |
|
"loss": 0.7518, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.23766816143497757, |
|
"grad_norm": 1.4695219464522695, |
|
"learning_rate": 9.433698183235468e-06, |
|
"loss": 0.7389, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.242152466367713, |
|
"grad_norm": 1.5460371795366352, |
|
"learning_rate": 9.39697658640179e-06, |
|
"loss": 0.7606, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.24663677130044842, |
|
"grad_norm": 1.36384817307445, |
|
"learning_rate": 9.359177655767398e-06, |
|
"loss": 0.7573, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.25112107623318386, |
|
"grad_norm": 1.2974873879986306, |
|
"learning_rate": 9.320310652711062e-06, |
|
"loss": 0.7447, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.2556053811659193, |
|
"grad_norm": 1.3924395179060305, |
|
"learning_rate": 9.2803851003074e-06, |
|
"loss": 0.7346, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2600896860986547, |
|
"grad_norm": 1.3646463661968233, |
|
"learning_rate": 9.239410780993565e-06, |
|
"loss": 0.7637, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.2645739910313901, |
|
"grad_norm": 1.5500883315093192, |
|
"learning_rate": 9.197397734172381e-06, |
|
"loss": 0.7352, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.26905829596412556, |
|
"grad_norm": 1.2384578247121611, |
|
"learning_rate": 9.154356253752519e-06, |
|
"loss": 0.7467, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.273542600896861, |
|
"grad_norm": 1.4632407956897133, |
|
"learning_rate": 9.110296885626315e-06, |
|
"loss": 0.7592, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.27802690582959644, |
|
"grad_norm": 1.4743516068749583, |
|
"learning_rate": 9.065230425085849e-06, |
|
"loss": 0.7471, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2825112107623318, |
|
"grad_norm": 1.609459180353317, |
|
"learning_rate": 9.01916791417792e-06, |
|
"loss": 0.7411, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.28699551569506726, |
|
"grad_norm": 1.4163468213726333, |
|
"learning_rate": 8.97212063899854e-06, |
|
"loss": 0.7583, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.2914798206278027, |
|
"grad_norm": 1.2789206960042645, |
|
"learning_rate": 8.924100126927672e-06, |
|
"loss": 0.7637, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.29596412556053814, |
|
"grad_norm": 1.4348891847742615, |
|
"learning_rate": 8.87511814380481e-06, |
|
"loss": 0.7376, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3004484304932735, |
|
"grad_norm": 1.4055551714674843, |
|
"learning_rate": 8.825186691046156e-06, |
|
"loss": 0.7544, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.30493273542600896, |
|
"grad_norm": 1.3076750983715024, |
|
"learning_rate": 8.774318002704072e-06, |
|
"loss": 0.7388, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3094170403587444, |
|
"grad_norm": 1.2832634236951583, |
|
"learning_rate": 8.722524542469517e-06, |
|
"loss": 0.7386, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.31390134529147984, |
|
"grad_norm": 1.3612825976793976, |
|
"learning_rate": 8.669819000618248e-06, |
|
"loss": 0.768, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3183856502242152, |
|
"grad_norm": 1.3343477240590562, |
|
"learning_rate": 8.616214290901474e-06, |
|
"loss": 0.7244, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.32286995515695066, |
|
"grad_norm": 1.3061898085537815, |
|
"learning_rate": 8.56172354738178e-06, |
|
"loss": 0.7368, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3273542600896861, |
|
"grad_norm": 1.423008056010291, |
|
"learning_rate": 8.506360121215046e-06, |
|
"loss": 0.7297, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.33183856502242154, |
|
"grad_norm": 1.258418518673196, |
|
"learning_rate": 8.4501375773792e-06, |
|
"loss": 0.7322, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.336322869955157, |
|
"grad_norm": 1.4805131801943718, |
|
"learning_rate": 8.39306969135056e-06, |
|
"loss": 0.7284, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.34080717488789236, |
|
"grad_norm": 1.3260411374117855, |
|
"learning_rate": 8.335170445728609e-06, |
|
"loss": 0.7618, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3452914798206278, |
|
"grad_norm": 1.3351013471897553, |
|
"learning_rate": 8.276454026810026e-06, |
|
"loss": 0.7454, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.34977578475336324, |
|
"grad_norm": 1.267536863507402, |
|
"learning_rate": 8.216934821112803e-06, |
|
"loss": 0.742, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.3542600896860987, |
|
"grad_norm": 1.4791659570865663, |
|
"learning_rate": 8.156627411851295e-06, |
|
"loss": 0.7483, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.35874439461883406, |
|
"grad_norm": 1.4042375363677306, |
|
"learning_rate": 8.095546575363098e-06, |
|
"loss": 0.7134, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3632286995515695, |
|
"grad_norm": 1.2552675050466975, |
|
"learning_rate": 8.033707277488585e-06, |
|
"loss": 0.7186, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.36771300448430494, |
|
"grad_norm": 1.281776208560821, |
|
"learning_rate": 7.97112466990403e-06, |
|
"loss": 0.7367, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.3721973094170404, |
|
"grad_norm": 1.417675486477273, |
|
"learning_rate": 7.907814086409183e-06, |
|
"loss": 0.7399, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.37668161434977576, |
|
"grad_norm": 1.4911782746859528, |
|
"learning_rate": 7.843791039170232e-06, |
|
"loss": 0.738, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.3811659192825112, |
|
"grad_norm": 1.3628167592658191, |
|
"learning_rate": 7.779071214919068e-06, |
|
"loss": 0.7404, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.38565022421524664, |
|
"grad_norm": 1.256534318600846, |
|
"learning_rate": 7.713670471109749e-06, |
|
"loss": 0.7364, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3901345291479821, |
|
"grad_norm": 1.3061985247991272, |
|
"learning_rate": 7.647604832033178e-06, |
|
"loss": 0.7535, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.39461883408071746, |
|
"grad_norm": 1.3626743194809932, |
|
"learning_rate": 7.580890484890864e-06, |
|
"loss": 0.7212, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.3991031390134529, |
|
"grad_norm": 1.283405821881289, |
|
"learning_rate": 7.513543775828791e-06, |
|
"loss": 0.7336, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.40358744394618834, |
|
"grad_norm": 1.4217858183255085, |
|
"learning_rate": 7.445581205932335e-06, |
|
"loss": 0.7349, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4080717488789238, |
|
"grad_norm": 1.3304359323377464, |
|
"learning_rate": 7.377019427183213e-06, |
|
"loss": 0.7265, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.4125560538116592, |
|
"grad_norm": 1.3324230635898773, |
|
"learning_rate": 7.30787523837947e-06, |
|
"loss": 0.7451, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.4170403587443946, |
|
"grad_norm": 1.2976336030674929, |
|
"learning_rate": 7.238165581019488e-06, |
|
"loss": 0.7415, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.42152466367713004, |
|
"grad_norm": 1.3082251443805497, |
|
"learning_rate": 7.167907535151027e-06, |
|
"loss": 0.7405, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4260089686098655, |
|
"grad_norm": 1.368310990039969, |
|
"learning_rate": 7.097118315186335e-06, |
|
"loss": 0.7141, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4304932735426009, |
|
"grad_norm": 1.3507885090012453, |
|
"learning_rate": 7.025815265684315e-06, |
|
"loss": 0.744, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.4349775784753363, |
|
"grad_norm": 1.3870418574466943, |
|
"learning_rate": 6.9540158571008105e-06, |
|
"loss": 0.7344, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.43946188340807174, |
|
"grad_norm": 1.2702369270052376, |
|
"learning_rate": 6.881737681508065e-06, |
|
"loss": 0.7131, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.4439461883408072, |
|
"grad_norm": 1.3694518474478212, |
|
"learning_rate": 6.808998448284347e-06, |
|
"loss": 0.7516, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.4484304932735426, |
|
"grad_norm": 1.3967080566658139, |
|
"learning_rate": 6.735815979774865e-06, |
|
"loss": 0.7208, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.452914798206278, |
|
"grad_norm": 1.4517646369323314, |
|
"learning_rate": 6.662208206924986e-06, |
|
"loss": 0.7455, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.45739910313901344, |
|
"grad_norm": 1.2752229454885209, |
|
"learning_rate": 6.588193164886847e-06, |
|
"loss": 0.7555, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4618834080717489, |
|
"grad_norm": 1.3528261238688069, |
|
"learning_rate": 6.513788988600441e-06, |
|
"loss": 0.7428, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.4663677130044843, |
|
"grad_norm": 1.340831615562883, |
|
"learning_rate": 6.439013908350249e-06, |
|
"loss": 0.7446, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.47085201793721976, |
|
"grad_norm": 1.3233981064583105, |
|
"learning_rate": 6.363886245298514e-06, |
|
"loss": 0.6945, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.47533632286995514, |
|
"grad_norm": 1.29683857481815, |
|
"learning_rate": 6.288424406996237e-06, |
|
"loss": 0.7085, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.4798206278026906, |
|
"grad_norm": 1.3009424922765027, |
|
"learning_rate": 6.2126468828730225e-06, |
|
"loss": 0.7294, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.484304932735426, |
|
"grad_norm": 1.244204104767217, |
|
"learning_rate": 6.136572239706854e-06, |
|
"loss": 0.7091, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.48878923766816146, |
|
"grad_norm": 1.4080424781447183, |
|
"learning_rate": 6.060219117074914e-06, |
|
"loss": 0.724, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.49327354260089684, |
|
"grad_norm": 1.4328593518662633, |
|
"learning_rate": 5.983606222786577e-06, |
|
"loss": 0.7106, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4977578475336323, |
|
"grad_norm": 1.3515007322722326, |
|
"learning_rate": 5.9067523282996775e-06, |
|
"loss": 0.7111, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5022421524663677, |
|
"grad_norm": 1.3343622572750706, |
|
"learning_rate": 5.829676264121184e-06, |
|
"loss": 0.7323, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5067264573991032, |
|
"grad_norm": 1.3346769678858248, |
|
"learning_rate": 5.752396915193403e-06, |
|
"loss": 0.744, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5112107623318386, |
|
"grad_norm": 1.4071259742434898, |
|
"learning_rate": 5.6749332162668525e-06, |
|
"loss": 0.7181, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.515695067264574, |
|
"grad_norm": 1.314030857289351, |
|
"learning_rate": 5.5973041472609265e-06, |
|
"loss": 0.7278, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5201793721973094, |
|
"grad_norm": 1.2893634754111014, |
|
"learning_rate": 5.519528728613491e-06, |
|
"loss": 0.722, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5246636771300448, |
|
"grad_norm": 1.2894378069172643, |
|
"learning_rate": 5.4416260166205525e-06, |
|
"loss": 0.7282, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5291479820627802, |
|
"grad_norm": 1.2691657277439665, |
|
"learning_rate": 5.363615098767149e-06, |
|
"loss": 0.7439, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5336322869955157, |
|
"grad_norm": 1.3478124675299437, |
|
"learning_rate": 5.285515089050587e-06, |
|
"loss": 0.7164, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5381165919282511, |
|
"grad_norm": 1.3052899854504807, |
|
"learning_rate": 5.207345123297187e-06, |
|
"loss": 0.7171, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5426008968609866, |
|
"grad_norm": 1.27131464716386, |
|
"learning_rate": 5.129124354473688e-06, |
|
"loss": 0.7235, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.547085201793722, |
|
"grad_norm": 1.257329353514232, |
|
"learning_rate": 5.050871947994443e-06, |
|
"loss": 0.6999, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.5515695067264574, |
|
"grad_norm": 1.3863466652299603, |
|
"learning_rate": 4.972607077025563e-06, |
|
"loss": 0.7251, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5560538116591929, |
|
"grad_norm": 1.394706696869187, |
|
"learning_rate": 4.894348917787174e-06, |
|
"loss": 0.6963, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5605381165919282, |
|
"grad_norm": 1.288516832053435, |
|
"learning_rate": 4.816116644854912e-06, |
|
"loss": 0.7207, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5650224215246636, |
|
"grad_norm": 1.291568839795731, |
|
"learning_rate": 4.73792942646183e-06, |
|
"loss": 0.7168, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.5695067264573991, |
|
"grad_norm": 1.3692501176044114, |
|
"learning_rate": 4.659806419801855e-06, |
|
"loss": 0.7311, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.5739910313901345, |
|
"grad_norm": 1.2214111714932894, |
|
"learning_rate": 4.581766766335953e-06, |
|
"loss": 0.7175, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.57847533632287, |
|
"grad_norm": 1.2261021201524966, |
|
"learning_rate": 4.503829587102138e-06, |
|
"loss": 0.722, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5829596412556054, |
|
"grad_norm": 1.279552435182188, |
|
"learning_rate": 4.426013978030508e-06, |
|
"loss": 0.7407, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5874439461883408, |
|
"grad_norm": 1.2639914975822624, |
|
"learning_rate": 4.348339005264406e-06, |
|
"loss": 0.7174, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.5919282511210763, |
|
"grad_norm": 1.2931987303236723, |
|
"learning_rate": 4.270823700488896e-06, |
|
"loss": 0.7236, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.5964125560538116, |
|
"grad_norm": 1.3083460389811294, |
|
"learning_rate": 4.19348705626768e-06, |
|
"loss": 0.7247, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.600896860986547, |
|
"grad_norm": 1.359946232054244, |
|
"learning_rate": 4.116348021389595e-06, |
|
"loss": 0.7289, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6053811659192825, |
|
"grad_norm": 1.3038409489179155, |
|
"learning_rate": 4.039425496225834e-06, |
|
"loss": 0.723, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6098654708520179, |
|
"grad_norm": 1.337031671252276, |
|
"learning_rate": 3.962738328099047e-06, |
|
"loss": 0.718, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6143497757847534, |
|
"grad_norm": 1.3006506595251124, |
|
"learning_rate": 3.88630530666542e-06, |
|
"loss": 0.7372, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6188340807174888, |
|
"grad_norm": 1.3038120667886732, |
|
"learning_rate": 3.8101451593108816e-06, |
|
"loss": 0.732, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.6233183856502242, |
|
"grad_norm": 1.2544712968929104, |
|
"learning_rate": 3.7342765465625953e-06, |
|
"loss": 0.7347, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6278026905829597, |
|
"grad_norm": 1.3352755014667614, |
|
"learning_rate": 3.658718057516803e-06, |
|
"loss": 0.7332, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6322869955156951, |
|
"grad_norm": 1.3389617347187606, |
|
"learning_rate": 3.5834882052841744e-06, |
|
"loss": 0.7154, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.6367713004484304, |
|
"grad_norm": 1.2654799213890686, |
|
"learning_rate": 3.508605422453799e-06, |
|
"loss": 0.7002, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.6412556053811659, |
|
"grad_norm": 1.311398422880929, |
|
"learning_rate": 3.4340880565768707e-06, |
|
"loss": 0.7098, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.6457399103139013, |
|
"grad_norm": 1.3577211660369808, |
|
"learning_rate": 3.359954365671241e-06, |
|
"loss": 0.7024, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.6502242152466368, |
|
"grad_norm": 1.145375472595952, |
|
"learning_rate": 3.2862225137478897e-06, |
|
"loss": 0.7097, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6547085201793722, |
|
"grad_norm": 1.3358950336855993, |
|
"learning_rate": 3.2129105663604275e-06, |
|
"loss": 0.7148, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.6591928251121076, |
|
"grad_norm": 1.2495395838860759, |
|
"learning_rate": 3.1400364861787434e-06, |
|
"loss": 0.7483, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.6636771300448431, |
|
"grad_norm": 1.2742823576583961, |
|
"learning_rate": 3.0676181285878343e-06, |
|
"loss": 0.7063, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.6681614349775785, |
|
"grad_norm": 1.2596513250823083, |
|
"learning_rate": 2.9956732373129378e-06, |
|
"loss": 0.7201, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.672645739910314, |
|
"grad_norm": 1.4183600229677984, |
|
"learning_rate": 2.9242194400720157e-06, |
|
"loss": 0.7202, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6771300448430493, |
|
"grad_norm": 1.224500332009707, |
|
"learning_rate": 2.8532742442566735e-06, |
|
"loss": 0.7228, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.6816143497757847, |
|
"grad_norm": 1.20210393613667, |
|
"learning_rate": 2.782855032642535e-06, |
|
"loss": 0.7386, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.6860986547085202, |
|
"grad_norm": 1.2835056973370584, |
|
"learning_rate": 2.712979059130187e-06, |
|
"loss": 0.7207, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.6905829596412556, |
|
"grad_norm": 1.180714987729606, |
|
"learning_rate": 2.643663444517671e-06, |
|
"loss": 0.6981, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.695067264573991, |
|
"grad_norm": 1.2871858226590431, |
|
"learning_rate": 2.5749251723055933e-06, |
|
"loss": 0.6853, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6995515695067265, |
|
"grad_norm": 1.3219720717807693, |
|
"learning_rate": 2.5067810845358926e-06, |
|
"loss": 0.7192, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.7040358744394619, |
|
"grad_norm": 1.391893981214182, |
|
"learning_rate": 2.439247877665244e-06, |
|
"loss": 0.7103, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.7085201793721974, |
|
"grad_norm": 1.2636799158865641, |
|
"learning_rate": 2.3723420984741417e-06, |
|
"loss": 0.684, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.7130044843049327, |
|
"grad_norm": 1.3557464635552046, |
|
"learning_rate": 2.3060801400126693e-06, |
|
"loss": 0.7207, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.7174887892376681, |
|
"grad_norm": 1.3569256088684083, |
|
"learning_rate": 2.240478237583915e-06, |
|
"loss": 0.7077, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7219730941704036, |
|
"grad_norm": 1.37192661939199, |
|
"learning_rate": 2.1755524647660514e-06, |
|
"loss": 0.693, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.726457399103139, |
|
"grad_norm": 1.248391921620642, |
|
"learning_rate": 2.1113187294740294e-06, |
|
"loss": 0.6911, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.7309417040358744, |
|
"grad_norm": 1.357856212054366, |
|
"learning_rate": 2.047792770061881e-06, |
|
"loss": 0.6838, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.7354260089686099, |
|
"grad_norm": 1.4147463764446673, |
|
"learning_rate": 1.9849901514665458e-06, |
|
"loss": 0.7122, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.7399103139013453, |
|
"grad_norm": 1.3587967144630926, |
|
"learning_rate": 1.922926261394206e-06, |
|
"loss": 0.6927, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7443946188340808, |
|
"grad_norm": 1.326112035320906, |
|
"learning_rate": 1.8616163065500231e-06, |
|
"loss": 0.6931, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.7488789237668162, |
|
"grad_norm": 1.2337199420172695, |
|
"learning_rate": 1.8010753089122572e-06, |
|
"loss": 0.6934, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.7533632286995515, |
|
"grad_norm": 1.2176765529306792, |
|
"learning_rate": 1.7413181020516146e-06, |
|
"loss": 0.7164, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.757847533632287, |
|
"grad_norm": 1.2724919849788396, |
|
"learning_rate": 1.6823593274967703e-06, |
|
"loss": 0.7267, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.7623318385650224, |
|
"grad_norm": 1.3274428154634692, |
|
"learning_rate": 1.6242134311469538e-06, |
|
"loss": 0.6824, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7668161434977578, |
|
"grad_norm": 1.3661211722387332, |
|
"learning_rate": 1.5668946597324558e-06, |
|
"loss": 0.7182, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.7713004484304933, |
|
"grad_norm": 1.2959174525555186, |
|
"learning_rate": 1.51041705732393e-06, |
|
"loss": 0.7118, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.7757847533632287, |
|
"grad_norm": 1.2773849053623296, |
|
"learning_rate": 1.4547944618913706e-06, |
|
"loss": 0.6929, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.7802690582959642, |
|
"grad_norm": 1.297150269984566, |
|
"learning_rate": 1.4000405019135676e-06, |
|
"loss": 0.6883, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.7847533632286996, |
|
"grad_norm": 1.400539444832659, |
|
"learning_rate": 1.3461685930388958e-06, |
|
"loss": 0.6911, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.7892376681614349, |
|
"grad_norm": 1.2337657683341194, |
|
"learning_rate": 1.2931919347982607e-06, |
|
"loss": 0.6921, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.7937219730941704, |
|
"grad_norm": 1.2072415924732212, |
|
"learning_rate": 1.2411235073709883e-06, |
|
"loss": 0.7102, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.7982062780269058, |
|
"grad_norm": 1.381367292756694, |
|
"learning_rate": 1.1899760684044515e-06, |
|
"loss": 0.6838, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.8026905829596412, |
|
"grad_norm": 1.3102643665842388, |
|
"learning_rate": 1.1397621498882471e-06, |
|
"loss": 0.6945, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.8071748878923767, |
|
"grad_norm": 1.3214717866253802, |
|
"learning_rate": 1.0904940550836285e-06, |
|
"loss": 0.7016, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8116591928251121, |
|
"grad_norm": 1.1240219719921938, |
|
"learning_rate": 1.0421838555090119e-06, |
|
"loss": 0.7018, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.8161434977578476, |
|
"grad_norm": 1.2844428659053804, |
|
"learning_rate": 9.948433879822428e-07, |
|
"loss": 0.7361, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.820627802690583, |
|
"grad_norm": 1.249492840397432, |
|
"learning_rate": 9.484842517203735e-07, |
|
"loss": 0.707, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.8251121076233184, |
|
"grad_norm": 1.2793612843154911, |
|
"learning_rate": 9.031178054976636e-07, |
|
"loss": 0.7226, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.8295964125560538, |
|
"grad_norm": 1.3447642426455195, |
|
"learning_rate": 8.587551648624859e-07, |
|
"loss": 0.6906, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8340807174887892, |
|
"grad_norm": 1.282168137173779, |
|
"learning_rate": 8.154071994138241e-07, |
|
"loss": 0.698, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.8385650224215246, |
|
"grad_norm": 1.4629920435128856, |
|
"learning_rate": 7.730845301380441e-07, |
|
"loss": 0.7212, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.8430493273542601, |
|
"grad_norm": 1.3143459341111923, |
|
"learning_rate": 7.317975268065685e-07, |
|
"loss": 0.6942, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.8475336322869955, |
|
"grad_norm": 1.3274198546425116, |
|
"learning_rate": 6.915563054351037e-07, |
|
"loss": 0.6944, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.852017937219731, |
|
"grad_norm": 1.2958298618538142, |
|
"learning_rate": 6.523707258050516e-07, |
|
"loss": 0.6692, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8565022421524664, |
|
"grad_norm": 1.3403229426415875, |
|
"learning_rate": 6.14250389047692e-07, |
|
"loss": 0.7034, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.8609865470852018, |
|
"grad_norm": 1.258601487476543, |
|
"learning_rate": 5.772046352917399e-07, |
|
"loss": 0.7144, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.8654708520179372, |
|
"grad_norm": 1.1222145256140716, |
|
"learning_rate": 5.412425413748623e-07, |
|
"loss": 0.6988, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.8699551569506726, |
|
"grad_norm": 1.2674996138499859, |
|
"learning_rate": 5.063729186196948e-07, |
|
"loss": 0.7089, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.874439461883408, |
|
"grad_norm": 1.3633828734617575, |
|
"learning_rate": 4.7260431067491617e-07, |
|
"loss": 0.733, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.8789237668161435, |
|
"grad_norm": 1.2652212276863493, |
|
"learning_rate": 4.399449914219167e-07, |
|
"loss": 0.7209, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.8834080717488789, |
|
"grad_norm": 1.2975811004623845, |
|
"learning_rate": 4.084029629475478e-07, |
|
"loss": 0.7252, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.8878923766816144, |
|
"grad_norm": 1.2744740809644324, |
|
"learning_rate": 3.7798595358348457e-07, |
|
"loss": 0.7083, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.8923766816143498, |
|
"grad_norm": 1.2924446335095698, |
|
"learning_rate": 3.487014160126467e-07, |
|
"loss": 0.7077, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.8968609865470852, |
|
"grad_norm": 1.283938673421291, |
|
"learning_rate": 3.2055652544316695e-07, |
|
"loss": 0.7038, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9013452914798207, |
|
"grad_norm": 1.3428158271865258, |
|
"learning_rate": 2.9355817785034325e-07, |
|
"loss": 0.7177, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.905829596412556, |
|
"grad_norm": 1.3237588390932393, |
|
"learning_rate": 2.6771298828700885e-07, |
|
"loss": 0.7079, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.9103139013452914, |
|
"grad_norm": 1.379188659318735, |
|
"learning_rate": 2.4302728926273224e-07, |
|
"loss": 0.7159, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.9147982062780269, |
|
"grad_norm": 1.2853687562313414, |
|
"learning_rate": 2.195071291922435e-07, |
|
"loss": 0.6842, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.9192825112107623, |
|
"grad_norm": 1.3212853096546313, |
|
"learning_rate": 1.9715827091347005e-07, |
|
"loss": 0.6994, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.9237668161434978, |
|
"grad_norm": 1.4231610496002076, |
|
"learning_rate": 1.7598619027554553e-07, |
|
"loss": 0.7032, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.9282511210762332, |
|
"grad_norm": 1.288660209861224, |
|
"learning_rate": 1.5599607479713396e-07, |
|
"loss": 0.6856, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.9327354260089686, |
|
"grad_norm": 1.291080758227027, |
|
"learning_rate": 1.3719282239539722e-07, |
|
"loss": 0.7183, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.9372197309417041, |
|
"grad_norm": 1.3527938026636528, |
|
"learning_rate": 1.1958104018592376e-07, |
|
"loss": 0.7022, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.9417040358744395, |
|
"grad_norm": 1.306930563286188, |
|
"learning_rate": 1.0316504335390775e-07, |
|
"loss": 0.7202, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9461883408071748, |
|
"grad_norm": 1.34728936572921, |
|
"learning_rate": 8.79488540968565e-08, |
|
"loss": 0.7128, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.9506726457399103, |
|
"grad_norm": 1.5138997567977701, |
|
"learning_rate": 7.39362006390798e-08, |
|
"loss": 0.6841, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.9551569506726457, |
|
"grad_norm": 1.2518587224976194, |
|
"learning_rate": 6.113051631821631e-08, |
|
"loss": 0.71, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.9596412556053812, |
|
"grad_norm": 1.4419766059722212, |
|
"learning_rate": 4.9534938744004723e-08, |
|
"loss": 0.6944, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.9641255605381166, |
|
"grad_norm": 1.3591089992663685, |
|
"learning_rate": 3.915230902951761e-08, |
|
"loss": 0.717, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.968609865470852, |
|
"grad_norm": 1.281084457332566, |
|
"learning_rate": 2.9985171095041066e-08, |
|
"loss": 0.703, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.9730941704035875, |
|
"grad_norm": 1.1922556783094496, |
|
"learning_rate": 2.203577104476773e-08, |
|
"loss": 0.7085, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.9775784753363229, |
|
"grad_norm": 1.3917918614257676, |
|
"learning_rate": 1.5306056616468666e-08, |
|
"loss": 0.709, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.9820627802690582, |
|
"grad_norm": 1.178733370314234, |
|
"learning_rate": 9.797676704259574e-09, |
|
"loss": 0.7009, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.9865470852017937, |
|
"grad_norm": 1.2352452900540438, |
|
"learning_rate": 5.511980954596152e-09, |
|
"loss": 0.7085, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9910313901345291, |
|
"grad_norm": 1.519249312059187, |
|
"learning_rate": 2.4500194355880913e-09, |
|
"loss": 0.6865, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.9955156950672646, |
|
"grad_norm": 1.2975150315542752, |
|
"learning_rate": 6.125423797137541e-10, |
|
"loss": 0.6935, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.2570272992562246, |
|
"learning_rate": 0.0, |
|
"loss": 0.6946, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.7042345404624939, |
|
"eval_runtime": 381.2635, |
|
"eval_samples_per_second": 131.345, |
|
"eval_steps_per_second": 2.054, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2230, |
|
"total_flos": 244955314192384.0, |
|
"train_loss": 0.7376568270371099, |
|
"train_runtime": 8720.6202, |
|
"train_samples_per_second": 32.731, |
|
"train_steps_per_second": 0.256 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2230, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 244955314192384.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|