{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 722, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002770083102493075, "grad_norm": 4.970390796661377, "learning_rate": 1.36986301369863e-07, "loss": 0.7038, "step": 1 }, { "epoch": 0.013850415512465374, "grad_norm": 5.532266139984131, "learning_rate": 6.849315068493151e-07, "loss": 0.7051, "step": 5 }, { "epoch": 0.027700831024930747, "grad_norm": 3.3267316818237305, "learning_rate": 1.3698630136986302e-06, "loss": 0.6794, "step": 10 }, { "epoch": 0.04155124653739612, "grad_norm": 2.5890796184539795, "learning_rate": 2.0547945205479454e-06, "loss": 0.632, "step": 15 }, { "epoch": 0.055401662049861494, "grad_norm": 2.3750393390655518, "learning_rate": 2.7397260273972604e-06, "loss": 0.5698, "step": 20 }, { "epoch": 0.06925207756232687, "grad_norm": 2.339930295944214, "learning_rate": 3.4246575342465754e-06, "loss": 0.5216, "step": 25 }, { "epoch": 0.08310249307479224, "grad_norm": 2.103806734085083, "learning_rate": 4.109589041095891e-06, "loss": 0.4612, "step": 30 }, { "epoch": 0.09695290858725762, "grad_norm": 2.015624761581421, "learning_rate": 4.7945205479452054e-06, "loss": 0.3838, "step": 35 }, { "epoch": 0.11080332409972299, "grad_norm": 2.146289587020874, "learning_rate": 5.479452054794521e-06, "loss": 0.2954, "step": 40 }, { "epoch": 0.12465373961218837, "grad_norm": 2.125903844833374, "learning_rate": 6.164383561643836e-06, "loss": 0.1999, "step": 45 }, { "epoch": 0.13850415512465375, "grad_norm": 2.214215040206909, "learning_rate": 6.849315068493151e-06, "loss": 0.1135, "step": 50 }, { "epoch": 0.1523545706371191, "grad_norm": 3.1861069202423096, "learning_rate": 7.534246575342466e-06, "loss": 0.0612, "step": 55 }, { "epoch": 0.16620498614958448, "grad_norm": 2.2513909339904785, "learning_rate": 8.219178082191782e-06, "loss": 0.0513, "step": 60 }, { "epoch": 0.18005540166204986, "grad_norm": 1.5095714330673218, "learning_rate": 8.904109589041097e-06, "loss": 0.033, "step": 65 }, { "epoch": 0.19390581717451524, "grad_norm": 1.3538941144943237, "learning_rate": 9.589041095890411e-06, "loss": 0.0338, "step": 70 }, { "epoch": 0.2077562326869806, "grad_norm": 1.7012300491333008, "learning_rate": 9.999765681294371e-06, "loss": 0.0231, "step": 75 }, { "epoch": 0.22160664819944598, "grad_norm": 0.762015700340271, "learning_rate": 9.997129848070563e-06, "loss": 0.0182, "step": 80 }, { "epoch": 0.23545706371191136, "grad_norm": 1.277585744857788, "learning_rate": 9.991566832387564e-06, "loss": 0.0154, "step": 85 }, { "epoch": 0.24930747922437674, "grad_norm": 0.8343765735626221, "learning_rate": 9.983079892908332e-06, "loss": 0.0185, "step": 90 }, { "epoch": 0.2631578947368421, "grad_norm": 1.3323298692703247, "learning_rate": 9.971674001050687e-06, "loss": 0.0158, "step": 95 }, { "epoch": 0.2770083102493075, "grad_norm": 0.4257211983203888, "learning_rate": 9.957355838075188e-06, "loss": 0.0154, "step": 100 }, { "epoch": 0.29085872576177285, "grad_norm": 0.6481120586395264, "learning_rate": 9.940133791171445e-06, "loss": 0.0105, "step": 105 }, { "epoch": 0.3047091412742382, "grad_norm": 0.8009067177772522, "learning_rate": 9.920017948545109e-06, "loss": 0.0111, "step": 110 }, { "epoch": 0.3185595567867036, "grad_norm": 0.4706541895866394, "learning_rate": 9.897020093508502e-06, "loss": 0.0123, "step": 115 }, { "epoch": 0.33240997229916897, "grad_norm": 0.7167349457740784, "learning_rate": 9.871153697578254e-06, "loss": 0.0129, "step": 120 }, { "epoch": 0.3462603878116344, "grad_norm": 0.9122310876846313, "learning_rate": 9.842433912584066e-06, "loss": 0.0118, "step": 125 }, { "epoch": 0.3601108033240997, "grad_norm": 0.3195926249027252, "learning_rate": 9.810877561793178e-06, "loss": 0.0091, "step": 130 }, { "epoch": 0.3739612188365651, "grad_norm": 0.5710881352424622, "learning_rate": 9.776503130055758e-06, "loss": 0.0116, "step": 135 }, { "epoch": 0.3878116343490305, "grad_norm": 0.6485520005226135, "learning_rate": 9.739330752976981e-06, "loss": 0.0129, "step": 140 }, { "epoch": 0.40166204986149584, "grad_norm": 0.41829606890678406, "learning_rate": 9.699382205122138e-06, "loss": 0.0105, "step": 145 }, { "epoch": 0.4155124653739612, "grad_norm": 0.4999130070209503, "learning_rate": 9.656680887261693e-06, "loss": 0.0096, "step": 150 }, { "epoch": 0.4293628808864266, "grad_norm": 0.4883331060409546, "learning_rate": 9.611251812663748e-06, "loss": 0.0085, "step": 155 }, { "epoch": 0.44321329639889195, "grad_norm": 0.33876070380210876, "learning_rate": 9.563121592441949e-06, "loss": 0.006, "step": 160 }, { "epoch": 0.45706371191135736, "grad_norm": 0.9331468939781189, "learning_rate": 9.512318419967427e-06, "loss": 0.0062, "step": 165 }, { "epoch": 0.4709141274238227, "grad_norm": 0.5563158988952637, "learning_rate": 9.458872054353888e-06, "loss": 0.0072, "step": 170 }, { "epoch": 0.48476454293628807, "grad_norm": 0.7999743223190308, "learning_rate": 9.402813803025526e-06, "loss": 0.0063, "step": 175 }, { "epoch": 0.4986149584487535, "grad_norm": 0.35912105441093445, "learning_rate": 9.344176503378003e-06, "loss": 0.0069, "step": 180 }, { "epoch": 0.5124653739612188, "grad_norm": 0.3588772416114807, "learning_rate": 9.282994503543185e-06, "loss": 0.0068, "step": 185 }, { "epoch": 0.5263157894736842, "grad_norm": 0.5413822531700134, "learning_rate": 9.219303642268953e-06, "loss": 0.0082, "step": 190 }, { "epoch": 0.5401662049861495, "grad_norm": 0.32814571261405945, "learning_rate": 9.153141227925828e-06, "loss": 0.0057, "step": 195 }, { "epoch": 0.554016620498615, "grad_norm": 0.48693302273750305, "learning_rate": 9.084546016652758e-06, "loss": 0.0078, "step": 200 }, { "epoch": 0.5678670360110804, "grad_norm": 0.20453135669231415, "learning_rate": 9.013558189654819e-06, "loss": 0.0081, "step": 205 }, { "epoch": 0.5817174515235457, "grad_norm": 0.32578709721565247, "learning_rate": 8.940219329666167e-06, "loss": 0.0048, "step": 210 }, { "epoch": 0.5955678670360111, "grad_norm": 0.32217937707901, "learning_rate": 8.864572396591996e-06, "loss": 0.0053, "step": 215 }, { "epoch": 0.6094182825484764, "grad_norm": 0.6116990447044373, "learning_rate": 8.786661702343811e-06, "loss": 0.0067, "step": 220 }, { "epoch": 0.6232686980609419, "grad_norm": 0.6656084060668945, "learning_rate": 8.706532884882704e-06, "loss": 0.0067, "step": 225 }, { "epoch": 0.6371191135734072, "grad_norm": 0.4221888482570648, "learning_rate": 8.624232881485887e-06, "loss": 0.0069, "step": 230 }, { "epoch": 0.6509695290858726, "grad_norm": 0.35918885469436646, "learning_rate": 8.539809901252118e-06, "loss": 0.006, "step": 235 }, { "epoch": 0.6648199445983379, "grad_norm": 0.3014688193798065, "learning_rate": 8.453313396862113e-06, "loss": 0.0052, "step": 240 }, { "epoch": 0.6786703601108033, "grad_norm": 0.32176536321640015, "learning_rate": 8.364794035610527e-06, "loss": 0.0061, "step": 245 }, { "epoch": 0.6925207756232687, "grad_norm": 0.2977988123893738, "learning_rate": 8.274303669726427e-06, "loss": 0.0051, "step": 250 }, { "epoch": 0.7063711911357341, "grad_norm": 0.50001460313797, "learning_rate": 8.181895305999665e-06, "loss": 0.0072, "step": 255 }, { "epoch": 0.7202216066481995, "grad_norm": 0.7377979159355164, "learning_rate": 8.08762307473096e-06, "loss": 0.0064, "step": 260 }, { "epoch": 0.7340720221606648, "grad_norm": 0.3045377731323242, "learning_rate": 7.991542198023827e-06, "loss": 0.0051, "step": 265 }, { "epoch": 0.7479224376731302, "grad_norm": 0.22468125820159912, "learning_rate": 7.893708957436982e-06, "loss": 0.0056, "step": 270 }, { "epoch": 0.7617728531855956, "grad_norm": 0.4352708160877228, "learning_rate": 7.794180661016143e-06, "loss": 0.0042, "step": 275 }, { "epoch": 0.775623268698061, "grad_norm": 0.29045531153678894, "learning_rate": 7.693015609724524e-06, "loss": 0.0065, "step": 280 }, { "epoch": 0.7894736842105263, "grad_norm": 0.35637104511260986, "learning_rate": 7.5902730632917395e-06, "loss": 0.0047, "step": 285 }, { "epoch": 0.8033240997229917, "grad_norm": 0.5587311387062073, "learning_rate": 7.486013205501053e-06, "loss": 0.0063, "step": 290 }, { "epoch": 0.817174515235457, "grad_norm": 0.19924254715442657, "learning_rate": 7.3802971089353696e-06, "loss": 0.0055, "step": 295 }, { "epoch": 0.8310249307479224, "grad_norm": 0.39876681566238403, "learning_rate": 7.273186699202572e-06, "loss": 0.0067, "step": 300 }, { "epoch": 0.8448753462603878, "grad_norm": 0.6315239667892456, "learning_rate": 7.164744718661198e-06, "loss": 0.0045, "step": 305 }, { "epoch": 0.8587257617728532, "grad_norm": 0.31891411542892456, "learning_rate": 7.055034689667661e-06, "loss": 0.0042, "step": 310 }, { "epoch": 0.8725761772853186, "grad_norm": 0.34718984365463257, "learning_rate": 6.944120877366605e-06, "loss": 0.0045, "step": 315 }, { "epoch": 0.8864265927977839, "grad_norm": 0.3197481632232666, "learning_rate": 6.832068252046116e-06, "loss": 0.0051, "step": 320 }, { "epoch": 0.9002770083102493, "grad_norm": 0.298637717962265, "learning_rate": 6.718942451079911e-06, "loss": 0.0055, "step": 325 }, { "epoch": 0.9141274238227147, "grad_norm": 0.3373955190181732, "learning_rate": 6.604809740478748e-06, "loss": 0.0045, "step": 330 }, { "epoch": 0.9279778393351801, "grad_norm": 0.5179949402809143, "learning_rate": 6.489736976073603e-06, "loss": 0.0054, "step": 335 }, { "epoch": 0.9418282548476454, "grad_norm": 0.31394144892692566, "learning_rate": 6.3737915643533484e-06, "loss": 0.0049, "step": 340 }, { "epoch": 0.9556786703601108, "grad_norm": 0.25130563974380493, "learning_rate": 6.257041422979871e-06, "loss": 0.0039, "step": 345 }, { "epoch": 0.9695290858725761, "grad_norm": 0.42873042821884155, "learning_rate": 6.139554941003747e-06, "loss": 0.0055, "step": 350 }, { "epoch": 0.9833795013850416, "grad_norm": 0.2586114704608917, "learning_rate": 6.021400938803813e-06, "loss": 0.0042, "step": 355 }, { "epoch": 0.997229916897507, "grad_norm": 0.3645874559879303, "learning_rate": 5.902648627774059e-06, "loss": 0.0058, "step": 360 }, { "epoch": 1.0, "eval_loss": 0.0030423467978835106, "eval_runtime": 3.8207, "eval_samples_per_second": 0.785, "eval_steps_per_second": 0.785, "step": 361 }, { "epoch": 1.0110803324099722, "grad_norm": 0.524590790271759, "learning_rate": 5.783367569781474e-06, "loss": 0.0053, "step": 365 }, { "epoch": 1.0249307479224377, "grad_norm": 0.2261294275522232, "learning_rate": 5.663627636418611e-06, "loss": 0.0036, "step": 370 }, { "epoch": 1.0387811634349031, "grad_norm": 0.16315680742263794, "learning_rate": 5.543498968074704e-06, "loss": 0.003, "step": 375 }, { "epoch": 1.0526315789473684, "grad_norm": 0.23269863426685333, "learning_rate": 5.423051932849348e-06, "loss": 0.0027, "step": 380 }, { "epoch": 1.0664819944598338, "grad_norm": 0.29580602049827576, "learning_rate": 5.3023570853327725e-06, "loss": 0.0037, "step": 385 }, { "epoch": 1.080332409972299, "grad_norm": 0.2091536670923233, "learning_rate": 5.181485125276898e-06, "loss": 0.0023, "step": 390 }, { "epoch": 1.0941828254847645, "grad_norm": 0.2704831063747406, "learning_rate": 5.060506856181342e-06, "loss": 0.0035, "step": 395 }, { "epoch": 1.10803324099723, "grad_norm": 0.5069320201873779, "learning_rate": 4.939493143818659e-06, "loss": 0.0028, "step": 400 }, { "epoch": 1.1218836565096952, "grad_norm": 0.7631614804267883, "learning_rate": 4.818514874723103e-06, "loss": 0.0042, "step": 405 }, { "epoch": 1.1357340720221607, "grad_norm": 0.26314568519592285, "learning_rate": 4.697642914667229e-06, "loss": 0.0041, "step": 410 }, { "epoch": 1.149584487534626, "grad_norm": 0.20495057106018066, "learning_rate": 4.576948067150655e-06, "loss": 0.003, "step": 415 }, { "epoch": 1.1634349030470914, "grad_norm": 0.2521879971027374, "learning_rate": 4.456501031925297e-06, "loss": 0.0037, "step": 420 }, { "epoch": 1.1772853185595569, "grad_norm": 0.23261719942092896, "learning_rate": 4.336372363581391e-06, "loss": 0.0027, "step": 425 }, { "epoch": 1.1911357340720221, "grad_norm": 0.19368702173233032, "learning_rate": 4.216632430218528e-06, "loss": 0.0033, "step": 430 }, { "epoch": 1.2049861495844876, "grad_norm": 0.5213963389396667, "learning_rate": 4.097351372225943e-06, "loss": 0.0028, "step": 435 }, { "epoch": 1.2188365650969528, "grad_norm": 0.16888286173343658, "learning_rate": 3.978599061196188e-06, "loss": 0.003, "step": 440 }, { "epoch": 1.2326869806094183, "grad_norm": 0.3124845027923584, "learning_rate": 3.860445058996255e-06, "loss": 0.0034, "step": 445 }, { "epoch": 1.2465373961218837, "grad_norm": 0.25205326080322266, "learning_rate": 3.7429585770201314e-06, "loss": 0.0028, "step": 450 }, { "epoch": 1.260387811634349, "grad_norm": 0.27026185393333435, "learning_rate": 3.626208435646652e-06, "loss": 0.0023, "step": 455 }, { "epoch": 1.2742382271468145, "grad_norm": 0.19147123396396637, "learning_rate": 3.5102630239263986e-06, "loss": 0.0021, "step": 460 }, { "epoch": 1.2880886426592797, "grad_norm": 0.21509826183319092, "learning_rate": 3.395190259521254e-06, "loss": 0.0039, "step": 465 }, { "epoch": 1.3019390581717452, "grad_norm": 0.3184572160243988, "learning_rate": 3.281057548920091e-06, "loss": 0.0034, "step": 470 }, { "epoch": 1.3157894736842106, "grad_norm": 0.3757064640522003, "learning_rate": 3.1679317479538864e-06, "loss": 0.0036, "step": 475 }, { "epoch": 1.3296398891966759, "grad_norm": 0.23070411384105682, "learning_rate": 3.0558791226333974e-06, "loss": 0.002, "step": 480 }, { "epoch": 1.3434903047091413, "grad_norm": 0.16266323626041412, "learning_rate": 2.9449653103323405e-06, "loss": 0.003, "step": 485 }, { "epoch": 1.3573407202216066, "grad_norm": 0.235463485121727, "learning_rate": 2.8352552813388035e-06, "loss": 0.0022, "step": 490 }, { "epoch": 1.371191135734072, "grad_norm": 0.21921995282173157, "learning_rate": 2.7268133007974284e-06, "loss": 0.0025, "step": 495 }, { "epoch": 1.3850415512465375, "grad_norm": 0.17426297068595886, "learning_rate": 2.6197028910646304e-06, "loss": 0.0032, "step": 500 }, { "epoch": 1.3988919667590027, "grad_norm": 0.14913178980350494, "learning_rate": 2.5139867944989483e-06, "loss": 0.0022, "step": 505 }, { "epoch": 1.4127423822714682, "grad_norm": 0.37892332673072815, "learning_rate": 2.409726936708263e-06, "loss": 0.0024, "step": 510 }, { "epoch": 1.4265927977839334, "grad_norm": 0.20658189058303833, "learning_rate": 2.3069843902754767e-06, "loss": 0.0034, "step": 515 }, { "epoch": 1.440443213296399, "grad_norm": 0.29341772198677063, "learning_rate": 2.205819338983859e-06, "loss": 0.0031, "step": 520 }, { "epoch": 1.4542936288088644, "grad_norm": 0.228977769613266, "learning_rate": 2.106291042563019e-06, "loss": 0.002, "step": 525 }, { "epoch": 1.4681440443213296, "grad_norm": 0.2889719009399414, "learning_rate": 2.0084578019761738e-06, "loss": 0.0031, "step": 530 }, { "epoch": 1.481994459833795, "grad_norm": 0.16132836043834686, "learning_rate": 1.912376925269041e-06, "loss": 0.0026, "step": 535 }, { "epoch": 1.4958448753462603, "grad_norm": 0.3356161117553711, "learning_rate": 1.8181046940003366e-06, "loss": 0.0028, "step": 540 }, { "epoch": 1.5096952908587258, "grad_norm": 0.4451793432235718, "learning_rate": 1.7256963302735752e-06, "loss": 0.0026, "step": 545 }, { "epoch": 1.5235457063711912, "grad_norm": 0.30361247062683105, "learning_rate": 1.635205964389474e-06, "loss": 0.0016, "step": 550 }, { "epoch": 1.5373961218836565, "grad_norm": 0.20500566065311432, "learning_rate": 1.5466866031378874e-06, "loss": 0.0029, "step": 555 }, { "epoch": 1.5512465373961217, "grad_norm": 0.3303813636302948, "learning_rate": 1.4601900987478834e-06, "loss": 0.003, "step": 560 }, { "epoch": 1.5650969529085872, "grad_norm": 0.20436729490756989, "learning_rate": 1.3757671185141136e-06, "loss": 0.0017, "step": 565 }, { "epoch": 1.5789473684210527, "grad_norm": 0.33445170521736145, "learning_rate": 1.2934671151172974e-06, "loss": 0.0031, "step": 570 }, { "epoch": 1.5927977839335181, "grad_norm": 0.2386503368616104, "learning_rate": 1.213338297656191e-06, "loss": 0.0026, "step": 575 }, { "epoch": 1.6066481994459834, "grad_norm": 0.45957309007644653, "learning_rate": 1.1354276034080059e-06, "loss": 0.003, "step": 580 }, { "epoch": 1.6204986149584486, "grad_norm": 0.2124543935060501, "learning_rate": 1.0597806703338354e-06, "loss": 0.0023, "step": 585 }, { "epoch": 1.634349030470914, "grad_norm": 0.1889660805463791, "learning_rate": 9.86441810345183e-07, "loss": 0.0026, "step": 590 }, { "epoch": 1.6481994459833795, "grad_norm": 0.3355896472930908, "learning_rate": 9.154539833472442e-07, "loss": 0.0023, "step": 595 }, { "epoch": 1.662049861495845, "grad_norm": 0.20331183075904846, "learning_rate": 8.468587720741728e-07, "loss": 0.002, "step": 600 }, { "epoch": 1.6759002770083102, "grad_norm": 0.5584515929222107, "learning_rate": 7.80696357731049e-07, "loss": 0.0027, "step": 605 }, { "epoch": 1.6897506925207755, "grad_norm": 0.21782828867435455, "learning_rate": 7.170054964568146e-07, "loss": 0.0027, "step": 610 }, { "epoch": 1.703601108033241, "grad_norm": 0.24219338595867157, "learning_rate": 6.558234966219984e-07, "loss": 0.0022, "step": 615 }, { "epoch": 1.7174515235457064, "grad_norm": 0.32900330424308777, "learning_rate": 5.971861969744758e-07, "loss": 0.0031, "step": 620 }, { "epoch": 1.7313019390581719, "grad_norm": 0.20752942562103271, "learning_rate": 5.411279456461133e-07, "loss": 0.0019, "step": 625 }, { "epoch": 1.745152354570637, "grad_norm": 0.15013189613819122, "learning_rate": 4.87681580032573e-07, "loss": 0.0018, "step": 630 }, { "epoch": 1.7590027700831024, "grad_norm": 0.2259117215871811, "learning_rate": 4.368784075580512e-07, "loss": 0.0028, "step": 635 }, { "epoch": 1.7728531855955678, "grad_norm": 0.16545476019382477, "learning_rate": 3.8874818733625363e-07, "loss": 0.0024, "step": 640 }, { "epoch": 1.7867036011080333, "grad_norm": 0.2805814743041992, "learning_rate": 3.433191127383079e-07, "loss": 0.0025, "step": 645 }, { "epoch": 1.8005540166204987, "grad_norm": 0.24149565398693085, "learning_rate": 3.0061779487786325e-07, "loss": 0.0026, "step": 650 }, { "epoch": 1.814404432132964, "grad_norm": 0.37586623430252075, "learning_rate": 2.6066924702302044e-07, "loss": 0.0027, "step": 655 }, { "epoch": 1.8282548476454292, "grad_norm": 0.28574487566947937, "learning_rate": 2.2349686994424303e-07, "loss": 0.0022, "step": 660 }, { "epoch": 1.8421052631578947, "grad_norm": 0.26507818698883057, "learning_rate": 1.8912243820682296e-07, "loss": 0.0025, "step": 665 }, { "epoch": 1.8559556786703602, "grad_norm": 0.29346975684165955, "learning_rate": 1.575660874159346e-07, "loss": 0.0024, "step": 670 }, { "epoch": 1.8698060941828256, "grad_norm": 0.37801891565322876, "learning_rate": 1.2884630242174734e-07, "loss": 0.003, "step": 675 }, { "epoch": 1.8836565096952909, "grad_norm": 0.11035842448472977, "learning_rate": 1.029799064914988e-07, "loss": 0.0019, "step": 680 }, { "epoch": 1.897506925207756, "grad_norm": 0.2686106860637665, "learning_rate": 7.998205145489157e-08, "loss": 0.0026, "step": 685 }, { "epoch": 1.9113573407202216, "grad_norm": 0.3440103232860565, "learning_rate": 5.986620882855676e-08, "loss": 0.0031, "step": 690 }, { "epoch": 1.925207756232687, "grad_norm": 0.2664523124694824, "learning_rate": 4.2644161924811353e-08, "loss": 0.0035, "step": 695 }, { "epoch": 1.9390581717451525, "grad_norm": 0.22529973089694977, "learning_rate": 2.8325998949314536e-08, "loss": 0.0022, "step": 700 }, { "epoch": 1.9529085872576177, "grad_norm": 0.167397141456604, "learning_rate": 1.6920107091668582e-08, "loss": 0.0021, "step": 705 }, { "epoch": 1.966759002770083, "grad_norm": 0.3647945523262024, "learning_rate": 8.433167612436066e-09, "loss": 0.0021, "step": 710 }, { "epoch": 1.9806094182825484, "grad_norm": 0.3356544077396393, "learning_rate": 2.8701519294371815e-09, "loss": 0.0034, "step": 715 }, { "epoch": 1.994459833795014, "grad_norm": 0.1703202724456787, "learning_rate": 2.3431870562917735e-10, "loss": 0.0019, "step": 720 }, { "epoch": 2.0, "eval_loss": 0.0028210037853568792, "eval_runtime": 3.8079, "eval_samples_per_second": 0.788, "eval_steps_per_second": 0.788, "step": 722 }, { "epoch": 2.0, "step": 722, "total_flos": 1.3020461702683034e+17, "train_loss": 0.037445212856265314, "train_runtime": 3051.2562, "train_samples_per_second": 0.237, "train_steps_per_second": 0.237 } ], "logging_steps": 5, "max_steps": 722, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3020461702683034e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }