{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6666370185893444, "eval_steps": 500, "global_step": 14990, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.986880725784932e-07, "loss": 5.3038, "step": 64 }, { "epoch": 0.01, "learning_rate": 9.972649648670284e-07, "loss": 2.7293, "step": 128 }, { "epoch": 0.01, "learning_rate": 9.959085653295383e-07, "loss": 1.6865, "step": 192 }, { "epoch": 0.01, "learning_rate": 9.944854576180734e-07, "loss": 0.3066, "step": 256 }, { "epoch": 0.01, "learning_rate": 9.930623499066086e-07, "loss": 0.2533, "step": 320 }, { "epoch": 0.02, "learning_rate": 9.916392421951437e-07, "loss": 0.3489, "step": 384 }, { "epoch": 0.02, "learning_rate": 9.902161344836786e-07, "loss": 0.2908, "step": 448 }, { "epoch": 0.02, "learning_rate": 9.887930267722138e-07, "loss": 0.3666, "step": 512 }, { "epoch": 0.03, "learning_rate": 9.873699190607489e-07, "loss": 0.3543, "step": 576 }, { "epoch": 0.03, "learning_rate": 9.85946811349284e-07, "loss": 0.2938, "step": 640 }, { "epoch": 0.03, "learning_rate": 9.84523703637819e-07, "loss": 0.2674, "step": 704 }, { "epoch": 0.03, "learning_rate": 9.83100595926354e-07, "loss": 0.2186, "step": 768 }, { "epoch": 0.04, "learning_rate": 9.816774882148892e-07, "loss": 0.3472, "step": 832 }, { "epoch": 0.04, "learning_rate": 9.802543805034244e-07, "loss": 0.3157, "step": 896 }, { "epoch": 0.04, "learning_rate": 9.788312727919595e-07, "loss": 0.432, "step": 960 }, { "epoch": 0.05, "learning_rate": 9.774081650804944e-07, "loss": 0.2131, "step": 1024 }, { "epoch": 0.05, "learning_rate": 9.759850573690296e-07, "loss": 0.249, "step": 1088 }, { "epoch": 0.05, "learning_rate": 9.745619496575647e-07, "loss": 0.239, "step": 1152 }, { "epoch": 0.05, "learning_rate": 9.731388419460998e-07, "loss": 0.3577, "step": 1216 }, { "epoch": 0.06, "learning_rate": 9.717157342346347e-07, "loss": 0.1945, "step": 1280 }, { "epoch": 0.06, "learning_rate": 9.702926265231699e-07, "loss": 0.3107, "step": 1344 }, { "epoch": 0.06, "learning_rate": 9.68869518811705e-07, "loss": 0.1983, "step": 1408 }, { "epoch": 0.07, "learning_rate": 9.674464111002402e-07, "loss": 0.188, "step": 1472 }, { "epoch": 0.07, "learning_rate": 9.660233033887753e-07, "loss": 0.2659, "step": 1536 }, { "epoch": 0.07, "learning_rate": 9.646001956773104e-07, "loss": 0.442, "step": 1600 }, { "epoch": 0.07, "learning_rate": 9.631770879658454e-07, "loss": 0.1774, "step": 1664 }, { "epoch": 0.08, "learning_rate": 9.617539802543805e-07, "loss": 0.2944, "step": 1728 }, { "epoch": 0.08, "learning_rate": 9.603308725429156e-07, "loss": 0.3228, "step": 1792 }, { "epoch": 0.08, "learning_rate": 9.589077648314505e-07, "loss": 0.2818, "step": 1856 }, { "epoch": 0.09, "learning_rate": 9.574846571199857e-07, "loss": 0.2816, "step": 1920 }, { "epoch": 0.09, "learning_rate": 9.560615494085208e-07, "loss": 0.3663, "step": 1984 }, { "epoch": 0.09, "learning_rate": 9.54638441697056e-07, "loss": 0.2614, "step": 2048 }, { "epoch": 0.09, "learning_rate": 9.53215333985591e-07, "loss": 0.2934, "step": 2112 }, { "epoch": 0.1, "learning_rate": 9.51792226274126e-07, "loss": 0.4422, "step": 2176 }, { "epoch": 0.1, "learning_rate": 9.503691185626613e-07, "loss": 0.2562, "step": 2240 }, { "epoch": 0.1, "learning_rate": 9.489460108511963e-07, "loss": 0.3432, "step": 2304 }, { "epoch": 0.11, "learning_rate": 9.475229031397314e-07, "loss": 0.3384, "step": 2368 }, { "epoch": 0.11, "learning_rate": 9.460997954282665e-07, "loss": 0.1679, "step": 2432 }, { "epoch": 0.11, "learning_rate": 9.446766877168015e-07, "loss": 0.1767, "step": 2496 }, { "epoch": 0.11, "learning_rate": 9.432535800053366e-07, "loss": 0.2161, "step": 2560 }, { "epoch": 0.12, "learning_rate": 9.418304722938717e-07, "loss": 0.2729, "step": 2624 }, { "epoch": 0.12, "learning_rate": 9.404073645824068e-07, "loss": 0.3314, "step": 2688 }, { "epoch": 0.12, "learning_rate": 9.389842568709418e-07, "loss": 0.242, "step": 2752 }, { "epoch": 0.13, "learning_rate": 9.37561149159477e-07, "loss": 0.3446, "step": 2816 }, { "epoch": 0.13, "learning_rate": 9.36138041448012e-07, "loss": 0.1292, "step": 2880 }, { "epoch": 0.13, "learning_rate": 9.347149337365472e-07, "loss": 0.1854, "step": 2944 }, { "epoch": 0.13, "learning_rate": 9.332918260250823e-07, "loss": 0.2536, "step": 3008 }, { "epoch": 0.14, "learning_rate": 9.318687183136174e-07, "loss": 0.2224, "step": 3072 }, { "epoch": 0.14, "learning_rate": 9.304456106021524e-07, "loss": 0.237, "step": 3136 }, { "epoch": 0.14, "learning_rate": 9.290225028906876e-07, "loss": 0.3345, "step": 3200 }, { "epoch": 0.15, "learning_rate": 9.275993951792226e-07, "loss": 0.175, "step": 3264 }, { "epoch": 0.15, "learning_rate": 9.261762874677576e-07, "loss": 0.2693, "step": 3328 }, { "epoch": 0.15, "learning_rate": 9.247531797562928e-07, "loss": 0.254, "step": 3392 }, { "epoch": 0.15, "learning_rate": 9.233300720448278e-07, "loss": 0.2855, "step": 3456 }, { "epoch": 0.16, "learning_rate": 9.219069643333629e-07, "loss": 0.1507, "step": 3520 }, { "epoch": 0.16, "learning_rate": 9.20483856621898e-07, "loss": 0.2464, "step": 3584 }, { "epoch": 0.16, "learning_rate": 9.190607489104332e-07, "loss": 0.2522, "step": 3648 }, { "epoch": 0.17, "learning_rate": 9.176376411989682e-07, "loss": 0.2378, "step": 3712 }, { "epoch": 0.17, "learning_rate": 9.162145334875034e-07, "loss": 0.2433, "step": 3776 }, { "epoch": 0.17, "learning_rate": 9.147914257760384e-07, "loss": 0.2932, "step": 3840 }, { "epoch": 0.17, "learning_rate": 9.133683180645735e-07, "loss": 0.2602, "step": 3904 }, { "epoch": 0.18, "learning_rate": 9.119452103531086e-07, "loss": 0.3365, "step": 3968 }, { "epoch": 0.18, "learning_rate": 9.105221026416437e-07, "loss": 0.1738, "step": 4032 }, { "epoch": 0.18, "learning_rate": 9.090989949301787e-07, "loss": 0.3132, "step": 4096 }, { "epoch": 0.19, "learning_rate": 9.076758872187137e-07, "loss": 0.2353, "step": 4160 }, { "epoch": 0.19, "learning_rate": 9.062527795072489e-07, "loss": 0.2868, "step": 4224 }, { "epoch": 0.19, "learning_rate": 9.048296717957839e-07, "loss": 0.1807, "step": 4288 }, { "epoch": 0.19, "learning_rate": 9.034065640843192e-07, "loss": 0.3052, "step": 4352 }, { "epoch": 0.2, "learning_rate": 9.019834563728542e-07, "loss": 0.2848, "step": 4416 }, { "epoch": 0.2, "learning_rate": 9.005603486613893e-07, "loss": 0.2031, "step": 4480 }, { "epoch": 0.2, "learning_rate": 8.991372409499244e-07, "loss": 0.2588, "step": 4544 }, { "epoch": 0.2, "learning_rate": 8.977141332384595e-07, "loss": 0.2356, "step": 4608 }, { "epoch": 0.21, "learning_rate": 8.962910255269945e-07, "loss": 0.2445, "step": 4672 }, { "epoch": 0.21, "learning_rate": 8.948679178155297e-07, "loss": 0.2537, "step": 4736 }, { "epoch": 0.21, "learning_rate": 8.934448101040647e-07, "loss": 0.2345, "step": 4800 }, { "epoch": 0.22, "learning_rate": 8.920217023925998e-07, "loss": 0.2098, "step": 4864 }, { "epoch": 0.22, "learning_rate": 8.905985946811349e-07, "loss": 0.3487, "step": 4928 }, { "epoch": 0.22, "learning_rate": 8.891754869696699e-07, "loss": 0.1884, "step": 4992 }, { "epoch": 0.22, "learning_rate": 8.877523792582051e-07, "loss": 0.273, "step": 5056 }, { "epoch": 0.23, "learning_rate": 8.863292715467402e-07, "loss": 0.27, "step": 5120 }, { "epoch": 0.23, "learning_rate": 8.849061638352753e-07, "loss": 0.1494, "step": 5184 }, { "epoch": 0.23, "learning_rate": 8.834830561238103e-07, "loss": 0.1452, "step": 5248 }, { "epoch": 0.24, "learning_rate": 8.820599484123455e-07, "loss": 0.177, "step": 5312 }, { "epoch": 0.24, "learning_rate": 8.806368407008805e-07, "loss": 0.3317, "step": 5376 }, { "epoch": 0.24, "learning_rate": 8.792137329894156e-07, "loss": 0.1638, "step": 5440 }, { "epoch": 0.24, "learning_rate": 8.777906252779507e-07, "loss": 0.2928, "step": 5504 }, { "epoch": 0.25, "learning_rate": 8.763675175664858e-07, "loss": 0.1851, "step": 5568 }, { "epoch": 0.25, "learning_rate": 8.749444098550208e-07, "loss": 0.3108, "step": 5632 }, { "epoch": 0.25, "learning_rate": 8.73521302143556e-07, "loss": 0.3002, "step": 5696 }, { "epoch": 0.26, "learning_rate": 8.720981944320911e-07, "loss": 0.2726, "step": 5760 }, { "epoch": 0.26, "learning_rate": 8.706750867206262e-07, "loss": 0.1775, "step": 5824 }, { "epoch": 0.26, "learning_rate": 8.692519790091613e-07, "loss": 0.2632, "step": 5888 }, { "epoch": 0.26, "learning_rate": 8.678288712976963e-07, "loss": 0.1625, "step": 5952 }, { "epoch": 0.27, "learning_rate": 8.664057635862314e-07, "loss": 0.235, "step": 6016 }, { "epoch": 0.27, "learning_rate": 8.649826558747665e-07, "loss": 0.2707, "step": 6080 }, { "epoch": 0.27, "learning_rate": 8.635595481633016e-07, "loss": 0.1704, "step": 6144 }, { "epoch": 0.28, "learning_rate": 8.621364404518366e-07, "loss": 0.1925, "step": 6208 }, { "epoch": 0.28, "learning_rate": 8.607133327403718e-07, "loss": 0.2159, "step": 6272 }, { "epoch": 0.28, "learning_rate": 8.592902250289068e-07, "loss": 0.2228, "step": 6336 }, { "epoch": 0.28, "learning_rate": 8.578671173174419e-07, "loss": 0.2184, "step": 6400 }, { "epoch": 0.29, "learning_rate": 8.56444009605977e-07, "loss": 0.2803, "step": 6464 }, { "epoch": 0.29, "learning_rate": 8.550209018945122e-07, "loss": 0.1729, "step": 6528 }, { "epoch": 0.29, "learning_rate": 8.535977941830472e-07, "loss": 0.2702, "step": 6592 }, { "epoch": 0.3, "learning_rate": 8.521746864715824e-07, "loss": 0.2938, "step": 6656 }, { "epoch": 0.3, "learning_rate": 8.507515787601174e-07, "loss": 0.2692, "step": 6720 }, { "epoch": 0.3, "learning_rate": 8.493284710486524e-07, "loss": 0.3317, "step": 6784 }, { "epoch": 0.3, "learning_rate": 8.479053633371876e-07, "loss": 0.2815, "step": 6848 }, { "epoch": 0.31, "learning_rate": 8.464822556257226e-07, "loss": 0.2568, "step": 6912 }, { "epoch": 0.31, "learning_rate": 8.450591479142577e-07, "loss": 0.2247, "step": 6976 }, { "epoch": 0.31, "learning_rate": 8.436360402027927e-07, "loss": 0.2673, "step": 7040 }, { "epoch": 0.32, "learning_rate": 8.422129324913279e-07, "loss": 0.1592, "step": 7104 }, { "epoch": 0.32, "learning_rate": 8.407898247798629e-07, "loss": 0.2298, "step": 7168 }, { "epoch": 0.32, "learning_rate": 8.393667170683982e-07, "loss": 0.2149, "step": 7232 }, { "epoch": 0.32, "learning_rate": 8.379436093569332e-07, "loss": 0.3177, "step": 7296 }, { "epoch": 0.33, "learning_rate": 8.365205016454683e-07, "loss": 0.2083, "step": 7360 }, { "epoch": 0.33, "learning_rate": 8.350973939340034e-07, "loss": 0.2936, "step": 7424 }, { "epoch": 0.33, "learning_rate": 8.336742862225385e-07, "loss": 0.2263, "step": 7488 }, { "epoch": 0.34, "learning_rate": 8.322511785110735e-07, "loss": 0.2245, "step": 7552 }, { "epoch": 0.34, "learning_rate": 8.308280707996085e-07, "loss": 0.1807, "step": 7616 }, { "epoch": 0.34, "learning_rate": 8.294049630881437e-07, "loss": 0.2844, "step": 7680 }, { "epoch": 0.34, "learning_rate": 8.279818553766787e-07, "loss": 0.1844, "step": 7744 }, { "epoch": 0.35, "learning_rate": 8.265587476652138e-07, "loss": 0.1911, "step": 7808 }, { "epoch": 0.35, "learning_rate": 8.251578760117405e-07, "loss": 0.2085, "step": 7872 }, { "epoch": 0.35, "learning_rate": 8.237347683002757e-07, "loss": 0.1718, "step": 7936 }, { "epoch": 0.36, "learning_rate": 8.223116605888107e-07, "loss": 0.2387, "step": 8000 }, { "epoch": 0.36, "learning_rate": 8.208885528773458e-07, "loss": 0.1725, "step": 8064 }, { "epoch": 0.36, "learning_rate": 8.19465445165881e-07, "loss": 0.1448, "step": 8128 }, { "epoch": 0.36, "learning_rate": 8.180423374544161e-07, "loss": 0.154, "step": 8192 }, { "epoch": 0.37, "learning_rate": 8.166192297429512e-07, "loss": 0.3209, "step": 8256 }, { "epoch": 0.37, "learning_rate": 8.151961220314863e-07, "loss": 0.2067, "step": 8320 }, { "epoch": 0.37, "learning_rate": 8.137730143200213e-07, "loss": 0.2147, "step": 8384 }, { "epoch": 0.38, "learning_rate": 8.123499066085565e-07, "loss": 0.1771, "step": 8448 }, { "epoch": 0.38, "learning_rate": 8.109267988970915e-07, "loss": 0.1212, "step": 8512 }, { "epoch": 0.38, "learning_rate": 8.095036911856265e-07, "loss": 0.1944, "step": 8576 }, { "epoch": 0.38, "learning_rate": 8.080805834741616e-07, "loss": 0.2416, "step": 8640 }, { "epoch": 0.39, "learning_rate": 8.066574757626967e-07, "loss": 0.2004, "step": 8704 }, { "epoch": 0.39, "learning_rate": 8.052343680512318e-07, "loss": 0.1667, "step": 8768 }, { "epoch": 0.39, "learning_rate": 8.03811260339767e-07, "loss": 0.203, "step": 8832 }, { "epoch": 0.4, "learning_rate": 8.023881526283021e-07, "loss": 0.2355, "step": 8896 }, { "epoch": 0.4, "learning_rate": 8.009650449168371e-07, "loss": 0.2247, "step": 8960 }, { "epoch": 0.4, "learning_rate": 7.995419372053723e-07, "loss": 0.1768, "step": 9024 }, { "epoch": 0.4, "learning_rate": 7.981188294939073e-07, "loss": 0.2669, "step": 9088 }, { "epoch": 0.41, "learning_rate": 7.966957217824424e-07, "loss": 0.2288, "step": 9152 }, { "epoch": 0.41, "learning_rate": 7.952726140709774e-07, "loss": 0.2397, "step": 9216 }, { "epoch": 0.41, "learning_rate": 7.938495063595126e-07, "loss": 0.2276, "step": 9280 }, { "epoch": 0.42, "learning_rate": 7.924263986480476e-07, "loss": 0.1367, "step": 9344 }, { "epoch": 0.42, "learning_rate": 7.910032909365826e-07, "loss": 0.222, "step": 9408 }, { "epoch": 0.42, "learning_rate": 7.895801832251178e-07, "loss": 0.2611, "step": 9472 }, { "epoch": 0.42, "learning_rate": 7.881570755136529e-07, "loss": 0.2373, "step": 9536 }, { "epoch": 0.43, "learning_rate": 7.867339678021881e-07, "loss": 0.2052, "step": 9600 }, { "epoch": 0.43, "learning_rate": 7.853108600907231e-07, "loss": 0.1998, "step": 9664 }, { "epoch": 0.43, "learning_rate": 7.838877523792582e-07, "loss": 0.2254, "step": 9728 }, { "epoch": 0.44, "learning_rate": 7.824646446677932e-07, "loss": 0.1955, "step": 9792 }, { "epoch": 0.44, "learning_rate": 7.810415369563284e-07, "loss": 0.1238, "step": 9856 }, { "epoch": 0.44, "learning_rate": 7.796184292448634e-07, "loss": 0.1806, "step": 9920 }, { "epoch": 0.44, "learning_rate": 7.781953215333986e-07, "loss": 0.31, "step": 9984 }, { "epoch": 0.45, "learning_rate": 7.767722138219336e-07, "loss": 0.2349, "step": 10048 }, { "epoch": 0.45, "learning_rate": 7.753491061104687e-07, "loss": 0.2277, "step": 10112 }, { "epoch": 0.45, "learning_rate": 7.739259983990037e-07, "loss": 0.2239, "step": 10176 }, { "epoch": 0.46, "learning_rate": 7.725028906875389e-07, "loss": 0.2707, "step": 10240 }, { "epoch": 0.46, "learning_rate": 7.71079782976074e-07, "loss": 0.1957, "step": 10304 }, { "epoch": 0.46, "learning_rate": 7.69656675264609e-07, "loss": 0.1649, "step": 10368 }, { "epoch": 0.46, "learning_rate": 7.682335675531442e-07, "loss": 0.2773, "step": 10432 }, { "epoch": 0.47, "learning_rate": 7.668104598416792e-07, "loss": 0.2146, "step": 10496 }, { "epoch": 0.47, "learning_rate": 7.653873521302144e-07, "loss": 0.2134, "step": 10560 }, { "epoch": 0.47, "learning_rate": 7.639642444187494e-07, "loss": 0.2168, "step": 10624 }, { "epoch": 0.48, "learning_rate": 7.625411367072845e-07, "loss": 0.1122, "step": 10688 }, { "epoch": 0.48, "learning_rate": 7.611180289958195e-07, "loss": 0.1628, "step": 10752 }, { "epoch": 0.48, "learning_rate": 7.596949212843547e-07, "loss": 0.1566, "step": 10816 }, { "epoch": 0.48, "learning_rate": 7.582718135728897e-07, "loss": 0.1824, "step": 10880 }, { "epoch": 0.49, "learning_rate": 7.56848705861425e-07, "loss": 0.292, "step": 10944 }, { "epoch": 0.49, "learning_rate": 7.5542559814996e-07, "loss": 0.1534, "step": 11008 }, { "epoch": 0.49, "learning_rate": 7.54002490438495e-07, "loss": 0.2107, "step": 11072 }, { "epoch": 0.5, "learning_rate": 7.525793827270302e-07, "loss": 0.1689, "step": 11136 }, { "epoch": 0.5, "learning_rate": 7.511562750155652e-07, "loss": 0.3489, "step": 11200 }, { "epoch": 0.5, "learning_rate": 7.497331673041003e-07, "loss": 0.1535, "step": 11264 }, { "epoch": 0.5, "learning_rate": 7.483100595926353e-07, "loss": 0.1439, "step": 11328 }, { "epoch": 0.51, "learning_rate": 7.468869518811705e-07, "loss": 0.2032, "step": 11392 }, { "epoch": 0.51, "learning_rate": 7.454638441697055e-07, "loss": 0.4177, "step": 11456 }, { "epoch": 0.51, "learning_rate": 7.440407364582406e-07, "loss": 0.2552, "step": 11520 }, { "epoch": 0.52, "learning_rate": 7.426176287467757e-07, "loss": 0.181, "step": 11584 }, { "epoch": 0.52, "learning_rate": 7.411945210353108e-07, "loss": 0.1128, "step": 11648 }, { "epoch": 0.52, "learning_rate": 7.39771413323846e-07, "loss": 0.2184, "step": 11712 }, { "epoch": 0.52, "learning_rate": 7.383483056123811e-07, "loss": 0.3284, "step": 11776 }, { "epoch": 0.53, "learning_rate": 7.369251979009161e-07, "loss": 0.1976, "step": 11840 }, { "epoch": 0.53, "learning_rate": 7.355020901894511e-07, "loss": 0.1274, "step": 11904 }, { "epoch": 0.53, "learning_rate": 7.340789824779863e-07, "loss": 0.1578, "step": 11968 }, { "epoch": 0.54, "learning_rate": 7.326558747665213e-07, "loss": 0.2599, "step": 12032 }, { "epoch": 0.54, "learning_rate": 7.312327670550564e-07, "loss": 0.1812, "step": 12096 }, { "epoch": 0.54, "learning_rate": 7.298096593435915e-07, "loss": 0.2296, "step": 12160 }, { "epoch": 0.54, "learning_rate": 7.283865516321266e-07, "loss": 0.2716, "step": 12224 }, { "epoch": 0.55, "learning_rate": 7.269634439206616e-07, "loss": 0.3587, "step": 12288 }, { "epoch": 0.55, "learning_rate": 7.255403362091968e-07, "loss": 0.1644, "step": 12352 }, { "epoch": 0.55, "learning_rate": 7.241394645557235e-07, "loss": 0.2707, "step": 12416 }, { "epoch": 0.56, "learning_rate": 7.227163568442586e-07, "loss": 0.1747, "step": 12480 }, { "epoch": 0.56, "learning_rate": 7.212932491327936e-07, "loss": 0.2109, "step": 12544 }, { "epoch": 0.56, "learning_rate": 7.198701414213289e-07, "loss": 0.2764, "step": 12608 }, { "epoch": 0.56, "learning_rate": 7.184470337098639e-07, "loss": 0.1857, "step": 12672 }, { "epoch": 0.57, "learning_rate": 7.170239259983991e-07, "loss": 0.2993, "step": 12736 }, { "epoch": 0.57, "learning_rate": 7.156008182869341e-07, "loss": 0.3019, "step": 12800 }, { "epoch": 0.57, "learning_rate": 7.141777105754691e-07, "loss": 0.2171, "step": 12864 }, { "epoch": 0.57, "learning_rate": 7.127546028640042e-07, "loss": 0.1616, "step": 12928 }, { "epoch": 0.58, "learning_rate": 7.113314951525393e-07, "loss": 0.2056, "step": 12992 }, { "epoch": 0.58, "learning_rate": 7.099083874410744e-07, "loss": 0.1741, "step": 13056 }, { "epoch": 0.58, "learning_rate": 7.084852797296094e-07, "loss": 0.1881, "step": 13120 }, { "epoch": 0.59, "learning_rate": 7.070621720181446e-07, "loss": 0.3325, "step": 13184 }, { "epoch": 0.59, "learning_rate": 7.056390643066796e-07, "loss": 0.1797, "step": 13248 }, { "epoch": 0.59, "learning_rate": 7.042159565952149e-07, "loss": 0.1904, "step": 13312 }, { "epoch": 0.59, "learning_rate": 7.027928488837499e-07, "loss": 0.182, "step": 13376 }, { "epoch": 0.6, "learning_rate": 7.01369741172285e-07, "loss": 0.1893, "step": 13440 }, { "epoch": 0.6, "learning_rate": 6.9994663346082e-07, "loss": 0.2757, "step": 13504 }, { "epoch": 0.6, "learning_rate": 6.985235257493552e-07, "loss": 0.1684, "step": 13568 }, { "epoch": 0.61, "learning_rate": 6.971004180378902e-07, "loss": 0.1678, "step": 13632 }, { "epoch": 0.61, "learning_rate": 6.956773103264252e-07, "loss": 0.2164, "step": 13696 }, { "epoch": 0.61, "learning_rate": 6.942542026149604e-07, "loss": 0.3885, "step": 13760 }, { "epoch": 0.61, "learning_rate": 6.928310949034954e-07, "loss": 0.153, "step": 13824 }, { "epoch": 0.62, "learning_rate": 6.914079871920305e-07, "loss": 0.211, "step": 13888 }, { "epoch": 0.62, "learning_rate": 6.899848794805656e-07, "loss": 0.2952, "step": 13952 }, { "epoch": 0.62, "learning_rate": 6.885617717691008e-07, "loss": 0.227, "step": 14016 }, { "epoch": 0.63, "learning_rate": 6.871386640576358e-07, "loss": 0.1933, "step": 14080 }, { "epoch": 0.63, "learning_rate": 6.85715556346171e-07, "loss": 0.2457, "step": 14144 }, { "epoch": 0.63, "learning_rate": 6.84292448634706e-07, "loss": 0.1388, "step": 14208 }, { "epoch": 0.63, "learning_rate": 6.828693409232412e-07, "loss": 0.175, "step": 14272 }, { "epoch": 0.64, "learning_rate": 6.814462332117762e-07, "loss": 0.1735, "step": 14336 }, { "epoch": 0.64, "learning_rate": 6.800231255003113e-07, "loss": 0.2027, "step": 14400 }, { "epoch": 0.64, "learning_rate": 6.786000177888463e-07, "loss": 0.3326, "step": 14464 }, { "epoch": 0.65, "learning_rate": 6.771769100773814e-07, "loss": 0.2391, "step": 14528 }, { "epoch": 0.65, "learning_rate": 6.757538023659165e-07, "loss": 0.1766, "step": 14592 }, { "epoch": 0.65, "learning_rate": 6.743306946544515e-07, "loss": 0.195, "step": 14656 }, { "epoch": 0.65, "learning_rate": 6.729075869429868e-07, "loss": 0.2176, "step": 14720 }, { "epoch": 0.66, "learning_rate": 6.714844792315218e-07, "loss": 0.2863, "step": 14784 }, { "epoch": 0.66, "learning_rate": 6.700836075780485e-07, "loss": 0.2664, "step": 14848 }, { "epoch": 0.66, "learning_rate": 6.686604998665835e-07, "loss": 0.1482, "step": 14912 }, { "epoch": 0.67, "learning_rate": 6.672373921551188e-07, "loss": 0.3435, "step": 14976 } ], "logging_steps": 64, "max_steps": 44972, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 14990, "total_flos": 3.1826333601792e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }