{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6717, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.94789081885856e-08, "loss": 4.9627, "step": 32 }, { "epoch": 0.01, "learning_rate": 1.4888337468982628e-07, "loss": 4.7244, "step": 64 }, { "epoch": 0.01, "learning_rate": 2.28287841191067e-07, "loss": 4.2955, "step": 96 }, { "epoch": 0.02, "learning_rate": 3.076923076923077e-07, "loss": 3.4769, "step": 128 }, { "epoch": 0.02, "learning_rate": 3.8709677419354837e-07, "loss": 2.602, "step": 160 }, { "epoch": 0.03, "learning_rate": 4.665012406947891e-07, "loss": 1.7352, "step": 192 }, { "epoch": 0.03, "learning_rate": 5.459057071960298e-07, "loss": 1.5972, "step": 224 }, { "epoch": 0.04, "learning_rate": 6.253101736972705e-07, "loss": 1.4621, "step": 256 }, { "epoch": 0.04, "learning_rate": 7.047146401985111e-07, "loss": 1.2769, "step": 288 }, { "epoch": 0.05, "learning_rate": 7.841191066997518e-07, "loss": 0.9435, "step": 320 }, { "epoch": 0.05, "learning_rate": 8.635235732009926e-07, "loss": 0.4583, "step": 352 }, { "epoch": 0.06, "learning_rate": 9.429280397022333e-07, "loss": 0.4589, "step": 384 }, { "epoch": 0.06, "learning_rate": 9.985745961355717e-07, "loss": 0.456, "step": 416 }, { "epoch": 0.07, "learning_rate": 9.935064935064936e-07, "loss": 0.3103, "step": 448 }, { "epoch": 0.07, "learning_rate": 9.884383908774152e-07, "loss": 0.4253, "step": 480 }, { "epoch": 0.08, "learning_rate": 9.83370288248337e-07, "loss": 0.363, "step": 512 }, { "epoch": 0.08, "learning_rate": 9.783021856192588e-07, "loss": 0.2691, "step": 544 }, { "epoch": 0.09, "learning_rate": 9.732340829901805e-07, "loss": 0.2752, "step": 576 }, { "epoch": 0.09, "learning_rate": 9.681659803611024e-07, "loss": 0.292, "step": 608 }, { "epoch": 0.1, "learning_rate": 9.63097877732024e-07, "loss": 0.3344, "step": 640 }, { "epoch": 0.1, "learning_rate": 9.580297751029457e-07, "loss": 0.3355, "step": 672 }, { "epoch": 0.1, "learning_rate": 9.529616724738676e-07, "loss": 0.3746, "step": 704 }, { "epoch": 0.11, "learning_rate": 9.478935698447893e-07, "loss": 0.3536, "step": 736 }, { "epoch": 0.11, "learning_rate": 9.428254672157111e-07, "loss": 0.2955, "step": 768 }, { "epoch": 0.12, "learning_rate": 9.377573645866328e-07, "loss": 0.3463, "step": 800 }, { "epoch": 0.12, "learning_rate": 9.326892619575546e-07, "loss": 0.4049, "step": 832 }, { "epoch": 0.13, "learning_rate": 9.276211593284764e-07, "loss": 0.3387, "step": 864 }, { "epoch": 0.13, "learning_rate": 9.225530566993981e-07, "loss": 0.3181, "step": 896 }, { "epoch": 0.14, "learning_rate": 9.174849540703199e-07, "loss": 0.2915, "step": 928 }, { "epoch": 0.14, "learning_rate": 9.125752296484004e-07, "loss": 0.298, "step": 960 }, { "epoch": 0.15, "learning_rate": 9.075071270193221e-07, "loss": 0.3243, "step": 992 }, { "epoch": 0.15, "learning_rate": 9.024390243902439e-07, "loss": 0.2464, "step": 1024 }, { "epoch": 0.16, "learning_rate": 8.973709217611656e-07, "loss": 0.3151, "step": 1056 }, { "epoch": 0.16, "learning_rate": 8.923028191320873e-07, "loss": 0.3568, "step": 1088 }, { "epoch": 0.17, "learning_rate": 8.872347165030091e-07, "loss": 0.2174, "step": 1120 }, { "epoch": 0.17, "learning_rate": 8.821666138739309e-07, "loss": 0.3157, "step": 1152 }, { "epoch": 0.18, "learning_rate": 8.770985112448527e-07, "loss": 0.3387, "step": 1184 }, { "epoch": 0.18, "learning_rate": 8.720304086157745e-07, "loss": 0.3334, "step": 1216 }, { "epoch": 0.19, "learning_rate": 8.669623059866962e-07, "loss": 0.2684, "step": 1248 }, { "epoch": 0.19, "learning_rate": 8.618942033576179e-07, "loss": 0.2936, "step": 1280 }, { "epoch": 0.2, "learning_rate": 8.568261007285396e-07, "loss": 0.3284, "step": 1312 }, { "epoch": 0.2, "learning_rate": 8.517579980994615e-07, "loss": 0.1762, "step": 1344 }, { "epoch": 0.2, "learning_rate": 8.466898954703833e-07, "loss": 0.3034, "step": 1376 }, { "epoch": 0.21, "learning_rate": 8.41621792841305e-07, "loss": 0.3374, "step": 1408 }, { "epoch": 0.21, "learning_rate": 8.365536902122268e-07, "loss": 0.2804, "step": 1440 }, { "epoch": 0.22, "learning_rate": 8.314855875831485e-07, "loss": 0.2602, "step": 1472 }, { "epoch": 0.22, "learning_rate": 8.264174849540702e-07, "loss": 0.3025, "step": 1504 }, { "epoch": 0.23, "learning_rate": 8.213493823249921e-07, "loss": 0.3233, "step": 1536 }, { "epoch": 0.23, "learning_rate": 8.162812796959138e-07, "loss": 0.3104, "step": 1568 }, { "epoch": 0.24, "learning_rate": 8.112131770668356e-07, "loss": 0.3905, "step": 1600 }, { "epoch": 0.24, "learning_rate": 8.061450744377573e-07, "loss": 0.2828, "step": 1632 }, { "epoch": 0.25, "learning_rate": 8.01076971808679e-07, "loss": 0.2781, "step": 1664 }, { "epoch": 0.25, "learning_rate": 7.960088691796008e-07, "loss": 0.2269, "step": 1696 }, { "epoch": 0.26, "learning_rate": 7.909407665505227e-07, "loss": 0.2633, "step": 1728 }, { "epoch": 0.26, "learning_rate": 7.858726639214444e-07, "loss": 0.3526, "step": 1760 }, { "epoch": 0.27, "learning_rate": 7.808045612923662e-07, "loss": 0.2754, "step": 1792 }, { "epoch": 0.27, "learning_rate": 7.757364586632878e-07, "loss": 0.268, "step": 1824 }, { "epoch": 0.28, "learning_rate": 7.706683560342096e-07, "loss": 0.3112, "step": 1856 }, { "epoch": 0.28, "learning_rate": 7.656002534051315e-07, "loss": 0.2976, "step": 1888 }, { "epoch": 0.29, "learning_rate": 7.605321507760532e-07, "loss": 0.2428, "step": 1920 }, { "epoch": 0.29, "learning_rate": 7.55464048146975e-07, "loss": 0.2446, "step": 1952 }, { "epoch": 0.3, "learning_rate": 7.503959455178968e-07, "loss": 0.2443, "step": 1984 }, { "epoch": 0.3, "learning_rate": 7.453278428888184e-07, "loss": 0.2407, "step": 2016 }, { "epoch": 0.3, "learning_rate": 7.402597402597402e-07, "loss": 0.2782, "step": 2048 }, { "epoch": 0.31, "learning_rate": 7.351916376306619e-07, "loss": 0.5238, "step": 2080 }, { "epoch": 0.31, "learning_rate": 7.301235350015838e-07, "loss": 0.2876, "step": 2112 }, { "epoch": 0.32, "learning_rate": 7.250554323725056e-07, "loss": 0.3338, "step": 2144 }, { "epoch": 0.32, "learning_rate": 7.199873297434272e-07, "loss": 0.2832, "step": 2176 }, { "epoch": 0.33, "learning_rate": 7.14919227114349e-07, "loss": 0.3335, "step": 2208 }, { "epoch": 0.33, "learning_rate": 7.098511244852708e-07, "loss": 0.2526, "step": 2240 }, { "epoch": 0.34, "learning_rate": 7.047830218561926e-07, "loss": 0.2588, "step": 2272 }, { "epoch": 0.34, "learning_rate": 6.99873297434273e-07, "loss": 0.3761, "step": 2304 }, { "epoch": 0.35, "learning_rate": 6.948051948051947e-07, "loss": 0.2686, "step": 2336 }, { "epoch": 0.35, "learning_rate": 6.897370921761165e-07, "loss": 0.2223, "step": 2368 }, { "epoch": 0.36, "learning_rate": 6.846689895470384e-07, "loss": 0.3035, "step": 2400 }, { "epoch": 0.36, "learning_rate": 6.796008869179601e-07, "loss": 0.3097, "step": 2432 }, { "epoch": 0.37, "learning_rate": 6.745327842888818e-07, "loss": 0.2795, "step": 2464 }, { "epoch": 0.37, "learning_rate": 6.694646816598036e-07, "loss": 0.2274, "step": 2496 }, { "epoch": 0.38, "learning_rate": 6.643965790307253e-07, "loss": 0.2918, "step": 2528 }, { "epoch": 0.38, "learning_rate": 6.593284764016471e-07, "loss": 0.2982, "step": 2560 }, { "epoch": 0.39, "learning_rate": 6.542603737725689e-07, "loss": 0.2517, "step": 2592 }, { "epoch": 0.39, "learning_rate": 6.491922711434906e-07, "loss": 0.2184, "step": 2624 }, { "epoch": 0.4, "learning_rate": 6.441241685144124e-07, "loss": 0.2715, "step": 2656 }, { "epoch": 0.4, "learning_rate": 6.390560658853341e-07, "loss": 0.2155, "step": 2688 }, { "epoch": 0.4, "learning_rate": 6.339879632562559e-07, "loss": 0.2686, "step": 2720 }, { "epoch": 0.41, "learning_rate": 6.289198606271777e-07, "loss": 0.2818, "step": 2752 }, { "epoch": 0.41, "learning_rate": 6.238517579980995e-07, "loss": 0.2865, "step": 2784 }, { "epoch": 0.42, "learning_rate": 6.187836553690212e-07, "loss": 0.2123, "step": 2816 }, { "epoch": 0.42, "learning_rate": 6.137155527399429e-07, "loss": 0.2663, "step": 2848 }, { "epoch": 0.43, "learning_rate": 6.086474501108647e-07, "loss": 0.2821, "step": 2880 }, { "epoch": 0.43, "learning_rate": 6.035793474817865e-07, "loss": 0.3285, "step": 2912 }, { "epoch": 0.44, "learning_rate": 5.985112448527082e-07, "loss": 0.2123, "step": 2944 }, { "epoch": 0.44, "learning_rate": 5.934431422236301e-07, "loss": 0.2753, "step": 2976 }, { "epoch": 0.45, "learning_rate": 5.883750395945518e-07, "loss": 0.2082, "step": 3008 }, { "epoch": 0.45, "learning_rate": 5.833069369654735e-07, "loss": 0.2989, "step": 3040 }, { "epoch": 0.46, "learning_rate": 5.782388343363953e-07, "loss": 0.2268, "step": 3072 }, { "epoch": 0.46, "learning_rate": 5.73170731707317e-07, "loss": 0.2775, "step": 3104 }, { "epoch": 0.47, "learning_rate": 5.681026290782388e-07, "loss": 0.2643, "step": 3136 }, { "epoch": 0.47, "learning_rate": 5.630345264491606e-07, "loss": 0.1845, "step": 3168 }, { "epoch": 0.48, "learning_rate": 5.579664238200823e-07, "loss": 0.282, "step": 3200 }, { "epoch": 0.48, "learning_rate": 5.528983211910041e-07, "loss": 0.3109, "step": 3232 }, { "epoch": 0.49, "learning_rate": 5.478302185619259e-07, "loss": 0.2203, "step": 3264 }, { "epoch": 0.49, "learning_rate": 5.427621159328476e-07, "loss": 0.2803, "step": 3296 }, { "epoch": 0.5, "learning_rate": 5.376940133037694e-07, "loss": 0.298, "step": 3328 }, { "epoch": 0.5, "learning_rate": 5.326259106746911e-07, "loss": 0.2953, "step": 3360 }, { "epoch": 0.5, "learning_rate": 5.275578080456129e-07, "loss": 0.2844, "step": 3392 }, { "epoch": 0.51, "learning_rate": 5.224897054165347e-07, "loss": 0.2202, "step": 3424 }, { "epoch": 0.51, "learning_rate": 5.174216027874564e-07, "loss": 0.2211, "step": 3456 }, { "epoch": 0.52, "learning_rate": 5.123535001583782e-07, "loss": 0.3406, "step": 3488 }, { "epoch": 0.52, "learning_rate": 5.072853975292999e-07, "loss": 0.2894, "step": 3520 }, { "epoch": 0.53, "learning_rate": 5.022172949002217e-07, "loss": 0.2595, "step": 3552 }, { "epoch": 0.53, "learning_rate": 4.971491922711435e-07, "loss": 0.2943, "step": 3584 }, { "epoch": 0.54, "learning_rate": 4.920810896420652e-07, "loss": 0.2118, "step": 3616 }, { "epoch": 0.54, "learning_rate": 4.87012987012987e-07, "loss": 0.2267, "step": 3648 }, { "epoch": 0.55, "learning_rate": 4.819448843839088e-07, "loss": 0.2574, "step": 3680 }, { "epoch": 0.55, "learning_rate": 4.768767817548305e-07, "loss": 0.2322, "step": 3712 }, { "epoch": 0.56, "learning_rate": 4.7180867912575227e-07, "loss": 0.2427, "step": 3744 }, { "epoch": 0.56, "learning_rate": 4.6674057649667405e-07, "loss": 0.252, "step": 3776 }, { "epoch": 0.57, "learning_rate": 4.616724738675958e-07, "loss": 0.2369, "step": 3808 }, { "epoch": 0.57, "learning_rate": 4.5660437123851757e-07, "loss": 0.2179, "step": 3840 }, { "epoch": 0.58, "learning_rate": 4.5153626860943935e-07, "loss": 0.2108, "step": 3872 }, { "epoch": 0.58, "learning_rate": 4.464681659803611e-07, "loss": 0.1787, "step": 3904 }, { "epoch": 0.59, "learning_rate": 4.414000633512828e-07, "loss": 0.281, "step": 3936 }, { "epoch": 0.59, "learning_rate": 4.3633196072220465e-07, "loss": 0.3145, "step": 3968 }, { "epoch": 0.6, "learning_rate": 4.312638580931264e-07, "loss": 0.2586, "step": 4000 }, { "epoch": 0.6, "learning_rate": 4.261957554640481e-07, "loss": 0.2548, "step": 4032 }, { "epoch": 0.61, "learning_rate": 4.211276528349699e-07, "loss": 0.3165, "step": 4064 }, { "epoch": 0.61, "learning_rate": 4.160595502058917e-07, "loss": 0.1822, "step": 4096 }, { "epoch": 0.61, "learning_rate": 4.109914475768134e-07, "loss": 0.1949, "step": 4128 }, { "epoch": 0.62, "learning_rate": 4.059233449477352e-07, "loss": 0.3197, "step": 4160 }, { "epoch": 0.62, "learning_rate": 4.008552423186569e-07, "loss": 0.2209, "step": 4192 }, { "epoch": 0.63, "learning_rate": 3.957871396895787e-07, "loss": 0.1941, "step": 4224 }, { "epoch": 0.63, "learning_rate": 3.907190370605005e-07, "loss": 0.2412, "step": 4256 }, { "epoch": 0.64, "learning_rate": 3.856509344314222e-07, "loss": 0.3301, "step": 4288 }, { "epoch": 0.64, "learning_rate": 3.8058283180234395e-07, "loss": 0.3845, "step": 4320 }, { "epoch": 0.65, "learning_rate": 3.755147291732658e-07, "loss": 0.2557, "step": 4352 }, { "epoch": 0.65, "learning_rate": 3.704466265441875e-07, "loss": 0.2228, "step": 4384 }, { "epoch": 0.66, "learning_rate": 3.6537852391510925e-07, "loss": 0.2608, "step": 4416 }, { "epoch": 0.66, "learning_rate": 3.6031042128603103e-07, "loss": 0.1698, "step": 4448 }, { "epoch": 0.67, "learning_rate": 3.552423186569528e-07, "loss": 0.2359, "step": 4480 }, { "epoch": 0.67, "learning_rate": 3.5017421602787454e-07, "loss": 0.2657, "step": 4512 }, { "epoch": 0.68, "learning_rate": 3.451061133987963e-07, "loss": 0.2521, "step": 4544 }, { "epoch": 0.68, "learning_rate": 3.4003801076971806e-07, "loss": 0.216, "step": 4576 }, { "epoch": 0.69, "learning_rate": 3.3496990814063984e-07, "loss": 0.3104, "step": 4608 }, { "epoch": 0.69, "learning_rate": 3.299018055115616e-07, "loss": 0.2136, "step": 4640 }, { "epoch": 0.7, "learning_rate": 3.2483370288248335e-07, "loss": 0.2443, "step": 4672 }, { "epoch": 0.7, "learning_rate": 3.197656002534051e-07, "loss": 0.1771, "step": 4704 }, { "epoch": 0.71, "learning_rate": 3.146974976243269e-07, "loss": 0.187, "step": 4736 }, { "epoch": 0.71, "learning_rate": 3.0962939499524865e-07, "loss": 0.2901, "step": 4768 }, { "epoch": 0.71, "learning_rate": 3.045612923661704e-07, "loss": 0.4018, "step": 4800 }, { "epoch": 0.72, "learning_rate": 2.9949318973709216e-07, "loss": 0.2795, "step": 4832 }, { "epoch": 0.72, "learning_rate": 2.9442508710801395e-07, "loss": 0.2578, "step": 4864 }, { "epoch": 0.73, "learning_rate": 2.893569844789357e-07, "loss": 0.1746, "step": 4896 }, { "epoch": 0.73, "learning_rate": 2.8428888184985746e-07, "loss": 0.2534, "step": 4928 }, { "epoch": 0.74, "learning_rate": 2.792207792207792e-07, "loss": 0.3677, "step": 4960 }, { "epoch": 0.74, "learning_rate": 2.74152676591701e-07, "loss": 0.2953, "step": 4992 }, { "epoch": 0.75, "learning_rate": 2.6908457396262276e-07, "loss": 0.2722, "step": 5024 }, { "epoch": 0.75, "learning_rate": 2.640164713335445e-07, "loss": 0.2797, "step": 5056 }, { "epoch": 0.76, "learning_rate": 2.589483687044662e-07, "loss": 0.2707, "step": 5088 }, { "epoch": 0.76, "learning_rate": 2.5388026607538806e-07, "loss": 0.3243, "step": 5120 }, { "epoch": 0.77, "learning_rate": 2.488121634463098e-07, "loss": 0.271, "step": 5152 }, { "epoch": 0.77, "learning_rate": 2.437440608172315e-07, "loss": 0.2899, "step": 5184 }, { "epoch": 0.78, "learning_rate": 2.386759581881533e-07, "loss": 0.3117, "step": 5216 }, { "epoch": 0.78, "learning_rate": 2.3360785555907506e-07, "loss": 0.2323, "step": 5248 }, { "epoch": 0.79, "learning_rate": 2.2853975292999684e-07, "loss": 0.2318, "step": 5280 }, { "epoch": 0.79, "learning_rate": 2.2347165030091857e-07, "loss": 0.29, "step": 5312 }, { "epoch": 0.8, "learning_rate": 2.1840354767184035e-07, "loss": 0.2406, "step": 5344 }, { "epoch": 0.8, "learning_rate": 2.133354450427621e-07, "loss": 0.2292, "step": 5376 }, { "epoch": 0.81, "learning_rate": 2.0826734241368387e-07, "loss": 0.3598, "step": 5408 }, { "epoch": 0.81, "learning_rate": 2.0319923978460563e-07, "loss": 0.258, "step": 5440 }, { "epoch": 0.81, "learning_rate": 1.981311371555274e-07, "loss": 0.2904, "step": 5472 }, { "epoch": 0.82, "learning_rate": 1.9306303452644914e-07, "loss": 0.3177, "step": 5504 }, { "epoch": 0.82, "learning_rate": 1.8799493189737092e-07, "loss": 0.3294, "step": 5536 }, { "epoch": 0.83, "learning_rate": 1.8292682926829268e-07, "loss": 0.2415, "step": 5568 }, { "epoch": 0.83, "learning_rate": 1.7785872663921444e-07, "loss": 0.2808, "step": 5600 }, { "epoch": 0.84, "learning_rate": 1.727906240101362e-07, "loss": 0.2563, "step": 5632 }, { "epoch": 0.84, "learning_rate": 1.6772252138105798e-07, "loss": 0.2069, "step": 5664 }, { "epoch": 0.85, "learning_rate": 1.626544187519797e-07, "loss": 0.2112, "step": 5696 }, { "epoch": 0.85, "learning_rate": 1.575863161229015e-07, "loss": 0.1768, "step": 5728 }, { "epoch": 0.86, "learning_rate": 1.5251821349382325e-07, "loss": 0.2977, "step": 5760 }, { "epoch": 0.86, "learning_rate": 1.47450110864745e-07, "loss": 0.2511, "step": 5792 }, { "epoch": 0.87, "learning_rate": 1.4238200823566676e-07, "loss": 0.2203, "step": 5824 }, { "epoch": 0.87, "learning_rate": 1.3731390560658854e-07, "loss": 0.1882, "step": 5856 }, { "epoch": 0.88, "learning_rate": 1.3224580297751027e-07, "loss": 0.3103, "step": 5888 }, { "epoch": 0.88, "learning_rate": 1.2717770034843206e-07, "loss": 0.2475, "step": 5920 }, { "epoch": 0.89, "learning_rate": 1.2210959771935381e-07, "loss": 0.2512, "step": 5952 }, { "epoch": 0.89, "learning_rate": 1.1704149509027557e-07, "loss": 0.2936, "step": 5984 }, { "epoch": 0.9, "learning_rate": 1.1197339246119733e-07, "loss": 0.2215, "step": 6016 }, { "epoch": 0.9, "learning_rate": 1.0690528983211909e-07, "loss": 0.2263, "step": 6048 }, { "epoch": 0.91, "learning_rate": 1.0183718720304086e-07, "loss": 0.2768, "step": 6080 }, { "epoch": 0.91, "learning_rate": 9.676908457396261e-08, "loss": 0.2535, "step": 6112 }, { "epoch": 0.91, "learning_rate": 9.170098194488437e-08, "loss": 0.195, "step": 6144 }, { "epoch": 0.92, "learning_rate": 8.663287931580614e-08, "loss": 0.1955, "step": 6176 }, { "epoch": 0.92, "learning_rate": 8.15647766867279e-08, "loss": 0.355, "step": 6208 }, { "epoch": 0.93, "learning_rate": 7.649667405764967e-08, "loss": 0.2593, "step": 6240 }, { "epoch": 0.93, "learning_rate": 7.142857142857142e-08, "loss": 0.3068, "step": 6272 }, { "epoch": 0.94, "learning_rate": 6.636046879949318e-08, "loss": 0.2366, "step": 6304 }, { "epoch": 0.94, "learning_rate": 6.129236617041495e-08, "loss": 0.2361, "step": 6336 }, { "epoch": 0.95, "learning_rate": 5.6224263541336714e-08, "loss": 0.2616, "step": 6368 }, { "epoch": 0.95, "learning_rate": 5.115616091225847e-08, "loss": 0.2597, "step": 6400 }, { "epoch": 0.96, "learning_rate": 4.6088058283180234e-08, "loss": 0.3039, "step": 6432 }, { "epoch": 0.96, "learning_rate": 4.1019955654102e-08, "loss": 0.2453, "step": 6464 }, { "epoch": 0.97, "learning_rate": 3.595185302502376e-08, "loss": 0.3413, "step": 6496 }, { "epoch": 0.97, "learning_rate": 3.088375039594552e-08, "loss": 0.2654, "step": 6528 }, { "epoch": 0.98, "learning_rate": 2.581564776686728e-08, "loss": 0.2189, "step": 6560 }, { "epoch": 0.98, "learning_rate": 2.074754513778904e-08, "loss": 0.257, "step": 6592 }, { "epoch": 0.99, "learning_rate": 1.56794425087108e-08, "loss": 0.25, "step": 6624 }, { "epoch": 0.99, "learning_rate": 1.0611339879632562e-08, "loss": 0.3328, "step": 6656 }, { "epoch": 1.0, "learning_rate": 5.543237250554324e-09, "loss": 0.2794, "step": 6688 } ], "logging_steps": 32, "max_steps": 6717, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 6717, "total_flos": 2.85226794934272e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }