{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 7190, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 9.972183588317107e-05, "loss": 1.5922, "step": 20 }, { "epoch": 0.06, "learning_rate": 9.944367176634215e-05, "loss": 1.0766, "step": 40 }, { "epoch": 0.08, "learning_rate": 9.916550764951322e-05, "loss": 0.7023, "step": 60 }, { "epoch": 0.11, "learning_rate": 9.888734353268429e-05, "loss": 0.6458, "step": 80 }, { "epoch": 0.14, "learning_rate": 9.860917941585536e-05, "loss": 0.6244, "step": 100 }, { "epoch": 0.17, "learning_rate": 9.833101529902643e-05, "loss": 0.5692, "step": 120 }, { "epoch": 0.19, "learning_rate": 9.805285118219751e-05, "loss": 0.5318, "step": 140 }, { "epoch": 0.22, "learning_rate": 9.777468706536858e-05, "loss": 0.6218, "step": 160 }, { "epoch": 0.25, "learning_rate": 9.749652294853965e-05, "loss": 0.5794, "step": 180 }, { "epoch": 0.28, "learning_rate": 9.721835883171071e-05, "loss": 0.591, "step": 200 }, { "epoch": 0.31, "learning_rate": 9.694019471488178e-05, "loss": 0.582, "step": 220 }, { "epoch": 0.33, "learning_rate": 9.666203059805285e-05, "loss": 0.6104, "step": 240 }, { "epoch": 0.36, "learning_rate": 9.638386648122393e-05, "loss": 0.5706, "step": 260 }, { "epoch": 0.39, "learning_rate": 9.6105702364395e-05, "loss": 0.6002, "step": 280 }, { "epoch": 0.42, "learning_rate": 9.582753824756607e-05, "loss": 0.5189, "step": 300 }, { "epoch": 0.45, "learning_rate": 9.554937413073714e-05, "loss": 0.5508, "step": 320 }, { "epoch": 0.47, "learning_rate": 9.52712100139082e-05, "loss": 0.5644, "step": 340 }, { "epoch": 0.5, "learning_rate": 9.499304589707929e-05, "loss": 0.5367, "step": 360 }, { "epoch": 0.53, "learning_rate": 9.471488178025035e-05, "loss": 0.5947, "step": 380 }, { "epoch": 0.56, "learning_rate": 9.443671766342142e-05, "loss": 0.5636, "step": 400 }, { "epoch": 0.58, "learning_rate": 9.41585535465925e-05, "loss": 0.5824, "step": 420 }, { "epoch": 0.61, "learning_rate": 9.388038942976356e-05, "loss": 0.6374, "step": 440 }, { "epoch": 0.64, "learning_rate": 9.360222531293463e-05, "loss": 0.5929, "step": 460 }, { "epoch": 0.67, "learning_rate": 9.332406119610571e-05, "loss": 0.5689, "step": 480 }, { "epoch": 0.7, "learning_rate": 9.304589707927678e-05, "loss": 0.5719, "step": 500 }, { "epoch": 0.72, "learning_rate": 9.276773296244784e-05, "loss": 0.5469, "step": 520 }, { "epoch": 0.75, "learning_rate": 9.248956884561893e-05, "loss": 0.5986, "step": 540 }, { "epoch": 0.78, "learning_rate": 9.221140472878998e-05, "loss": 0.5868, "step": 560 }, { "epoch": 0.81, "learning_rate": 9.193324061196106e-05, "loss": 0.596, "step": 580 }, { "epoch": 0.83, "learning_rate": 9.165507649513213e-05, "loss": 0.5653, "step": 600 }, { "epoch": 0.86, "learning_rate": 9.13769123783032e-05, "loss": 0.5527, "step": 620 }, { "epoch": 0.89, "learning_rate": 9.109874826147428e-05, "loss": 0.5296, "step": 640 }, { "epoch": 0.92, "learning_rate": 9.082058414464535e-05, "loss": 0.5399, "step": 660 }, { "epoch": 0.95, "learning_rate": 9.054242002781642e-05, "loss": 0.6171, "step": 680 }, { "epoch": 0.97, "learning_rate": 9.026425591098748e-05, "loss": 0.5256, "step": 700 }, { "epoch": 1.0, "learning_rate": 8.998609179415855e-05, "loss": 0.5679, "step": 720 }, { "epoch": 1.03, "learning_rate": 8.970792767732963e-05, "loss": 0.5813, "step": 740 }, { "epoch": 1.06, "learning_rate": 8.94297635605007e-05, "loss": 0.5831, "step": 760 }, { "epoch": 1.08, "learning_rate": 8.915159944367177e-05, "loss": 0.5042, "step": 780 }, { "epoch": 1.11, "learning_rate": 8.887343532684285e-05, "loss": 0.4984, "step": 800 }, { "epoch": 1.14, "learning_rate": 8.859527121001391e-05, "loss": 0.5397, "step": 820 }, { "epoch": 1.17, "learning_rate": 8.831710709318498e-05, "loss": 0.4707, "step": 840 }, { "epoch": 1.2, "learning_rate": 8.803894297635606e-05, "loss": 0.5099, "step": 860 }, { "epoch": 1.22, "learning_rate": 8.776077885952713e-05, "loss": 0.508, "step": 880 }, { "epoch": 1.25, "learning_rate": 8.74826147426982e-05, "loss": 0.5305, "step": 900 }, { "epoch": 1.28, "learning_rate": 8.720445062586928e-05, "loss": 0.5146, "step": 920 }, { "epoch": 1.31, "learning_rate": 8.692628650904033e-05, "loss": 0.5567, "step": 940 }, { "epoch": 1.34, "learning_rate": 8.664812239221141e-05, "loss": 0.4569, "step": 960 }, { "epoch": 1.36, "learning_rate": 8.636995827538248e-05, "loss": 0.5321, "step": 980 }, { "epoch": 1.39, "learning_rate": 8.609179415855355e-05, "loss": 0.5039, "step": 1000 }, { "epoch": 1.42, "learning_rate": 8.581363004172463e-05, "loss": 0.5198, "step": 1020 }, { "epoch": 1.45, "learning_rate": 8.55354659248957e-05, "loss": 0.5124, "step": 1040 }, { "epoch": 1.47, "learning_rate": 8.525730180806675e-05, "loss": 0.4706, "step": 1060 }, { "epoch": 1.5, "learning_rate": 8.497913769123783e-05, "loss": 0.5389, "step": 1080 }, { "epoch": 1.53, "learning_rate": 8.47009735744089e-05, "loss": 0.5279, "step": 1100 }, { "epoch": 1.56, "learning_rate": 8.442280945757997e-05, "loss": 0.5317, "step": 1120 }, { "epoch": 1.59, "learning_rate": 8.414464534075105e-05, "loss": 0.549, "step": 1140 }, { "epoch": 1.61, "learning_rate": 8.386648122392212e-05, "loss": 0.5743, "step": 1160 }, { "epoch": 1.64, "learning_rate": 8.358831710709319e-05, "loss": 0.5269, "step": 1180 }, { "epoch": 1.67, "learning_rate": 8.331015299026426e-05, "loss": 0.4979, "step": 1200 }, { "epoch": 1.7, "learning_rate": 8.303198887343532e-05, "loss": 0.525, "step": 1220 }, { "epoch": 1.72, "learning_rate": 8.27538247566064e-05, "loss": 0.5036, "step": 1240 }, { "epoch": 1.75, "learning_rate": 8.247566063977747e-05, "loss": 0.5357, "step": 1260 }, { "epoch": 1.78, "learning_rate": 8.219749652294854e-05, "loss": 0.5345, "step": 1280 }, { "epoch": 1.81, "learning_rate": 8.191933240611962e-05, "loss": 0.499, "step": 1300 }, { "epoch": 1.84, "learning_rate": 8.164116828929068e-05, "loss": 0.4976, "step": 1320 }, { "epoch": 1.86, "learning_rate": 8.136300417246176e-05, "loss": 0.504, "step": 1340 }, { "epoch": 1.89, "learning_rate": 8.108484005563283e-05, "loss": 0.4888, "step": 1360 }, { "epoch": 1.92, "learning_rate": 8.08066759388039e-05, "loss": 0.5083, "step": 1380 }, { "epoch": 1.95, "learning_rate": 8.052851182197498e-05, "loss": 0.5361, "step": 1400 }, { "epoch": 1.97, "learning_rate": 8.025034770514605e-05, "loss": 0.5412, "step": 1420 }, { "epoch": 2.0, "learning_rate": 7.99721835883171e-05, "loss": 0.471, "step": 1440 }, { "epoch": 2.03, "learning_rate": 7.969401947148818e-05, "loss": 0.4617, "step": 1460 }, { "epoch": 2.06, "learning_rate": 7.941585535465925e-05, "loss": 0.468, "step": 1480 }, { "epoch": 2.09, "learning_rate": 7.913769123783032e-05, "loss": 0.4843, "step": 1500 }, { "epoch": 2.11, "learning_rate": 7.88595271210014e-05, "loss": 0.4567, "step": 1520 }, { "epoch": 2.14, "learning_rate": 7.858136300417247e-05, "loss": 0.515, "step": 1540 }, { "epoch": 2.17, "learning_rate": 7.830319888734354e-05, "loss": 0.4594, "step": 1560 }, { "epoch": 2.2, "learning_rate": 7.80250347705146e-05, "loss": 0.4375, "step": 1580 }, { "epoch": 2.23, "learning_rate": 7.774687065368567e-05, "loss": 0.4916, "step": 1600 }, { "epoch": 2.25, "learning_rate": 7.746870653685676e-05, "loss": 0.5439, "step": 1620 }, { "epoch": 2.28, "learning_rate": 7.719054242002782e-05, "loss": 0.5093, "step": 1640 }, { "epoch": 2.31, "learning_rate": 7.691237830319889e-05, "loss": 0.5126, "step": 1660 }, { "epoch": 2.34, "learning_rate": 7.663421418636996e-05, "loss": 0.4866, "step": 1680 }, { "epoch": 2.36, "learning_rate": 7.635605006954103e-05, "loss": 0.4732, "step": 1700 }, { "epoch": 2.39, "learning_rate": 7.60778859527121e-05, "loss": 0.4682, "step": 1720 }, { "epoch": 2.42, "learning_rate": 7.579972183588318e-05, "loss": 0.5212, "step": 1740 }, { "epoch": 2.45, "learning_rate": 7.552155771905425e-05, "loss": 0.4307, "step": 1760 }, { "epoch": 2.48, "learning_rate": 7.524339360222531e-05, "loss": 0.4613, "step": 1780 }, { "epoch": 2.5, "learning_rate": 7.496522948539638e-05, "loss": 0.4379, "step": 1800 }, { "epoch": 2.53, "learning_rate": 7.468706536856745e-05, "loss": 0.4572, "step": 1820 }, { "epoch": 2.56, "learning_rate": 7.440890125173853e-05, "loss": 0.4637, "step": 1840 }, { "epoch": 2.59, "learning_rate": 7.41307371349096e-05, "loss": 0.4752, "step": 1860 }, { "epoch": 2.61, "learning_rate": 7.385257301808067e-05, "loss": 0.4982, "step": 1880 }, { "epoch": 2.64, "learning_rate": 7.357440890125175e-05, "loss": 0.4163, "step": 1900 }, { "epoch": 2.67, "learning_rate": 7.329624478442282e-05, "loss": 0.4906, "step": 1920 }, { "epoch": 2.7, "learning_rate": 7.301808066759389e-05, "loss": 0.4375, "step": 1940 }, { "epoch": 2.73, "learning_rate": 7.273991655076495e-05, "loss": 0.4402, "step": 1960 }, { "epoch": 2.75, "learning_rate": 7.246175243393602e-05, "loss": 0.52, "step": 1980 }, { "epoch": 2.78, "learning_rate": 7.21835883171071e-05, "loss": 0.4718, "step": 2000 }, { "epoch": 2.81, "learning_rate": 7.190542420027817e-05, "loss": 0.5179, "step": 2020 }, { "epoch": 2.84, "learning_rate": 7.162726008344924e-05, "loss": 0.4774, "step": 2040 }, { "epoch": 2.87, "learning_rate": 7.134909596662031e-05, "loss": 0.477, "step": 2060 }, { "epoch": 2.89, "learning_rate": 7.107093184979138e-05, "loss": 0.4805, "step": 2080 }, { "epoch": 2.92, "learning_rate": 7.079276773296244e-05, "loss": 0.4507, "step": 2100 }, { "epoch": 2.95, "learning_rate": 7.051460361613353e-05, "loss": 0.451, "step": 2120 }, { "epoch": 2.98, "learning_rate": 7.02364394993046e-05, "loss": 0.4698, "step": 2140 }, { "epoch": 3.0, "learning_rate": 6.995827538247566e-05, "loss": 0.4611, "step": 2160 }, { "epoch": 3.03, "learning_rate": 6.968011126564673e-05, "loss": 0.4324, "step": 2180 }, { "epoch": 3.06, "learning_rate": 6.94019471488178e-05, "loss": 0.4085, "step": 2200 }, { "epoch": 3.09, "learning_rate": 6.912378303198888e-05, "loss": 0.406, "step": 2220 }, { "epoch": 3.12, "learning_rate": 6.884561891515995e-05, "loss": 0.4395, "step": 2240 }, { "epoch": 3.14, "learning_rate": 6.856745479833102e-05, "loss": 0.4557, "step": 2260 }, { "epoch": 3.17, "learning_rate": 6.82892906815021e-05, "loss": 0.4331, "step": 2280 }, { "epoch": 3.2, "learning_rate": 6.801112656467315e-05, "loss": 0.4544, "step": 2300 }, { "epoch": 3.23, "learning_rate": 6.773296244784422e-05, "loss": 0.4419, "step": 2320 }, { "epoch": 3.25, "learning_rate": 6.74547983310153e-05, "loss": 0.4362, "step": 2340 }, { "epoch": 3.28, "learning_rate": 6.717663421418637e-05, "loss": 0.4392, "step": 2360 }, { "epoch": 3.31, "learning_rate": 6.689847009735744e-05, "loss": 0.4633, "step": 2380 }, { "epoch": 3.34, "learning_rate": 6.662030598052852e-05, "loss": 0.3991, "step": 2400 }, { "epoch": 3.37, "learning_rate": 6.634214186369958e-05, "loss": 0.451, "step": 2420 }, { "epoch": 3.39, "learning_rate": 6.606397774687066e-05, "loss": 0.3981, "step": 2440 }, { "epoch": 3.42, "learning_rate": 6.578581363004173e-05, "loss": 0.4091, "step": 2460 }, { "epoch": 3.45, "learning_rate": 6.55076495132128e-05, "loss": 0.4431, "step": 2480 }, { "epoch": 3.48, "learning_rate": 6.522948539638388e-05, "loss": 0.4432, "step": 2500 }, { "epoch": 3.5, "learning_rate": 6.495132127955494e-05, "loss": 0.4572, "step": 2520 }, { "epoch": 3.53, "learning_rate": 6.467315716272601e-05, "loss": 0.4288, "step": 2540 }, { "epoch": 3.56, "learning_rate": 6.439499304589708e-05, "loss": 0.4195, "step": 2560 }, { "epoch": 3.59, "learning_rate": 6.411682892906815e-05, "loss": 0.4456, "step": 2580 }, { "epoch": 3.62, "learning_rate": 6.383866481223923e-05, "loss": 0.4164, "step": 2600 }, { "epoch": 3.64, "learning_rate": 6.35605006954103e-05, "loss": 0.4597, "step": 2620 }, { "epoch": 3.67, "learning_rate": 6.328233657858137e-05, "loss": 0.4074, "step": 2640 }, { "epoch": 3.7, "learning_rate": 6.300417246175245e-05, "loss": 0.449, "step": 2660 }, { "epoch": 3.73, "learning_rate": 6.27260083449235e-05, "loss": 0.4322, "step": 2680 }, { "epoch": 3.76, "learning_rate": 6.244784422809457e-05, "loss": 0.411, "step": 2700 }, { "epoch": 3.78, "learning_rate": 6.216968011126565e-05, "loss": 0.4289, "step": 2720 }, { "epoch": 3.81, "learning_rate": 6.189151599443672e-05, "loss": 0.4298, "step": 2740 }, { "epoch": 3.84, "learning_rate": 6.161335187760779e-05, "loss": 0.4303, "step": 2760 }, { "epoch": 3.87, "learning_rate": 6.133518776077887e-05, "loss": 0.4422, "step": 2780 }, { "epoch": 3.89, "learning_rate": 6.105702364394992e-05, "loss": 0.4473, "step": 2800 }, { "epoch": 3.92, "learning_rate": 6.0778859527121e-05, "loss": 0.4164, "step": 2820 }, { "epoch": 3.95, "learning_rate": 6.0500695410292075e-05, "loss": 0.4325, "step": 2840 }, { "epoch": 3.98, "learning_rate": 6.022253129346315e-05, "loss": 0.4368, "step": 2860 }, { "epoch": 4.01, "learning_rate": 5.994436717663422e-05, "loss": 0.4469, "step": 2880 }, { "epoch": 4.03, "learning_rate": 5.966620305980529e-05, "loss": 0.4146, "step": 2900 }, { "epoch": 4.06, "learning_rate": 5.9388038942976354e-05, "loss": 0.4111, "step": 2920 }, { "epoch": 4.09, "learning_rate": 5.910987482614743e-05, "loss": 0.444, "step": 2940 }, { "epoch": 4.12, "learning_rate": 5.88317107093185e-05, "loss": 0.4195, "step": 2960 }, { "epoch": 4.14, "learning_rate": 5.855354659248957e-05, "loss": 0.3457, "step": 2980 }, { "epoch": 4.17, "learning_rate": 5.827538247566065e-05, "loss": 0.3783, "step": 3000 }, { "epoch": 4.2, "learning_rate": 5.7997218358831715e-05, "loss": 0.407, "step": 3020 }, { "epoch": 4.23, "learning_rate": 5.7719054242002777e-05, "loss": 0.4187, "step": 3040 }, { "epoch": 4.26, "learning_rate": 5.744089012517385e-05, "loss": 0.3938, "step": 3060 }, { "epoch": 4.28, "learning_rate": 5.7162726008344926e-05, "loss": 0.3947, "step": 3080 }, { "epoch": 4.31, "learning_rate": 5.6884561891515995e-05, "loss": 0.4367, "step": 3100 }, { "epoch": 4.34, "learning_rate": 5.660639777468707e-05, "loss": 0.3908, "step": 3120 }, { "epoch": 4.37, "learning_rate": 5.6328233657858144e-05, "loss": 0.4016, "step": 3140 }, { "epoch": 4.39, "learning_rate": 5.605006954102921e-05, "loss": 0.3862, "step": 3160 }, { "epoch": 4.42, "learning_rate": 5.5771905424200274e-05, "loss": 0.3971, "step": 3180 }, { "epoch": 4.45, "learning_rate": 5.549374130737135e-05, "loss": 0.3688, "step": 3200 }, { "epoch": 4.48, "learning_rate": 5.5215577190542424e-05, "loss": 0.3961, "step": 3220 }, { "epoch": 4.51, "learning_rate": 5.493741307371349e-05, "loss": 0.4247, "step": 3240 }, { "epoch": 4.53, "learning_rate": 5.465924895688457e-05, "loss": 0.4154, "step": 3260 }, { "epoch": 4.56, "learning_rate": 5.438108484005564e-05, "loss": 0.3502, "step": 3280 }, { "epoch": 4.59, "learning_rate": 5.41029207232267e-05, "loss": 0.3485, "step": 3300 }, { "epoch": 4.62, "learning_rate": 5.382475660639778e-05, "loss": 0.4089, "step": 3320 }, { "epoch": 4.65, "learning_rate": 5.3546592489568846e-05, "loss": 0.3659, "step": 3340 }, { "epoch": 4.67, "learning_rate": 5.326842837273992e-05, "loss": 0.3838, "step": 3360 }, { "epoch": 4.7, "learning_rate": 5.2990264255910996e-05, "loss": 0.3694, "step": 3380 }, { "epoch": 4.73, "learning_rate": 5.2712100139082064e-05, "loss": 0.4135, "step": 3400 }, { "epoch": 4.76, "learning_rate": 5.2433936022253126e-05, "loss": 0.3949, "step": 3420 }, { "epoch": 4.78, "learning_rate": 5.21557719054242e-05, "loss": 0.4358, "step": 3440 }, { "epoch": 4.81, "learning_rate": 5.1877607788595275e-05, "loss": 0.4285, "step": 3460 }, { "epoch": 4.84, "learning_rate": 5.1599443671766344e-05, "loss": 0.374, "step": 3480 }, { "epoch": 4.87, "learning_rate": 5.132127955493742e-05, "loss": 0.4047, "step": 3500 }, { "epoch": 4.9, "learning_rate": 5.1043115438108493e-05, "loss": 0.3886, "step": 3520 }, { "epoch": 4.92, "learning_rate": 5.0764951321279555e-05, "loss": 0.3783, "step": 3540 }, { "epoch": 4.95, "learning_rate": 5.048678720445062e-05, "loss": 0.4301, "step": 3560 }, { "epoch": 4.98, "learning_rate": 5.02086230876217e-05, "loss": 0.371, "step": 3580 }, { "epoch": 5.01, "learning_rate": 4.993045897079277e-05, "loss": 0.3834, "step": 3600 }, { "epoch": 5.03, "learning_rate": 4.965229485396384e-05, "loss": 0.382, "step": 3620 }, { "epoch": 5.06, "learning_rate": 4.937413073713491e-05, "loss": 0.3744, "step": 3640 }, { "epoch": 5.09, "learning_rate": 4.9095966620305984e-05, "loss": 0.3819, "step": 3660 }, { "epoch": 5.12, "learning_rate": 4.881780250347706e-05, "loss": 0.3857, "step": 3680 }, { "epoch": 5.15, "learning_rate": 4.853963838664812e-05, "loss": 0.4152, "step": 3700 }, { "epoch": 5.17, "learning_rate": 4.8261474269819195e-05, "loss": 0.4125, "step": 3720 }, { "epoch": 5.2, "learning_rate": 4.798331015299027e-05, "loss": 0.3337, "step": 3740 }, { "epoch": 5.23, "learning_rate": 4.770514603616134e-05, "loss": 0.3294, "step": 3760 }, { "epoch": 5.26, "learning_rate": 4.7426981919332406e-05, "loss": 0.3155, "step": 3780 }, { "epoch": 5.29, "learning_rate": 4.714881780250348e-05, "loss": 0.3058, "step": 3800 }, { "epoch": 5.31, "learning_rate": 4.687065368567455e-05, "loss": 0.3816, "step": 3820 }, { "epoch": 5.34, "learning_rate": 4.659248956884562e-05, "loss": 0.3308, "step": 3840 }, { "epoch": 5.37, "learning_rate": 4.631432545201669e-05, "loss": 0.3203, "step": 3860 }, { "epoch": 5.4, "learning_rate": 4.603616133518776e-05, "loss": 0.3219, "step": 3880 }, { "epoch": 5.42, "learning_rate": 4.5757997218358836e-05, "loss": 0.4005, "step": 3900 }, { "epoch": 5.45, "learning_rate": 4.5479833101529904e-05, "loss": 0.3508, "step": 3920 }, { "epoch": 5.48, "learning_rate": 4.520166898470097e-05, "loss": 0.3329, "step": 3940 }, { "epoch": 5.51, "learning_rate": 4.492350486787205e-05, "loss": 0.3915, "step": 3960 }, { "epoch": 5.54, "learning_rate": 4.464534075104312e-05, "loss": 0.3336, "step": 3980 }, { "epoch": 5.56, "learning_rate": 4.436717663421418e-05, "loss": 0.3497, "step": 4000 }, { "epoch": 5.59, "learning_rate": 4.408901251738526e-05, "loss": 0.4179, "step": 4020 }, { "epoch": 5.62, "learning_rate": 4.381084840055633e-05, "loss": 0.3408, "step": 4040 }, { "epoch": 5.65, "learning_rate": 4.35326842837274e-05, "loss": 0.349, "step": 4060 }, { "epoch": 5.67, "learning_rate": 4.325452016689847e-05, "loss": 0.395, "step": 4080 }, { "epoch": 5.7, "learning_rate": 4.2976356050069544e-05, "loss": 0.3605, "step": 4100 }, { "epoch": 5.73, "learning_rate": 4.269819193324062e-05, "loss": 0.4124, "step": 4120 }, { "epoch": 5.76, "learning_rate": 4.242002781641168e-05, "loss": 0.3448, "step": 4140 }, { "epoch": 5.79, "learning_rate": 4.2141863699582755e-05, "loss": 0.3499, "step": 4160 }, { "epoch": 5.81, "learning_rate": 4.186369958275383e-05, "loss": 0.4002, "step": 4180 }, { "epoch": 5.84, "learning_rate": 4.15855354659249e-05, "loss": 0.368, "step": 4200 }, { "epoch": 5.87, "learning_rate": 4.130737134909597e-05, "loss": 0.3582, "step": 4220 }, { "epoch": 5.9, "learning_rate": 4.102920723226704e-05, "loss": 0.3684, "step": 4240 }, { "epoch": 5.92, "learning_rate": 4.075104311543811e-05, "loss": 0.3802, "step": 4260 }, { "epoch": 5.95, "learning_rate": 4.0472878998609185e-05, "loss": 0.3775, "step": 4280 }, { "epoch": 5.98, "learning_rate": 4.019471488178025e-05, "loss": 0.3537, "step": 4300 }, { "epoch": 6.01, "learning_rate": 3.991655076495132e-05, "loss": 0.3692, "step": 4320 }, { "epoch": 6.04, "learning_rate": 3.9638386648122396e-05, "loss": 0.3038, "step": 4340 }, { "epoch": 6.06, "learning_rate": 3.9360222531293464e-05, "loss": 0.2944, "step": 4360 }, { "epoch": 6.09, "learning_rate": 3.908205841446453e-05, "loss": 0.3075, "step": 4380 }, { "epoch": 6.12, "learning_rate": 3.880389429763561e-05, "loss": 0.2942, "step": 4400 }, { "epoch": 6.15, "learning_rate": 3.852573018080668e-05, "loss": 0.3304, "step": 4420 }, { "epoch": 6.18, "learning_rate": 3.8247566063977743e-05, "loss": 0.3522, "step": 4440 }, { "epoch": 6.2, "learning_rate": 3.796940194714882e-05, "loss": 0.3178, "step": 4460 }, { "epoch": 6.23, "learning_rate": 3.769123783031989e-05, "loss": 0.3312, "step": 4480 }, { "epoch": 6.26, "learning_rate": 3.741307371349096e-05, "loss": 0.3769, "step": 4500 }, { "epoch": 6.29, "learning_rate": 3.713490959666203e-05, "loss": 0.3356, "step": 4520 }, { "epoch": 6.31, "learning_rate": 3.6856745479833105e-05, "loss": 0.3059, "step": 4540 }, { "epoch": 6.34, "learning_rate": 3.657858136300417e-05, "loss": 0.3415, "step": 4560 }, { "epoch": 6.37, "learning_rate": 3.630041724617525e-05, "loss": 0.351, "step": 4580 }, { "epoch": 6.4, "learning_rate": 3.6022253129346316e-05, "loss": 0.326, "step": 4600 }, { "epoch": 6.43, "learning_rate": 3.5744089012517384e-05, "loss": 0.3225, "step": 4620 }, { "epoch": 6.45, "learning_rate": 3.546592489568846e-05, "loss": 0.3527, "step": 4640 }, { "epoch": 6.48, "learning_rate": 3.518776077885953e-05, "loss": 0.3962, "step": 4660 }, { "epoch": 6.51, "learning_rate": 3.4909596662030595e-05, "loss": 0.3648, "step": 4680 }, { "epoch": 6.54, "learning_rate": 3.463143254520167e-05, "loss": 0.3316, "step": 4700 }, { "epoch": 6.56, "learning_rate": 3.4353268428372745e-05, "loss": 0.3111, "step": 4720 }, { "epoch": 6.59, "learning_rate": 3.407510431154381e-05, "loss": 0.2905, "step": 4740 }, { "epoch": 6.62, "learning_rate": 3.379694019471488e-05, "loss": 0.3172, "step": 4760 }, { "epoch": 6.65, "learning_rate": 3.3518776077885956e-05, "loss": 0.3378, "step": 4780 }, { "epoch": 6.68, "learning_rate": 3.3240611961057024e-05, "loss": 0.3621, "step": 4800 }, { "epoch": 6.7, "learning_rate": 3.296244784422809e-05, "loss": 0.3366, "step": 4820 }, { "epoch": 6.73, "learning_rate": 3.268428372739917e-05, "loss": 0.3122, "step": 4840 }, { "epoch": 6.76, "learning_rate": 3.240611961057024e-05, "loss": 0.3339, "step": 4860 }, { "epoch": 6.79, "learning_rate": 3.212795549374131e-05, "loss": 0.3554, "step": 4880 }, { "epoch": 6.82, "learning_rate": 3.184979137691238e-05, "loss": 0.3583, "step": 4900 }, { "epoch": 6.84, "learning_rate": 3.1571627260083454e-05, "loss": 0.3682, "step": 4920 }, { "epoch": 6.87, "learning_rate": 3.129346314325452e-05, "loss": 0.3189, "step": 4940 }, { "epoch": 6.9, "learning_rate": 3.101529902642559e-05, "loss": 0.3654, "step": 4960 }, { "epoch": 6.93, "learning_rate": 3.0737134909596665e-05, "loss": 0.3703, "step": 4980 }, { "epoch": 6.95, "learning_rate": 3.0458970792767733e-05, "loss": 0.3582, "step": 5000 }, { "epoch": 6.98, "learning_rate": 3.0180806675938804e-05, "loss": 0.3698, "step": 5020 }, { "epoch": 7.01, "learning_rate": 2.990264255910988e-05, "loss": 0.3144, "step": 5040 }, { "epoch": 7.04, "learning_rate": 2.9624478442280944e-05, "loss": 0.291, "step": 5060 }, { "epoch": 7.07, "learning_rate": 2.934631432545202e-05, "loss": 0.3118, "step": 5080 }, { "epoch": 7.09, "learning_rate": 2.906815020862309e-05, "loss": 0.3471, "step": 5100 }, { "epoch": 7.12, "learning_rate": 2.878998609179416e-05, "loss": 0.3224, "step": 5120 }, { "epoch": 7.15, "learning_rate": 2.851182197496523e-05, "loss": 0.3167, "step": 5140 }, { "epoch": 7.18, "learning_rate": 2.8233657858136302e-05, "loss": 0.3203, "step": 5160 }, { "epoch": 7.2, "learning_rate": 2.795549374130737e-05, "loss": 0.2913, "step": 5180 }, { "epoch": 7.23, "learning_rate": 2.767732962447844e-05, "loss": 0.2995, "step": 5200 }, { "epoch": 7.26, "learning_rate": 2.7399165507649516e-05, "loss": 0.3112, "step": 5220 }, { "epoch": 7.29, "learning_rate": 2.7121001390820585e-05, "loss": 0.3293, "step": 5240 }, { "epoch": 7.32, "learning_rate": 2.6842837273991656e-05, "loss": 0.2808, "step": 5260 }, { "epoch": 7.34, "learning_rate": 2.6564673157162728e-05, "loss": 0.3126, "step": 5280 }, { "epoch": 7.37, "learning_rate": 2.6286509040333796e-05, "loss": 0.2832, "step": 5300 }, { "epoch": 7.4, "learning_rate": 2.6008344923504867e-05, "loss": 0.3096, "step": 5320 }, { "epoch": 7.43, "learning_rate": 2.5730180806675942e-05, "loss": 0.3065, "step": 5340 }, { "epoch": 7.45, "learning_rate": 2.5452016689847014e-05, "loss": 0.3326, "step": 5360 }, { "epoch": 7.48, "learning_rate": 2.5173852573018082e-05, "loss": 0.326, "step": 5380 }, { "epoch": 7.51, "learning_rate": 2.4895688456189153e-05, "loss": 0.3477, "step": 5400 }, { "epoch": 7.54, "learning_rate": 2.4617524339360225e-05, "loss": 0.2976, "step": 5420 }, { "epoch": 7.57, "learning_rate": 2.4339360222531293e-05, "loss": 0.3201, "step": 5440 }, { "epoch": 7.59, "learning_rate": 2.4061196105702365e-05, "loss": 0.3338, "step": 5460 }, { "epoch": 7.62, "learning_rate": 2.3783031988873436e-05, "loss": 0.324, "step": 5480 }, { "epoch": 7.65, "learning_rate": 2.3504867872044508e-05, "loss": 0.3004, "step": 5500 }, { "epoch": 7.68, "learning_rate": 2.322670375521558e-05, "loss": 0.3271, "step": 5520 }, { "epoch": 7.71, "learning_rate": 2.2948539638386647e-05, "loss": 0.3379, "step": 5540 }, { "epoch": 7.73, "learning_rate": 2.2670375521557722e-05, "loss": 0.284, "step": 5560 }, { "epoch": 7.76, "learning_rate": 2.239221140472879e-05, "loss": 0.3185, "step": 5580 }, { "epoch": 7.79, "learning_rate": 2.2114047287899862e-05, "loss": 0.3303, "step": 5600 }, { "epoch": 7.82, "learning_rate": 2.1835883171070934e-05, "loss": 0.3107, "step": 5620 }, { "epoch": 7.84, "learning_rate": 2.1557719054242005e-05, "loss": 0.3747, "step": 5640 }, { "epoch": 7.87, "learning_rate": 2.1279554937413073e-05, "loss": 0.3003, "step": 5660 }, { "epoch": 7.9, "learning_rate": 2.1001390820584145e-05, "loss": 0.3176, "step": 5680 }, { "epoch": 7.93, "learning_rate": 2.0723226703755216e-05, "loss": 0.2942, "step": 5700 }, { "epoch": 7.96, "learning_rate": 2.0445062586926288e-05, "loss": 0.2932, "step": 5720 }, { "epoch": 7.98, "learning_rate": 2.016689847009736e-05, "loss": 0.2919, "step": 5740 }, { "epoch": 8.01, "learning_rate": 1.9888734353268428e-05, "loss": 0.2664, "step": 5760 }, { "epoch": 8.04, "learning_rate": 1.96105702364395e-05, "loss": 0.2788, "step": 5780 }, { "epoch": 8.07, "learning_rate": 1.933240611961057e-05, "loss": 0.2737, "step": 5800 }, { "epoch": 8.09, "learning_rate": 1.9054242002781642e-05, "loss": 0.287, "step": 5820 }, { "epoch": 8.12, "learning_rate": 1.8776077885952714e-05, "loss": 0.2771, "step": 5840 }, { "epoch": 8.15, "learning_rate": 1.8497913769123785e-05, "loss": 0.2956, "step": 5860 }, { "epoch": 8.18, "learning_rate": 1.8219749652294853e-05, "loss": 0.2837, "step": 5880 }, { "epoch": 8.21, "learning_rate": 1.7941585535465928e-05, "loss": 0.3363, "step": 5900 }, { "epoch": 8.23, "learning_rate": 1.7663421418636996e-05, "loss": 0.3007, "step": 5920 }, { "epoch": 8.26, "learning_rate": 1.7385257301808068e-05, "loss": 0.3177, "step": 5940 }, { "epoch": 8.29, "learning_rate": 1.710709318497914e-05, "loss": 0.3089, "step": 5960 }, { "epoch": 8.32, "learning_rate": 1.6828929068150208e-05, "loss": 0.2654, "step": 5980 }, { "epoch": 8.34, "learning_rate": 1.655076495132128e-05, "loss": 0.2763, "step": 6000 }, { "epoch": 8.37, "learning_rate": 1.627260083449235e-05, "loss": 0.3078, "step": 6020 }, { "epoch": 8.4, "learning_rate": 1.5994436717663422e-05, "loss": 0.2756, "step": 6040 }, { "epoch": 8.43, "learning_rate": 1.571627260083449e-05, "loss": 0.2902, "step": 6060 }, { "epoch": 8.46, "learning_rate": 1.5438108484005565e-05, "loss": 0.2787, "step": 6080 }, { "epoch": 8.48, "learning_rate": 1.5159944367176635e-05, "loss": 0.2727, "step": 6100 }, { "epoch": 8.51, "learning_rate": 1.4881780250347707e-05, "loss": 0.2758, "step": 6120 }, { "epoch": 8.54, "learning_rate": 1.4603616133518777e-05, "loss": 0.3012, "step": 6140 }, { "epoch": 8.57, "learning_rate": 1.4325452016689846e-05, "loss": 0.3084, "step": 6160 }, { "epoch": 8.6, "learning_rate": 1.404728789986092e-05, "loss": 0.2817, "step": 6180 }, { "epoch": 8.62, "learning_rate": 1.376912378303199e-05, "loss": 0.3026, "step": 6200 }, { "epoch": 8.65, "learning_rate": 1.349095966620306e-05, "loss": 0.3288, "step": 6220 }, { "epoch": 8.68, "learning_rate": 1.3212795549374133e-05, "loss": 0.297, "step": 6240 }, { "epoch": 8.71, "learning_rate": 1.2934631432545202e-05, "loss": 0.3321, "step": 6260 }, { "epoch": 8.73, "learning_rate": 1.2656467315716272e-05, "loss": 0.2587, "step": 6280 }, { "epoch": 8.76, "learning_rate": 1.2378303198887344e-05, "loss": 0.3322, "step": 6300 }, { "epoch": 8.79, "learning_rate": 1.2100139082058415e-05, "loss": 0.2704, "step": 6320 }, { "epoch": 8.82, "learning_rate": 1.1821974965229487e-05, "loss": 0.304, "step": 6340 }, { "epoch": 8.85, "learning_rate": 1.1543810848400557e-05, "loss": 0.2947, "step": 6360 }, { "epoch": 8.87, "learning_rate": 1.1265646731571628e-05, "loss": 0.2906, "step": 6380 }, { "epoch": 8.9, "learning_rate": 1.0987482614742698e-05, "loss": 0.2907, "step": 6400 }, { "epoch": 8.93, "learning_rate": 1.070931849791377e-05, "loss": 0.3304, "step": 6420 }, { "epoch": 8.96, "learning_rate": 1.043115438108484e-05, "loss": 0.3089, "step": 6440 }, { "epoch": 8.98, "learning_rate": 1.0152990264255911e-05, "loss": 0.2724, "step": 6460 }, { "epoch": 9.01, "learning_rate": 9.874826147426983e-06, "loss": 0.286, "step": 6480 }, { "epoch": 9.04, "learning_rate": 9.596662030598054e-06, "loss": 0.2835, "step": 6500 }, { "epoch": 9.07, "learning_rate": 9.318497913769124e-06, "loss": 0.262, "step": 6520 }, { "epoch": 9.1, "learning_rate": 9.040333796940195e-06, "loss": 0.2772, "step": 6540 }, { "epoch": 9.12, "learning_rate": 8.762169680111267e-06, "loss": 0.2801, "step": 6560 }, { "epoch": 9.15, "learning_rate": 8.484005563282337e-06, "loss": 0.2681, "step": 6580 }, { "epoch": 9.18, "learning_rate": 8.205841446453408e-06, "loss": 0.2686, "step": 6600 }, { "epoch": 9.21, "learning_rate": 7.92767732962448e-06, "loss": 0.2838, "step": 6620 }, { "epoch": 9.24, "learning_rate": 7.64951321279555e-06, "loss": 0.2886, "step": 6640 }, { "epoch": 9.26, "learning_rate": 7.37134909596662e-06, "loss": 0.2428, "step": 6660 }, { "epoch": 9.29, "learning_rate": 7.093184979137692e-06, "loss": 0.2765, "step": 6680 }, { "epoch": 9.32, "learning_rate": 6.815020862308763e-06, "loss": 0.2936, "step": 6700 }, { "epoch": 9.35, "learning_rate": 6.536856745479833e-06, "loss": 0.3004, "step": 6720 }, { "epoch": 9.37, "learning_rate": 6.258692628650904e-06, "loss": 0.26, "step": 6740 }, { "epoch": 9.4, "learning_rate": 5.980528511821975e-06, "loss": 0.2806, "step": 6760 }, { "epoch": 9.43, "learning_rate": 5.702364394993046e-06, "loss": 0.2628, "step": 6780 }, { "epoch": 9.46, "learning_rate": 5.424200278164117e-06, "loss": 0.2636, "step": 6800 }, { "epoch": 9.49, "learning_rate": 5.1460361613351884e-06, "loss": 0.2652, "step": 6820 }, { "epoch": 9.51, "learning_rate": 4.867872044506259e-06, "loss": 0.2831, "step": 6840 }, { "epoch": 9.54, "learning_rate": 4.58970792767733e-06, "loss": 0.2661, "step": 6860 }, { "epoch": 9.57, "learning_rate": 4.3115438108484005e-06, "loss": 0.2653, "step": 6880 }, { "epoch": 9.6, "learning_rate": 4.033379694019471e-06, "loss": 0.2693, "step": 6900 }, { "epoch": 9.62, "learning_rate": 3.7552155771905427e-06, "loss": 0.2632, "step": 6920 }, { "epoch": 9.65, "learning_rate": 3.4770514603616134e-06, "loss": 0.2788, "step": 6940 }, { "epoch": 9.68, "learning_rate": 3.1988873435326845e-06, "loss": 0.3019, "step": 6960 }, { "epoch": 9.71, "learning_rate": 2.9207232267037552e-06, "loss": 0.2778, "step": 6980 }, { "epoch": 9.74, "learning_rate": 2.6425591098748263e-06, "loss": 0.3145, "step": 7000 }, { "epoch": 9.76, "learning_rate": 2.364394993045897e-06, "loss": 0.2991, "step": 7020 }, { "epoch": 9.79, "learning_rate": 2.086230876216968e-06, "loss": 0.2819, "step": 7040 }, { "epoch": 9.82, "learning_rate": 1.8080667593880393e-06, "loss": 0.2817, "step": 7060 }, { "epoch": 9.85, "learning_rate": 1.52990264255911e-06, "loss": 0.2689, "step": 7080 }, { "epoch": 9.87, "learning_rate": 1.2517385257301808e-06, "loss": 0.2703, "step": 7100 }, { "epoch": 9.9, "learning_rate": 9.735744089012517e-07, "loss": 0.2763, "step": 7120 }, { "epoch": 9.93, "learning_rate": 6.954102920723227e-07, "loss": 0.2825, "step": 7140 }, { "epoch": 9.96, "learning_rate": 4.1724617524339365e-07, "loss": 0.2849, "step": 7160 }, { "epoch": 9.99, "learning_rate": 1.3908205841446452e-07, "loss": 0.3184, "step": 7180 }, { "epoch": 10.0, "step": 7190, "total_flos": 2.920670798324695e+17, "train_loss": 0.4033946073105007, "train_runtime": 2282.7571, "train_samples_per_second": 6.295, "train_steps_per_second": 3.15 } ], "max_steps": 7190, "num_train_epochs": 10, "total_flos": 2.920670798324695e+17, "trial_name": null, "trial_params": null }