{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.515237104206927, "global_step": 32000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 5e-05, "loss": 3.5407, "step": 100 }, { "epoch": 0.06, "learning_rate": 0.0001, "loss": 3.2075, "step": 200 }, { "epoch": 0.09, "learning_rate": 0.00015, "loss": 3.0286, "step": 300 }, { "epoch": 0.12, "learning_rate": 0.0002, "loss": 2.8212, "step": 400 }, { "epoch": 0.15, "learning_rate": 0.00025, "loss": 2.3586, "step": 500 }, { "epoch": 0.18, "learning_rate": 0.0003, "loss": 1.676, "step": 600 }, { "epoch": 0.21, "learning_rate": 0.00035, "loss": 1.3696, "step": 700 }, { "epoch": 0.24, "learning_rate": 0.0004, "loss": 1.2677, "step": 800 }, { "epoch": 0.27, "learning_rate": 0.00045000000000000004, "loss": 1.2271, "step": 900 }, { "epoch": 0.3, "learning_rate": 0.0005, "loss": 1.2006, "step": 1000 }, { "epoch": 0.33, "learning_rate": 0.000498467667790377, "loss": 1.1846, "step": 1100 }, { "epoch": 0.36, "learning_rate": 0.0004969353355807539, "loss": 1.1663, "step": 1200 }, { "epoch": 0.39, "learning_rate": 0.0004954030033711309, "loss": 1.1429, "step": 1300 }, { "epoch": 0.42, "learning_rate": 0.0004938706711615078, "loss": 1.1384, "step": 1400 }, { "epoch": 0.45, "learning_rate": 0.0004923383389518848, "loss": 1.1353, "step": 1500 }, { "epoch": 0.48, "learning_rate": 0.0004908060067422617, "loss": 1.1384, "step": 1600 }, { "epoch": 0.51, "learning_rate": 0.0004892736745326388, "loss": 1.1461, "step": 1700 }, { "epoch": 0.54, "learning_rate": 0.00048774134232301567, "loss": 1.1333, "step": 1800 }, { "epoch": 0.56, "learning_rate": 0.0004862090101133926, "loss": 1.1205, "step": 1900 }, { "epoch": 0.59, "learning_rate": 0.00048467667790376954, "loss": 1.1141, "step": 2000 }, { "epoch": 0.62, "learning_rate": 0.0004831443456941465, "loss": 1.1078, "step": 2100 }, { "epoch": 0.65, "learning_rate": 0.0004816120134845234, "loss": 1.1006, "step": 2200 }, { "epoch": 0.68, "learning_rate": 0.00048007968127490044, "loss": 1.0978, "step": 2300 }, { "epoch": 0.71, "learning_rate": 0.00047854734906527735, "loss": 1.0894, "step": 2400 }, { "epoch": 0.74, "learning_rate": 0.0004770150168556543, "loss": 1.0861, "step": 2500 }, { "epoch": 0.77, "learning_rate": 0.0004754826846460313, "loss": 1.083, "step": 2600 }, { "epoch": 0.8, "learning_rate": 0.00047395035243640824, "loss": 1.0758, "step": 2700 }, { "epoch": 0.83, "learning_rate": 0.00047241802022678515, "loss": 1.076, "step": 2800 }, { "epoch": 0.86, "learning_rate": 0.00047088568801716217, "loss": 1.0794, "step": 2900 }, { "epoch": 0.89, "learning_rate": 0.0004693533558075391, "loss": 1.0706, "step": 3000 }, { "epoch": 0.92, "learning_rate": 0.00046782102359791604, "loss": 1.0725, "step": 3100 }, { "epoch": 0.95, "learning_rate": 0.000466288691388293, "loss": 1.069, "step": 3200 }, { "epoch": 0.98, "learning_rate": 0.0004647563591786699, "loss": 1.0674, "step": 3300 }, { "epoch": 1.01, "learning_rate": 0.00046322402696904693, "loss": 1.066, "step": 3400 }, { "epoch": 1.04, "learning_rate": 0.00046169169475942384, "loss": 1.0569, "step": 3500 }, { "epoch": 1.07, "learning_rate": 0.0004601593625498008, "loss": 1.0579, "step": 3600 }, { "epoch": 1.1, "learning_rate": 0.00045862703034017777, "loss": 1.0615, "step": 3700 }, { "epoch": 1.13, "learning_rate": 0.00045709469813055473, "loss": 1.055, "step": 3800 }, { "epoch": 1.16, "learning_rate": 0.00045556236592093164, "loss": 1.0583, "step": 3900 }, { "epoch": 1.19, "learning_rate": 0.0004540300337113086, "loss": 1.0537, "step": 4000 }, { "epoch": 1.22, "learning_rate": 0.00045249770150168557, "loss": 1.0531, "step": 4100 }, { "epoch": 1.25, "learning_rate": 0.00045096536929206254, "loss": 1.0507, "step": 4200 }, { "epoch": 1.28, "learning_rate": 0.0004494330370824395, "loss": 1.0449, "step": 4300 }, { "epoch": 1.31, "learning_rate": 0.0004479007048728164, "loss": 1.0463, "step": 4400 }, { "epoch": 1.34, "learning_rate": 0.00044636837266319343, "loss": 1.0495, "step": 4500 }, { "epoch": 1.37, "learning_rate": 0.00044483604045357034, "loss": 1.0489, "step": 4600 }, { "epoch": 1.4, "learning_rate": 0.0004433037082439473, "loss": 1.043, "step": 4700 }, { "epoch": 1.43, "learning_rate": 0.00044177137603432427, "loss": 1.0404, "step": 4800 }, { "epoch": 1.46, "learning_rate": 0.0004402390438247012, "loss": 1.0448, "step": 4900 }, { "epoch": 1.49, "learning_rate": 0.00043870671161507814, "loss": 1.0378, "step": 5000 }, { "epoch": 1.52, "learning_rate": 0.0004371743794054551, "loss": 1.0359, "step": 5100 }, { "epoch": 1.55, "learning_rate": 0.00043564204719583207, "loss": 1.0419, "step": 5200 }, { "epoch": 1.58, "learning_rate": 0.000434109714986209, "loss": 1.0332, "step": 5300 }, { "epoch": 1.61, "learning_rate": 0.000432577382776586, "loss": 1.0382, "step": 5400 }, { "epoch": 1.64, "learning_rate": 0.0004310450505669629, "loss": 1.0312, "step": 5500 }, { "epoch": 1.67, "learning_rate": 0.0004295127183573399, "loss": 1.0377, "step": 5600 }, { "epoch": 1.69, "learning_rate": 0.00042798038614771683, "loss": 1.0296, "step": 5700 }, { "epoch": 1.72, "learning_rate": 0.00042644805393809374, "loss": 1.0316, "step": 5800 }, { "epoch": 1.75, "learning_rate": 0.00042491572172847076, "loss": 1.0322, "step": 5900 }, { "epoch": 1.78, "learning_rate": 0.0004233833895188477, "loss": 1.0325, "step": 6000 }, { "epoch": 1.81, "learning_rate": 0.00042185105730922464, "loss": 1.0307, "step": 6100 }, { "epoch": 1.84, "learning_rate": 0.0004203187250996016, "loss": 1.0297, "step": 6200 }, { "epoch": 1.87, "learning_rate": 0.00041878639288997857, "loss": 1.031, "step": 6300 }, { "epoch": 1.9, "learning_rate": 0.0004172540606803555, "loss": 1.0304, "step": 6400 }, { "epoch": 1.93, "learning_rate": 0.0004157217284707325, "loss": 1.0278, "step": 6500 }, { "epoch": 1.96, "learning_rate": 0.0004141893962611094, "loss": 1.0211, "step": 6600 }, { "epoch": 1.99, "learning_rate": 0.0004126570640514864, "loss": 1.0248, "step": 6700 }, { "epoch": 2.02, "learning_rate": 0.00041112473184186333, "loss": 1.0319, "step": 6800 }, { "epoch": 2.05, "learning_rate": 0.00040959239963224024, "loss": 1.0301, "step": 6900 }, { "epoch": 2.08, "learning_rate": 0.00040806006742261726, "loss": 1.0295, "step": 7000 }, { "epoch": 2.11, "learning_rate": 0.00040652773521299417, "loss": 1.0247, "step": 7100 }, { "epoch": 2.14, "learning_rate": 0.00040499540300337113, "loss": 1.0205, "step": 7200 }, { "epoch": 2.17, "learning_rate": 0.0004034630707937481, "loss": 1.0221, "step": 7300 }, { "epoch": 2.2, "learning_rate": 0.00040193073858412506, "loss": 1.0251, "step": 7400 }, { "epoch": 2.23, "learning_rate": 0.00040039840637450197, "loss": 1.0164, "step": 7500 }, { "epoch": 2.26, "learning_rate": 0.000398866074164879, "loss": 1.019, "step": 7600 }, { "epoch": 2.29, "learning_rate": 0.0003973337419552559, "loss": 1.0167, "step": 7700 }, { "epoch": 2.32, "learning_rate": 0.0003958014097456328, "loss": 1.0202, "step": 7800 }, { "epoch": 2.35, "learning_rate": 0.00039426907753600983, "loss": 1.0183, "step": 7900 }, { "epoch": 2.38, "learning_rate": 0.00039273674532638674, "loss": 1.0234, "step": 8000 }, { "epoch": 2.41, "learning_rate": 0.00039120441311676376, "loss": 1.0103, "step": 8100 }, { "epoch": 2.44, "learning_rate": 0.00038967208090714067, "loss": 1.0196, "step": 8200 }, { "epoch": 2.47, "learning_rate": 0.00038813974869751763, "loss": 1.0147, "step": 8300 }, { "epoch": 2.5, "learning_rate": 0.0003866074164878946, "loss": 1.0138, "step": 8400 }, { "epoch": 2.53, "learning_rate": 0.00038507508427827156, "loss": 1.0151, "step": 8500 }, { "epoch": 2.56, "learning_rate": 0.00038354275206864847, "loss": 1.0118, "step": 8600 }, { "epoch": 2.59, "learning_rate": 0.0003820104198590255, "loss": 1.014, "step": 8700 }, { "epoch": 2.62, "learning_rate": 0.0003804780876494024, "loss": 1.0096, "step": 8800 }, { "epoch": 2.65, "learning_rate": 0.0003789457554397793, "loss": 1.0092, "step": 8900 }, { "epoch": 2.68, "learning_rate": 0.0003774134232301563, "loss": 1.0096, "step": 9000 }, { "epoch": 2.71, "learning_rate": 0.00037588109102053323, "loss": 1.0148, "step": 9100 }, { "epoch": 2.74, "learning_rate": 0.00037434875881091025, "loss": 1.0102, "step": 9200 }, { "epoch": 2.77, "learning_rate": 0.00037281642660128716, "loss": 1.0095, "step": 9300 }, { "epoch": 2.8, "learning_rate": 0.0003712840943916641, "loss": 1.0099, "step": 9400 }, { "epoch": 2.82, "learning_rate": 0.0003697517621820411, "loss": 1.0083, "step": 9500 }, { "epoch": 2.85, "learning_rate": 0.00036821942997241805, "loss": 1.0093, "step": 9600 }, { "epoch": 2.88, "learning_rate": 0.00036668709776279496, "loss": 1.0023, "step": 9700 }, { "epoch": 2.91, "learning_rate": 0.00036515476555317193, "loss": 1.0058, "step": 9800 }, { "epoch": 2.94, "learning_rate": 0.0003636224333435489, "loss": 1.0088, "step": 9900 }, { "epoch": 2.97, "learning_rate": 0.0003620901011339258, "loss": 1.0046, "step": 10000 }, { "epoch": 3.0, "learning_rate": 0.0003605577689243028, "loss": 1.0142, "step": 10100 }, { "epoch": 3.03, "learning_rate": 0.00035902543671467973, "loss": 1.0031, "step": 10200 }, { "epoch": 3.06, "learning_rate": 0.0003574931045050567, "loss": 1.006, "step": 10300 }, { "epoch": 3.09, "learning_rate": 0.00035596077229543366, "loss": 1.0019, "step": 10400 }, { "epoch": 3.12, "learning_rate": 0.0003544284400858106, "loss": 1.0023, "step": 10500 }, { "epoch": 3.15, "learning_rate": 0.0003528961078761876, "loss": 0.9993, "step": 10600 }, { "epoch": 3.18, "learning_rate": 0.0003513637756665645, "loss": 0.9987, "step": 10700 }, { "epoch": 3.21, "learning_rate": 0.00034983144345694146, "loss": 0.9987, "step": 10800 }, { "epoch": 3.24, "learning_rate": 0.0003482991112473184, "loss": 1.005, "step": 10900 }, { "epoch": 3.27, "learning_rate": 0.0003467667790376954, "loss": 0.9966, "step": 11000 }, { "epoch": 3.3, "learning_rate": 0.0003452344468280723, "loss": 0.9986, "step": 11100 }, { "epoch": 3.33, "learning_rate": 0.0003437021146184493, "loss": 0.9973, "step": 11200 }, { "epoch": 3.36, "learning_rate": 0.00034216978240882623, "loss": 1.0011, "step": 11300 }, { "epoch": 3.39, "learning_rate": 0.0003406374501992032, "loss": 0.9944, "step": 11400 }, { "epoch": 3.42, "learning_rate": 0.00033910511798958016, "loss": 0.996, "step": 11500 }, { "epoch": 3.45, "learning_rate": 0.00033757278577995707, "loss": 0.9976, "step": 11600 }, { "epoch": 3.48, "learning_rate": 0.0003360404535703341, "loss": 0.9931, "step": 11700 }, { "epoch": 3.51, "learning_rate": 0.000334508121360711, "loss": 0.9921, "step": 11800 }, { "epoch": 3.54, "learning_rate": 0.00033297578915108796, "loss": 0.9911, "step": 11900 }, { "epoch": 3.57, "learning_rate": 0.0003314434569414649, "loss": 0.9916, "step": 12000 }, { "epoch": 3.6, "learning_rate": 0.0003299111247318419, "loss": 0.9921, "step": 12100 }, { "epoch": 3.63, "learning_rate": 0.0003283787925222188, "loss": 0.991, "step": 12200 }, { "epoch": 3.66, "learning_rate": 0.0003268464603125958, "loss": 0.9971, "step": 12300 }, { "epoch": 3.69, "learning_rate": 0.0003253141281029727, "loss": 0.995, "step": 12400 }, { "epoch": 3.72, "learning_rate": 0.0003237817958933497, "loss": 0.9891, "step": 12500 }, { "epoch": 3.75, "learning_rate": 0.00032224946368372665, "loss": 0.9907, "step": 12600 }, { "epoch": 3.78, "learning_rate": 0.00032071713147410356, "loss": 0.9912, "step": 12700 }, { "epoch": 3.81, "learning_rate": 0.0003191847992644805, "loss": 0.9873, "step": 12800 }, { "epoch": 3.84, "learning_rate": 0.0003176524670548575, "loss": 0.9868, "step": 12900 }, { "epoch": 3.87, "learning_rate": 0.00031612013484523445, "loss": 0.9845, "step": 13000 }, { "epoch": 3.9, "learning_rate": 0.0003145878026356114, "loss": 0.9836, "step": 13100 }, { "epoch": 3.92, "learning_rate": 0.0003130554704259884, "loss": 0.986, "step": 13200 }, { "epoch": 3.95, "learning_rate": 0.0003115231382163653, "loss": 0.9902, "step": 13300 }, { "epoch": 3.98, "learning_rate": 0.0003099908060067423, "loss": 0.983, "step": 13400 }, { "epoch": 4.01, "learning_rate": 0.0003084584737971192, "loss": 0.9872, "step": 13500 }, { "epoch": 4.04, "learning_rate": 0.00030692614158749613, "loss": 0.9844, "step": 13600 }, { "epoch": 4.07, "learning_rate": 0.00030539380937787315, "loss": 0.9867, "step": 13700 }, { "epoch": 4.1, "learning_rate": 0.00030386147716825006, "loss": 0.9821, "step": 13800 }, { "epoch": 4.13, "learning_rate": 0.000302329144958627, "loss": 0.9809, "step": 13900 }, { "epoch": 4.16, "learning_rate": 0.000300796812749004, "loss": 0.984, "step": 14000 }, { "epoch": 4.19, "learning_rate": 0.00029926448053938095, "loss": 0.9767, "step": 14100 }, { "epoch": 4.22, "learning_rate": 0.0002977321483297579, "loss": 0.9819, "step": 14200 }, { "epoch": 4.25, "learning_rate": 0.0002961998161201349, "loss": 0.9811, "step": 14300 }, { "epoch": 4.28, "learning_rate": 0.0002946674839105118, "loss": 0.9791, "step": 14400 }, { "epoch": 4.31, "learning_rate": 0.00029313515170088875, "loss": 0.9783, "step": 14500 }, { "epoch": 4.34, "learning_rate": 0.0002916028194912657, "loss": 0.9878, "step": 14600 }, { "epoch": 4.37, "learning_rate": 0.00029007048728164263, "loss": 0.975, "step": 14700 }, { "epoch": 4.4, "learning_rate": 0.00028853815507201965, "loss": 0.9775, "step": 14800 }, { "epoch": 4.43, "learning_rate": 0.00028700582286239656, "loss": 0.9775, "step": 14900 }, { "epoch": 4.46, "learning_rate": 0.0002854734906527735, "loss": 0.9786, "step": 15000 }, { "epoch": 4.49, "learning_rate": 0.0002839411584431505, "loss": 0.9753, "step": 15100 }, { "epoch": 4.52, "learning_rate": 0.00028240882623352745, "loss": 0.9841, "step": 15200 }, { "epoch": 4.55, "learning_rate": 0.00028087649402390436, "loss": 0.9716, "step": 15300 }, { "epoch": 4.58, "learning_rate": 0.0002793441618142814, "loss": 0.9774, "step": 15400 }, { "epoch": 4.61, "learning_rate": 0.0002778118296046583, "loss": 0.9723, "step": 15500 }, { "epoch": 4.64, "learning_rate": 0.00027627949739503525, "loss": 0.9702, "step": 15600 }, { "epoch": 4.67, "learning_rate": 0.0002747471651854122, "loss": 0.9766, "step": 15700 }, { "epoch": 4.7, "learning_rate": 0.0002732148329757891, "loss": 0.9843, "step": 15800 }, { "epoch": 4.73, "learning_rate": 0.00027168250076616614, "loss": 0.9701, "step": 15900 }, { "epoch": 4.76, "learning_rate": 0.00027015016855654305, "loss": 0.9715, "step": 16000 }, { "epoch": 4.79, "learning_rate": 0.00026861783634692, "loss": 0.9695, "step": 16100 }, { "epoch": 4.82, "learning_rate": 0.000267085504137297, "loss": 0.9699, "step": 16200 }, { "epoch": 4.85, "learning_rate": 0.00026555317192767394, "loss": 0.9665, "step": 16300 }, { "epoch": 4.88, "learning_rate": 0.00026402083971805085, "loss": 0.9681, "step": 16400 }, { "epoch": 4.91, "learning_rate": 0.0002624885075084278, "loss": 0.9697, "step": 16500 }, { "epoch": 4.94, "learning_rate": 0.0002609561752988048, "loss": 0.9662, "step": 16600 }, { "epoch": 4.97, "learning_rate": 0.00025942384308918175, "loss": 0.965, "step": 16700 }, { "epoch": 5.0, "learning_rate": 0.0002578915108795587, "loss": 0.9655, "step": 16800 }, { "epoch": 5.03, "learning_rate": 0.0002563591786699356, "loss": 0.9689, "step": 16900 }, { "epoch": 5.06, "learning_rate": 0.00025482684646031264, "loss": 0.9641, "step": 17000 }, { "epoch": 5.08, "learning_rate": 0.00025329451425068955, "loss": 0.9612, "step": 17100 }, { "epoch": 5.11, "learning_rate": 0.0002517621820410665, "loss": 0.9667, "step": 17200 }, { "epoch": 5.14, "learning_rate": 0.0002502298498314435, "loss": 0.9623, "step": 17300 }, { "epoch": 5.17, "learning_rate": 0.0002486975176218204, "loss": 0.9611, "step": 17400 }, { "epoch": 5.2, "learning_rate": 0.00024716518541219735, "loss": 0.956, "step": 17500 }, { "epoch": 5.23, "learning_rate": 0.0002456328532025743, "loss": 0.9623, "step": 17600 }, { "epoch": 5.26, "learning_rate": 0.00024410052099295128, "loss": 0.9577, "step": 17700 }, { "epoch": 5.29, "learning_rate": 0.00024256818878332824, "loss": 0.9584, "step": 17800 }, { "epoch": 5.32, "learning_rate": 0.00024103585657370518, "loss": 0.9595, "step": 17900 }, { "epoch": 5.35, "learning_rate": 0.00023950352436408212, "loss": 0.954, "step": 18000 }, { "epoch": 5.38, "learning_rate": 0.00023797119215445908, "loss": 0.958, "step": 18100 }, { "epoch": 5.41, "learning_rate": 0.00023643885994483605, "loss": 0.9575, "step": 18200 }, { "epoch": 5.44, "learning_rate": 0.000234906527735213, "loss": 0.9499, "step": 18300 }, { "epoch": 5.47, "learning_rate": 0.00023337419552558995, "loss": 0.9583, "step": 18400 }, { "epoch": 5.5, "learning_rate": 0.0002318418633159669, "loss": 0.9547, "step": 18500 }, { "epoch": 5.53, "learning_rate": 0.00023030953110634387, "loss": 0.9531, "step": 18600 }, { "epoch": 5.56, "learning_rate": 0.0002287771988967208, "loss": 0.9566, "step": 18700 }, { "epoch": 5.59, "learning_rate": 0.00022724486668709778, "loss": 0.9519, "step": 18800 }, { "epoch": 5.62, "learning_rate": 0.0002257125344774747, "loss": 0.9473, "step": 18900 }, { "epoch": 5.65, "learning_rate": 0.00022418020226785168, "loss": 0.9496, "step": 19000 }, { "epoch": 5.68, "learning_rate": 0.00022264787005822861, "loss": 0.9469, "step": 19100 }, { "epoch": 5.71, "learning_rate": 0.00022111553784860558, "loss": 0.9509, "step": 19200 }, { "epoch": 5.74, "learning_rate": 0.00021958320563898254, "loss": 0.9466, "step": 19300 }, { "epoch": 5.77, "learning_rate": 0.0002180508734293595, "loss": 0.9499, "step": 19400 }, { "epoch": 5.8, "learning_rate": 0.00021651854121973644, "loss": 0.9498, "step": 19500 }, { "epoch": 5.83, "learning_rate": 0.0002149862090101134, "loss": 0.9483, "step": 19600 }, { "epoch": 5.86, "learning_rate": 0.00021345387680049037, "loss": 0.9522, "step": 19700 }, { "epoch": 5.89, "learning_rate": 0.00021192154459086728, "loss": 0.9441, "step": 19800 }, { "epoch": 5.92, "learning_rate": 0.00021038921238124425, "loss": 0.9492, "step": 19900 }, { "epoch": 5.95, "learning_rate": 0.0002088568801716212, "loss": 0.9421, "step": 20000 }, { "epoch": 5.98, "learning_rate": 0.00020732454796199817, "loss": 0.9432, "step": 20100 }, { "epoch": 6.01, "learning_rate": 0.0002057922157523751, "loss": 0.9542, "step": 20200 }, { "epoch": 6.04, "learning_rate": 0.00020425988354275207, "loss": 0.9426, "step": 20300 }, { "epoch": 6.07, "learning_rate": 0.00020272755133312904, "loss": 0.9484, "step": 20400 }, { "epoch": 6.1, "learning_rate": 0.00020119521912350598, "loss": 0.9473, "step": 20500 }, { "epoch": 6.13, "learning_rate": 0.00019966288691388294, "loss": 0.9413, "step": 20600 }, { "epoch": 6.16, "learning_rate": 0.0001981305547042599, "loss": 0.9438, "step": 20700 }, { "epoch": 6.18, "learning_rate": 0.00019659822249463684, "loss": 0.9421, "step": 20800 }, { "epoch": 6.21, "learning_rate": 0.00019506589028501378, "loss": 0.9406, "step": 20900 }, { "epoch": 6.24, "learning_rate": 0.00019353355807539074, "loss": 0.9384, "step": 21000 }, { "epoch": 6.27, "learning_rate": 0.0001920012258657677, "loss": 0.9397, "step": 21100 }, { "epoch": 6.3, "learning_rate": 0.00019046889365614464, "loss": 0.9367, "step": 21200 }, { "epoch": 6.33, "learning_rate": 0.0001889365614465216, "loss": 0.9402, "step": 21300 }, { "epoch": 6.36, "learning_rate": 0.00018740422923689857, "loss": 0.9319, "step": 21400 }, { "epoch": 6.39, "learning_rate": 0.00018587189702727554, "loss": 0.9385, "step": 21500 }, { "epoch": 6.42, "learning_rate": 0.00018433956481765247, "loss": 0.939, "step": 21600 }, { "epoch": 6.45, "learning_rate": 0.0001828072326080294, "loss": 0.9399, "step": 21700 }, { "epoch": 6.48, "learning_rate": 0.00018127490039840637, "loss": 0.9407, "step": 21800 }, { "epoch": 6.51, "learning_rate": 0.00017974256818878334, "loss": 0.94, "step": 21900 }, { "epoch": 6.54, "learning_rate": 0.00017821023597916027, "loss": 0.9407, "step": 22000 }, { "epoch": 6.57, "learning_rate": 0.00017667790376953724, "loss": 0.9353, "step": 22100 }, { "epoch": 6.6, "learning_rate": 0.0001751455715599142, "loss": 0.9405, "step": 22200 }, { "epoch": 6.63, "learning_rate": 0.00017361323935029114, "loss": 0.9305, "step": 22300 }, { "epoch": 6.66, "learning_rate": 0.0001720809071406681, "loss": 0.938, "step": 22400 }, { "epoch": 6.69, "learning_rate": 0.00017054857493104507, "loss": 0.9311, "step": 22500 }, { "epoch": 6.72, "learning_rate": 0.00016901624272142203, "loss": 0.9343, "step": 22600 }, { "epoch": 6.75, "learning_rate": 0.00016748391051179894, "loss": 0.9312, "step": 22700 }, { "epoch": 6.78, "learning_rate": 0.0001659515783021759, "loss": 0.9353, "step": 22800 }, { "epoch": 6.81, "learning_rate": 0.00016441924609255287, "loss": 0.9341, "step": 22900 }, { "epoch": 6.84, "learning_rate": 0.0001628869138829298, "loss": 0.9338, "step": 23000 }, { "epoch": 6.87, "learning_rate": 0.00016135458167330677, "loss": 0.9318, "step": 23100 }, { "epoch": 6.9, "learning_rate": 0.00015982224946368373, "loss": 0.9309, "step": 23200 }, { "epoch": 6.93, "learning_rate": 0.0001582899172540607, "loss": 0.9291, "step": 23300 }, { "epoch": 6.96, "learning_rate": 0.00015675758504443764, "loss": 0.9307, "step": 23400 }, { "epoch": 6.99, "learning_rate": 0.0001552252528348146, "loss": 0.9325, "step": 23500 }, { "epoch": 7.02, "learning_rate": 0.00015369292062519156, "loss": 0.9363, "step": 23600 }, { "epoch": 7.05, "learning_rate": 0.00015216058841556847, "loss": 0.9325, "step": 23700 }, { "epoch": 7.08, "learning_rate": 0.00015062825620594544, "loss": 0.9276, "step": 23800 }, { "epoch": 7.11, "learning_rate": 0.0001490959239963224, "loss": 0.9328, "step": 23900 }, { "epoch": 7.14, "learning_rate": 0.00014756359178669937, "loss": 0.9304, "step": 24000 }, { "epoch": 7.17, "learning_rate": 0.0001460312595770763, "loss": 0.9274, "step": 24100 }, { "epoch": 7.2, "learning_rate": 0.00014449892736745327, "loss": 0.9261, "step": 24200 }, { "epoch": 7.23, "learning_rate": 0.00014296659515783023, "loss": 0.9245, "step": 24300 }, { "epoch": 7.26, "learning_rate": 0.0001414342629482072, "loss": 0.9233, "step": 24400 }, { "epoch": 7.29, "learning_rate": 0.00013990193073858413, "loss": 0.9275, "step": 24500 }, { "epoch": 7.31, "learning_rate": 0.00013836959852896107, "loss": 0.9265, "step": 24600 }, { "epoch": 7.34, "learning_rate": 0.00013683726631933803, "loss": 0.9276, "step": 24700 }, { "epoch": 7.37, "learning_rate": 0.00013530493410971497, "loss": 0.9252, "step": 24800 }, { "epoch": 7.4, "learning_rate": 0.00013377260190009193, "loss": 0.9224, "step": 24900 }, { "epoch": 7.43, "learning_rate": 0.0001322402696904689, "loss": 0.9216, "step": 25000 }, { "epoch": 7.46, "learning_rate": 0.00013070793748084586, "loss": 0.9233, "step": 25100 }, { "epoch": 7.49, "learning_rate": 0.0001291756052712228, "loss": 0.9275, "step": 25200 }, { "epoch": 7.52, "learning_rate": 0.00012764327306159976, "loss": 0.9229, "step": 25300 }, { "epoch": 7.55, "learning_rate": 0.00012611094085197673, "loss": 0.922, "step": 25400 }, { "epoch": 7.58, "learning_rate": 0.00012457860864235367, "loss": 0.9255, "step": 25500 }, { "epoch": 7.61, "learning_rate": 0.0001230462764327306, "loss": 0.9196, "step": 25600 }, { "epoch": 7.64, "learning_rate": 0.00012151394422310758, "loss": 0.9198, "step": 25700 }, { "epoch": 7.67, "learning_rate": 0.00011998161201348452, "loss": 0.9226, "step": 25800 }, { "epoch": 7.7, "learning_rate": 0.00011844927980386148, "loss": 0.9174, "step": 25900 }, { "epoch": 7.73, "learning_rate": 0.00011691694759423843, "loss": 0.9191, "step": 26000 }, { "epoch": 7.76, "learning_rate": 0.0001153846153846154, "loss": 0.9207, "step": 26100 }, { "epoch": 7.79, "learning_rate": 0.00011385228317499235, "loss": 0.9225, "step": 26200 }, { "epoch": 7.82, "learning_rate": 0.00011231995096536928, "loss": 0.9198, "step": 26300 }, { "epoch": 7.85, "learning_rate": 0.00011078761875574625, "loss": 0.9183, "step": 26400 }, { "epoch": 7.88, "learning_rate": 0.0001092552865461232, "loss": 0.919, "step": 26500 }, { "epoch": 7.91, "learning_rate": 0.00010772295433650016, "loss": 0.9193, "step": 26600 }, { "epoch": 7.94, "learning_rate": 0.00010619062212687711, "loss": 0.9205, "step": 26700 }, { "epoch": 7.97, "learning_rate": 0.00010465828991725406, "loss": 0.92, "step": 26800 }, { "epoch": 8.0, "learning_rate": 0.00010312595770763101, "loss": 0.9192, "step": 26900 }, { "epoch": 8.03, "learning_rate": 0.00010159362549800798, "loss": 0.9186, "step": 27000 }, { "epoch": 8.06, "learning_rate": 0.00010006129328838493, "loss": 0.9176, "step": 27100 }, { "epoch": 8.09, "learning_rate": 9.852896107876188e-05, "loss": 0.9136, "step": 27200 }, { "epoch": 8.12, "learning_rate": 9.699662886913883e-05, "loss": 0.9116, "step": 27300 }, { "epoch": 8.15, "learning_rate": 9.546429665951578e-05, "loss": 0.9174, "step": 27400 }, { "epoch": 8.18, "learning_rate": 9.393196444989274e-05, "loss": 0.9156, "step": 27500 }, { "epoch": 8.21, "learning_rate": 9.23996322402697e-05, "loss": 0.912, "step": 27600 }, { "epoch": 8.24, "learning_rate": 9.086730003064666e-05, "loss": 0.9142, "step": 27700 }, { "epoch": 8.27, "learning_rate": 8.93349678210236e-05, "loss": 0.9099, "step": 27800 }, { "epoch": 8.3, "learning_rate": 8.780263561140055e-05, "loss": 0.9129, "step": 27900 }, { "epoch": 8.33, "learning_rate": 8.627030340177751e-05, "loss": 0.9145, "step": 28000 }, { "epoch": 8.36, "learning_rate": 8.473797119215446e-05, "loss": 0.9117, "step": 28100 }, { "epoch": 8.39, "learning_rate": 8.320563898253141e-05, "loss": 0.9112, "step": 28200 }, { "epoch": 8.42, "learning_rate": 8.167330677290836e-05, "loss": 0.9128, "step": 28300 }, { "epoch": 8.44, "learning_rate": 8.014097456328533e-05, "loss": 0.9122, "step": 28400 }, { "epoch": 8.47, "learning_rate": 7.860864235366228e-05, "loss": 0.9113, "step": 28500 }, { "epoch": 8.5, "learning_rate": 7.707631014403924e-05, "loss": 0.9115, "step": 28600 }, { "epoch": 8.53, "learning_rate": 7.554397793441618e-05, "loss": 0.9098, "step": 28700 }, { "epoch": 8.56, "learning_rate": 7.401164572479313e-05, "loss": 0.9101, "step": 28800 }, { "epoch": 8.59, "learning_rate": 7.247931351517009e-05, "loss": 0.9063, "step": 28900 }, { "epoch": 8.62, "learning_rate": 7.094698130554704e-05, "loss": 0.913, "step": 29000 }, { "epoch": 8.65, "learning_rate": 6.9414649095924e-05, "loss": 0.9092, "step": 29100 }, { "epoch": 8.68, "learning_rate": 6.788231688630094e-05, "loss": 0.9101, "step": 29200 }, { "epoch": 8.71, "learning_rate": 6.634998467667791e-05, "loss": 0.9089, "step": 29300 }, { "epoch": 8.74, "learning_rate": 6.481765246705486e-05, "loss": 0.9108, "step": 29400 }, { "epoch": 8.77, "learning_rate": 6.328532025743182e-05, "loss": 0.9065, "step": 29500 }, { "epoch": 8.8, "learning_rate": 6.175298804780877e-05, "loss": 0.9129, "step": 29600 }, { "epoch": 8.83, "learning_rate": 6.022065583818572e-05, "loss": 0.9097, "step": 29700 }, { "epoch": 8.86, "learning_rate": 5.8688323628562674e-05, "loss": 0.9115, "step": 29800 }, { "epoch": 8.89, "learning_rate": 5.715599141893963e-05, "loss": 0.9088, "step": 29900 }, { "epoch": 8.92, "learning_rate": 5.5623659209316575e-05, "loss": 0.9112, "step": 30000 }, { "epoch": 8.95, "learning_rate": 5.409132699969353e-05, "loss": 0.9086, "step": 30100 }, { "epoch": 8.98, "learning_rate": 5.255899479007049e-05, "loss": 0.9106, "step": 30200 }, { "epoch": 9.01, "learning_rate": 5.102666258044744e-05, "loss": 0.9104, "step": 30300 }, { "epoch": 9.04, "learning_rate": 4.94943303708244e-05, "loss": 0.9037, "step": 30400 }, { "epoch": 9.07, "learning_rate": 4.796199816120135e-05, "loss": 0.9082, "step": 30500 }, { "epoch": 9.1, "learning_rate": 4.6429665951578305e-05, "loss": 0.9041, "step": 30600 }, { "epoch": 9.13, "learning_rate": 4.489733374195526e-05, "loss": 0.9025, "step": 30700 }, { "epoch": 9.16, "learning_rate": 4.3365001532332206e-05, "loss": 0.9006, "step": 30800 }, { "epoch": 9.19, "learning_rate": 4.1832669322709164e-05, "loss": 0.9072, "step": 30900 }, { "epoch": 9.22, "learning_rate": 4.0300337113086114e-05, "loss": 0.9038, "step": 31000 }, { "epoch": 9.25, "learning_rate": 3.876800490346307e-05, "loss": 0.9072, "step": 31100 }, { "epoch": 9.28, "learning_rate": 3.723567269384002e-05, "loss": 0.9017, "step": 31200 }, { "epoch": 9.31, "learning_rate": 3.570334048421698e-05, "loss": 0.9032, "step": 31300 }, { "epoch": 9.34, "learning_rate": 3.4171008274593937e-05, "loss": 0.9026, "step": 31400 }, { "epoch": 9.37, "learning_rate": 3.263867606497089e-05, "loss": 0.9008, "step": 31500 }, { "epoch": 9.4, "learning_rate": 3.110634385534784e-05, "loss": 0.9065, "step": 31600 }, { "epoch": 9.43, "learning_rate": 2.9574011645724795e-05, "loss": 0.9026, "step": 31700 }, { "epoch": 9.46, "learning_rate": 2.804167943610175e-05, "loss": 0.9011, "step": 31800 }, { "epoch": 9.49, "learning_rate": 2.65093472264787e-05, "loss": 0.9023, "step": 31900 }, { "epoch": 9.52, "learning_rate": 2.4977015016855653e-05, "loss": 0.9007, "step": 32000 } ], "max_steps": 33630, "num_train_epochs": 10, "total_flos": 8.418135066885916e+17, "trial_name": null, "trial_params": null }