{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.38264777048047, "eval_steps": 7000.0, "global_step": 7000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.975503753457132e-07, "loss": 7.9807, "step": 10 }, { "epoch": 0.0, "learning_rate": 3.951007506914264e-07, "loss": 7.3025, "step": 20 }, { "epoch": 0.01, "learning_rate": 5.926511260371394e-07, "loss": 6.442, "step": 30 }, { "epoch": 0.01, "learning_rate": 7.902015013828528e-07, "loss": 4.9928, "step": 40 }, { "epoch": 0.01, "learning_rate": 9.877518767285659e-07, "loss": 3.0368, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.1853022520742789e-06, "loss": 1.5885, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.382852627419992e-06, "loss": 1.118, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.5804030027657055e-06, "loss": 1.0564, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.7779533781114185e-06, "loss": 1.1117, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.9755037534571318e-06, "loss": 1.1398, "step": 100 }, { "epoch": 0.02, "learning_rate": 2.173054128802845e-06, "loss": 0.9834, "step": 110 }, { "epoch": 0.02, "learning_rate": 2.3706045041485578e-06, "loss": 1.0677, "step": 120 }, { "epoch": 0.03, "learning_rate": 2.5681548794942714e-06, "loss": 0.9412, "step": 130 }, { "epoch": 0.03, "learning_rate": 2.765705254839984e-06, "loss": 0.9347, "step": 140 }, { "epoch": 0.03, "learning_rate": 2.9632556301856974e-06, "loss": 0.9574, "step": 150 }, { "epoch": 0.03, "learning_rate": 3.160806005531411e-06, "loss": 0.9889, "step": 160 }, { "epoch": 0.03, "learning_rate": 3.3583563808771234e-06, "loss": 0.9102, "step": 170 }, { "epoch": 0.04, "learning_rate": 3.555906756222837e-06, "loss": 0.9191, "step": 180 }, { "epoch": 0.04, "learning_rate": 3.7534571315685503e-06, "loss": 0.9766, "step": 190 }, { "epoch": 0.04, "learning_rate": 3.9510075069142635e-06, "loss": 0.9135, "step": 200 }, { "epoch": 0.04, "learning_rate": 4.148557882259976e-06, "loss": 0.8757, "step": 210 }, { "epoch": 0.04, "learning_rate": 4.34610825760569e-06, "loss": 0.9067, "step": 220 }, { "epoch": 0.05, "learning_rate": 4.543658632951403e-06, "loss": 0.9219, "step": 230 }, { "epoch": 0.05, "learning_rate": 4.7412090082971155e-06, "loss": 0.8996, "step": 240 }, { "epoch": 0.05, "learning_rate": 4.938759383642829e-06, "loss": 0.9467, "step": 250 }, { "epoch": 0.05, "learning_rate": 5.136309758988543e-06, "loss": 0.8877, "step": 260 }, { "epoch": 0.05, "learning_rate": 5.333860134334256e-06, "loss": 0.8398, "step": 270 }, { "epoch": 0.06, "learning_rate": 5.531410509679968e-06, "loss": 0.8764, "step": 280 }, { "epoch": 0.06, "learning_rate": 5.728960885025682e-06, "loss": 0.8809, "step": 290 }, { "epoch": 0.06, "learning_rate": 5.926511260371395e-06, "loss": 0.8699, "step": 300 }, { "epoch": 0.06, "learning_rate": 6.124061635717108e-06, "loss": 0.8377, "step": 310 }, { "epoch": 0.06, "learning_rate": 6.321612011062822e-06, "loss": 0.8943, "step": 320 }, { "epoch": 0.07, "learning_rate": 6.519162386408534e-06, "loss": 0.8393, "step": 330 }, { "epoch": 0.07, "learning_rate": 6.716712761754247e-06, "loss": 0.8192, "step": 340 }, { "epoch": 0.07, "learning_rate": 6.9142631370999605e-06, "loss": 0.903, "step": 350 }, { "epoch": 0.07, "learning_rate": 7.111813512445674e-06, "loss": 0.8917, "step": 360 }, { "epoch": 0.07, "learning_rate": 7.309363887791387e-06, "loss": 0.916, "step": 370 }, { "epoch": 0.08, "learning_rate": 7.506914263137101e-06, "loss": 0.864, "step": 380 }, { "epoch": 0.08, "learning_rate": 7.704464638482814e-06, "loss": 0.7809, "step": 390 }, { "epoch": 0.08, "learning_rate": 7.902015013828527e-06, "loss": 0.885, "step": 400 }, { "epoch": 0.08, "learning_rate": 8.09956538917424e-06, "loss": 0.8183, "step": 410 }, { "epoch": 0.08, "learning_rate": 8.297115764519953e-06, "loss": 0.7897, "step": 420 }, { "epoch": 0.08, "learning_rate": 8.494666139865665e-06, "loss": 0.8315, "step": 430 }, { "epoch": 0.09, "learning_rate": 8.69221651521138e-06, "loss": 0.8266, "step": 440 }, { "epoch": 0.09, "learning_rate": 8.889766890557093e-06, "loss": 0.8755, "step": 450 }, { "epoch": 0.09, "learning_rate": 9.087317265902805e-06, "loss": 0.8103, "step": 460 }, { "epoch": 0.09, "learning_rate": 9.28486764124852e-06, "loss": 0.8226, "step": 470 }, { "epoch": 0.09, "learning_rate": 9.482418016594231e-06, "loss": 0.8174, "step": 480 }, { "epoch": 0.1, "learning_rate": 9.679968391939946e-06, "loss": 0.8336, "step": 490 }, { "epoch": 0.1, "learning_rate": 9.877518767285658e-06, "loss": 0.8214, "step": 500 }, { "epoch": 0.1, "learning_rate": 1.0075069142631371e-05, "loss": 0.8534, "step": 510 }, { "epoch": 0.1, "learning_rate": 1.0272619517977086e-05, "loss": 0.8148, "step": 520 }, { "epoch": 0.1, "learning_rate": 1.0470169893322798e-05, "loss": 0.8032, "step": 530 }, { "epoch": 0.11, "learning_rate": 1.0667720268668511e-05, "loss": 0.7561, "step": 540 }, { "epoch": 0.11, "learning_rate": 1.0865270644014224e-05, "loss": 0.8219, "step": 550 }, { "epoch": 0.11, "learning_rate": 1.1062821019359937e-05, "loss": 0.8108, "step": 560 }, { "epoch": 0.11, "learning_rate": 1.126037139470565e-05, "loss": 0.7673, "step": 570 }, { "epoch": 0.11, "learning_rate": 1.1457921770051364e-05, "loss": 0.7731, "step": 580 }, { "epoch": 0.12, "learning_rate": 1.1655472145397077e-05, "loss": 0.8135, "step": 590 }, { "epoch": 0.12, "learning_rate": 1.185302252074279e-05, "loss": 0.8525, "step": 600 }, { "epoch": 0.12, "learning_rate": 1.2050572896088504e-05, "loss": 0.8473, "step": 610 }, { "epoch": 0.12, "learning_rate": 1.2248123271434215e-05, "loss": 0.7937, "step": 620 }, { "epoch": 0.12, "learning_rate": 1.244567364677993e-05, "loss": 0.7924, "step": 630 }, { "epoch": 0.13, "learning_rate": 1.2643224022125644e-05, "loss": 0.7778, "step": 640 }, { "epoch": 0.13, "learning_rate": 1.2840774397471355e-05, "loss": 0.8282, "step": 650 }, { "epoch": 0.13, "learning_rate": 1.3038324772817068e-05, "loss": 0.811, "step": 660 }, { "epoch": 0.13, "learning_rate": 1.3235875148162783e-05, "loss": 0.7167, "step": 670 }, { "epoch": 0.13, "learning_rate": 1.3433425523508494e-05, "loss": 0.8321, "step": 680 }, { "epoch": 0.14, "learning_rate": 1.363097589885421e-05, "loss": 0.7477, "step": 690 }, { "epoch": 0.14, "learning_rate": 1.3828526274199921e-05, "loss": 0.8232, "step": 700 }, { "epoch": 0.14, "learning_rate": 1.4026076649545635e-05, "loss": 0.799, "step": 710 }, { "epoch": 0.14, "learning_rate": 1.4223627024891348e-05, "loss": 0.8554, "step": 720 }, { "epoch": 0.14, "learning_rate": 1.442117740023706e-05, "loss": 0.778, "step": 730 }, { "epoch": 0.15, "learning_rate": 1.4618727775582774e-05, "loss": 0.8337, "step": 740 }, { "epoch": 0.15, "learning_rate": 1.4816278150928487e-05, "loss": 0.7793, "step": 750 }, { "epoch": 0.15, "learning_rate": 1.5013828526274201e-05, "loss": 0.7812, "step": 760 }, { "epoch": 0.15, "learning_rate": 1.5211378901619914e-05, "loss": 0.7837, "step": 770 }, { "epoch": 0.15, "learning_rate": 1.540892927696563e-05, "loss": 0.7714, "step": 780 }, { "epoch": 0.16, "learning_rate": 1.560647965231134e-05, "loss": 0.74, "step": 790 }, { "epoch": 0.16, "learning_rate": 1.5804030027657054e-05, "loss": 0.7725, "step": 800 }, { "epoch": 0.16, "learning_rate": 1.6001580403002767e-05, "loss": 0.7618, "step": 810 }, { "epoch": 0.16, "learning_rate": 1.619913077834848e-05, "loss": 0.767, "step": 820 }, { "epoch": 0.16, "learning_rate": 1.6396681153694192e-05, "loss": 0.8257, "step": 830 }, { "epoch": 0.17, "learning_rate": 1.6594231529039905e-05, "loss": 0.8116, "step": 840 }, { "epoch": 0.17, "learning_rate": 1.679178190438562e-05, "loss": 0.7649, "step": 850 }, { "epoch": 0.17, "learning_rate": 1.698933227973133e-05, "loss": 0.7616, "step": 860 }, { "epoch": 0.17, "learning_rate": 1.7186882655077047e-05, "loss": 0.7219, "step": 870 }, { "epoch": 0.17, "learning_rate": 1.738443303042276e-05, "loss": 0.7532, "step": 880 }, { "epoch": 0.18, "learning_rate": 1.758198340576847e-05, "loss": 0.7122, "step": 890 }, { "epoch": 0.18, "learning_rate": 1.7779533781114185e-05, "loss": 0.8242, "step": 900 }, { "epoch": 0.18, "learning_rate": 1.7977084156459898e-05, "loss": 0.7554, "step": 910 }, { "epoch": 0.18, "learning_rate": 1.817463453180561e-05, "loss": 0.7588, "step": 920 }, { "epoch": 0.18, "learning_rate": 1.8372184907151324e-05, "loss": 0.7404, "step": 930 }, { "epoch": 0.19, "learning_rate": 1.856973528249704e-05, "loss": 0.7907, "step": 940 }, { "epoch": 0.19, "learning_rate": 1.876728565784275e-05, "loss": 0.7775, "step": 950 }, { "epoch": 0.19, "learning_rate": 1.8964836033188462e-05, "loss": 0.7544, "step": 960 }, { "epoch": 0.19, "learning_rate": 1.9162386408534178e-05, "loss": 0.8053, "step": 970 }, { "epoch": 0.19, "learning_rate": 1.935993678387989e-05, "loss": 0.7226, "step": 980 }, { "epoch": 0.2, "learning_rate": 1.9557487159225604e-05, "loss": 0.7482, "step": 990 }, { "epoch": 0.2, "learning_rate": 1.9755037534571317e-05, "loss": 0.7171, "step": 1000 }, { "epoch": 0.2, "learning_rate": 1.995258790991703e-05, "loss": 0.7421, "step": 1010 }, { "epoch": 0.2, "learning_rate": 2.0150138285262742e-05, "loss": 0.7356, "step": 1020 }, { "epoch": 0.2, "learning_rate": 2.0347688660608455e-05, "loss": 0.713, "step": 1030 }, { "epoch": 0.21, "learning_rate": 2.054523903595417e-05, "loss": 0.7705, "step": 1040 }, { "epoch": 0.21, "learning_rate": 2.074278941129988e-05, "loss": 0.7501, "step": 1050 }, { "epoch": 0.21, "learning_rate": 2.0940339786645597e-05, "loss": 0.7572, "step": 1060 }, { "epoch": 0.21, "learning_rate": 2.113789016199131e-05, "loss": 0.7382, "step": 1070 }, { "epoch": 0.21, "learning_rate": 2.1335440537337022e-05, "loss": 0.7596, "step": 1080 }, { "epoch": 0.22, "learning_rate": 2.1532990912682735e-05, "loss": 0.7063, "step": 1090 }, { "epoch": 0.22, "learning_rate": 2.1730541288028448e-05, "loss": 0.7239, "step": 1100 }, { "epoch": 0.22, "learning_rate": 2.192809166337416e-05, "loss": 0.7648, "step": 1110 }, { "epoch": 0.22, "learning_rate": 2.2125642038719874e-05, "loss": 0.7539, "step": 1120 }, { "epoch": 0.22, "learning_rate": 2.232319241406559e-05, "loss": 0.7498, "step": 1130 }, { "epoch": 0.23, "learning_rate": 2.25207427894113e-05, "loss": 0.7647, "step": 1140 }, { "epoch": 0.23, "learning_rate": 2.2718293164757015e-05, "loss": 0.7657, "step": 1150 }, { "epoch": 0.23, "learning_rate": 2.2915843540102728e-05, "loss": 0.7451, "step": 1160 }, { "epoch": 0.23, "learning_rate": 2.3113393915448438e-05, "loss": 0.7528, "step": 1170 }, { "epoch": 0.23, "learning_rate": 2.3310944290794154e-05, "loss": 0.7548, "step": 1180 }, { "epoch": 0.24, "learning_rate": 2.3508494666139867e-05, "loss": 0.7616, "step": 1190 }, { "epoch": 0.24, "learning_rate": 2.370604504148558e-05, "loss": 0.7654, "step": 1200 }, { "epoch": 0.24, "learning_rate": 2.3903595416831292e-05, "loss": 0.7681, "step": 1210 }, { "epoch": 0.24, "learning_rate": 2.410114579217701e-05, "loss": 0.7302, "step": 1220 }, { "epoch": 0.24, "learning_rate": 2.4298696167522718e-05, "loss": 0.7516, "step": 1230 }, { "epoch": 0.24, "learning_rate": 2.449624654286843e-05, "loss": 0.7331, "step": 1240 }, { "epoch": 0.25, "learning_rate": 2.4693796918214147e-05, "loss": 0.7709, "step": 1250 }, { "epoch": 0.25, "learning_rate": 2.489134729355986e-05, "loss": 0.696, "step": 1260 }, { "epoch": 0.25, "learning_rate": 2.508889766890557e-05, "loss": 0.7537, "step": 1270 }, { "epoch": 0.25, "learning_rate": 2.528644804425129e-05, "loss": 0.7789, "step": 1280 }, { "epoch": 0.25, "learning_rate": 2.5483998419596998e-05, "loss": 0.7277, "step": 1290 }, { "epoch": 0.26, "learning_rate": 2.568154879494271e-05, "loss": 0.6968, "step": 1300 }, { "epoch": 0.26, "learning_rate": 2.5879099170288423e-05, "loss": 0.7329, "step": 1310 }, { "epoch": 0.26, "learning_rate": 2.6076649545634136e-05, "loss": 0.7501, "step": 1320 }, { "epoch": 0.26, "learning_rate": 2.6274199920979852e-05, "loss": 0.7224, "step": 1330 }, { "epoch": 0.26, "learning_rate": 2.6471750296325565e-05, "loss": 0.7276, "step": 1340 }, { "epoch": 0.27, "learning_rate": 2.6669300671671278e-05, "loss": 0.6827, "step": 1350 }, { "epoch": 0.27, "learning_rate": 2.6866851047016987e-05, "loss": 0.7762, "step": 1360 }, { "epoch": 0.27, "learning_rate": 2.7064401422362707e-05, "loss": 0.7124, "step": 1370 }, { "epoch": 0.27, "learning_rate": 2.726195179770842e-05, "loss": 0.6812, "step": 1380 }, { "epoch": 0.27, "learning_rate": 2.745950217305413e-05, "loss": 0.6922, "step": 1390 }, { "epoch": 0.28, "learning_rate": 2.7657052548399842e-05, "loss": 0.7216, "step": 1400 }, { "epoch": 0.28, "learning_rate": 2.7854602923745555e-05, "loss": 0.6899, "step": 1410 }, { "epoch": 0.28, "learning_rate": 2.805215329909127e-05, "loss": 0.7433, "step": 1420 }, { "epoch": 0.28, "learning_rate": 2.8249703674436984e-05, "loss": 0.7103, "step": 1430 }, { "epoch": 0.28, "learning_rate": 2.8447254049782697e-05, "loss": 0.7348, "step": 1440 }, { "epoch": 0.29, "learning_rate": 2.8644804425128406e-05, "loss": 0.8088, "step": 1450 }, { "epoch": 0.29, "learning_rate": 2.884235480047412e-05, "loss": 0.7432, "step": 1460 }, { "epoch": 0.29, "learning_rate": 2.903990517581984e-05, "loss": 0.7381, "step": 1470 }, { "epoch": 0.29, "learning_rate": 2.9237455551165548e-05, "loss": 0.7195, "step": 1480 }, { "epoch": 0.29, "learning_rate": 2.943500592651126e-05, "loss": 0.7038, "step": 1490 }, { "epoch": 0.3, "learning_rate": 2.9632556301856973e-05, "loss": 0.6643, "step": 1500 }, { "epoch": 0.3, "learning_rate": 2.983010667720269e-05, "loss": 0.7159, "step": 1510 }, { "epoch": 0.3, "learning_rate": 3.0027657052548402e-05, "loss": 0.6977, "step": 1520 }, { "epoch": 0.3, "learning_rate": 3.0225207427894115e-05, "loss": 0.713, "step": 1530 }, { "epoch": 0.3, "learning_rate": 3.0422757803239828e-05, "loss": 0.6713, "step": 1540 }, { "epoch": 0.31, "learning_rate": 3.062030817858554e-05, "loss": 0.6948, "step": 1550 }, { "epoch": 0.31, "learning_rate": 3.081785855393126e-05, "loss": 0.6964, "step": 1560 }, { "epoch": 0.31, "learning_rate": 3.101540892927697e-05, "loss": 0.7344, "step": 1570 }, { "epoch": 0.31, "learning_rate": 3.121295930462268e-05, "loss": 0.6921, "step": 1580 }, { "epoch": 0.31, "learning_rate": 3.141050967996839e-05, "loss": 0.7013, "step": 1590 }, { "epoch": 0.32, "learning_rate": 3.160806005531411e-05, "loss": 0.6525, "step": 1600 }, { "epoch": 0.32, "learning_rate": 3.180561043065982e-05, "loss": 0.6412, "step": 1610 }, { "epoch": 0.32, "learning_rate": 3.2003160806005534e-05, "loss": 0.6697, "step": 1620 }, { "epoch": 0.32, "learning_rate": 3.2200711181351246e-05, "loss": 0.7215, "step": 1630 }, { "epoch": 0.32, "learning_rate": 3.239826155669696e-05, "loss": 0.6918, "step": 1640 }, { "epoch": 0.33, "learning_rate": 3.259581193204267e-05, "loss": 0.7667, "step": 1650 }, { "epoch": 0.33, "learning_rate": 3.2793362307388385e-05, "loss": 0.69, "step": 1660 }, { "epoch": 0.33, "learning_rate": 3.29909126827341e-05, "loss": 0.6899, "step": 1670 }, { "epoch": 0.33, "learning_rate": 3.318846305807981e-05, "loss": 0.663, "step": 1680 }, { "epoch": 0.33, "learning_rate": 3.338601343342552e-05, "loss": 0.7416, "step": 1690 }, { "epoch": 0.34, "learning_rate": 3.358356380877124e-05, "loss": 0.689, "step": 1700 }, { "epoch": 0.34, "learning_rate": 3.378111418411695e-05, "loss": 0.6992, "step": 1710 }, { "epoch": 0.34, "learning_rate": 3.397866455946266e-05, "loss": 0.6602, "step": 1720 }, { "epoch": 0.34, "learning_rate": 3.4176214934808374e-05, "loss": 0.6961, "step": 1730 }, { "epoch": 0.34, "learning_rate": 3.4373765310154094e-05, "loss": 0.7106, "step": 1740 }, { "epoch": 0.35, "learning_rate": 3.457131568549981e-05, "loss": 0.6957, "step": 1750 }, { "epoch": 0.35, "learning_rate": 3.476886606084552e-05, "loss": 0.6981, "step": 1760 }, { "epoch": 0.35, "learning_rate": 3.496641643619123e-05, "loss": 0.7585, "step": 1770 }, { "epoch": 0.35, "learning_rate": 3.516396681153694e-05, "loss": 0.6903, "step": 1780 }, { "epoch": 0.35, "learning_rate": 3.536151718688266e-05, "loss": 0.6592, "step": 1790 }, { "epoch": 0.36, "learning_rate": 3.555906756222837e-05, "loss": 0.732, "step": 1800 }, { "epoch": 0.36, "learning_rate": 3.5756617937574084e-05, "loss": 0.703, "step": 1810 }, { "epoch": 0.36, "learning_rate": 3.5954168312919796e-05, "loss": 0.7397, "step": 1820 }, { "epoch": 0.36, "learning_rate": 3.615171868826551e-05, "loss": 0.6665, "step": 1830 }, { "epoch": 0.36, "learning_rate": 3.634926906361122e-05, "loss": 0.7053, "step": 1840 }, { "epoch": 0.37, "learning_rate": 3.6546819438956935e-05, "loss": 0.7008, "step": 1850 }, { "epoch": 0.37, "learning_rate": 3.674436981430265e-05, "loss": 0.7181, "step": 1860 }, { "epoch": 0.37, "learning_rate": 3.694192018964836e-05, "loss": 0.7223, "step": 1870 }, { "epoch": 0.37, "learning_rate": 3.713947056499408e-05, "loss": 0.7027, "step": 1880 }, { "epoch": 0.37, "learning_rate": 3.733702094033979e-05, "loss": 0.6727, "step": 1890 }, { "epoch": 0.38, "learning_rate": 3.75345713156855e-05, "loss": 0.6756, "step": 1900 }, { "epoch": 0.38, "learning_rate": 3.773212169103121e-05, "loss": 0.7386, "step": 1910 }, { "epoch": 0.38, "learning_rate": 3.7929672066376924e-05, "loss": 0.6824, "step": 1920 }, { "epoch": 0.38, "learning_rate": 3.8127222441722644e-05, "loss": 0.7292, "step": 1930 }, { "epoch": 0.38, "learning_rate": 3.8324772817068357e-05, "loss": 0.716, "step": 1940 }, { "epoch": 0.39, "learning_rate": 3.852232319241407e-05, "loss": 0.657, "step": 1950 }, { "epoch": 0.39, "learning_rate": 3.871987356775978e-05, "loss": 0.6604, "step": 1960 }, { "epoch": 0.39, "learning_rate": 3.891742394310549e-05, "loss": 0.679, "step": 1970 }, { "epoch": 0.39, "learning_rate": 3.911497431845121e-05, "loss": 0.6932, "step": 1980 }, { "epoch": 0.39, "learning_rate": 3.931252469379692e-05, "loss": 0.7374, "step": 1990 }, { "epoch": 0.4, "learning_rate": 3.951007506914263e-05, "loss": 0.7186, "step": 2000 }, { "epoch": 0.4, "learning_rate": 3.9707625444488346e-05, "loss": 0.7086, "step": 2010 }, { "epoch": 0.4, "learning_rate": 3.990517581983406e-05, "loss": 0.6731, "step": 2020 }, { "epoch": 0.4, "learning_rate": 4.010272619517977e-05, "loss": 0.69, "step": 2030 }, { "epoch": 0.4, "learning_rate": 4.0300276570525485e-05, "loss": 0.6567, "step": 2040 }, { "epoch": 0.4, "learning_rate": 4.04978269458712e-05, "loss": 0.6974, "step": 2050 }, { "epoch": 0.41, "learning_rate": 4.069537732121691e-05, "loss": 0.7603, "step": 2060 }, { "epoch": 0.41, "learning_rate": 4.089292769656263e-05, "loss": 0.7012, "step": 2070 }, { "epoch": 0.41, "learning_rate": 4.109047807190834e-05, "loss": 0.7585, "step": 2080 }, { "epoch": 0.41, "learning_rate": 4.128802844725405e-05, "loss": 0.7086, "step": 2090 }, { "epoch": 0.41, "learning_rate": 4.148557882259976e-05, "loss": 0.7345, "step": 2100 }, { "epoch": 0.42, "learning_rate": 4.168312919794548e-05, "loss": 0.6888, "step": 2110 }, { "epoch": 0.42, "learning_rate": 4.1880679573291194e-05, "loss": 0.6637, "step": 2120 }, { "epoch": 0.42, "learning_rate": 4.2078229948636906e-05, "loss": 0.6936, "step": 2130 }, { "epoch": 0.42, "learning_rate": 4.227578032398262e-05, "loss": 0.6844, "step": 2140 }, { "epoch": 0.42, "learning_rate": 4.2473330699328325e-05, "loss": 0.709, "step": 2150 }, { "epoch": 0.43, "learning_rate": 4.2670881074674045e-05, "loss": 0.6244, "step": 2160 }, { "epoch": 0.43, "learning_rate": 4.286843145001976e-05, "loss": 0.6936, "step": 2170 }, { "epoch": 0.43, "learning_rate": 4.306598182536547e-05, "loss": 0.7073, "step": 2180 }, { "epoch": 0.43, "learning_rate": 4.326353220071118e-05, "loss": 0.7202, "step": 2190 }, { "epoch": 0.43, "learning_rate": 4.3461082576056896e-05, "loss": 0.6506, "step": 2200 }, { "epoch": 0.44, "learning_rate": 4.365863295140261e-05, "loss": 0.6339, "step": 2210 }, { "epoch": 0.44, "learning_rate": 4.385618332674832e-05, "loss": 0.6635, "step": 2220 }, { "epoch": 0.44, "learning_rate": 4.4053733702094034e-05, "loss": 0.6674, "step": 2230 }, { "epoch": 0.44, "learning_rate": 4.425128407743975e-05, "loss": 0.6857, "step": 2240 }, { "epoch": 0.44, "learning_rate": 4.444883445278547e-05, "loss": 0.7223, "step": 2250 }, { "epoch": 0.45, "learning_rate": 4.464638482813118e-05, "loss": 0.6716, "step": 2260 }, { "epoch": 0.45, "learning_rate": 4.4843935203476886e-05, "loss": 0.7094, "step": 2270 }, { "epoch": 0.45, "learning_rate": 4.50414855788226e-05, "loss": 0.6477, "step": 2280 }, { "epoch": 0.45, "learning_rate": 4.523903595416831e-05, "loss": 0.6409, "step": 2290 }, { "epoch": 0.45, "learning_rate": 4.543658632951403e-05, "loss": 0.6682, "step": 2300 }, { "epoch": 0.46, "learning_rate": 4.5634136704859744e-05, "loss": 0.6727, "step": 2310 }, { "epoch": 0.46, "learning_rate": 4.5831687080205456e-05, "loss": 0.6348, "step": 2320 }, { "epoch": 0.46, "learning_rate": 4.602923745555117e-05, "loss": 0.683, "step": 2330 }, { "epoch": 0.46, "learning_rate": 4.6226787830896875e-05, "loss": 0.5872, "step": 2340 }, { "epoch": 0.46, "learning_rate": 4.6424338206242595e-05, "loss": 0.7156, "step": 2350 }, { "epoch": 0.47, "learning_rate": 4.662188858158831e-05, "loss": 0.6441, "step": 2360 }, { "epoch": 0.47, "learning_rate": 4.681943895693402e-05, "loss": 0.647, "step": 2370 }, { "epoch": 0.47, "learning_rate": 4.701698933227973e-05, "loss": 0.7303, "step": 2380 }, { "epoch": 0.47, "learning_rate": 4.7214539707625446e-05, "loss": 0.6304, "step": 2390 }, { "epoch": 0.47, "learning_rate": 4.741209008297116e-05, "loss": 0.6577, "step": 2400 }, { "epoch": 0.48, "learning_rate": 4.760964045831687e-05, "loss": 0.6941, "step": 2410 }, { "epoch": 0.48, "learning_rate": 4.7807190833662584e-05, "loss": 0.6309, "step": 2420 }, { "epoch": 0.48, "learning_rate": 4.80047412090083e-05, "loss": 0.6584, "step": 2430 }, { "epoch": 0.48, "learning_rate": 4.820229158435402e-05, "loss": 0.6387, "step": 2440 }, { "epoch": 0.48, "learning_rate": 4.839984195969973e-05, "loss": 0.6602, "step": 2450 }, { "epoch": 0.49, "learning_rate": 4.8597392335045435e-05, "loss": 0.7043, "step": 2460 }, { "epoch": 0.49, "learning_rate": 4.879494271039115e-05, "loss": 0.711, "step": 2470 }, { "epoch": 0.49, "learning_rate": 4.899249308573686e-05, "loss": 0.6393, "step": 2480 }, { "epoch": 0.49, "learning_rate": 4.919004346108258e-05, "loss": 0.7015, "step": 2490 }, { "epoch": 0.49, "learning_rate": 4.9387593836428293e-05, "loss": 0.6153, "step": 2500 }, { "epoch": 0.5, "learning_rate": 4.9585144211774006e-05, "loss": 0.6787, "step": 2510 }, { "epoch": 0.5, "learning_rate": 4.978269458711972e-05, "loss": 0.6316, "step": 2520 }, { "epoch": 0.5, "learning_rate": 4.998024496246543e-05, "loss": 0.6494, "step": 2530 }, { "epoch": 0.5, "learning_rate": 5.017779533781114e-05, "loss": 0.6925, "step": 2540 }, { "epoch": 0.5, "learning_rate": 5.0375345713156864e-05, "loss": 0.7249, "step": 2550 }, { "epoch": 0.51, "learning_rate": 5.057289608850258e-05, "loss": 0.6799, "step": 2560 }, { "epoch": 0.51, "learning_rate": 5.077044646384829e-05, "loss": 0.7094, "step": 2570 }, { "epoch": 0.51, "learning_rate": 5.0967996839193996e-05, "loss": 0.7007, "step": 2580 }, { "epoch": 0.51, "learning_rate": 5.116554721453971e-05, "loss": 0.7456, "step": 2590 }, { "epoch": 0.51, "learning_rate": 5.136309758988542e-05, "loss": 0.6767, "step": 2600 }, { "epoch": 0.52, "learning_rate": 5.1560647965231134e-05, "loss": 0.7366, "step": 2610 }, { "epoch": 0.52, "learning_rate": 5.175819834057685e-05, "loss": 0.6579, "step": 2620 }, { "epoch": 0.52, "learning_rate": 5.195574871592256e-05, "loss": 0.6985, "step": 2630 }, { "epoch": 0.52, "learning_rate": 5.215329909126827e-05, "loss": 0.7202, "step": 2640 }, { "epoch": 0.52, "learning_rate": 5.235084946661399e-05, "loss": 0.6233, "step": 2650 }, { "epoch": 0.53, "learning_rate": 5.2548399841959705e-05, "loss": 0.6271, "step": 2660 }, { "epoch": 0.53, "learning_rate": 5.274595021730542e-05, "loss": 0.697, "step": 2670 }, { "epoch": 0.53, "learning_rate": 5.294350059265113e-05, "loss": 0.6596, "step": 2680 }, { "epoch": 0.53, "learning_rate": 5.314105096799684e-05, "loss": 0.6474, "step": 2690 }, { "epoch": 0.53, "learning_rate": 5.3338601343342556e-05, "loss": 0.6291, "step": 2700 }, { "epoch": 0.54, "learning_rate": 5.353615171868827e-05, "loss": 0.634, "step": 2710 }, { "epoch": 0.54, "learning_rate": 5.3733702094033975e-05, "loss": 0.67, "step": 2720 }, { "epoch": 0.54, "learning_rate": 5.393125246937969e-05, "loss": 0.6784, "step": 2730 }, { "epoch": 0.54, "learning_rate": 5.4128802844725414e-05, "loss": 0.6826, "step": 2740 }, { "epoch": 0.54, "learning_rate": 5.432635322007113e-05, "loss": 0.6485, "step": 2750 }, { "epoch": 0.55, "learning_rate": 5.452390359541684e-05, "loss": 0.6364, "step": 2760 }, { "epoch": 0.55, "learning_rate": 5.4721453970762546e-05, "loss": 0.6196, "step": 2770 }, { "epoch": 0.55, "learning_rate": 5.491900434610826e-05, "loss": 0.6812, "step": 2780 }, { "epoch": 0.55, "learning_rate": 5.511655472145397e-05, "loss": 0.6404, "step": 2790 }, { "epoch": 0.55, "learning_rate": 5.5314105096799684e-05, "loss": 0.6725, "step": 2800 }, { "epoch": 0.56, "learning_rate": 5.55116554721454e-05, "loss": 0.6811, "step": 2810 }, { "epoch": 0.56, "learning_rate": 5.570920584749111e-05, "loss": 0.6282, "step": 2820 }, { "epoch": 0.56, "learning_rate": 5.590675622283683e-05, "loss": 0.6505, "step": 2830 }, { "epoch": 0.56, "learning_rate": 5.610430659818254e-05, "loss": 0.6352, "step": 2840 }, { "epoch": 0.56, "learning_rate": 5.6301856973528255e-05, "loss": 0.6836, "step": 2850 }, { "epoch": 0.56, "learning_rate": 5.649940734887397e-05, "loss": 0.5995, "step": 2860 }, { "epoch": 0.57, "learning_rate": 5.669695772421968e-05, "loss": 0.6773, "step": 2870 }, { "epoch": 0.57, "learning_rate": 5.689450809956539e-05, "loss": 0.6272, "step": 2880 }, { "epoch": 0.57, "learning_rate": 5.7092058474911106e-05, "loss": 0.681, "step": 2890 }, { "epoch": 0.57, "learning_rate": 5.728960885025681e-05, "loss": 0.6307, "step": 2900 }, { "epoch": 0.57, "learning_rate": 5.7487159225602525e-05, "loss": 0.6643, "step": 2910 }, { "epoch": 0.58, "learning_rate": 5.768470960094824e-05, "loss": 0.6184, "step": 2920 }, { "epoch": 0.58, "learning_rate": 5.7882259976293964e-05, "loss": 0.6572, "step": 2930 }, { "epoch": 0.58, "learning_rate": 5.807981035163968e-05, "loss": 0.6898, "step": 2940 }, { "epoch": 0.58, "learning_rate": 5.827736072698538e-05, "loss": 0.6442, "step": 2950 }, { "epoch": 0.58, "learning_rate": 5.8474911102331095e-05, "loss": 0.6782, "step": 2960 }, { "epoch": 0.59, "learning_rate": 5.867246147767681e-05, "loss": 0.6749, "step": 2970 }, { "epoch": 0.59, "learning_rate": 5.887001185302252e-05, "loss": 0.6217, "step": 2980 }, { "epoch": 0.59, "learning_rate": 5.9067562228368234e-05, "loss": 0.6644, "step": 2990 }, { "epoch": 0.59, "learning_rate": 5.926511260371395e-05, "loss": 0.6527, "step": 3000 }, { "epoch": 0.59, "learning_rate": 5.946266297905966e-05, "loss": 0.6961, "step": 3010 }, { "epoch": 0.6, "learning_rate": 5.966021335440538e-05, "loss": 0.6514, "step": 3020 }, { "epoch": 0.6, "learning_rate": 5.985776372975109e-05, "loss": 0.6066, "step": 3030 }, { "epoch": 0.6, "learning_rate": 6.0055314105096805e-05, "loss": 0.673, "step": 3040 }, { "epoch": 0.6, "learning_rate": 6.025286448044252e-05, "loss": 0.6685, "step": 3050 }, { "epoch": 0.6, "learning_rate": 6.045041485578823e-05, "loss": 0.6274, "step": 3060 }, { "epoch": 0.61, "learning_rate": 6.064796523113394e-05, "loss": 0.7091, "step": 3070 }, { "epoch": 0.61, "learning_rate": 6.0845515606479656e-05, "loss": 0.6791, "step": 3080 }, { "epoch": 0.61, "learning_rate": 6.104306598182537e-05, "loss": 0.6348, "step": 3090 }, { "epoch": 0.61, "learning_rate": 6.124061635717107e-05, "loss": 0.6043, "step": 3100 }, { "epoch": 0.61, "learning_rate": 6.14381667325168e-05, "loss": 0.6991, "step": 3110 }, { "epoch": 0.62, "learning_rate": 6.163571710786251e-05, "loss": 0.6613, "step": 3120 }, { "epoch": 0.62, "learning_rate": 6.183326748320822e-05, "loss": 0.6498, "step": 3130 }, { "epoch": 0.62, "learning_rate": 6.203081785855394e-05, "loss": 0.6819, "step": 3140 }, { "epoch": 0.62, "learning_rate": 6.222836823389965e-05, "loss": 0.6316, "step": 3150 }, { "epoch": 0.62, "learning_rate": 6.242591860924536e-05, "loss": 0.6672, "step": 3160 }, { "epoch": 0.63, "learning_rate": 6.262346898459107e-05, "loss": 0.6602, "step": 3170 }, { "epoch": 0.63, "learning_rate": 6.282101935993678e-05, "loss": 0.6463, "step": 3180 }, { "epoch": 0.63, "learning_rate": 6.30185697352825e-05, "loss": 0.614, "step": 3190 }, { "epoch": 0.63, "learning_rate": 6.321612011062822e-05, "loss": 0.6689, "step": 3200 }, { "epoch": 0.63, "learning_rate": 6.341367048597394e-05, "loss": 0.6224, "step": 3210 }, { "epoch": 0.64, "learning_rate": 6.361122086131964e-05, "loss": 0.5965, "step": 3220 }, { "epoch": 0.64, "learning_rate": 6.380877123666536e-05, "loss": 0.6219, "step": 3230 }, { "epoch": 0.64, "learning_rate": 6.400632161201107e-05, "loss": 0.6675, "step": 3240 }, { "epoch": 0.64, "learning_rate": 6.420387198735677e-05, "loss": 0.7074, "step": 3250 }, { "epoch": 0.64, "learning_rate": 6.440142236270249e-05, "loss": 0.6304, "step": 3260 }, { "epoch": 0.65, "learning_rate": 6.45989727380482e-05, "loss": 0.6729, "step": 3270 }, { "epoch": 0.65, "learning_rate": 6.479652311339392e-05, "loss": 0.6177, "step": 3280 }, { "epoch": 0.65, "learning_rate": 6.499407348873962e-05, "loss": 0.6709, "step": 3290 }, { "epoch": 0.65, "learning_rate": 6.519162386408534e-05, "loss": 0.7026, "step": 3300 }, { "epoch": 0.65, "learning_rate": 6.538917423943106e-05, "loss": 0.6838, "step": 3310 }, { "epoch": 0.66, "learning_rate": 6.558672461477677e-05, "loss": 0.6294, "step": 3320 }, { "epoch": 0.66, "learning_rate": 6.578427499012249e-05, "loss": 0.6447, "step": 3330 }, { "epoch": 0.66, "learning_rate": 6.59818253654682e-05, "loss": 0.592, "step": 3340 }, { "epoch": 0.66, "learning_rate": 6.617937574081391e-05, "loss": 0.6698, "step": 3350 }, { "epoch": 0.66, "learning_rate": 6.637692611615962e-05, "loss": 0.6528, "step": 3360 }, { "epoch": 0.67, "learning_rate": 6.657447649150533e-05, "loss": 0.6679, "step": 3370 }, { "epoch": 0.67, "learning_rate": 6.677202686685105e-05, "loss": 0.6379, "step": 3380 }, { "epoch": 0.67, "learning_rate": 6.696957724219677e-05, "loss": 0.6213, "step": 3390 }, { "epoch": 0.67, "learning_rate": 6.716712761754249e-05, "loss": 0.6696, "step": 3400 }, { "epoch": 0.67, "learning_rate": 6.736467799288819e-05, "loss": 0.6174, "step": 3410 }, { "epoch": 0.68, "learning_rate": 6.75622283682339e-05, "loss": 0.6211, "step": 3420 }, { "epoch": 0.68, "learning_rate": 6.775977874357962e-05, "loss": 0.6016, "step": 3430 }, { "epoch": 0.68, "learning_rate": 6.795732911892532e-05, "loss": 0.6407, "step": 3440 }, { "epoch": 0.68, "learning_rate": 6.815487949427104e-05, "loss": 0.6349, "step": 3450 }, { "epoch": 0.68, "learning_rate": 6.835242986961675e-05, "loss": 0.6478, "step": 3460 }, { "epoch": 0.69, "learning_rate": 6.854998024496247e-05, "loss": 0.6733, "step": 3470 }, { "epoch": 0.69, "learning_rate": 6.874753062030819e-05, "loss": 0.6037, "step": 3480 }, { "epoch": 0.69, "learning_rate": 6.89450809956539e-05, "loss": 0.6621, "step": 3490 }, { "epoch": 0.69, "learning_rate": 6.914263137099961e-05, "loss": 0.6122, "step": 3500 }, { "epoch": 0.69, "learning_rate": 6.934018174634532e-05, "loss": 0.594, "step": 3510 }, { "epoch": 0.7, "learning_rate": 6.953773212169104e-05, "loss": 0.6445, "step": 3520 }, { "epoch": 0.7, "learning_rate": 6.973528249703675e-05, "loss": 0.6363, "step": 3530 }, { "epoch": 0.7, "learning_rate": 6.993283287238246e-05, "loss": 0.635, "step": 3540 }, { "epoch": 0.7, "learning_rate": 7.013038324772817e-05, "loss": 0.6506, "step": 3550 }, { "epoch": 0.7, "learning_rate": 7.032793362307388e-05, "loss": 0.6546, "step": 3560 }, { "epoch": 0.71, "learning_rate": 7.052548399841961e-05, "loss": 0.6486, "step": 3570 }, { "epoch": 0.71, "learning_rate": 7.072303437376532e-05, "loss": 0.6543, "step": 3580 }, { "epoch": 0.71, "learning_rate": 7.092058474911104e-05, "loss": 0.6799, "step": 3590 }, { "epoch": 0.71, "learning_rate": 7.111813512445674e-05, "loss": 0.665, "step": 3600 }, { "epoch": 0.71, "learning_rate": 7.131568549980245e-05, "loss": 0.6728, "step": 3610 }, { "epoch": 0.72, "learning_rate": 7.151323587514817e-05, "loss": 0.6574, "step": 3620 }, { "epoch": 0.72, "learning_rate": 7.171078625049387e-05, "loss": 0.6618, "step": 3630 }, { "epoch": 0.72, "learning_rate": 7.190833662583959e-05, "loss": 0.5915, "step": 3640 }, { "epoch": 0.72, "learning_rate": 7.21058870011853e-05, "loss": 0.5632, "step": 3650 }, { "epoch": 0.72, "learning_rate": 7.230343737653102e-05, "loss": 0.6802, "step": 3660 }, { "epoch": 0.72, "learning_rate": 7.250098775187674e-05, "loss": 0.6749, "step": 3670 }, { "epoch": 0.73, "learning_rate": 7.269853812722244e-05, "loss": 0.665, "step": 3680 }, { "epoch": 0.73, "learning_rate": 7.289608850256816e-05, "loss": 0.7057, "step": 3690 }, { "epoch": 0.73, "learning_rate": 7.309363887791387e-05, "loss": 0.6284, "step": 3700 }, { "epoch": 0.73, "learning_rate": 7.329118925325959e-05, "loss": 0.6445, "step": 3710 }, { "epoch": 0.73, "learning_rate": 7.34887396286053e-05, "loss": 0.6352, "step": 3720 }, { "epoch": 0.74, "learning_rate": 7.368629000395101e-05, "loss": 0.6989, "step": 3730 }, { "epoch": 0.74, "learning_rate": 7.388384037929672e-05, "loss": 0.6579, "step": 3740 }, { "epoch": 0.74, "learning_rate": 7.408139075464243e-05, "loss": 0.6215, "step": 3750 }, { "epoch": 0.74, "learning_rate": 7.427894112998816e-05, "loss": 0.5917, "step": 3760 }, { "epoch": 0.74, "learning_rate": 7.447649150533387e-05, "loss": 0.6685, "step": 3770 }, { "epoch": 0.75, "learning_rate": 7.467404188067959e-05, "loss": 0.6314, "step": 3780 }, { "epoch": 0.75, "learning_rate": 7.487159225602529e-05, "loss": 0.6184, "step": 3790 }, { "epoch": 0.75, "learning_rate": 7.5069142631371e-05, "loss": 0.6256, "step": 3800 }, { "epoch": 0.75, "learning_rate": 7.526669300671672e-05, "loss": 0.6055, "step": 3810 }, { "epoch": 0.75, "learning_rate": 7.546424338206242e-05, "loss": 0.6527, "step": 3820 }, { "epoch": 0.76, "learning_rate": 7.566179375740814e-05, "loss": 0.6632, "step": 3830 }, { "epoch": 0.76, "learning_rate": 7.585934413275385e-05, "loss": 0.5831, "step": 3840 }, { "epoch": 0.76, "learning_rate": 7.605689450809957e-05, "loss": 0.6703, "step": 3850 }, { "epoch": 0.76, "learning_rate": 7.625444488344529e-05, "loss": 0.6218, "step": 3860 }, { "epoch": 0.76, "learning_rate": 7.6451995258791e-05, "loss": 0.6795, "step": 3870 }, { "epoch": 0.77, "learning_rate": 7.664954563413671e-05, "loss": 0.6779, "step": 3880 }, { "epoch": 0.77, "learning_rate": 7.684709600948242e-05, "loss": 0.6174, "step": 3890 }, { "epoch": 0.77, "learning_rate": 7.704464638482814e-05, "loss": 0.6547, "step": 3900 }, { "epoch": 0.77, "learning_rate": 7.724219676017384e-05, "loss": 0.6003, "step": 3910 }, { "epoch": 0.77, "learning_rate": 7.743974713551956e-05, "loss": 0.6768, "step": 3920 }, { "epoch": 0.78, "learning_rate": 7.763729751086527e-05, "loss": 0.6117, "step": 3930 }, { "epoch": 0.78, "learning_rate": 7.783484788621098e-05, "loss": 0.618, "step": 3940 }, { "epoch": 0.78, "learning_rate": 7.803239826155671e-05, "loss": 0.6749, "step": 3950 }, { "epoch": 0.78, "learning_rate": 7.822994863690242e-05, "loss": 0.6358, "step": 3960 }, { "epoch": 0.78, "learning_rate": 7.842749901224814e-05, "loss": 0.6954, "step": 3970 }, { "epoch": 0.79, "learning_rate": 7.862504938759384e-05, "loss": 0.5934, "step": 3980 }, { "epoch": 0.79, "learning_rate": 7.882259976293955e-05, "loss": 0.6526, "step": 3990 }, { "epoch": 0.79, "learning_rate": 7.902015013828527e-05, "loss": 0.6171, "step": 4000 }, { "epoch": 0.79, "learning_rate": 7.921770051363097e-05, "loss": 0.6219, "step": 4010 }, { "epoch": 0.79, "learning_rate": 7.941525088897669e-05, "loss": 0.6421, "step": 4020 }, { "epoch": 0.8, "learning_rate": 7.96128012643224e-05, "loss": 0.6531, "step": 4030 }, { "epoch": 0.8, "learning_rate": 7.981035163966812e-05, "loss": 0.5801, "step": 4040 }, { "epoch": 0.8, "learning_rate": 8.000790201501384e-05, "loss": 0.63, "step": 4050 }, { "epoch": 0.8, "learning_rate": 8.020545239035954e-05, "loss": 0.6303, "step": 4060 }, { "epoch": 0.8, "learning_rate": 8.040300276570526e-05, "loss": 0.612, "step": 4070 }, { "epoch": 0.81, "learning_rate": 8.060055314105097e-05, "loss": 0.6031, "step": 4080 }, { "epoch": 0.81, "learning_rate": 8.079810351639669e-05, "loss": 0.6569, "step": 4090 }, { "epoch": 0.81, "learning_rate": 8.09956538917424e-05, "loss": 0.7017, "step": 4100 }, { "epoch": 0.81, "learning_rate": 8.119320426708811e-05, "loss": 0.6456, "step": 4110 }, { "epoch": 0.81, "learning_rate": 8.139075464243382e-05, "loss": 0.6752, "step": 4120 }, { "epoch": 0.82, "learning_rate": 8.158830501777954e-05, "loss": 0.5988, "step": 4130 }, { "epoch": 0.82, "learning_rate": 8.178585539312526e-05, "loss": 0.6224, "step": 4140 }, { "epoch": 0.82, "learning_rate": 8.198340576847097e-05, "loss": 0.6238, "step": 4150 }, { "epoch": 0.82, "learning_rate": 8.218095614381669e-05, "loss": 0.6953, "step": 4160 }, { "epoch": 0.82, "learning_rate": 8.237850651916239e-05, "loss": 0.675, "step": 4170 }, { "epoch": 0.83, "learning_rate": 8.25760568945081e-05, "loss": 0.6091, "step": 4180 }, { "epoch": 0.83, "learning_rate": 8.277360726985382e-05, "loss": 0.6527, "step": 4190 }, { "epoch": 0.83, "learning_rate": 8.297115764519952e-05, "loss": 0.6183, "step": 4200 }, { "epoch": 0.83, "learning_rate": 8.316870802054524e-05, "loss": 0.6101, "step": 4210 }, { "epoch": 0.83, "learning_rate": 8.336625839589096e-05, "loss": 0.6639, "step": 4220 }, { "epoch": 0.84, "learning_rate": 8.356380877123667e-05, "loss": 0.6226, "step": 4230 }, { "epoch": 0.84, "learning_rate": 8.376135914658239e-05, "loss": 0.6441, "step": 4240 }, { "epoch": 0.84, "learning_rate": 8.39589095219281e-05, "loss": 0.5824, "step": 4250 }, { "epoch": 0.84, "learning_rate": 8.415645989727381e-05, "loss": 0.6669, "step": 4260 }, { "epoch": 0.84, "learning_rate": 8.435401027261952e-05, "loss": 0.6821, "step": 4270 }, { "epoch": 0.85, "learning_rate": 8.455156064796524e-05, "loss": 0.6243, "step": 4280 }, { "epoch": 0.85, "learning_rate": 8.474911102331094e-05, "loss": 0.5964, "step": 4290 }, { "epoch": 0.85, "learning_rate": 8.494666139865665e-05, "loss": 0.5887, "step": 4300 }, { "epoch": 0.85, "learning_rate": 8.514421177400237e-05, "loss": 0.6496, "step": 4310 }, { "epoch": 0.85, "learning_rate": 8.534176214934809e-05, "loss": 0.6563, "step": 4320 }, { "epoch": 0.86, "learning_rate": 8.553931252469381e-05, "loss": 0.6282, "step": 4330 }, { "epoch": 0.86, "learning_rate": 8.573686290003952e-05, "loss": 0.6189, "step": 4340 }, { "epoch": 0.86, "learning_rate": 8.593441327538523e-05, "loss": 0.6376, "step": 4350 }, { "epoch": 0.86, "learning_rate": 8.613196365073094e-05, "loss": 0.5814, "step": 4360 }, { "epoch": 0.86, "learning_rate": 8.632951402607665e-05, "loss": 0.5461, "step": 4370 }, { "epoch": 0.87, "learning_rate": 8.652706440142237e-05, "loss": 0.6651, "step": 4380 }, { "epoch": 0.87, "learning_rate": 8.672461477676807e-05, "loss": 0.6614, "step": 4390 }, { "epoch": 0.87, "learning_rate": 8.692216515211379e-05, "loss": 0.604, "step": 4400 }, { "epoch": 0.87, "learning_rate": 8.711971552745951e-05, "loss": 0.6378, "step": 4410 }, { "epoch": 0.87, "learning_rate": 8.731726590280522e-05, "loss": 0.6211, "step": 4420 }, { "epoch": 0.88, "learning_rate": 8.751481627815094e-05, "loss": 0.5943, "step": 4430 }, { "epoch": 0.88, "learning_rate": 8.771236665349664e-05, "loss": 0.6493, "step": 4440 }, { "epoch": 0.88, "learning_rate": 8.790991702884236e-05, "loss": 0.643, "step": 4450 }, { "epoch": 0.88, "learning_rate": 8.810746740418807e-05, "loss": 0.6311, "step": 4460 }, { "epoch": 0.88, "learning_rate": 8.830501777953379e-05, "loss": 0.6606, "step": 4470 }, { "epoch": 0.88, "learning_rate": 8.85025681548795e-05, "loss": 0.615, "step": 4480 }, { "epoch": 0.89, "learning_rate": 8.87001185302252e-05, "loss": 0.6847, "step": 4490 }, { "epoch": 0.89, "learning_rate": 8.889766890557093e-05, "loss": 0.6207, "step": 4500 }, { "epoch": 0.89, "learning_rate": 8.909521928091664e-05, "loss": 0.6094, "step": 4510 }, { "epoch": 0.89, "learning_rate": 8.929276965626236e-05, "loss": 0.6294, "step": 4520 }, { "epoch": 0.89, "learning_rate": 8.949032003160807e-05, "loss": 0.644, "step": 4530 }, { "epoch": 0.9, "learning_rate": 8.968787040695377e-05, "loss": 0.6265, "step": 4540 }, { "epoch": 0.9, "learning_rate": 8.988542078229949e-05, "loss": 0.6348, "step": 4550 }, { "epoch": 0.9, "learning_rate": 9.00829711576452e-05, "loss": 0.5913, "step": 4560 }, { "epoch": 0.9, "learning_rate": 9.028052153299092e-05, "loss": 0.6377, "step": 4570 }, { "epoch": 0.9, "learning_rate": 9.047807190833662e-05, "loss": 0.5929, "step": 4580 }, { "epoch": 0.91, "learning_rate": 9.067562228368236e-05, "loss": 0.6708, "step": 4590 }, { "epoch": 0.91, "learning_rate": 9.087317265902806e-05, "loss": 0.6314, "step": 4600 }, { "epoch": 0.91, "learning_rate": 9.107072303437377e-05, "loss": 0.6853, "step": 4610 }, { "epoch": 0.91, "learning_rate": 9.126827340971949e-05, "loss": 0.6196, "step": 4620 }, { "epoch": 0.91, "learning_rate": 9.146582378506519e-05, "loss": 0.5957, "step": 4630 }, { "epoch": 0.92, "learning_rate": 9.166337416041091e-05, "loss": 0.5991, "step": 4640 }, { "epoch": 0.92, "learning_rate": 9.186092453575662e-05, "loss": 0.6602, "step": 4650 }, { "epoch": 0.92, "learning_rate": 9.205847491110234e-05, "loss": 0.6852, "step": 4660 }, { "epoch": 0.92, "learning_rate": 9.225602528644804e-05, "loss": 0.5723, "step": 4670 }, { "epoch": 0.92, "learning_rate": 9.245357566179375e-05, "loss": 0.6218, "step": 4680 }, { "epoch": 0.93, "learning_rate": 9.265112603713948e-05, "loss": 0.625, "step": 4690 }, { "epoch": 0.93, "learning_rate": 9.284867641248519e-05, "loss": 0.6035, "step": 4700 }, { "epoch": 0.93, "learning_rate": 9.304622678783091e-05, "loss": 0.6119, "step": 4710 }, { "epoch": 0.93, "learning_rate": 9.324377716317662e-05, "loss": 0.6284, "step": 4720 }, { "epoch": 0.93, "learning_rate": 9.344132753852232e-05, "loss": 0.5956, "step": 4730 }, { "epoch": 0.94, "learning_rate": 9.363887791386804e-05, "loss": 0.5773, "step": 4740 }, { "epoch": 0.94, "learning_rate": 9.383642828921375e-05, "loss": 0.6735, "step": 4750 }, { "epoch": 0.94, "learning_rate": 9.403397866455947e-05, "loss": 0.6019, "step": 4760 }, { "epoch": 0.94, "learning_rate": 9.423152903990517e-05, "loss": 0.6271, "step": 4770 }, { "epoch": 0.94, "learning_rate": 9.442907941525089e-05, "loss": 0.6201, "step": 4780 }, { "epoch": 0.95, "learning_rate": 9.462662979059661e-05, "loss": 0.6904, "step": 4790 }, { "epoch": 0.95, "learning_rate": 9.482418016594232e-05, "loss": 0.6405, "step": 4800 }, { "epoch": 0.95, "learning_rate": 9.502173054128804e-05, "loss": 0.5189, "step": 4810 }, { "epoch": 0.95, "learning_rate": 9.521928091663374e-05, "loss": 0.623, "step": 4820 }, { "epoch": 0.95, "learning_rate": 9.541683129197946e-05, "loss": 0.6278, "step": 4830 }, { "epoch": 0.96, "learning_rate": 9.561438166732517e-05, "loss": 0.6264, "step": 4840 }, { "epoch": 0.96, "learning_rate": 9.581193204267089e-05, "loss": 0.6407, "step": 4850 }, { "epoch": 0.96, "learning_rate": 9.60094824180166e-05, "loss": 0.6766, "step": 4860 }, { "epoch": 0.96, "learning_rate": 9.620703279336231e-05, "loss": 0.6492, "step": 4870 }, { "epoch": 0.96, "learning_rate": 9.640458316870803e-05, "loss": 0.6087, "step": 4880 }, { "epoch": 0.97, "learning_rate": 9.660213354405374e-05, "loss": 0.5838, "step": 4890 }, { "epoch": 0.97, "learning_rate": 9.679968391939946e-05, "loss": 0.6389, "step": 4900 }, { "epoch": 0.97, "learning_rate": 9.699723429474516e-05, "loss": 0.664, "step": 4910 }, { "epoch": 0.97, "learning_rate": 9.719478467009087e-05, "loss": 0.6164, "step": 4920 }, { "epoch": 0.97, "learning_rate": 9.739233504543659e-05, "loss": 0.6112, "step": 4930 }, { "epoch": 0.98, "learning_rate": 9.75898854207823e-05, "loss": 0.5928, "step": 4940 }, { "epoch": 0.98, "learning_rate": 9.778743579612802e-05, "loss": 0.6131, "step": 4950 }, { "epoch": 0.98, "learning_rate": 9.798498617147372e-05, "loss": 0.6221, "step": 4960 }, { "epoch": 0.98, "learning_rate": 9.818253654681944e-05, "loss": 0.6302, "step": 4970 }, { "epoch": 0.98, "learning_rate": 9.838008692216516e-05, "loss": 0.6185, "step": 4980 }, { "epoch": 0.99, "learning_rate": 9.857763729751087e-05, "loss": 0.6397, "step": 4990 }, { "epoch": 0.99, "learning_rate": 9.877518767285659e-05, "loss": 0.6381, "step": 5000 }, { "epoch": 0.99, "learning_rate": 9.897273804820229e-05, "loss": 0.6274, "step": 5010 }, { "epoch": 0.99, "learning_rate": 9.917028842354801e-05, "loss": 0.5895, "step": 5020 }, { "epoch": 0.99, "learning_rate": 9.936783879889372e-05, "loss": 0.6603, "step": 5030 }, { "epoch": 1.0, "learning_rate": 9.956538917423944e-05, "loss": 0.5812, "step": 5040 }, { "epoch": 1.0, "learning_rate": 9.976293954958514e-05, "loss": 0.6215, "step": 5050 }, { "epoch": 1.0, "learning_rate": 9.996048992493086e-05, "loss": 0.5478, "step": 5060 }, { "epoch": 1.0, "learning_rate": 9.996048992493086e-05, "loss": 0.5893, "step": 5070 }, { "epoch": 1.0, "learning_rate": 9.991110233109443e-05, "loss": 0.6172, "step": 5080 }, { "epoch": 1.01, "learning_rate": 9.986171473725801e-05, "loss": 0.609, "step": 5090 }, { "epoch": 1.01, "learning_rate": 9.981232714342158e-05, "loss": 0.5751, "step": 5100 }, { "epoch": 1.01, "learning_rate": 9.976293954958514e-05, "loss": 0.6062, "step": 5110 }, { "epoch": 1.01, "learning_rate": 9.971355195574872e-05, "loss": 0.5886, "step": 5120 }, { "epoch": 1.01, "learning_rate": 9.966416436191229e-05, "loss": 0.5286, "step": 5130 }, { "epoch": 1.02, "learning_rate": 9.961477676807587e-05, "loss": 0.611, "step": 5140 }, { "epoch": 1.02, "learning_rate": 9.956538917423944e-05, "loss": 0.5796, "step": 5150 }, { "epoch": 1.02, "learning_rate": 9.9516001580403e-05, "loss": 0.5771, "step": 5160 }, { "epoch": 1.02, "learning_rate": 9.946661398656658e-05, "loss": 0.5545, "step": 5170 }, { "epoch": 1.02, "learning_rate": 9.941722639273015e-05, "loss": 0.5793, "step": 5180 }, { "epoch": 1.03, "learning_rate": 9.936783879889372e-05, "loss": 0.6067, "step": 5190 }, { "epoch": 1.03, "learning_rate": 9.93184512050573e-05, "loss": 0.5685, "step": 5200 }, { "epoch": 1.03, "learning_rate": 9.926906361122087e-05, "loss": 0.5841, "step": 5210 }, { "epoch": 1.03, "learning_rate": 9.921967601738443e-05, "loss": 0.5445, "step": 5220 }, { "epoch": 1.03, "learning_rate": 9.917028842354801e-05, "loss": 0.5709, "step": 5230 }, { "epoch": 1.04, "learning_rate": 9.912090082971158e-05, "loss": 0.607, "step": 5240 }, { "epoch": 1.04, "learning_rate": 9.907151323587516e-05, "loss": 0.5702, "step": 5250 }, { "epoch": 1.04, "learning_rate": 9.902212564203873e-05, "loss": 0.5739, "step": 5260 }, { "epoch": 1.04, "learning_rate": 9.897273804820229e-05, "loss": 0.6375, "step": 5270 }, { "epoch": 1.04, "learning_rate": 9.892335045436587e-05, "loss": 0.6164, "step": 5280 }, { "epoch": 1.04, "learning_rate": 9.887396286052944e-05, "loss": 0.6189, "step": 5290 }, { "epoch": 1.05, "learning_rate": 9.8824575266693e-05, "loss": 0.5755, "step": 5300 }, { "epoch": 1.05, "learning_rate": 9.877518767285659e-05, "loss": 0.5973, "step": 5310 }, { "epoch": 1.05, "learning_rate": 9.872580007902015e-05, "loss": 0.6061, "step": 5320 }, { "epoch": 1.05, "learning_rate": 9.867641248518372e-05, "loss": 0.5847, "step": 5330 }, { "epoch": 1.05, "learning_rate": 9.86270248913473e-05, "loss": 0.5368, "step": 5340 }, { "epoch": 1.06, "learning_rate": 9.857763729751087e-05, "loss": 0.5204, "step": 5350 }, { "epoch": 1.06, "learning_rate": 9.852824970367445e-05, "loss": 0.5737, "step": 5360 }, { "epoch": 1.06, "learning_rate": 9.847886210983801e-05, "loss": 0.5824, "step": 5370 }, { "epoch": 1.06, "learning_rate": 9.842947451600158e-05, "loss": 0.5648, "step": 5380 }, { "epoch": 1.06, "learning_rate": 9.838008692216516e-05, "loss": 0.5426, "step": 5390 }, { "epoch": 1.07, "learning_rate": 9.833069932832873e-05, "loss": 0.6269, "step": 5400 }, { "epoch": 1.07, "learning_rate": 9.82813117344923e-05, "loss": 0.5373, "step": 5410 }, { "epoch": 1.07, "learning_rate": 9.823192414065587e-05, "loss": 0.5895, "step": 5420 }, { "epoch": 1.07, "learning_rate": 9.818253654681944e-05, "loss": 0.5642, "step": 5430 }, { "epoch": 1.07, "learning_rate": 9.813314895298301e-05, "loss": 0.5318, "step": 5440 }, { "epoch": 1.08, "learning_rate": 9.808376135914659e-05, "loss": 0.6078, "step": 5450 }, { "epoch": 1.08, "learning_rate": 9.803437376531016e-05, "loss": 0.5631, "step": 5460 }, { "epoch": 1.08, "learning_rate": 9.798498617147372e-05, "loss": 0.6024, "step": 5470 }, { "epoch": 1.08, "learning_rate": 9.79355985776373e-05, "loss": 0.6337, "step": 5480 }, { "epoch": 1.08, "learning_rate": 9.788621098380087e-05, "loss": 0.5616, "step": 5490 }, { "epoch": 1.09, "learning_rate": 9.783682338996445e-05, "loss": 0.5906, "step": 5500 }, { "epoch": 1.09, "learning_rate": 9.778743579612802e-05, "loss": 0.6046, "step": 5510 }, { "epoch": 1.09, "learning_rate": 9.773804820229158e-05, "loss": 0.5795, "step": 5520 }, { "epoch": 1.09, "learning_rate": 9.768866060845516e-05, "loss": 0.6262, "step": 5530 }, { "epoch": 1.09, "learning_rate": 9.763927301461874e-05, "loss": 0.6531, "step": 5540 }, { "epoch": 1.1, "learning_rate": 9.75898854207823e-05, "loss": 0.6423, "step": 5550 }, { "epoch": 1.1, "learning_rate": 9.754049782694588e-05, "loss": 0.6074, "step": 5560 }, { "epoch": 1.1, "learning_rate": 9.749111023310946e-05, "loss": 0.5798, "step": 5570 }, { "epoch": 1.1, "learning_rate": 9.744172263927301e-05, "loss": 0.5806, "step": 5580 }, { "epoch": 1.1, "learning_rate": 9.739233504543659e-05, "loss": 0.5655, "step": 5590 }, { "epoch": 1.11, "learning_rate": 9.734294745160016e-05, "loss": 0.523, "step": 5600 }, { "epoch": 1.11, "learning_rate": 9.729355985776374e-05, "loss": 0.566, "step": 5610 }, { "epoch": 1.11, "learning_rate": 9.72441722639273e-05, "loss": 0.5482, "step": 5620 }, { "epoch": 1.11, "learning_rate": 9.719478467009087e-05, "loss": 0.5899, "step": 5630 }, { "epoch": 1.11, "learning_rate": 9.714539707625445e-05, "loss": 0.5508, "step": 5640 }, { "epoch": 1.12, "learning_rate": 9.709600948241803e-05, "loss": 0.5866, "step": 5650 }, { "epoch": 1.12, "learning_rate": 9.704662188858158e-05, "loss": 0.5878, "step": 5660 }, { "epoch": 1.12, "learning_rate": 9.699723429474516e-05, "loss": 0.5563, "step": 5670 }, { "epoch": 1.12, "learning_rate": 9.694784670090875e-05, "loss": 0.6075, "step": 5680 }, { "epoch": 1.12, "learning_rate": 9.68984591070723e-05, "loss": 0.6032, "step": 5690 }, { "epoch": 1.13, "learning_rate": 9.684907151323588e-05, "loss": 0.5908, "step": 5700 }, { "epoch": 1.13, "learning_rate": 9.679968391939946e-05, "loss": 0.5863, "step": 5710 }, { "epoch": 1.13, "learning_rate": 9.675029632556303e-05, "loss": 0.5858, "step": 5720 }, { "epoch": 1.13, "learning_rate": 9.670090873172659e-05, "loss": 0.5929, "step": 5730 }, { "epoch": 1.13, "learning_rate": 9.665152113789017e-05, "loss": 0.5891, "step": 5740 }, { "epoch": 1.14, "learning_rate": 9.660213354405374e-05, "loss": 0.618, "step": 5750 }, { "epoch": 1.14, "learning_rate": 9.65527459502173e-05, "loss": 0.6104, "step": 5760 }, { "epoch": 1.14, "learning_rate": 9.650335835638089e-05, "loss": 0.588, "step": 5770 }, { "epoch": 1.14, "learning_rate": 9.645397076254445e-05, "loss": 0.6054, "step": 5780 }, { "epoch": 1.14, "learning_rate": 9.640458316870803e-05, "loss": 0.5846, "step": 5790 }, { "epoch": 1.15, "learning_rate": 9.635519557487159e-05, "loss": 0.6161, "step": 5800 }, { "epoch": 1.15, "learning_rate": 9.630580798103517e-05, "loss": 0.5995, "step": 5810 }, { "epoch": 1.15, "learning_rate": 9.625642038719875e-05, "loss": 0.5678, "step": 5820 }, { "epoch": 1.15, "learning_rate": 9.620703279336231e-05, "loss": 0.6255, "step": 5830 }, { "epoch": 1.15, "learning_rate": 9.615764519952588e-05, "loss": 0.5903, "step": 5840 }, { "epoch": 1.16, "learning_rate": 9.610825760568946e-05, "loss": 0.5584, "step": 5850 }, { "epoch": 1.16, "learning_rate": 9.605887001185303e-05, "loss": 0.5297, "step": 5860 }, { "epoch": 1.16, "learning_rate": 9.60094824180166e-05, "loss": 0.5777, "step": 5870 }, { "epoch": 1.16, "learning_rate": 9.596009482418017e-05, "loss": 0.6187, "step": 5880 }, { "epoch": 1.16, "learning_rate": 9.591070723034374e-05, "loss": 0.5935, "step": 5890 }, { "epoch": 1.17, "learning_rate": 9.586131963650732e-05, "loss": 0.5897, "step": 5900 }, { "epoch": 1.17, "learning_rate": 9.581193204267089e-05, "loss": 0.5581, "step": 5910 }, { "epoch": 1.17, "learning_rate": 9.576254444883445e-05, "loss": 0.6178, "step": 5920 }, { "epoch": 1.17, "learning_rate": 9.571315685499804e-05, "loss": 0.5879, "step": 5930 }, { "epoch": 1.17, "learning_rate": 9.56637692611616e-05, "loss": 0.6004, "step": 5940 }, { "epoch": 1.18, "learning_rate": 9.561438166732517e-05, "loss": 0.5726, "step": 5950 }, { "epoch": 1.18, "learning_rate": 9.556499407348875e-05, "loss": 0.5794, "step": 5960 }, { "epoch": 1.18, "learning_rate": 9.551560647965232e-05, "loss": 0.586, "step": 5970 }, { "epoch": 1.18, "learning_rate": 9.546621888581588e-05, "loss": 0.5863, "step": 5980 }, { "epoch": 1.18, "learning_rate": 9.541683129197946e-05, "loss": 0.6062, "step": 5990 }, { "epoch": 1.19, "learning_rate": 9.536744369814303e-05, "loss": 0.6103, "step": 6000 }, { "epoch": 1.19, "learning_rate": 9.531805610430661e-05, "loss": 0.6179, "step": 6010 }, { "epoch": 1.19, "learning_rate": 9.526866851047018e-05, "loss": 0.5564, "step": 6020 }, { "epoch": 1.19, "learning_rate": 9.521928091663374e-05, "loss": 0.5664, "step": 6030 }, { "epoch": 1.19, "learning_rate": 9.516989332279732e-05, "loss": 0.5681, "step": 6040 }, { "epoch": 1.2, "learning_rate": 9.512050572896089e-05, "loss": 0.5789, "step": 6050 }, { "epoch": 1.2, "learning_rate": 9.507111813512446e-05, "loss": 0.59, "step": 6060 }, { "epoch": 1.2, "learning_rate": 9.502173054128804e-05, "loss": 0.5384, "step": 6070 }, { "epoch": 1.2, "learning_rate": 9.49723429474516e-05, "loss": 0.5677, "step": 6080 }, { "epoch": 1.2, "learning_rate": 9.492295535361517e-05, "loss": 0.6119, "step": 6090 }, { "epoch": 1.2, "learning_rate": 9.487356775977875e-05, "loss": 0.5626, "step": 6100 }, { "epoch": 1.21, "learning_rate": 9.482418016594232e-05, "loss": 0.5925, "step": 6110 }, { "epoch": 1.21, "learning_rate": 9.477479257210588e-05, "loss": 0.5993, "step": 6120 }, { "epoch": 1.21, "learning_rate": 9.472540497826946e-05, "loss": 0.5264, "step": 6130 }, { "epoch": 1.21, "learning_rate": 9.467601738443303e-05, "loss": 0.5545, "step": 6140 }, { "epoch": 1.21, "learning_rate": 9.462662979059661e-05, "loss": 0.5822, "step": 6150 }, { "epoch": 1.22, "learning_rate": 9.457724219676018e-05, "loss": 0.5915, "step": 6160 }, { "epoch": 1.22, "learning_rate": 9.452785460292374e-05, "loss": 0.5689, "step": 6170 }, { "epoch": 1.22, "learning_rate": 9.447846700908733e-05, "loss": 0.5721, "step": 6180 }, { "epoch": 1.22, "learning_rate": 9.442907941525089e-05, "loss": 0.5692, "step": 6190 }, { "epoch": 1.22, "learning_rate": 9.437969182141446e-05, "loss": 0.5578, "step": 6200 }, { "epoch": 1.23, "learning_rate": 9.433030422757804e-05, "loss": 0.5678, "step": 6210 }, { "epoch": 1.23, "learning_rate": 9.42809166337416e-05, "loss": 0.5738, "step": 6220 }, { "epoch": 1.23, "learning_rate": 9.423152903990517e-05, "loss": 0.5563, "step": 6230 }, { "epoch": 1.23, "learning_rate": 9.418214144606875e-05, "loss": 0.5703, "step": 6240 }, { "epoch": 1.23, "learning_rate": 9.413275385223232e-05, "loss": 0.5612, "step": 6250 }, { "epoch": 1.24, "learning_rate": 9.40833662583959e-05, "loss": 0.5578, "step": 6260 }, { "epoch": 1.24, "learning_rate": 9.403397866455947e-05, "loss": 0.609, "step": 6270 }, { "epoch": 1.24, "learning_rate": 9.398459107072303e-05, "loss": 0.5784, "step": 6280 }, { "epoch": 1.24, "learning_rate": 9.393520347688661e-05, "loss": 0.5322, "step": 6290 }, { "epoch": 1.24, "learning_rate": 9.38858158830502e-05, "loss": 0.5788, "step": 6300 }, { "epoch": 1.25, "learning_rate": 9.383642828921375e-05, "loss": 0.5519, "step": 6310 }, { "epoch": 1.25, "learning_rate": 9.378704069537733e-05, "loss": 0.5735, "step": 6320 }, { "epoch": 1.25, "learning_rate": 9.373765310154091e-05, "loss": 0.5542, "step": 6330 }, { "epoch": 1.25, "learning_rate": 9.368826550770446e-05, "loss": 0.5588, "step": 6340 }, { "epoch": 1.25, "learning_rate": 9.363887791386804e-05, "loss": 0.6296, "step": 6350 }, { "epoch": 1.26, "learning_rate": 9.358949032003162e-05, "loss": 0.592, "step": 6360 }, { "epoch": 1.26, "learning_rate": 9.354010272619519e-05, "loss": 0.54, "step": 6370 }, { "epoch": 1.26, "learning_rate": 9.349071513235875e-05, "loss": 0.5527, "step": 6380 }, { "epoch": 1.26, "learning_rate": 9.344132753852232e-05, "loss": 0.5999, "step": 6390 }, { "epoch": 1.26, "learning_rate": 9.33919399446859e-05, "loss": 0.5323, "step": 6400 }, { "epoch": 1.27, "learning_rate": 9.334255235084947e-05, "loss": 0.5933, "step": 6410 }, { "epoch": 1.27, "learning_rate": 9.329316475701303e-05, "loss": 0.5564, "step": 6420 }, { "epoch": 1.27, "learning_rate": 9.324377716317662e-05, "loss": 0.6013, "step": 6430 }, { "epoch": 1.27, "learning_rate": 9.31943895693402e-05, "loss": 0.5646, "step": 6440 }, { "epoch": 1.27, "learning_rate": 9.314500197550375e-05, "loss": 0.5512, "step": 6450 }, { "epoch": 1.28, "learning_rate": 9.309561438166733e-05, "loss": 0.5529, "step": 6460 }, { "epoch": 1.28, "learning_rate": 9.304622678783091e-05, "loss": 0.6227, "step": 6470 }, { "epoch": 1.28, "learning_rate": 9.299683919399448e-05, "loss": 0.5761, "step": 6480 }, { "epoch": 1.28, "learning_rate": 9.294745160015804e-05, "loss": 0.5036, "step": 6490 }, { "epoch": 1.28, "learning_rate": 9.289806400632162e-05, "loss": 0.5148, "step": 6500 }, { "epoch": 1.29, "learning_rate": 9.284867641248519e-05, "loss": 0.5503, "step": 6510 }, { "epoch": 1.29, "learning_rate": 9.279928881864876e-05, "loss": 0.5882, "step": 6520 }, { "epoch": 1.29, "learning_rate": 9.274990122481234e-05, "loss": 0.5803, "step": 6530 }, { "epoch": 1.29, "learning_rate": 9.27005136309759e-05, "loss": 0.5741, "step": 6540 }, { "epoch": 1.29, "learning_rate": 9.265112603713948e-05, "loss": 0.6181, "step": 6550 }, { "epoch": 1.3, "learning_rate": 9.260173844330304e-05, "loss": 0.594, "step": 6560 }, { "epoch": 1.3, "learning_rate": 9.255235084946662e-05, "loss": 0.5033, "step": 6570 }, { "epoch": 1.3, "learning_rate": 9.25029632556302e-05, "loss": 0.6282, "step": 6580 }, { "epoch": 1.3, "learning_rate": 9.245357566179375e-05, "loss": 0.5376, "step": 6590 }, { "epoch": 1.3, "learning_rate": 9.240418806795733e-05, "loss": 0.5705, "step": 6600 }, { "epoch": 1.31, "learning_rate": 9.235480047412091e-05, "loss": 0.5539, "step": 6610 }, { "epoch": 1.31, "learning_rate": 9.230541288028448e-05, "loss": 0.5966, "step": 6620 }, { "epoch": 1.31, "learning_rate": 9.225602528644804e-05, "loss": 0.5605, "step": 6630 }, { "epoch": 1.31, "learning_rate": 9.220663769261162e-05, "loss": 0.6484, "step": 6640 }, { "epoch": 1.31, "learning_rate": 9.215725009877519e-05, "loss": 0.5461, "step": 6650 }, { "epoch": 1.32, "learning_rate": 9.210786250493877e-05, "loss": 0.6082, "step": 6660 }, { "epoch": 1.32, "learning_rate": 9.205847491110234e-05, "loss": 0.5517, "step": 6670 }, { "epoch": 1.32, "learning_rate": 9.20090873172659e-05, "loss": 0.5512, "step": 6680 }, { "epoch": 1.32, "learning_rate": 9.195969972342949e-05, "loss": 0.4835, "step": 6690 }, { "epoch": 1.32, "learning_rate": 9.191031212959305e-05, "loss": 0.5864, "step": 6700 }, { "epoch": 1.33, "learning_rate": 9.186092453575662e-05, "loss": 0.5513, "step": 6710 }, { "epoch": 1.33, "learning_rate": 9.18115369419202e-05, "loss": 0.5527, "step": 6720 }, { "epoch": 1.33, "learning_rate": 9.176214934808377e-05, "loss": 0.5251, "step": 6730 }, { "epoch": 1.33, "learning_rate": 9.171276175424733e-05, "loss": 0.5467, "step": 6740 }, { "epoch": 1.33, "learning_rate": 9.166337416041091e-05, "loss": 0.5539, "step": 6750 }, { "epoch": 1.34, "learning_rate": 9.161398656657448e-05, "loss": 0.5269, "step": 6760 }, { "epoch": 1.34, "learning_rate": 9.156459897273805e-05, "loss": 0.5198, "step": 6770 }, { "epoch": 1.34, "learning_rate": 9.151521137890163e-05, "loss": 0.5678, "step": 6780 }, { "epoch": 1.34, "learning_rate": 9.146582378506519e-05, "loss": 0.6333, "step": 6790 }, { "epoch": 1.34, "learning_rate": 9.141643619122877e-05, "loss": 0.5445, "step": 6800 }, { "epoch": 1.35, "learning_rate": 9.136704859739234e-05, "loss": 0.5594, "step": 6810 }, { "epoch": 1.35, "learning_rate": 9.131766100355591e-05, "loss": 0.5557, "step": 6820 }, { "epoch": 1.35, "learning_rate": 9.126827340971949e-05, "loss": 0.5797, "step": 6830 }, { "epoch": 1.35, "learning_rate": 9.121888581588305e-05, "loss": 0.5981, "step": 6840 }, { "epoch": 1.35, "learning_rate": 9.116949822204662e-05, "loss": 0.535, "step": 6850 }, { "epoch": 1.35, "learning_rate": 9.11201106282102e-05, "loss": 0.5275, "step": 6860 }, { "epoch": 1.36, "learning_rate": 9.107072303437377e-05, "loss": 0.5531, "step": 6870 }, { "epoch": 1.36, "learning_rate": 9.102133544053733e-05, "loss": 0.5669, "step": 6880 }, { "epoch": 1.36, "learning_rate": 9.097194784670091e-05, "loss": 0.5635, "step": 6890 }, { "epoch": 1.36, "learning_rate": 9.092256025286448e-05, "loss": 0.5911, "step": 6900 }, { "epoch": 1.36, "learning_rate": 9.087317265902806e-05, "loss": 0.5518, "step": 6910 }, { "epoch": 1.37, "learning_rate": 9.082378506519163e-05, "loss": 0.6044, "step": 6920 }, { "epoch": 1.37, "learning_rate": 9.07743974713552e-05, "loss": 0.5872, "step": 6930 }, { "epoch": 1.37, "learning_rate": 9.072500987751878e-05, "loss": 0.5214, "step": 6940 }, { "epoch": 1.37, "learning_rate": 9.067562228368236e-05, "loss": 0.5623, "step": 6950 }, { "epoch": 1.37, "learning_rate": 9.062623468984591e-05, "loss": 0.5604, "step": 6960 }, { "epoch": 1.38, "learning_rate": 9.057684709600949e-05, "loss": 0.5896, "step": 6970 }, { "epoch": 1.38, "learning_rate": 9.052745950217306e-05, "loss": 0.5493, "step": 6980 }, { "epoch": 1.38, "learning_rate": 9.047807190833662e-05, "loss": 0.4864, "step": 6990 }, { "epoch": 1.38, "learning_rate": 9.04286843145002e-05, "loss": 0.5621, "step": 7000 } ], "logging_steps": 10, "max_steps": 25310, "num_train_epochs": 5, "save_steps": 7000, "total_flos": 1.8202371090127258e+17, "trial_name": null, "trial_params": null }