hsethu's picture
Upload folder using huggingface_hub
2abeb7e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 67458,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 9.991105576803343e-07,
"loss": 3.5693,
"step": 64
},
{
"epoch": 0.01,
"learning_rate": 9.981618192060246e-07,
"loss": 1.3039,
"step": 128
},
{
"epoch": 0.01,
"learning_rate": 9.972130807317145e-07,
"loss": 0.6178,
"step": 192
},
{
"epoch": 0.01,
"learning_rate": 9.962643422574046e-07,
"loss": 0.2217,
"step": 256
},
{
"epoch": 0.01,
"learning_rate": 9.953156037830946e-07,
"loss": 0.135,
"step": 320
},
{
"epoch": 0.02,
"learning_rate": 9.943668653087847e-07,
"loss": 0.2547,
"step": 384
},
{
"epoch": 0.02,
"learning_rate": 9.934181268344748e-07,
"loss": 0.2357,
"step": 448
},
{
"epoch": 0.02,
"learning_rate": 9.92513860476148e-07,
"loss": 0.4032,
"step": 512
},
{
"epoch": 0.03,
"learning_rate": 9.915651220018381e-07,
"loss": 0.2513,
"step": 576
},
{
"epoch": 0.03,
"learning_rate": 9.906163835275282e-07,
"loss": 0.2058,
"step": 640
},
{
"epoch": 0.03,
"learning_rate": 9.89667645053218e-07,
"loss": 0.2276,
"step": 704
},
{
"epoch": 0.03,
"learning_rate": 9.887189065789084e-07,
"loss": 0.1798,
"step": 768
},
{
"epoch": 0.04,
"learning_rate": 9.877701681045983e-07,
"loss": 0.2646,
"step": 832
},
{
"epoch": 0.04,
"learning_rate": 9.868214296302884e-07,
"loss": 0.2434,
"step": 896
},
{
"epoch": 0.04,
"learning_rate": 9.858726911559785e-07,
"loss": 0.3211,
"step": 960
},
{
"epoch": 0.05,
"learning_rate": 9.849239526816686e-07,
"loss": 0.1601,
"step": 1024
},
{
"epoch": 0.05,
"learning_rate": 9.839752142073586e-07,
"loss": 0.2012,
"step": 1088
},
{
"epoch": 0.05,
"learning_rate": 9.830264757330487e-07,
"loss": 0.1635,
"step": 1152
},
{
"epoch": 0.05,
"learning_rate": 9.820777372587388e-07,
"loss": 0.2808,
"step": 1216
},
{
"epoch": 0.06,
"learning_rate": 9.811289987844287e-07,
"loss": 0.1622,
"step": 1280
},
{
"epoch": 0.06,
"learning_rate": 9.801802603101188e-07,
"loss": 0.23,
"step": 1344
},
{
"epoch": 0.06,
"learning_rate": 9.792315218358089e-07,
"loss": 0.1615,
"step": 1408
},
{
"epoch": 0.07,
"learning_rate": 9.78282783361499e-07,
"loss": 0.1424,
"step": 1472
},
{
"epoch": 0.07,
"learning_rate": 9.77334044887189e-07,
"loss": 0.2007,
"step": 1536
},
{
"epoch": 0.07,
"learning_rate": 9.763853064128792e-07,
"loss": 0.3649,
"step": 1600
},
{
"epoch": 0.07,
"learning_rate": 9.75436567938569e-07,
"loss": 0.1149,
"step": 1664
},
{
"epoch": 0.08,
"learning_rate": 9.744878294642591e-07,
"loss": 0.1882,
"step": 1728
},
{
"epoch": 0.08,
"learning_rate": 9.735390909899492e-07,
"loss": 0.2598,
"step": 1792
},
{
"epoch": 0.08,
"learning_rate": 9.725903525156393e-07,
"loss": 0.2198,
"step": 1856
},
{
"epoch": 0.09,
"learning_rate": 9.716416140413294e-07,
"loss": 0.1786,
"step": 1920
},
{
"epoch": 0.09,
"learning_rate": 9.706928755670195e-07,
"loss": 0.2713,
"step": 1984
},
{
"epoch": 0.09,
"learning_rate": 9.697441370927096e-07,
"loss": 0.2076,
"step": 2048
},
{
"epoch": 0.09,
"learning_rate": 9.687953986183995e-07,
"loss": 0.2343,
"step": 2112
},
{
"epoch": 0.1,
"learning_rate": 9.678466601440898e-07,
"loss": 0.3182,
"step": 2176
},
{
"epoch": 0.1,
"learning_rate": 9.668979216697796e-07,
"loss": 0.2102,
"step": 2240
},
{
"epoch": 0.1,
"learning_rate": 9.659491831954697e-07,
"loss": 0.2687,
"step": 2304
},
{
"epoch": 0.11,
"learning_rate": 9.650004447211598e-07,
"loss": 0.2186,
"step": 2368
},
{
"epoch": 0.11,
"learning_rate": 9.6405170624685e-07,
"loss": 0.1295,
"step": 2432
},
{
"epoch": 0.11,
"learning_rate": 9.631029677725398e-07,
"loss": 0.1542,
"step": 2496
},
{
"epoch": 0.11,
"learning_rate": 9.621542292982299e-07,
"loss": 0.1365,
"step": 2560
},
{
"epoch": 0.12,
"learning_rate": 9.6120549082392e-07,
"loss": 0.2106,
"step": 2624
},
{
"epoch": 0.12,
"learning_rate": 9.6025675234961e-07,
"loss": 0.2753,
"step": 2688
},
{
"epoch": 0.12,
"learning_rate": 9.593080138753002e-07,
"loss": 0.2162,
"step": 2752
},
{
"epoch": 0.13,
"learning_rate": 9.583592754009902e-07,
"loss": 0.2186,
"step": 2816
},
{
"epoch": 0.13,
"learning_rate": 9.574105369266803e-07,
"loss": 0.0991,
"step": 2880
},
{
"epoch": 0.13,
"learning_rate": 9.564617984523702e-07,
"loss": 0.1616,
"step": 2944
},
{
"epoch": 0.13,
"learning_rate": 9.555130599780605e-07,
"loss": 0.2029,
"step": 3008
},
{
"epoch": 0.14,
"learning_rate": 9.545643215037504e-07,
"loss": 0.1659,
"step": 3072
},
{
"epoch": 0.14,
"learning_rate": 9.536155830294405e-07,
"loss": 0.1887,
"step": 3136
},
{
"epoch": 0.14,
"learning_rate": 9.526668445551306e-07,
"loss": 0.2626,
"step": 3200
},
{
"epoch": 0.15,
"learning_rate": 9.517181060808206e-07,
"loss": 0.1325,
"step": 3264
},
{
"epoch": 0.15,
"learning_rate": 9.507693676065108e-07,
"loss": 0.2125,
"step": 3328
},
{
"epoch": 0.15,
"learning_rate": 9.498206291322007e-07,
"loss": 0.1926,
"step": 3392
},
{
"epoch": 0.15,
"learning_rate": 9.488718906578907e-07,
"loss": 0.2237,
"step": 3456
},
{
"epoch": 0.16,
"learning_rate": 9.479231521835809e-07,
"loss": 0.1283,
"step": 3520
},
{
"epoch": 0.16,
"learning_rate": 9.469744137092709e-07,
"loss": 0.2052,
"step": 3584
},
{
"epoch": 0.16,
"learning_rate": 9.46025675234961e-07,
"loss": 0.1983,
"step": 3648
},
{
"epoch": 0.17,
"learning_rate": 9.45076936760651e-07,
"loss": 0.1976,
"step": 3712
},
{
"epoch": 0.17,
"learning_rate": 9.441281982863411e-07,
"loss": 0.1999,
"step": 3776
},
{
"epoch": 0.17,
"learning_rate": 9.431794598120312e-07,
"loss": 0.2231,
"step": 3840
},
{
"epoch": 0.17,
"learning_rate": 9.422307213377212e-07,
"loss": 0.2057,
"step": 3904
},
{
"epoch": 0.18,
"learning_rate": 9.412819828634113e-07,
"loss": 0.2376,
"step": 3968
},
{
"epoch": 0.18,
"learning_rate": 9.403332443891013e-07,
"loss": 0.1325,
"step": 4032
},
{
"epoch": 0.18,
"learning_rate": 9.393845059147913e-07,
"loss": 0.2595,
"step": 4096
},
{
"epoch": 0.19,
"learning_rate": 9.384357674404815e-07,
"loss": 0.1817,
"step": 4160
},
{
"epoch": 0.19,
"learning_rate": 9.374870289661715e-07,
"loss": 0.2224,
"step": 4224
},
{
"epoch": 0.19,
"learning_rate": 9.365382904918616e-07,
"loss": 0.1444,
"step": 4288
},
{
"epoch": 0.19,
"learning_rate": 9.355895520175517e-07,
"loss": 0.2695,
"step": 4352
},
{
"epoch": 0.2,
"learning_rate": 9.346408135432417e-07,
"loss": 0.1859,
"step": 4416
},
{
"epoch": 0.2,
"learning_rate": 9.336920750689318e-07,
"loss": 0.1551,
"step": 4480
},
{
"epoch": 0.2,
"learning_rate": 9.327433365946217e-07,
"loss": 0.2093,
"step": 4544
},
{
"epoch": 0.2,
"learning_rate": 9.317945981203118e-07,
"loss": 0.1922,
"step": 4608
},
{
"epoch": 0.21,
"learning_rate": 9.308458596460019e-07,
"loss": 0.2056,
"step": 4672
},
{
"epoch": 0.21,
"learning_rate": 9.298971211716919e-07,
"loss": 0.2087,
"step": 4736
},
{
"epoch": 0.21,
"learning_rate": 9.289483826973821e-07,
"loss": 0.1552,
"step": 4800
},
{
"epoch": 0.22,
"learning_rate": 9.279996442230721e-07,
"loss": 0.1619,
"step": 4864
},
{
"epoch": 0.22,
"learning_rate": 9.270509057487621e-07,
"loss": 0.2634,
"step": 4928
},
{
"epoch": 0.22,
"learning_rate": 9.261021672744523e-07,
"loss": 0.1459,
"step": 4992
},
{
"epoch": 0.22,
"learning_rate": 9.251534288001423e-07,
"loss": 0.225,
"step": 5056
},
{
"epoch": 0.23,
"learning_rate": 9.242046903258323e-07,
"loss": 0.1878,
"step": 5120
},
{
"epoch": 0.23,
"learning_rate": 9.232559518515224e-07,
"loss": 0.1286,
"step": 5184
},
{
"epoch": 0.23,
"learning_rate": 9.223072133772124e-07,
"loss": 0.1059,
"step": 5248
},
{
"epoch": 0.24,
"learning_rate": 9.213584749029025e-07,
"loss": 0.1541,
"step": 5312
},
{
"epoch": 0.24,
"learning_rate": 9.204097364285925e-07,
"loss": 0.2427,
"step": 5376
},
{
"epoch": 0.24,
"learning_rate": 9.194609979542827e-07,
"loss": 0.1368,
"step": 5440
},
{
"epoch": 0.24,
"learning_rate": 9.185122594799727e-07,
"loss": 0.2477,
"step": 5504
},
{
"epoch": 0.25,
"learning_rate": 9.175635210056627e-07,
"loss": 0.1385,
"step": 5568
},
{
"epoch": 0.25,
"learning_rate": 9.166147825313529e-07,
"loss": 0.2537,
"step": 5632
},
{
"epoch": 0.25,
"learning_rate": 9.156660440570428e-07,
"loss": 0.2374,
"step": 5696
},
{
"epoch": 0.26,
"learning_rate": 9.147173055827329e-07,
"loss": 0.1942,
"step": 5760
},
{
"epoch": 0.26,
"learning_rate": 9.13768567108423e-07,
"loss": 0.1416,
"step": 5824
},
{
"epoch": 0.26,
"learning_rate": 9.12819828634113e-07,
"loss": 0.2205,
"step": 5888
},
{
"epoch": 0.26,
"learning_rate": 9.118710901598031e-07,
"loss": 0.1265,
"step": 5952
},
{
"epoch": 0.27,
"learning_rate": 9.109223516854932e-07,
"loss": 0.1972,
"step": 6016
},
{
"epoch": 0.27,
"learning_rate": 9.099736132111833e-07,
"loss": 0.2119,
"step": 6080
},
{
"epoch": 0.27,
"learning_rate": 9.090248747368733e-07,
"loss": 0.1284,
"step": 6144
},
{
"epoch": 0.28,
"learning_rate": 9.080761362625632e-07,
"loss": 0.1665,
"step": 6208
},
{
"epoch": 0.28,
"learning_rate": 9.071273977882534e-07,
"loss": 0.1545,
"step": 6272
},
{
"epoch": 0.28,
"learning_rate": 9.061786593139434e-07,
"loss": 0.1505,
"step": 6336
},
{
"epoch": 0.28,
"learning_rate": 9.052299208396334e-07,
"loss": 0.1871,
"step": 6400
},
{
"epoch": 0.29,
"learning_rate": 9.042811823653236e-07,
"loss": 0.2302,
"step": 6464
},
{
"epoch": 0.29,
"learning_rate": 9.033324438910136e-07,
"loss": 0.1395,
"step": 6528
},
{
"epoch": 0.29,
"learning_rate": 9.023837054167037e-07,
"loss": 0.2195,
"step": 6592
},
{
"epoch": 0.3,
"learning_rate": 9.014349669423938e-07,
"loss": 0.2419,
"step": 6656
},
{
"epoch": 0.3,
"learning_rate": 9.004862284680838e-07,
"loss": 0.2173,
"step": 6720
},
{
"epoch": 0.3,
"learning_rate": 8.995374899937739e-07,
"loss": 0.2522,
"step": 6784
},
{
"epoch": 0.3,
"learning_rate": 8.985887515194639e-07,
"loss": 0.2317,
"step": 6848
},
{
"epoch": 0.31,
"learning_rate": 8.97640013045154e-07,
"loss": 0.1959,
"step": 6912
},
{
"epoch": 0.31,
"learning_rate": 8.96691274570844e-07,
"loss": 0.1889,
"step": 6976
},
{
"epoch": 0.31,
"learning_rate": 8.95742536096534e-07,
"loss": 0.2131,
"step": 7040
},
{
"epoch": 0.32,
"learning_rate": 8.947937976222242e-07,
"loss": 0.1321,
"step": 7104
},
{
"epoch": 0.32,
"learning_rate": 8.938450591479142e-07,
"loss": 0.1679,
"step": 7168
},
{
"epoch": 0.32,
"learning_rate": 8.928963206736043e-07,
"loss": 0.1826,
"step": 7232
},
{
"epoch": 0.32,
"learning_rate": 8.919475821992944e-07,
"loss": 0.2144,
"step": 7296
},
{
"epoch": 0.33,
"learning_rate": 8.909988437249843e-07,
"loss": 0.1614,
"step": 7360
},
{
"epoch": 0.33,
"learning_rate": 8.900501052506744e-07,
"loss": 0.241,
"step": 7424
},
{
"epoch": 0.33,
"learning_rate": 8.891013667763645e-07,
"loss": 0.1825,
"step": 7488
},
{
"epoch": 0.34,
"learning_rate": 8.881526283020546e-07,
"loss": 0.17,
"step": 7552
},
{
"epoch": 0.34,
"learning_rate": 8.872038898277446e-07,
"loss": 0.1562,
"step": 7616
},
{
"epoch": 0.34,
"learning_rate": 8.862551513534347e-07,
"loss": 0.2264,
"step": 7680
},
{
"epoch": 0.34,
"learning_rate": 8.853064128791248e-07,
"loss": 0.1325,
"step": 7744
},
{
"epoch": 0.35,
"learning_rate": 8.843576744048148e-07,
"loss": 0.1601,
"step": 7808
},
{
"epoch": 0.35,
"learning_rate": 8.83408935930505e-07,
"loss": 0.1784,
"step": 7872
},
{
"epoch": 0.35,
"learning_rate": 8.82460197456195e-07,
"loss": 0.1447,
"step": 7936
},
{
"epoch": 0.36,
"learning_rate": 8.815114589818849e-07,
"loss": 0.166,
"step": 8000
},
{
"epoch": 0.36,
"learning_rate": 8.80562720507575e-07,
"loss": 0.1554,
"step": 8064
},
{
"epoch": 0.36,
"learning_rate": 8.796139820332651e-07,
"loss": 0.1097,
"step": 8128
},
{
"epoch": 0.36,
"learning_rate": 8.786652435589552e-07,
"loss": 0.1322,
"step": 8192
},
{
"epoch": 0.37,
"learning_rate": 8.777165050846452e-07,
"loss": 0.262,
"step": 8256
},
{
"epoch": 0.37,
"learning_rate": 8.767677666103353e-07,
"loss": 0.1755,
"step": 8320
},
{
"epoch": 0.37,
"learning_rate": 8.758190281360254e-07,
"loss": 0.1646,
"step": 8384
},
{
"epoch": 0.38,
"learning_rate": 8.748702896617154e-07,
"loss": 0.1481,
"step": 8448
},
{
"epoch": 0.38,
"learning_rate": 8.739215511874055e-07,
"loss": 0.0985,
"step": 8512
},
{
"epoch": 0.38,
"learning_rate": 8.729728127130955e-07,
"loss": 0.1401,
"step": 8576
},
{
"epoch": 0.38,
"learning_rate": 8.720240742387855e-07,
"loss": 0.2057,
"step": 8640
},
{
"epoch": 0.39,
"learning_rate": 8.710753357644757e-07,
"loss": 0.1677,
"step": 8704
},
{
"epoch": 0.39,
"learning_rate": 8.701265972901657e-07,
"loss": 0.1398,
"step": 8768
},
{
"epoch": 0.39,
"learning_rate": 8.691778588158557e-07,
"loss": 0.1665,
"step": 8832
},
{
"epoch": 0.4,
"learning_rate": 8.682439443802069e-07,
"loss": 0.1737,
"step": 8896
},
{
"epoch": 0.4,
"learning_rate": 8.67295205905897e-07,
"loss": 0.1642,
"step": 8960
},
{
"epoch": 0.4,
"learning_rate": 8.66346467431587e-07,
"loss": 0.1487,
"step": 9024
},
{
"epoch": 0.4,
"learning_rate": 8.653977289572771e-07,
"loss": 0.186,
"step": 9088
},
{
"epoch": 0.41,
"learning_rate": 8.644489904829672e-07,
"loss": 0.1902,
"step": 9152
},
{
"epoch": 0.41,
"learning_rate": 8.635002520086571e-07,
"loss": 0.1977,
"step": 9216
},
{
"epoch": 0.41,
"learning_rate": 8.625515135343473e-07,
"loss": 0.1853,
"step": 9280
},
{
"epoch": 0.42,
"learning_rate": 8.616027750600373e-07,
"loss": 0.1156,
"step": 9344
},
{
"epoch": 0.42,
"learning_rate": 8.606540365857273e-07,
"loss": 0.179,
"step": 9408
},
{
"epoch": 0.42,
"learning_rate": 8.597052981114175e-07,
"loss": 0.1978,
"step": 9472
},
{
"epoch": 0.42,
"learning_rate": 8.587565596371075e-07,
"loss": 0.1735,
"step": 9536
},
{
"epoch": 0.43,
"learning_rate": 8.578078211627976e-07,
"loss": 0.1579,
"step": 9600
},
{
"epoch": 0.43,
"learning_rate": 8.568590826884876e-07,
"loss": 0.1444,
"step": 9664
},
{
"epoch": 0.43,
"learning_rate": 8.559103442141777e-07,
"loss": 0.1664,
"step": 9728
},
{
"epoch": 0.44,
"learning_rate": 8.549616057398677e-07,
"loss": 0.1715,
"step": 9792
},
{
"epoch": 0.44,
"learning_rate": 8.540128672655577e-07,
"loss": 0.1189,
"step": 9856
},
{
"epoch": 0.44,
"learning_rate": 8.530641287912479e-07,
"loss": 0.1255,
"step": 9920
},
{
"epoch": 0.44,
"learning_rate": 8.521153903169379e-07,
"loss": 0.2314,
"step": 9984
},
{
"epoch": 0.45,
"learning_rate": 8.511814758812891e-07,
"loss": 0.18,
"step": 10048
},
{
"epoch": 0.45,
"learning_rate": 8.502327374069791e-07,
"loss": 0.1948,
"step": 10112
},
{
"epoch": 0.45,
"learning_rate": 8.492839989326692e-07,
"loss": 0.1763,
"step": 10176
},
{
"epoch": 0.46,
"learning_rate": 8.483352604583593e-07,
"loss": 0.1984,
"step": 10240
},
{
"epoch": 0.46,
"learning_rate": 8.473865219840493e-07,
"loss": 0.169,
"step": 10304
},
{
"epoch": 0.46,
"learning_rate": 8.464377835097394e-07,
"loss": 0.1268,
"step": 10368
},
{
"epoch": 0.46,
"learning_rate": 8.454890450354294e-07,
"loss": 0.2261,
"step": 10432
},
{
"epoch": 0.47,
"learning_rate": 8.445403065611195e-07,
"loss": 0.1751,
"step": 10496
},
{
"epoch": 0.47,
"learning_rate": 8.435915680868095e-07,
"loss": 0.1758,
"step": 10560
},
{
"epoch": 0.47,
"learning_rate": 8.426428296124995e-07,
"loss": 0.1565,
"step": 10624
},
{
"epoch": 0.48,
"learning_rate": 8.416940911381897e-07,
"loss": 0.0861,
"step": 10688
},
{
"epoch": 0.48,
"learning_rate": 8.407453526638797e-07,
"loss": 0.1382,
"step": 10752
},
{
"epoch": 0.48,
"learning_rate": 8.397966141895698e-07,
"loss": 0.1268,
"step": 10816
},
{
"epoch": 0.48,
"learning_rate": 8.388478757152599e-07,
"loss": 0.1418,
"step": 10880
},
{
"epoch": 0.49,
"learning_rate": 8.378991372409498e-07,
"loss": 0.2417,
"step": 10944
},
{
"epoch": 0.49,
"learning_rate": 8.369503987666399e-07,
"loss": 0.1285,
"step": 11008
},
{
"epoch": 0.49,
"learning_rate": 8.3600166029233e-07,
"loss": 0.1719,
"step": 11072
},
{
"epoch": 0.5,
"learning_rate": 8.350529218180201e-07,
"loss": 0.1432,
"step": 11136
},
{
"epoch": 0.5,
"learning_rate": 8.341041833437101e-07,
"loss": 0.2443,
"step": 11200
},
{
"epoch": 0.5,
"learning_rate": 8.331554448694002e-07,
"loss": 0.1348,
"step": 11264
},
{
"epoch": 0.5,
"learning_rate": 8.322067063950903e-07,
"loss": 0.1251,
"step": 11328
},
{
"epoch": 0.51,
"learning_rate": 8.312579679207803e-07,
"loss": 0.156,
"step": 11392
},
{
"epoch": 0.51,
"learning_rate": 8.303092294464703e-07,
"loss": 0.3104,
"step": 11456
},
{
"epoch": 0.51,
"learning_rate": 8.293604909721605e-07,
"loss": 0.1834,
"step": 11520
},
{
"epoch": 0.52,
"learning_rate": 8.284117524978504e-07,
"loss": 0.1312,
"step": 11584
},
{
"epoch": 0.52,
"learning_rate": 8.274630140235405e-07,
"loss": 0.1026,
"step": 11648
},
{
"epoch": 0.52,
"learning_rate": 8.265142755492306e-07,
"loss": 0.1805,
"step": 11712
},
{
"epoch": 0.52,
"learning_rate": 8.255655370749206e-07,
"loss": 0.2334,
"step": 11776
},
{
"epoch": 0.53,
"learning_rate": 8.246167986006107e-07,
"loss": 0.1606,
"step": 11840
},
{
"epoch": 0.53,
"learning_rate": 8.236680601263008e-07,
"loss": 0.1009,
"step": 11904
},
{
"epoch": 0.53,
"learning_rate": 8.227193216519909e-07,
"loss": 0.1337,
"step": 11968
},
{
"epoch": 0.54,
"learning_rate": 8.217705831776809e-07,
"loss": 0.2247,
"step": 12032
},
{
"epoch": 0.54,
"learning_rate": 8.20821844703371e-07,
"loss": 0.163,
"step": 12096
},
{
"epoch": 0.54,
"learning_rate": 8.19873106229061e-07,
"loss": 0.1729,
"step": 12160
},
{
"epoch": 0.54,
"learning_rate": 8.18924367754751e-07,
"loss": 0.2133,
"step": 12224
},
{
"epoch": 0.55,
"learning_rate": 8.179756292804412e-07,
"loss": 0.2887,
"step": 12288
},
{
"epoch": 0.55,
"learning_rate": 8.170268908061312e-07,
"loss": 0.128,
"step": 12352
},
{
"epoch": 0.55,
"learning_rate": 8.160781523318212e-07,
"loss": 0.2019,
"step": 12416
},
{
"epoch": 0.56,
"learning_rate": 8.151294138575113e-07,
"loss": 0.128,
"step": 12480
},
{
"epoch": 0.56,
"learning_rate": 8.141806753832014e-07,
"loss": 0.1888,
"step": 12544
},
{
"epoch": 0.56,
"learning_rate": 8.132319369088915e-07,
"loss": 0.2176,
"step": 12608
},
{
"epoch": 0.56,
"learning_rate": 8.122831984345814e-07,
"loss": 0.1555,
"step": 12672
},
{
"epoch": 0.57,
"learning_rate": 8.113344599602715e-07,
"loss": 0.1948,
"step": 12736
},
{
"epoch": 0.57,
"learning_rate": 8.103857214859616e-07,
"loss": 0.233,
"step": 12800
},
{
"epoch": 0.57,
"learning_rate": 8.094369830116516e-07,
"loss": 0.1574,
"step": 12864
},
{
"epoch": 0.57,
"learning_rate": 8.084882445373418e-07,
"loss": 0.1377,
"step": 12928
},
{
"epoch": 0.58,
"learning_rate": 8.075395060630318e-07,
"loss": 0.1563,
"step": 12992
},
{
"epoch": 0.58,
"learning_rate": 8.065907675887218e-07,
"loss": 0.1345,
"step": 13056
},
{
"epoch": 0.58,
"learning_rate": 8.05642029114412e-07,
"loss": 0.1489,
"step": 13120
},
{
"epoch": 0.59,
"learning_rate": 8.047081146787631e-07,
"loss": 0.2524,
"step": 13184
},
{
"epoch": 0.59,
"learning_rate": 8.037742002431142e-07,
"loss": 0.1534,
"step": 13248
},
{
"epoch": 0.59,
"learning_rate": 8.028254617688043e-07,
"loss": 0.136,
"step": 13312
},
{
"epoch": 0.59,
"learning_rate": 8.018767232944942e-07,
"loss": 0.151,
"step": 13376
},
{
"epoch": 0.6,
"learning_rate": 8.009279848201844e-07,
"loss": 0.1423,
"step": 13440
},
{
"epoch": 0.6,
"learning_rate": 7.999792463458744e-07,
"loss": 0.2284,
"step": 13504
},
{
"epoch": 0.6,
"learning_rate": 7.990305078715644e-07,
"loss": 0.1515,
"step": 13568
},
{
"epoch": 0.61,
"learning_rate": 7.980817693972546e-07,
"loss": 0.1431,
"step": 13632
},
{
"epoch": 0.61,
"learning_rate": 7.971330309229446e-07,
"loss": 0.1759,
"step": 13696
},
{
"epoch": 0.61,
"learning_rate": 7.961842924486347e-07,
"loss": 0.2942,
"step": 13760
},
{
"epoch": 0.61,
"learning_rate": 7.952355539743248e-07,
"loss": 0.1382,
"step": 13824
},
{
"epoch": 0.62,
"learning_rate": 7.942868155000148e-07,
"loss": 0.181,
"step": 13888
},
{
"epoch": 0.62,
"learning_rate": 7.933380770257049e-07,
"loss": 0.2471,
"step": 13952
},
{
"epoch": 0.62,
"learning_rate": 7.923893385513949e-07,
"loss": 0.1487,
"step": 14016
},
{
"epoch": 0.63,
"learning_rate": 7.91440600077085e-07,
"loss": 0.1653,
"step": 14080
},
{
"epoch": 0.63,
"learning_rate": 7.90491861602775e-07,
"loss": 0.193,
"step": 14144
},
{
"epoch": 0.63,
"learning_rate": 7.89543123128465e-07,
"loss": 0.115,
"step": 14208
},
{
"epoch": 0.63,
"learning_rate": 7.885943846541552e-07,
"loss": 0.1413,
"step": 14272
},
{
"epoch": 0.64,
"learning_rate": 7.876456461798452e-07,
"loss": 0.1508,
"step": 14336
},
{
"epoch": 0.64,
"learning_rate": 7.866969077055352e-07,
"loss": 0.1752,
"step": 14400
},
{
"epoch": 0.64,
"learning_rate": 7.857481692312254e-07,
"loss": 0.2432,
"step": 14464
},
{
"epoch": 0.65,
"learning_rate": 7.847994307569153e-07,
"loss": 0.1978,
"step": 14528
},
{
"epoch": 0.65,
"learning_rate": 7.838506922826054e-07,
"loss": 0.1445,
"step": 14592
},
{
"epoch": 0.65,
"learning_rate": 7.829019538082955e-07,
"loss": 0.1484,
"step": 14656
},
{
"epoch": 0.65,
"learning_rate": 7.819532153339855e-07,
"loss": 0.1887,
"step": 14720
},
{
"epoch": 0.66,
"learning_rate": 7.810044768596756e-07,
"loss": 0.216,
"step": 14784
},
{
"epoch": 0.66,
"learning_rate": 7.800557383853657e-07,
"loss": 0.1803,
"step": 14848
},
{
"epoch": 0.66,
"learning_rate": 7.791069999110558e-07,
"loss": 0.1332,
"step": 14912
},
{
"epoch": 0.67,
"learning_rate": 7.781582614367458e-07,
"loss": 0.2439,
"step": 14976
},
{
"epoch": 0.67,
"learning_rate": 7.772095229624358e-07,
"loss": 0.1689,
"step": 15040
},
{
"epoch": 0.67,
"learning_rate": 7.76260784488126e-07,
"loss": 0.1823,
"step": 15104
},
{
"epoch": 0.67,
"learning_rate": 7.753120460138159e-07,
"loss": 0.1905,
"step": 15168
},
{
"epoch": 0.68,
"learning_rate": 7.74363307539506e-07,
"loss": 0.2558,
"step": 15232
},
{
"epoch": 0.68,
"learning_rate": 7.734145690651961e-07,
"loss": 0.1531,
"step": 15296
},
{
"epoch": 0.68,
"learning_rate": 7.724658305908861e-07,
"loss": 0.1849,
"step": 15360
},
{
"epoch": 0.69,
"learning_rate": 7.715170921165762e-07,
"loss": 0.1317,
"step": 15424
},
{
"epoch": 0.69,
"learning_rate": 7.705683536422663e-07,
"loss": 0.1096,
"step": 15488
},
{
"epoch": 0.69,
"learning_rate": 7.696196151679564e-07,
"loss": 0.2193,
"step": 15552
},
{
"epoch": 0.69,
"learning_rate": 7.686708766936464e-07,
"loss": 0.1658,
"step": 15616
},
{
"epoch": 0.7,
"learning_rate": 7.677221382193365e-07,
"loss": 0.1553,
"step": 15680
},
{
"epoch": 0.7,
"learning_rate": 7.667733997450265e-07,
"loss": 0.1772,
"step": 15744
},
{
"epoch": 0.7,
"learning_rate": 7.658246612707165e-07,
"loss": 0.2147,
"step": 15808
},
{
"epoch": 0.71,
"learning_rate": 7.648759227964067e-07,
"loss": 0.1096,
"step": 15872
},
{
"epoch": 0.71,
"learning_rate": 7.639271843220967e-07,
"loss": 0.1613,
"step": 15936
},
{
"epoch": 0.71,
"learning_rate": 7.629784458477867e-07,
"loss": 0.1488,
"step": 16000
},
{
"epoch": 0.71,
"learning_rate": 7.620297073734768e-07,
"loss": 0.2256,
"step": 16064
},
{
"epoch": 0.72,
"learning_rate": 7.610809688991669e-07,
"loss": 0.2512,
"step": 16128
},
{
"epoch": 0.72,
"learning_rate": 7.60132230424857e-07,
"loss": 0.1264,
"step": 16192
},
{
"epoch": 0.72,
"learning_rate": 7.59183491950547e-07,
"loss": 0.1162,
"step": 16256
},
{
"epoch": 0.73,
"learning_rate": 7.58234753476237e-07,
"loss": 0.1401,
"step": 16320
},
{
"epoch": 0.73,
"learning_rate": 7.572860150019271e-07,
"loss": 0.1336,
"step": 16384
},
{
"epoch": 0.73,
"learning_rate": 7.563372765276171e-07,
"loss": 0.1234,
"step": 16448
},
{
"epoch": 0.73,
"learning_rate": 7.553885380533072e-07,
"loss": 0.1195,
"step": 16512
},
{
"epoch": 0.74,
"learning_rate": 7.544397995789973e-07,
"loss": 0.2435,
"step": 16576
},
{
"epoch": 0.74,
"learning_rate": 7.534910611046873e-07,
"loss": 0.1109,
"step": 16640
},
{
"epoch": 0.74,
"learning_rate": 7.525423226303775e-07,
"loss": 0.2088,
"step": 16704
},
{
"epoch": 0.75,
"learning_rate": 7.515935841560675e-07,
"loss": 0.141,
"step": 16768
},
{
"epoch": 0.75,
"learning_rate": 7.506448456817574e-07,
"loss": 0.1428,
"step": 16832
},
{
"epoch": 0.75,
"learning_rate": 7.496961072074475e-07,
"loss": 0.1505,
"step": 16896
},
{
"epoch": 0.75,
"learning_rate": 7.487473687331376e-07,
"loss": 0.2152,
"step": 16960
},
{
"epoch": 0.76,
"learning_rate": 7.477986302588277e-07,
"loss": 0.2008,
"step": 17024
},
{
"epoch": 0.76,
"learning_rate": 7.468498917845177e-07,
"loss": 0.1872,
"step": 17088
},
{
"epoch": 0.76,
"learning_rate": 7.459011533102078e-07,
"loss": 0.1313,
"step": 17152
},
{
"epoch": 0.77,
"learning_rate": 7.449524148358979e-07,
"loss": 0.1099,
"step": 17216
},
{
"epoch": 0.77,
"learning_rate": 7.440036763615879e-07,
"loss": 0.138,
"step": 17280
},
{
"epoch": 0.77,
"learning_rate": 7.430549378872781e-07,
"loss": 0.1871,
"step": 17344
},
{
"epoch": 0.77,
"learning_rate": 7.42106199412968e-07,
"loss": 0.18,
"step": 17408
},
{
"epoch": 0.78,
"learning_rate": 7.41157460938658e-07,
"loss": 0.1337,
"step": 17472
},
{
"epoch": 0.78,
"learning_rate": 7.402087224643482e-07,
"loss": 0.1222,
"step": 17536
},
{
"epoch": 0.78,
"learning_rate": 7.392599839900382e-07,
"loss": 0.1434,
"step": 17600
},
{
"epoch": 0.79,
"learning_rate": 7.383112455157283e-07,
"loss": 0.1538,
"step": 17664
},
{
"epoch": 0.79,
"learning_rate": 7.373625070414183e-07,
"loss": 0.1908,
"step": 17728
},
{
"epoch": 0.79,
"learning_rate": 7.364137685671084e-07,
"loss": 0.1244,
"step": 17792
},
{
"epoch": 0.79,
"learning_rate": 7.354650300927985e-07,
"loss": 0.1593,
"step": 17856
},
{
"epoch": 0.8,
"learning_rate": 7.345162916184885e-07,
"loss": 0.1588,
"step": 17920
},
{
"epoch": 0.8,
"learning_rate": 7.335675531441787e-07,
"loss": 0.1639,
"step": 17984
},
{
"epoch": 0.8,
"learning_rate": 7.326188146698686e-07,
"loss": 0.1431,
"step": 18048
},
{
"epoch": 0.81,
"learning_rate": 7.316700761955586e-07,
"loss": 0.2002,
"step": 18112
},
{
"epoch": 0.81,
"learning_rate": 7.307213377212488e-07,
"loss": 0.1761,
"step": 18176
},
{
"epoch": 0.81,
"learning_rate": 7.297725992469388e-07,
"loss": 0.1597,
"step": 18240
},
{
"epoch": 0.81,
"learning_rate": 7.288238607726288e-07,
"loss": 0.1952,
"step": 18304
},
{
"epoch": 0.82,
"learning_rate": 7.27875122298319e-07,
"loss": 0.1843,
"step": 18368
},
{
"epoch": 0.82,
"learning_rate": 7.26926383824009e-07,
"loss": 0.1032,
"step": 18432
},
{
"epoch": 0.82,
"learning_rate": 7.259776453496991e-07,
"loss": 0.1952,
"step": 18496
},
{
"epoch": 0.83,
"learning_rate": 7.25028906875389e-07,
"loss": 0.193,
"step": 18560
},
{
"epoch": 0.83,
"learning_rate": 7.240801684010791e-07,
"loss": 0.137,
"step": 18624
},
{
"epoch": 0.83,
"learning_rate": 7.231314299267692e-07,
"loss": 0.1992,
"step": 18688
},
{
"epoch": 0.83,
"learning_rate": 7.221826914524592e-07,
"loss": 0.138,
"step": 18752
},
{
"epoch": 0.84,
"learning_rate": 7.212339529781494e-07,
"loss": 0.2263,
"step": 18816
},
{
"epoch": 0.84,
"learning_rate": 7.202852145038394e-07,
"loss": 0.2101,
"step": 18880
},
{
"epoch": 0.84,
"learning_rate": 7.193364760295294e-07,
"loss": 0.1731,
"step": 18944
},
{
"epoch": 0.85,
"learning_rate": 7.183877375552196e-07,
"loss": 0.1523,
"step": 19008
},
{
"epoch": 0.85,
"learning_rate": 7.174389990809096e-07,
"loss": 0.1671,
"step": 19072
},
{
"epoch": 0.85,
"learning_rate": 7.164902606065996e-07,
"loss": 0.1549,
"step": 19136
},
{
"epoch": 0.85,
"learning_rate": 7.155415221322897e-07,
"loss": 0.1346,
"step": 19200
},
{
"epoch": 0.86,
"learning_rate": 7.145927836579797e-07,
"loss": 0.2403,
"step": 19264
},
{
"epoch": 0.86,
"learning_rate": 7.136588692223308e-07,
"loss": 0.1909,
"step": 19328
},
{
"epoch": 0.86,
"learning_rate": 7.12710130748021e-07,
"loss": 0.1801,
"step": 19392
},
{
"epoch": 0.87,
"learning_rate": 7.11761392273711e-07,
"loss": 0.1196,
"step": 19456
},
{
"epoch": 0.87,
"learning_rate": 7.10812653799401e-07,
"loss": 0.0749,
"step": 19520
},
{
"epoch": 0.87,
"learning_rate": 7.098639153250912e-07,
"loss": 0.1386,
"step": 19584
},
{
"epoch": 0.87,
"learning_rate": 7.089151768507812e-07,
"loss": 0.219,
"step": 19648
},
{
"epoch": 0.88,
"learning_rate": 7.079664383764713e-07,
"loss": 0.1572,
"step": 19712
},
{
"epoch": 0.88,
"learning_rate": 7.070176999021614e-07,
"loss": 0.191,
"step": 19776
},
{
"epoch": 0.88,
"learning_rate": 7.060689614278513e-07,
"loss": 0.1882,
"step": 19840
},
{
"epoch": 0.89,
"learning_rate": 7.051202229535414e-07,
"loss": 0.1654,
"step": 19904
},
{
"epoch": 0.89,
"learning_rate": 7.041714844792315e-07,
"loss": 0.1397,
"step": 19968
},
{
"epoch": 0.89,
"learning_rate": 7.032227460049216e-07,
"loss": 0.1948,
"step": 20032
},
{
"epoch": 0.89,
"learning_rate": 7.022740075306116e-07,
"loss": 0.2171,
"step": 20096
},
{
"epoch": 0.9,
"learning_rate": 7.013252690563016e-07,
"loss": 0.2474,
"step": 20160
},
{
"epoch": 0.9,
"learning_rate": 7.003765305819918e-07,
"loss": 0.2014,
"step": 20224
},
{
"epoch": 0.9,
"learning_rate": 6.994277921076818e-07,
"loss": 0.1256,
"step": 20288
},
{
"epoch": 0.91,
"learning_rate": 6.984790536333718e-07,
"loss": 0.1634,
"step": 20352
},
{
"epoch": 0.91,
"learning_rate": 6.975303151590619e-07,
"loss": 0.1672,
"step": 20416
},
{
"epoch": 0.91,
"learning_rate": 6.965815766847519e-07,
"loss": 0.1773,
"step": 20480
},
{
"epoch": 0.91,
"learning_rate": 6.95632838210442e-07,
"loss": 0.1157,
"step": 20544
},
{
"epoch": 0.92,
"learning_rate": 6.946840997361321e-07,
"loss": 0.2241,
"step": 20608
},
{
"epoch": 0.92,
"learning_rate": 6.937353612618222e-07,
"loss": 0.1108,
"step": 20672
},
{
"epoch": 0.92,
"learning_rate": 6.927866227875122e-07,
"loss": 0.1821,
"step": 20736
},
{
"epoch": 0.93,
"learning_rate": 6.918378843132023e-07,
"loss": 0.1459,
"step": 20800
},
{
"epoch": 0.93,
"learning_rate": 6.908891458388924e-07,
"loss": 0.2022,
"step": 20864
},
{
"epoch": 0.93,
"learning_rate": 6.899404073645823e-07,
"loss": 0.1864,
"step": 20928
},
{
"epoch": 0.93,
"learning_rate": 6.889916688902723e-07,
"loss": 0.1436,
"step": 20992
},
{
"epoch": 0.94,
"learning_rate": 6.880429304159625e-07,
"loss": 0.1771,
"step": 21056
},
{
"epoch": 0.94,
"learning_rate": 6.870941919416525e-07,
"loss": 0.1782,
"step": 21120
},
{
"epoch": 0.94,
"learning_rate": 6.861454534673426e-07,
"loss": 0.1754,
"step": 21184
},
{
"epoch": 0.94,
"learning_rate": 6.851967149930327e-07,
"loss": 0.1483,
"step": 21248
},
{
"epoch": 0.95,
"learning_rate": 6.842479765187227e-07,
"loss": 0.1373,
"step": 21312
},
{
"epoch": 0.95,
"learning_rate": 6.833140620830739e-07,
"loss": 0.219,
"step": 21376
},
{
"epoch": 0.95,
"learning_rate": 6.82365323608764e-07,
"loss": 0.1474,
"step": 21440
},
{
"epoch": 0.96,
"learning_rate": 6.81416585134454e-07,
"loss": 0.1713,
"step": 21504
},
{
"epoch": 0.96,
"learning_rate": 6.80467846660144e-07,
"loss": 0.1034,
"step": 21568
},
{
"epoch": 0.96,
"learning_rate": 6.795191081858341e-07,
"loss": 0.185,
"step": 21632
},
{
"epoch": 0.96,
"learning_rate": 6.785703697115241e-07,
"loss": 0.284,
"step": 21696
},
{
"epoch": 0.97,
"learning_rate": 6.776216312372143e-07,
"loss": 0.1953,
"step": 21760
},
{
"epoch": 0.97,
"learning_rate": 6.766728927629043e-07,
"loss": 0.168,
"step": 21824
},
{
"epoch": 0.97,
"learning_rate": 6.757241542885943e-07,
"loss": 0.1852,
"step": 21888
},
{
"epoch": 0.98,
"learning_rate": 6.747754158142845e-07,
"loss": 0.1358,
"step": 21952
},
{
"epoch": 0.98,
"learning_rate": 6.738266773399745e-07,
"loss": 0.1885,
"step": 22016
},
{
"epoch": 0.98,
"learning_rate": 6.728779388656646e-07,
"loss": 0.22,
"step": 22080
},
{
"epoch": 0.98,
"learning_rate": 6.719292003913545e-07,
"loss": 0.214,
"step": 22144
},
{
"epoch": 0.99,
"learning_rate": 6.709804619170446e-07,
"loss": 0.1198,
"step": 22208
},
{
"epoch": 0.99,
"learning_rate": 6.700317234427347e-07,
"loss": 0.1458,
"step": 22272
},
{
"epoch": 0.99,
"learning_rate": 6.690829849684247e-07,
"loss": 0.2405,
"step": 22336
},
{
"epoch": 1.0,
"learning_rate": 6.681342464941149e-07,
"loss": 0.1162,
"step": 22400
},
{
"epoch": 1.0,
"learning_rate": 6.671855080198049e-07,
"loss": 0.1508,
"step": 22464
},
{
"epoch": 1.0,
"learning_rate": 6.662367695454949e-07,
"loss": 0.1805,
"step": 22528
},
{
"epoch": 1.0,
"learning_rate": 6.652880310711851e-07,
"loss": 0.0954,
"step": 22592
},
{
"epoch": 1.01,
"learning_rate": 6.643392925968751e-07,
"loss": 0.0756,
"step": 22656
},
{
"epoch": 1.01,
"learning_rate": 6.633905541225651e-07,
"loss": 0.1514,
"step": 22720
},
{
"epoch": 1.01,
"learning_rate": 6.624418156482551e-07,
"loss": 0.1408,
"step": 22784
},
{
"epoch": 1.02,
"learning_rate": 6.614930771739452e-07,
"loss": 0.1979,
"step": 22848
},
{
"epoch": 1.02,
"learning_rate": 6.605443386996353e-07,
"loss": 0.1277,
"step": 22912
},
{
"epoch": 1.02,
"learning_rate": 6.595956002253253e-07,
"loss": 0.0607,
"step": 22976
},
{
"epoch": 1.02,
"learning_rate": 6.586468617510155e-07,
"loss": 0.126,
"step": 23040
},
{
"epoch": 1.03,
"learning_rate": 6.576981232767055e-07,
"loss": 0.116,
"step": 23104
},
{
"epoch": 1.03,
"learning_rate": 6.567493848023955e-07,
"loss": 0.1233,
"step": 23168
},
{
"epoch": 1.03,
"learning_rate": 6.558006463280857e-07,
"loss": 0.0871,
"step": 23232
},
{
"epoch": 1.04,
"learning_rate": 6.548519078537756e-07,
"loss": 0.0974,
"step": 23296
},
{
"epoch": 1.04,
"learning_rate": 6.539031693794656e-07,
"loss": 0.1102,
"step": 23360
},
{
"epoch": 1.04,
"learning_rate": 6.529544309051558e-07,
"loss": 0.0905,
"step": 23424
},
{
"epoch": 1.04,
"learning_rate": 6.520056924308458e-07,
"loss": 0.0783,
"step": 23488
},
{
"epoch": 1.05,
"learning_rate": 6.510569539565359e-07,
"loss": 0.0835,
"step": 23552
},
{
"epoch": 1.05,
"learning_rate": 6.501082154822259e-07,
"loss": 0.1459,
"step": 23616
},
{
"epoch": 1.05,
"learning_rate": 6.49159477007916e-07,
"loss": 0.1042,
"step": 23680
},
{
"epoch": 1.06,
"learning_rate": 6.482107385336061e-07,
"loss": 0.1063,
"step": 23744
},
{
"epoch": 1.06,
"learning_rate": 6.472620000592961e-07,
"loss": 0.1107,
"step": 23808
},
{
"epoch": 1.06,
"learning_rate": 6.463132615849863e-07,
"loss": 0.1557,
"step": 23872
},
{
"epoch": 1.06,
"learning_rate": 6.453645231106762e-07,
"loss": 0.1015,
"step": 23936
},
{
"epoch": 1.07,
"learning_rate": 6.444157846363662e-07,
"loss": 0.1518,
"step": 24000
},
{
"epoch": 1.07,
"learning_rate": 6.434670461620564e-07,
"loss": 0.1283,
"step": 24064
},
{
"epoch": 1.07,
"learning_rate": 6.425183076877464e-07,
"loss": 0.1181,
"step": 24128
},
{
"epoch": 1.08,
"learning_rate": 6.415695692134365e-07,
"loss": 0.1414,
"step": 24192
},
{
"epoch": 1.08,
"learning_rate": 6.406208307391266e-07,
"loss": 0.0739,
"step": 24256
},
{
"epoch": 1.08,
"learning_rate": 6.396869163034777e-07,
"loss": 0.1461,
"step": 24320
},
{
"epoch": 1.08,
"learning_rate": 6.387381778291678e-07,
"loss": 0.1104,
"step": 24384
},
{
"epoch": 1.09,
"learning_rate": 6.377894393548579e-07,
"loss": 0.0982,
"step": 24448
},
{
"epoch": 1.09,
"learning_rate": 6.368407008805478e-07,
"loss": 0.1016,
"step": 24512
},
{
"epoch": 1.09,
"learning_rate": 6.358919624062378e-07,
"loss": 0.1343,
"step": 24576
},
{
"epoch": 1.1,
"learning_rate": 6.34943223931928e-07,
"loss": 0.0936,
"step": 24640
},
{
"epoch": 1.1,
"learning_rate": 6.33994485457618e-07,
"loss": 0.1245,
"step": 24704
},
{
"epoch": 1.1,
"learning_rate": 6.330457469833081e-07,
"loss": 0.1656,
"step": 24768
},
{
"epoch": 1.1,
"learning_rate": 6.320970085089982e-07,
"loss": 0.1573,
"step": 24832
},
{
"epoch": 1.11,
"learning_rate": 6.311482700346882e-07,
"loss": 0.1143,
"step": 24896
},
{
"epoch": 1.11,
"learning_rate": 6.301995315603783e-07,
"loss": 0.1048,
"step": 24960
},
{
"epoch": 1.11,
"learning_rate": 6.292507930860684e-07,
"loss": 0.0797,
"step": 25024
},
{
"epoch": 1.12,
"learning_rate": 6.283020546117584e-07,
"loss": 0.0676,
"step": 25088
},
{
"epoch": 1.12,
"learning_rate": 6.273533161374484e-07,
"loss": 0.1028,
"step": 25152
},
{
"epoch": 1.12,
"learning_rate": 6.264045776631385e-07,
"loss": 0.1148,
"step": 25216
},
{
"epoch": 1.12,
"learning_rate": 6.254558391888286e-07,
"loss": 0.1185,
"step": 25280
},
{
"epoch": 1.13,
"learning_rate": 6.245071007145186e-07,
"loss": 0.1288,
"step": 25344
},
{
"epoch": 1.13,
"learning_rate": 6.235583622402088e-07,
"loss": 0.1474,
"step": 25408
},
{
"epoch": 1.13,
"learning_rate": 6.226096237658988e-07,
"loss": 0.1075,
"step": 25472
},
{
"epoch": 1.14,
"learning_rate": 6.216608852915888e-07,
"loss": 0.1531,
"step": 25536
},
{
"epoch": 1.14,
"learning_rate": 6.207121468172789e-07,
"loss": 0.1072,
"step": 25600
},
{
"epoch": 1.14,
"learning_rate": 6.19763408342969e-07,
"loss": 0.0739,
"step": 25664
},
{
"epoch": 1.14,
"learning_rate": 6.188146698686589e-07,
"loss": 0.0674,
"step": 25728
},
{
"epoch": 1.15,
"learning_rate": 6.17865931394349e-07,
"loss": 0.0712,
"step": 25792
},
{
"epoch": 1.15,
"learning_rate": 6.169171929200391e-07,
"loss": 0.116,
"step": 25856
},
{
"epoch": 1.15,
"learning_rate": 6.159684544457292e-07,
"loss": 0.1242,
"step": 25920
},
{
"epoch": 1.16,
"learning_rate": 6.150197159714192e-07,
"loss": 0.0647,
"step": 25984
},
{
"epoch": 1.16,
"learning_rate": 6.140709774971093e-07,
"loss": 0.1281,
"step": 26048
},
{
"epoch": 1.16,
"learning_rate": 6.131222390227994e-07,
"loss": 0.1245,
"step": 26112
},
{
"epoch": 1.16,
"learning_rate": 6.121735005484894e-07,
"loss": 0.1712,
"step": 26176
},
{
"epoch": 1.17,
"learning_rate": 6.112247620741796e-07,
"loss": 0.1322,
"step": 26240
},
{
"epoch": 1.17,
"learning_rate": 6.102760235998695e-07,
"loss": 0.1391,
"step": 26304
},
{
"epoch": 1.17,
"learning_rate": 6.093272851255595e-07,
"loss": 0.0661,
"step": 26368
},
{
"epoch": 1.18,
"learning_rate": 6.083785466512496e-07,
"loss": 0.153,
"step": 26432
},
{
"epoch": 1.18,
"learning_rate": 6.074298081769397e-07,
"loss": 0.0904,
"step": 26496
},
{
"epoch": 1.18,
"learning_rate": 6.064810697026298e-07,
"loss": 0.1427,
"step": 26560
},
{
"epoch": 1.18,
"learning_rate": 6.055323312283198e-07,
"loss": 0.0734,
"step": 26624
},
{
"epoch": 1.19,
"learning_rate": 6.045835927540099e-07,
"loss": 0.0615,
"step": 26688
},
{
"epoch": 1.19,
"learning_rate": 6.036348542797e-07,
"loss": 0.1454,
"step": 26752
},
{
"epoch": 1.19,
"learning_rate": 6.027009398440512e-07,
"loss": 0.1196,
"step": 26816
},
{
"epoch": 1.2,
"learning_rate": 6.017522013697411e-07,
"loss": 0.1487,
"step": 26880
},
{
"epoch": 1.2,
"learning_rate": 6.008182869340923e-07,
"loss": 0.1299,
"step": 26944
},
{
"epoch": 1.2,
"learning_rate": 5.998695484597823e-07,
"loss": 0.111,
"step": 27008
},
{
"epoch": 1.2,
"learning_rate": 5.989208099854724e-07,
"loss": 0.0529,
"step": 27072
},
{
"epoch": 1.21,
"learning_rate": 5.979720715111625e-07,
"loss": 0.1411,
"step": 27136
},
{
"epoch": 1.21,
"learning_rate": 5.970233330368525e-07,
"loss": 0.1335,
"step": 27200
},
{
"epoch": 1.21,
"learning_rate": 5.960745945625426e-07,
"loss": 0.1229,
"step": 27264
},
{
"epoch": 1.22,
"learning_rate": 5.951258560882326e-07,
"loss": 0.0501,
"step": 27328
},
{
"epoch": 1.22,
"learning_rate": 5.941771176139228e-07,
"loss": 0.0866,
"step": 27392
},
{
"epoch": 1.22,
"learning_rate": 5.932283791396128e-07,
"loss": 0.0643,
"step": 27456
},
{
"epoch": 1.22,
"learning_rate": 5.922796406653027e-07,
"loss": 0.0793,
"step": 27520
},
{
"epoch": 1.23,
"learning_rate": 5.913309021909929e-07,
"loss": 0.0627,
"step": 27584
},
{
"epoch": 1.23,
"learning_rate": 5.903821637166829e-07,
"loss": 0.1354,
"step": 27648
},
{
"epoch": 1.23,
"learning_rate": 5.89433425242373e-07,
"loss": 0.1341,
"step": 27712
},
{
"epoch": 1.24,
"learning_rate": 5.884846867680631e-07,
"loss": 0.1205,
"step": 27776
},
{
"epoch": 1.24,
"learning_rate": 5.875359482937531e-07,
"loss": 0.1247,
"step": 27840
},
{
"epoch": 1.24,
"learning_rate": 5.865872098194432e-07,
"loss": 0.1071,
"step": 27904
},
{
"epoch": 1.24,
"learning_rate": 5.856384713451333e-07,
"loss": 0.125,
"step": 27968
},
{
"epoch": 1.25,
"learning_rate": 5.846897328708234e-07,
"loss": 0.0815,
"step": 28032
},
{
"epoch": 1.25,
"learning_rate": 5.837409943965133e-07,
"loss": 0.0916,
"step": 28096
},
{
"epoch": 1.25,
"learning_rate": 5.827922559222033e-07,
"loss": 0.1076,
"step": 28160
},
{
"epoch": 1.26,
"learning_rate": 5.818435174478935e-07,
"loss": 0.1217,
"step": 28224
},
{
"epoch": 1.26,
"learning_rate": 5.808947789735835e-07,
"loss": 0.0918,
"step": 28288
},
{
"epoch": 1.26,
"learning_rate": 5.799460404992736e-07,
"loss": 0.1532,
"step": 28352
},
{
"epoch": 1.26,
"learning_rate": 5.789973020249637e-07,
"loss": 0.0839,
"step": 28416
},
{
"epoch": 1.27,
"learning_rate": 5.780485635506537e-07,
"loss": 0.1425,
"step": 28480
},
{
"epoch": 1.27,
"learning_rate": 5.770998250763438e-07,
"loss": 0.0679,
"step": 28544
},
{
"epoch": 1.27,
"learning_rate": 5.761510866020339e-07,
"loss": 0.0667,
"step": 28608
},
{
"epoch": 1.28,
"learning_rate": 5.75202348127724e-07,
"loss": 0.1245,
"step": 28672
},
{
"epoch": 1.28,
"learning_rate": 5.742536096534139e-07,
"loss": 0.1255,
"step": 28736
},
{
"epoch": 1.28,
"learning_rate": 5.73304871179104e-07,
"loss": 0.0819,
"step": 28800
},
{
"epoch": 1.28,
"learning_rate": 5.723561327047941e-07,
"loss": 0.1035,
"step": 28864
},
{
"epoch": 1.29,
"learning_rate": 5.714073942304841e-07,
"loss": 0.1238,
"step": 28928
},
{
"epoch": 1.29,
"learning_rate": 5.704586557561741e-07,
"loss": 0.1312,
"step": 28992
},
{
"epoch": 1.29,
"learning_rate": 5.695099172818643e-07,
"loss": 0.0645,
"step": 29056
},
{
"epoch": 1.3,
"learning_rate": 5.685611788075543e-07,
"loss": 0.1143,
"step": 29120
},
{
"epoch": 1.3,
"learning_rate": 5.676124403332444e-07,
"loss": 0.0848,
"step": 29184
},
{
"epoch": 1.3,
"learning_rate": 5.666637018589344e-07,
"loss": 0.1051,
"step": 29248
},
{
"epoch": 1.3,
"learning_rate": 5.657149633846244e-07,
"loss": 0.0981,
"step": 29312
},
{
"epoch": 1.31,
"learning_rate": 5.647662249103145e-07,
"loss": 0.0894,
"step": 29376
},
{
"epoch": 1.31,
"learning_rate": 5.638174864360046e-07,
"loss": 0.1164,
"step": 29440
},
{
"epoch": 1.31,
"learning_rate": 5.628687479616947e-07,
"loss": 0.0816,
"step": 29504
},
{
"epoch": 1.31,
"learning_rate": 5.619200094873847e-07,
"loss": 0.1426,
"step": 29568
},
{
"epoch": 1.32,
"learning_rate": 5.609712710130748e-07,
"loss": 0.1748,
"step": 29632
},
{
"epoch": 1.32,
"learning_rate": 5.600225325387649e-07,
"loss": 0.0979,
"step": 29696
},
{
"epoch": 1.32,
"learning_rate": 5.590737940644549e-07,
"loss": 0.1499,
"step": 29760
},
{
"epoch": 1.33,
"learning_rate": 5.58125055590145e-07,
"loss": 0.0845,
"step": 29824
},
{
"epoch": 1.33,
"learning_rate": 5.57176317115835e-07,
"loss": 0.1298,
"step": 29888
},
{
"epoch": 1.33,
"learning_rate": 5.56227578641525e-07,
"loss": 0.1241,
"step": 29952
},
{
"epoch": 1.33,
"learning_rate": 5.552788401672151e-07,
"loss": 0.1174,
"step": 30016
},
{
"epoch": 1.34,
"learning_rate": 5.543301016929052e-07,
"loss": 0.1209,
"step": 30080
},
{
"epoch": 1.34,
"learning_rate": 5.533813632185953e-07,
"loss": 0.1071,
"step": 30144
},
{
"epoch": 1.34,
"learning_rate": 5.524326247442853e-07,
"loss": 0.0615,
"step": 30208
},
{
"epoch": 1.35,
"learning_rate": 5.514838862699754e-07,
"loss": 0.0797,
"step": 30272
},
{
"epoch": 1.35,
"learning_rate": 5.505351477956655e-07,
"loss": 0.1182,
"step": 30336
},
{
"epoch": 1.35,
"learning_rate": 5.495864093213554e-07,
"loss": 0.1027,
"step": 30400
},
{
"epoch": 1.35,
"learning_rate": 5.486376708470456e-07,
"loss": 0.0997,
"step": 30464
},
{
"epoch": 1.36,
"learning_rate": 5.476889323727356e-07,
"loss": 0.1624,
"step": 30528
},
{
"epoch": 1.36,
"learning_rate": 5.467401938984256e-07,
"loss": 0.0811,
"step": 30592
},
{
"epoch": 1.36,
"learning_rate": 5.457914554241158e-07,
"loss": 0.0667,
"step": 30656
},
{
"epoch": 1.37,
"learning_rate": 5.448427169498058e-07,
"loss": 0.1017,
"step": 30720
},
{
"epoch": 1.37,
"learning_rate": 5.438939784754958e-07,
"loss": 0.0942,
"step": 30784
},
{
"epoch": 1.37,
"learning_rate": 5.429452400011859e-07,
"loss": 0.1741,
"step": 30848
},
{
"epoch": 1.37,
"learning_rate": 5.41996501526876e-07,
"loss": 0.1527,
"step": 30912
},
{
"epoch": 1.38,
"learning_rate": 5.41047763052566e-07,
"loss": 0.0979,
"step": 30976
},
{
"epoch": 1.38,
"learning_rate": 5.40099024578256e-07,
"loss": 0.0569,
"step": 31040
},
{
"epoch": 1.38,
"learning_rate": 5.391502861039461e-07,
"loss": 0.1476,
"step": 31104
},
{
"epoch": 1.39,
"learning_rate": 5.382015476296362e-07,
"loss": 0.097,
"step": 31168
},
{
"epoch": 1.39,
"learning_rate": 5.372676331939874e-07,
"loss": 0.0647,
"step": 31232
},
{
"epoch": 1.39,
"learning_rate": 5.363188947196774e-07,
"loss": 0.103,
"step": 31296
},
{
"epoch": 1.39,
"learning_rate": 5.353701562453674e-07,
"loss": 0.0903,
"step": 31360
},
{
"epoch": 1.4,
"learning_rate": 5.344214177710576e-07,
"loss": 0.1414,
"step": 31424
},
{
"epoch": 1.4,
"learning_rate": 5.334726792967476e-07,
"loss": 0.0945,
"step": 31488
},
{
"epoch": 1.4,
"learning_rate": 5.325239408224377e-07,
"loss": 0.1426,
"step": 31552
},
{
"epoch": 1.41,
"learning_rate": 5.315752023481276e-07,
"loss": 0.1063,
"step": 31616
},
{
"epoch": 1.41,
"learning_rate": 5.306264638738177e-07,
"loss": 0.0921,
"step": 31680
},
{
"epoch": 1.41,
"learning_rate": 5.296777253995078e-07,
"loss": 0.0904,
"step": 31744
},
{
"epoch": 1.41,
"learning_rate": 5.287289869251978e-07,
"loss": 0.1278,
"step": 31808
},
{
"epoch": 1.42,
"learning_rate": 5.27780248450888e-07,
"loss": 0.1034,
"step": 31872
},
{
"epoch": 1.42,
"learning_rate": 5.26831509976578e-07,
"loss": 0.0908,
"step": 31936
},
{
"epoch": 1.42,
"learning_rate": 5.25882771502268e-07,
"loss": 0.1339,
"step": 32000
},
{
"epoch": 1.43,
"learning_rate": 5.249340330279582e-07,
"loss": 0.0777,
"step": 32064
},
{
"epoch": 1.43,
"learning_rate": 5.239852945536482e-07,
"loss": 0.1353,
"step": 32128
},
{
"epoch": 1.43,
"learning_rate": 5.230365560793382e-07,
"loss": 0.1369,
"step": 32192
},
{
"epoch": 1.43,
"learning_rate": 5.220878176050283e-07,
"loss": 0.087,
"step": 32256
},
{
"epoch": 1.44,
"learning_rate": 5.211390791307183e-07,
"loss": 0.129,
"step": 32320
},
{
"epoch": 1.44,
"learning_rate": 5.201903406564084e-07,
"loss": 0.1012,
"step": 32384
},
{
"epoch": 1.44,
"learning_rate": 5.192416021820984e-07,
"loss": 0.1114,
"step": 32448
},
{
"epoch": 1.45,
"learning_rate": 5.182928637077886e-07,
"loss": 0.0917,
"step": 32512
},
{
"epoch": 1.45,
"learning_rate": 5.173441252334786e-07,
"loss": 0.0852,
"step": 32576
},
{
"epoch": 1.45,
"learning_rate": 5.163953867591686e-07,
"loss": 0.0937,
"step": 32640
},
{
"epoch": 1.45,
"learning_rate": 5.154466482848588e-07,
"loss": 0.1014,
"step": 32704
},
{
"epoch": 1.46,
"learning_rate": 5.144979098105487e-07,
"loss": 0.0621,
"step": 32768
},
{
"epoch": 1.46,
"learning_rate": 5.135491713362388e-07,
"loss": 0.0927,
"step": 32832
},
{
"epoch": 1.46,
"learning_rate": 5.126004328619289e-07,
"loss": 0.1182,
"step": 32896
},
{
"epoch": 1.47,
"learning_rate": 5.116516943876189e-07,
"loss": 0.1206,
"step": 32960
},
{
"epoch": 1.47,
"learning_rate": 5.10702955913309e-07,
"loss": 0.1605,
"step": 33024
},
{
"epoch": 1.47,
"learning_rate": 5.097542174389991e-07,
"loss": 0.1525,
"step": 33088
},
{
"epoch": 1.47,
"learning_rate": 5.088054789646892e-07,
"loss": 0.0679,
"step": 33152
},
{
"epoch": 1.48,
"learning_rate": 5.078567404903792e-07,
"loss": 0.1432,
"step": 33216
},
{
"epoch": 1.48,
"learning_rate": 5.069080020160692e-07,
"loss": 0.1117,
"step": 33280
},
{
"epoch": 1.48,
"learning_rate": 5.059592635417593e-07,
"loss": 0.0866,
"step": 33344
},
{
"epoch": 1.49,
"learning_rate": 5.050105250674493e-07,
"loss": 0.0727,
"step": 33408
},
{
"epoch": 1.49,
"learning_rate": 5.040617865931393e-07,
"loss": 0.0671,
"step": 33472
},
{
"epoch": 1.49,
"learning_rate": 5.031130481188295e-07,
"loss": 0.1083,
"step": 33536
},
{
"epoch": 1.49,
"learning_rate": 5.021791336831806e-07,
"loss": 0.1442,
"step": 33600
},
{
"epoch": 1.5,
"learning_rate": 5.012303952088707e-07,
"loss": 0.1382,
"step": 33664
},
{
"epoch": 1.5,
"learning_rate": 5.002816567345607e-07,
"loss": 0.1495,
"step": 33728
},
{
"epoch": 1.5,
"learning_rate": 4.993329182602508e-07,
"loss": 0.1246,
"step": 33792
},
{
"epoch": 1.51,
"learning_rate": 4.983841797859409e-07,
"loss": 0.0701,
"step": 33856
},
{
"epoch": 1.51,
"learning_rate": 4.974354413116309e-07,
"loss": 0.1176,
"step": 33920
},
{
"epoch": 1.51,
"learning_rate": 4.964867028373209e-07,
"loss": 0.1294,
"step": 33984
},
{
"epoch": 1.51,
"learning_rate": 4.95537964363011e-07,
"loss": 0.1144,
"step": 34048
},
{
"epoch": 1.52,
"learning_rate": 4.945892258887011e-07,
"loss": 0.1356,
"step": 34112
},
{
"epoch": 1.52,
"learning_rate": 4.936404874143911e-07,
"loss": 0.0847,
"step": 34176
},
{
"epoch": 1.52,
"learning_rate": 4.926917489400812e-07,
"loss": 0.1499,
"step": 34240
},
{
"epoch": 1.53,
"learning_rate": 4.917430104657713e-07,
"loss": 0.1345,
"step": 34304
},
{
"epoch": 1.53,
"learning_rate": 4.907942719914614e-07,
"loss": 0.0594,
"step": 34368
},
{
"epoch": 1.53,
"learning_rate": 4.898455335171514e-07,
"loss": 0.1239,
"step": 34432
},
{
"epoch": 1.53,
"learning_rate": 4.888967950428415e-07,
"loss": 0.0936,
"step": 34496
},
{
"epoch": 1.54,
"learning_rate": 4.879480565685315e-07,
"loss": 0.0963,
"step": 34560
},
{
"epoch": 1.54,
"learning_rate": 4.869993180942215e-07,
"loss": 0.1015,
"step": 34624
},
{
"epoch": 1.54,
"learning_rate": 4.860505796199116e-07,
"loss": 0.0758,
"step": 34688
},
{
"epoch": 1.55,
"learning_rate": 4.851018411456017e-07,
"loss": 0.0954,
"step": 34752
},
{
"epoch": 1.55,
"learning_rate": 4.841531026712917e-07,
"loss": 0.1693,
"step": 34816
},
{
"epoch": 1.55,
"learning_rate": 4.832043641969818e-07,
"loss": 0.1428,
"step": 34880
},
{
"epoch": 1.55,
"learning_rate": 4.822556257226719e-07,
"loss": 0.1161,
"step": 34944
},
{
"epoch": 1.56,
"learning_rate": 4.81306887248362e-07,
"loss": 0.0599,
"step": 35008
},
{
"epoch": 1.56,
"learning_rate": 4.803581487740521e-07,
"loss": 0.1093,
"step": 35072
},
{
"epoch": 1.56,
"learning_rate": 4.79409410299742e-07,
"loss": 0.0763,
"step": 35136
},
{
"epoch": 1.57,
"learning_rate": 4.784606718254321e-07,
"loss": 0.119,
"step": 35200
},
{
"epoch": 1.57,
"learning_rate": 4.775119333511221e-07,
"loss": 0.1467,
"step": 35264
},
{
"epoch": 1.57,
"learning_rate": 4.765631948768122e-07,
"loss": 0.1311,
"step": 35328
},
{
"epoch": 1.57,
"learning_rate": 4.7561445640250225e-07,
"loss": 0.1628,
"step": 35392
},
{
"epoch": 1.58,
"learning_rate": 4.7466571792819234e-07,
"loss": 0.0847,
"step": 35456
},
{
"epoch": 1.58,
"learning_rate": 4.737169794538824e-07,
"loss": 0.0888,
"step": 35520
},
{
"epoch": 1.58,
"learning_rate": 4.7276824097957247e-07,
"loss": 0.1145,
"step": 35584
},
{
"epoch": 1.59,
"learning_rate": 4.7181950250526256e-07,
"loss": 0.1296,
"step": 35648
},
{
"epoch": 1.59,
"learning_rate": 4.7087076403095254e-07,
"loss": 0.0984,
"step": 35712
},
{
"epoch": 1.59,
"learning_rate": 4.6992202555664263e-07,
"loss": 0.1509,
"step": 35776
},
{
"epoch": 1.59,
"learning_rate": 4.6897328708233267e-07,
"loss": 0.1288,
"step": 35840
},
{
"epoch": 1.6,
"learning_rate": 4.6802454860802276e-07,
"loss": 0.1383,
"step": 35904
},
{
"epoch": 1.6,
"learning_rate": 4.6707581013371285e-07,
"loss": 0.1004,
"step": 35968
},
{
"epoch": 1.6,
"learning_rate": 4.6614189569806395e-07,
"loss": 0.1098,
"step": 36032
},
{
"epoch": 1.61,
"learning_rate": 4.6519315722375404e-07,
"loss": 0.1092,
"step": 36096
},
{
"epoch": 1.61,
"learning_rate": 4.642444187494441e-07,
"loss": 0.1028,
"step": 36160
},
{
"epoch": 1.61,
"learning_rate": 4.6329568027513417e-07,
"loss": 0.1196,
"step": 36224
},
{
"epoch": 1.61,
"learning_rate": 4.6234694180082415e-07,
"loss": 0.1209,
"step": 36288
},
{
"epoch": 1.62,
"learning_rate": 4.6139820332651424e-07,
"loss": 0.0756,
"step": 36352
},
{
"epoch": 1.62,
"learning_rate": 4.6044946485220433e-07,
"loss": 0.0949,
"step": 36416
},
{
"epoch": 1.62,
"learning_rate": 4.5950072637789437e-07,
"loss": 0.0852,
"step": 36480
},
{
"epoch": 1.63,
"learning_rate": 4.585668119422455e-07,
"loss": 0.1649,
"step": 36544
},
{
"epoch": 1.63,
"learning_rate": 4.5761807346793556e-07,
"loss": 0.1239,
"step": 36608
},
{
"epoch": 1.63,
"learning_rate": 4.5666933499362565e-07,
"loss": 0.0986,
"step": 36672
},
{
"epoch": 1.63,
"learning_rate": 4.5572059651931574e-07,
"loss": 0.1478,
"step": 36736
},
{
"epoch": 1.64,
"learning_rate": 4.547718580450058e-07,
"loss": 0.1032,
"step": 36800
},
{
"epoch": 1.64,
"learning_rate": 4.538231195706958e-07,
"loss": 0.1364,
"step": 36864
},
{
"epoch": 1.64,
"learning_rate": 4.5287438109638585e-07,
"loss": 0.0667,
"step": 36928
},
{
"epoch": 1.65,
"learning_rate": 4.5192564262207594e-07,
"loss": 0.0836,
"step": 36992
},
{
"epoch": 1.65,
"learning_rate": 4.5097690414776603e-07,
"loss": 0.1446,
"step": 37056
},
{
"epoch": 1.65,
"learning_rate": 4.50028165673456e-07,
"loss": 0.0684,
"step": 37120
},
{
"epoch": 1.65,
"learning_rate": 4.490794271991461e-07,
"loss": 0.1048,
"step": 37184
},
{
"epoch": 1.66,
"learning_rate": 4.4813068872483615e-07,
"loss": 0.0935,
"step": 37248
},
{
"epoch": 1.66,
"learning_rate": 4.4718195025052624e-07,
"loss": 0.1176,
"step": 37312
},
{
"epoch": 1.66,
"learning_rate": 4.4623321177621633e-07,
"loss": 0.095,
"step": 37376
},
{
"epoch": 1.67,
"learning_rate": 4.452844733019063e-07,
"loss": 0.139,
"step": 37440
},
{
"epoch": 1.67,
"learning_rate": 4.443357348275964e-07,
"loss": 0.0939,
"step": 37504
},
{
"epoch": 1.67,
"learning_rate": 4.433869963532865e-07,
"loss": 0.0793,
"step": 37568
},
{
"epoch": 1.67,
"learning_rate": 4.4243825787897653e-07,
"loss": 0.1213,
"step": 37632
},
{
"epoch": 1.68,
"learning_rate": 4.414895194046666e-07,
"loss": 0.1258,
"step": 37696
},
{
"epoch": 1.68,
"learning_rate": 4.405407809303566e-07,
"loss": 0.0902,
"step": 37760
},
{
"epoch": 1.68,
"learning_rate": 4.395920424560467e-07,
"loss": 0.1153,
"step": 37824
},
{
"epoch": 1.68,
"learning_rate": 4.386433039817368e-07,
"loss": 0.1569,
"step": 37888
},
{
"epoch": 1.69,
"learning_rate": 4.376945655074268e-07,
"loss": 0.0657,
"step": 37952
},
{
"epoch": 1.69,
"learning_rate": 4.367458270331169e-07,
"loss": 0.1206,
"step": 38016
},
{
"epoch": 1.69,
"learning_rate": 4.357970885588069e-07,
"loss": 0.1313,
"step": 38080
},
{
"epoch": 1.7,
"learning_rate": 4.34848350084497e-07,
"loss": 0.1358,
"step": 38144
},
{
"epoch": 1.7,
"learning_rate": 4.338996116101871e-07,
"loss": 0.0711,
"step": 38208
},
{
"epoch": 1.7,
"learning_rate": 4.329508731358771e-07,
"loss": 0.1359,
"step": 38272
},
{
"epoch": 1.7,
"learning_rate": 4.3200213466156716e-07,
"loss": 0.087,
"step": 38336
},
{
"epoch": 1.71,
"learning_rate": 4.3105339618725725e-07,
"loss": 0.0831,
"step": 38400
},
{
"epoch": 1.71,
"learning_rate": 4.301046577129473e-07,
"loss": 0.065,
"step": 38464
},
{
"epoch": 1.71,
"learning_rate": 4.291559192386374e-07,
"loss": 0.1132,
"step": 38528
},
{
"epoch": 1.72,
"learning_rate": 4.282071807643274e-07,
"loss": 0.1098,
"step": 38592
},
{
"epoch": 1.72,
"learning_rate": 4.2725844229001745e-07,
"loss": 0.0918,
"step": 38656
},
{
"epoch": 1.72,
"learning_rate": 4.2630970381570754e-07,
"loss": 0.1517,
"step": 38720
},
{
"epoch": 1.72,
"learning_rate": 4.253609653413976e-07,
"loss": 0.0869,
"step": 38784
},
{
"epoch": 1.73,
"learning_rate": 4.2441222686708767e-07,
"loss": 0.1281,
"step": 38848
},
{
"epoch": 1.73,
"learning_rate": 4.234634883927777e-07,
"loss": 0.0536,
"step": 38912
},
{
"epoch": 1.73,
"learning_rate": 4.2251474991846775e-07,
"loss": 0.1776,
"step": 38976
},
{
"epoch": 1.74,
"learning_rate": 4.2156601144415784e-07,
"loss": 0.15,
"step": 39040
},
{
"epoch": 1.74,
"learning_rate": 4.206172729698479e-07,
"loss": 0.0522,
"step": 39104
},
{
"epoch": 1.74,
"learning_rate": 4.1966853449553797e-07,
"loss": 0.0863,
"step": 39168
},
{
"epoch": 1.74,
"learning_rate": 4.18719796021228e-07,
"loss": 0.0848,
"step": 39232
},
{
"epoch": 1.75,
"learning_rate": 4.1777105754691804e-07,
"loss": 0.1043,
"step": 39296
},
{
"epoch": 1.75,
"learning_rate": 4.1682231907260813e-07,
"loss": 0.0975,
"step": 39360
},
{
"epoch": 1.75,
"learning_rate": 4.1587358059829817e-07,
"loss": 0.155,
"step": 39424
},
{
"epoch": 1.76,
"learning_rate": 4.1492484212398826e-07,
"loss": 0.0518,
"step": 39488
},
{
"epoch": 1.76,
"learning_rate": 4.139761036496783e-07,
"loss": 0.1139,
"step": 39552
},
{
"epoch": 1.76,
"learning_rate": 4.1304218921402945e-07,
"loss": 0.1094,
"step": 39616
},
{
"epoch": 1.76,
"learning_rate": 4.1209345073971954e-07,
"loss": 0.0979,
"step": 39680
},
{
"epoch": 1.77,
"learning_rate": 4.111447122654096e-07,
"loss": 0.0968,
"step": 39744
},
{
"epoch": 1.77,
"learning_rate": 4.101959737910996e-07,
"loss": 0.0828,
"step": 39808
},
{
"epoch": 1.77,
"learning_rate": 4.0924723531678965e-07,
"loss": 0.1267,
"step": 39872
},
{
"epoch": 1.78,
"learning_rate": 4.0829849684247974e-07,
"loss": 0.1223,
"step": 39936
},
{
"epoch": 1.78,
"learning_rate": 4.0734975836816983e-07,
"loss": 0.0536,
"step": 40000
},
{
"epoch": 1.78,
"learning_rate": 4.0640101989385987e-07,
"loss": 0.0832,
"step": 40064
},
{
"epoch": 1.78,
"learning_rate": 4.054522814195499e-07,
"loss": 0.1293,
"step": 40128
},
{
"epoch": 1.79,
"learning_rate": 4.0450354294524e-07,
"loss": 0.0894,
"step": 40192
},
{
"epoch": 1.79,
"learning_rate": 4.0355480447093004e-07,
"loss": 0.0937,
"step": 40256
},
{
"epoch": 1.79,
"learning_rate": 4.0260606599662013e-07,
"loss": 0.0785,
"step": 40320
},
{
"epoch": 1.8,
"learning_rate": 4.0165732752231016e-07,
"loss": 0.1403,
"step": 40384
},
{
"epoch": 1.8,
"learning_rate": 4.007085890480002e-07,
"loss": 0.0647,
"step": 40448
},
{
"epoch": 1.8,
"learning_rate": 3.997598505736903e-07,
"loss": 0.108,
"step": 40512
},
{
"epoch": 1.8,
"learning_rate": 3.9881111209938033e-07,
"loss": 0.1352,
"step": 40576
},
{
"epoch": 1.81,
"learning_rate": 3.978623736250704e-07,
"loss": 0.1345,
"step": 40640
},
{
"epoch": 1.81,
"learning_rate": 3.969136351507604e-07,
"loss": 0.0638,
"step": 40704
},
{
"epoch": 1.81,
"learning_rate": 3.959648966764505e-07,
"loss": 0.1575,
"step": 40768
},
{
"epoch": 1.82,
"learning_rate": 3.950161582021406e-07,
"loss": 0.0485,
"step": 40832
},
{
"epoch": 1.82,
"learning_rate": 3.940674197278306e-07,
"loss": 0.1627,
"step": 40896
},
{
"epoch": 1.82,
"learning_rate": 3.931186812535207e-07,
"loss": 0.096,
"step": 40960
},
{
"epoch": 1.82,
"learning_rate": 3.9216994277921075e-07,
"loss": 0.1305,
"step": 41024
},
{
"epoch": 1.83,
"learning_rate": 3.912212043049008e-07,
"loss": 0.0839,
"step": 41088
},
{
"epoch": 1.83,
"learning_rate": 3.902724658305909e-07,
"loss": 0.1202,
"step": 41152
},
{
"epoch": 1.83,
"learning_rate": 3.893237273562809e-07,
"loss": 0.1068,
"step": 41216
},
{
"epoch": 1.84,
"learning_rate": 3.88374988881971e-07,
"loss": 0.1165,
"step": 41280
},
{
"epoch": 1.84,
"learning_rate": 3.8742625040766105e-07,
"loss": 0.0656,
"step": 41344
},
{
"epoch": 1.84,
"learning_rate": 3.864775119333511e-07,
"loss": 0.0999,
"step": 41408
},
{
"epoch": 1.84,
"learning_rate": 3.855287734590412e-07,
"loss": 0.1246,
"step": 41472
},
{
"epoch": 1.85,
"learning_rate": 3.845800349847312e-07,
"loss": 0.1357,
"step": 41536
},
{
"epoch": 1.85,
"learning_rate": 3.836312965104213e-07,
"loss": 0.1208,
"step": 41600
},
{
"epoch": 1.85,
"learning_rate": 3.8268255803611134e-07,
"loss": 0.1149,
"step": 41664
},
{
"epoch": 1.86,
"learning_rate": 3.817338195618014e-07,
"loss": 0.0814,
"step": 41728
},
{
"epoch": 1.86,
"learning_rate": 3.8078508108749147e-07,
"loss": 0.1499,
"step": 41792
},
{
"epoch": 1.86,
"learning_rate": 3.7983634261318156e-07,
"loss": 0.088,
"step": 41856
},
{
"epoch": 1.86,
"learning_rate": 3.7888760413887155e-07,
"loss": 0.0644,
"step": 41920
},
{
"epoch": 1.87,
"learning_rate": 3.7793886566456164e-07,
"loss": 0.0924,
"step": 41984
},
{
"epoch": 1.87,
"learning_rate": 3.769901271902517e-07,
"loss": 0.1307,
"step": 42048
},
{
"epoch": 1.87,
"learning_rate": 3.7604138871594176e-07,
"loss": 0.0862,
"step": 42112
},
{
"epoch": 1.88,
"learning_rate": 3.7509265024163185e-07,
"loss": 0.1854,
"step": 42176
},
{
"epoch": 1.88,
"learning_rate": 3.7414391176732184e-07,
"loss": 0.0671,
"step": 42240
},
{
"epoch": 1.88,
"learning_rate": 3.7319517329301193e-07,
"loss": 0.0915,
"step": 42304
},
{
"epoch": 1.88,
"learning_rate": 3.7224643481870197e-07,
"loss": 0.0638,
"step": 42368
},
{
"epoch": 1.89,
"learning_rate": 3.7129769634439206e-07,
"loss": 0.0935,
"step": 42432
},
{
"epoch": 1.89,
"learning_rate": 3.7034895787008215e-07,
"loss": 0.1093,
"step": 42496
},
{
"epoch": 1.89,
"learning_rate": 3.6940021939577213e-07,
"loss": 0.0964,
"step": 42560
},
{
"epoch": 1.9,
"learning_rate": 3.684514809214622e-07,
"loss": 0.1378,
"step": 42624
},
{
"epoch": 1.9,
"learning_rate": 3.675027424471523e-07,
"loss": 0.157,
"step": 42688
},
{
"epoch": 1.9,
"learning_rate": 3.6655400397284235e-07,
"loss": 0.1068,
"step": 42752
},
{
"epoch": 1.9,
"learning_rate": 3.656052654985324e-07,
"loss": 0.1646,
"step": 42816
},
{
"epoch": 1.91,
"learning_rate": 3.6465652702422243e-07,
"loss": 0.0418,
"step": 42880
},
{
"epoch": 1.91,
"learning_rate": 3.637077885499125e-07,
"loss": 0.1197,
"step": 42944
},
{
"epoch": 1.91,
"learning_rate": 3.627590500756026e-07,
"loss": 0.1105,
"step": 43008
},
{
"epoch": 1.92,
"learning_rate": 3.6181031160129265e-07,
"loss": 0.0999,
"step": 43072
},
{
"epoch": 1.92,
"learning_rate": 3.608615731269827e-07,
"loss": 0.1232,
"step": 43136
},
{
"epoch": 1.92,
"learning_rate": 3.599128346526727e-07,
"loss": 0.0982,
"step": 43200
},
{
"epoch": 1.92,
"learning_rate": 3.589640961783628e-07,
"loss": 0.0958,
"step": 43264
},
{
"epoch": 1.93,
"learning_rate": 3.580153577040529e-07,
"loss": 0.1199,
"step": 43328
},
{
"epoch": 1.93,
"learning_rate": 3.5706661922974294e-07,
"loss": 0.1282,
"step": 43392
},
{
"epoch": 1.93,
"learning_rate": 3.56117880755433e-07,
"loss": 0.1198,
"step": 43456
},
{
"epoch": 1.94,
"learning_rate": 3.5516914228112307e-07,
"loss": 0.078,
"step": 43520
},
{
"epoch": 1.94,
"learning_rate": 3.542204038068131e-07,
"loss": 0.1085,
"step": 43584
},
{
"epoch": 1.94,
"learning_rate": 3.532716653325032e-07,
"loss": 0.0971,
"step": 43648
},
{
"epoch": 1.94,
"learning_rate": 3.523229268581932e-07,
"loss": 0.1054,
"step": 43712
},
{
"epoch": 1.95,
"learning_rate": 3.5137418838388327e-07,
"loss": 0.0871,
"step": 43776
},
{
"epoch": 1.95,
"learning_rate": 3.5042544990957336e-07,
"loss": 0.0859,
"step": 43840
},
{
"epoch": 1.95,
"learning_rate": 3.494767114352634e-07,
"loss": 0.1574,
"step": 43904
},
{
"epoch": 1.96,
"learning_rate": 3.485279729609535e-07,
"loss": 0.151,
"step": 43968
},
{
"epoch": 1.96,
"learning_rate": 3.475792344866435e-07,
"loss": 0.0996,
"step": 44032
},
{
"epoch": 1.96,
"learning_rate": 3.4663049601233357e-07,
"loss": 0.1001,
"step": 44096
},
{
"epoch": 1.96,
"learning_rate": 3.4568175753802366e-07,
"loss": 0.0738,
"step": 44160
},
{
"epoch": 1.97,
"learning_rate": 3.447330190637137e-07,
"loss": 0.0682,
"step": 44224
},
{
"epoch": 1.97,
"learning_rate": 3.437842805894038e-07,
"loss": 0.0821,
"step": 44288
},
{
"epoch": 1.97,
"learning_rate": 3.428355421150938e-07,
"loss": 0.0613,
"step": 44352
},
{
"epoch": 1.98,
"learning_rate": 3.4188680364078386e-07,
"loss": 0.137,
"step": 44416
},
{
"epoch": 1.98,
"learning_rate": 3.4093806516647395e-07,
"loss": 0.0937,
"step": 44480
},
{
"epoch": 1.98,
"learning_rate": 3.39989326692164e-07,
"loss": 0.1133,
"step": 44544
},
{
"epoch": 1.98,
"learning_rate": 3.390405882178541e-07,
"loss": 0.1079,
"step": 44608
},
{
"epoch": 1.99,
"learning_rate": 3.380918497435441e-07,
"loss": 0.089,
"step": 44672
},
{
"epoch": 1.99,
"learning_rate": 3.3714311126923416e-07,
"loss": 0.1321,
"step": 44736
},
{
"epoch": 1.99,
"learning_rate": 3.3619437279492425e-07,
"loss": 0.1646,
"step": 44800
},
{
"epoch": 2.0,
"learning_rate": 3.3524563432061434e-07,
"loss": 0.1659,
"step": 44864
},
{
"epoch": 2.0,
"learning_rate": 3.342968958463043e-07,
"loss": 0.084,
"step": 44928
},
{
"epoch": 2.0,
"learning_rate": 3.333481573719944e-07,
"loss": 0.1143,
"step": 44992
},
{
"epoch": 2.0,
"learning_rate": 3.3239941889768445e-07,
"loss": 0.0553,
"step": 45056
},
{
"epoch": 2.01,
"learning_rate": 3.3145068042337454e-07,
"loss": 0.073,
"step": 45120
},
{
"epoch": 2.01,
"learning_rate": 3.3050194194906463e-07,
"loss": 0.0648,
"step": 45184
},
{
"epoch": 2.01,
"learning_rate": 3.295532034747546e-07,
"loss": 0.0901,
"step": 45248
},
{
"epoch": 2.02,
"learning_rate": 3.286044650004447e-07,
"loss": 0.0771,
"step": 45312
},
{
"epoch": 2.02,
"learning_rate": 3.2765572652613474e-07,
"loss": 0.072,
"step": 45376
},
{
"epoch": 2.02,
"learning_rate": 3.2670698805182484e-07,
"loss": 0.0564,
"step": 45440
},
{
"epoch": 2.02,
"learning_rate": 3.2577307361617593e-07,
"loss": 0.0466,
"step": 45504
},
{
"epoch": 2.03,
"learning_rate": 3.24824335141866e-07,
"loss": 0.06,
"step": 45568
},
{
"epoch": 2.03,
"learning_rate": 3.238755966675561e-07,
"loss": 0.0333,
"step": 45632
},
{
"epoch": 2.03,
"learning_rate": 3.2292685819324615e-07,
"loss": 0.053,
"step": 45696
},
{
"epoch": 2.04,
"learning_rate": 3.2197811971893624e-07,
"loss": 0.0864,
"step": 45760
},
{
"epoch": 2.04,
"learning_rate": 3.2102938124462623e-07,
"loss": 0.0339,
"step": 45824
},
{
"epoch": 2.04,
"learning_rate": 3.200806427703163e-07,
"loss": 0.0486,
"step": 45888
},
{
"epoch": 2.04,
"learning_rate": 3.1914672833466747e-07,
"loss": 0.1192,
"step": 45952
},
{
"epoch": 2.05,
"learning_rate": 3.1819798986035756e-07,
"loss": 0.0666,
"step": 46016
},
{
"epoch": 2.05,
"learning_rate": 3.172492513860476e-07,
"loss": 0.0451,
"step": 46080
},
{
"epoch": 2.05,
"learning_rate": 3.1630051291173763e-07,
"loss": 0.0871,
"step": 46144
},
{
"epoch": 2.05,
"learning_rate": 3.153517744374277e-07,
"loss": 0.065,
"step": 46208
},
{
"epoch": 2.06,
"learning_rate": 3.144030359631178e-07,
"loss": 0.0784,
"step": 46272
},
{
"epoch": 2.06,
"learning_rate": 3.1345429748880785e-07,
"loss": 0.1044,
"step": 46336
},
{
"epoch": 2.06,
"learning_rate": 3.125055590144979e-07,
"loss": 0.0529,
"step": 46400
},
{
"epoch": 2.07,
"learning_rate": 3.1155682054018793e-07,
"loss": 0.073,
"step": 46464
},
{
"epoch": 2.07,
"learning_rate": 3.10608082065878e-07,
"loss": 0.0843,
"step": 46528
},
{
"epoch": 2.07,
"learning_rate": 3.096593435915681e-07,
"loss": 0.0552,
"step": 46592
},
{
"epoch": 2.07,
"learning_rate": 3.087106051172581e-07,
"loss": 0.0743,
"step": 46656
},
{
"epoch": 2.08,
"learning_rate": 3.077618666429482e-07,
"loss": 0.0612,
"step": 46720
},
{
"epoch": 2.08,
"learning_rate": 3.068131281686382e-07,
"loss": 0.0693,
"step": 46784
},
{
"epoch": 2.08,
"learning_rate": 3.058643896943283e-07,
"loss": 0.0755,
"step": 46848
},
{
"epoch": 2.09,
"learning_rate": 3.049156512200184e-07,
"loss": 0.085,
"step": 46912
},
{
"epoch": 2.09,
"learning_rate": 3.039669127457084e-07,
"loss": 0.0316,
"step": 46976
},
{
"epoch": 2.09,
"learning_rate": 3.030181742713985e-07,
"loss": 0.0542,
"step": 47040
},
{
"epoch": 2.09,
"learning_rate": 3.0206943579708857e-07,
"loss": 0.0582,
"step": 47104
},
{
"epoch": 2.1,
"learning_rate": 3.011206973227786e-07,
"loss": 0.0471,
"step": 47168
},
{
"epoch": 2.1,
"learning_rate": 3.001719588484687e-07,
"loss": 0.0288,
"step": 47232
},
{
"epoch": 2.1,
"learning_rate": 2.992232203741587e-07,
"loss": 0.0996,
"step": 47296
},
{
"epoch": 2.11,
"learning_rate": 2.9827448189984877e-07,
"loss": 0.0591,
"step": 47360
},
{
"epoch": 2.11,
"learning_rate": 2.9732574342553886e-07,
"loss": 0.0438,
"step": 47424
},
{
"epoch": 2.11,
"learning_rate": 2.963770049512289e-07,
"loss": 0.0632,
"step": 47488
},
{
"epoch": 2.11,
"learning_rate": 2.95428266476919e-07,
"loss": 0.0576,
"step": 47552
},
{
"epoch": 2.12,
"learning_rate": 2.94479528002609e-07,
"loss": 0.0892,
"step": 47616
},
{
"epoch": 2.12,
"learning_rate": 2.9353078952829907e-07,
"loss": 0.1369,
"step": 47680
},
{
"epoch": 2.12,
"learning_rate": 2.9258205105398916e-07,
"loss": 0.0736,
"step": 47744
},
{
"epoch": 2.13,
"learning_rate": 2.916333125796792e-07,
"loss": 0.1227,
"step": 47808
},
{
"epoch": 2.13,
"learning_rate": 2.9068457410536923e-07,
"loss": 0.0895,
"step": 47872
},
{
"epoch": 2.13,
"learning_rate": 2.897358356310593e-07,
"loss": 0.0623,
"step": 47936
},
{
"epoch": 2.13,
"learning_rate": 2.8878709715674936e-07,
"loss": 0.0817,
"step": 48000
},
{
"epoch": 2.14,
"learning_rate": 2.8783835868243945e-07,
"loss": 0.0819,
"step": 48064
},
{
"epoch": 2.14,
"learning_rate": 2.868896202081295e-07,
"loss": 0.069,
"step": 48128
},
{
"epoch": 2.14,
"learning_rate": 2.8594088173381953e-07,
"loss": 0.0737,
"step": 48192
},
{
"epoch": 2.15,
"learning_rate": 2.849921432595096e-07,
"loss": 0.0437,
"step": 48256
},
{
"epoch": 2.15,
"learning_rate": 2.8404340478519966e-07,
"loss": 0.0608,
"step": 48320
},
{
"epoch": 2.15,
"learning_rate": 2.8309466631088975e-07,
"loss": 0.0831,
"step": 48384
},
{
"epoch": 2.15,
"learning_rate": 2.8214592783657984e-07,
"loss": 0.0239,
"step": 48448
},
{
"epoch": 2.16,
"learning_rate": 2.811971893622698e-07,
"loss": 0.0509,
"step": 48512
},
{
"epoch": 2.16,
"learning_rate": 2.802484508879599e-07,
"loss": 0.0671,
"step": 48576
},
{
"epoch": 2.16,
"learning_rate": 2.7929971241364995e-07,
"loss": 0.0449,
"step": 48640
},
{
"epoch": 2.17,
"learning_rate": 2.7835097393934004e-07,
"loss": 0.0436,
"step": 48704
},
{
"epoch": 2.17,
"learning_rate": 2.774022354650301e-07,
"loss": 0.0752,
"step": 48768
},
{
"epoch": 2.17,
"learning_rate": 2.764534969907201e-07,
"loss": 0.0975,
"step": 48832
},
{
"epoch": 2.17,
"learning_rate": 2.755047585164102e-07,
"loss": 0.0787,
"step": 48896
},
{
"epoch": 2.18,
"learning_rate": 2.7457084408076136e-07,
"loss": 0.0786,
"step": 48960
},
{
"epoch": 2.18,
"learning_rate": 2.7362210560645145e-07,
"loss": 0.0799,
"step": 49024
},
{
"epoch": 2.18,
"learning_rate": 2.7267336713214143e-07,
"loss": 0.0533,
"step": 49088
},
{
"epoch": 2.19,
"learning_rate": 2.717246286578315e-07,
"loss": 0.0628,
"step": 49152
},
{
"epoch": 2.19,
"learning_rate": 2.707758901835216e-07,
"loss": 0.0589,
"step": 49216
},
{
"epoch": 2.19,
"learning_rate": 2.6982715170921165e-07,
"loss": 0.0374,
"step": 49280
},
{
"epoch": 2.19,
"learning_rate": 2.688784132349017e-07,
"loss": 0.0635,
"step": 49344
},
{
"epoch": 2.2,
"learning_rate": 2.679296747605917e-07,
"loss": 0.0509,
"step": 49408
},
{
"epoch": 2.2,
"learning_rate": 2.669809362862818e-07,
"loss": 0.0751,
"step": 49472
},
{
"epoch": 2.2,
"learning_rate": 2.660321978119719e-07,
"loss": 0.0769,
"step": 49536
},
{
"epoch": 2.21,
"learning_rate": 2.6508345933766194e-07,
"loss": 0.1124,
"step": 49600
},
{
"epoch": 2.21,
"learning_rate": 2.64134720863352e-07,
"loss": 0.0478,
"step": 49664
},
{
"epoch": 2.21,
"learning_rate": 2.6318598238904207e-07,
"loss": 0.0753,
"step": 49728
},
{
"epoch": 2.21,
"learning_rate": 2.622372439147321e-07,
"loss": 0.0611,
"step": 49792
},
{
"epoch": 2.22,
"learning_rate": 2.612885054404222e-07,
"loss": 0.0475,
"step": 49856
},
{
"epoch": 2.22,
"learning_rate": 2.603545910047733e-07,
"loss": 0.0461,
"step": 49920
},
{
"epoch": 2.22,
"learning_rate": 2.594058525304634e-07,
"loss": 0.044,
"step": 49984
},
{
"epoch": 2.23,
"learning_rate": 2.5845711405615343e-07,
"loss": 0.085,
"step": 50048
},
{
"epoch": 2.23,
"learning_rate": 2.575083755818435e-07,
"loss": 0.1055,
"step": 50112
},
{
"epoch": 2.23,
"learning_rate": 2.565596371075336e-07,
"loss": 0.0932,
"step": 50176
},
{
"epoch": 2.23,
"learning_rate": 2.556108986332236e-07,
"loss": 0.0656,
"step": 50240
},
{
"epoch": 2.24,
"learning_rate": 2.546621601589137e-07,
"loss": 0.0679,
"step": 50304
},
{
"epoch": 2.24,
"learning_rate": 2.537134216846037e-07,
"loss": 0.0673,
"step": 50368
},
{
"epoch": 2.24,
"learning_rate": 2.527646832102938e-07,
"loss": 0.0718,
"step": 50432
},
{
"epoch": 2.25,
"learning_rate": 2.518159447359839e-07,
"loss": 0.0329,
"step": 50496
},
{
"epoch": 2.25,
"learning_rate": 2.508672062616739e-07,
"loss": 0.0793,
"step": 50560
},
{
"epoch": 2.25,
"learning_rate": 2.49918467787364e-07,
"loss": 0.0633,
"step": 50624
},
{
"epoch": 2.25,
"learning_rate": 2.4896972931305407e-07,
"loss": 0.0257,
"step": 50688
},
{
"epoch": 2.26,
"learning_rate": 2.480209908387441e-07,
"loss": 0.0608,
"step": 50752
},
{
"epoch": 2.26,
"learning_rate": 2.4707225236443414e-07,
"loss": 0.0701,
"step": 50816
},
{
"epoch": 2.26,
"learning_rate": 2.461235138901242e-07,
"loss": 0.0573,
"step": 50880
},
{
"epoch": 2.27,
"learning_rate": 2.4517477541581427e-07,
"loss": 0.136,
"step": 50944
},
{
"epoch": 2.27,
"learning_rate": 2.4422603694150436e-07,
"loss": 0.0862,
"step": 51008
},
{
"epoch": 2.27,
"learning_rate": 2.432772984671944e-07,
"loss": 0.0793,
"step": 51072
},
{
"epoch": 2.27,
"learning_rate": 2.4232855999288444e-07,
"loss": 0.0501,
"step": 51136
},
{
"epoch": 2.28,
"learning_rate": 2.413798215185745e-07,
"loss": 0.0761,
"step": 51200
},
{
"epoch": 2.28,
"learning_rate": 2.4043108304426457e-07,
"loss": 0.0484,
"step": 51264
},
{
"epoch": 2.28,
"learning_rate": 2.394823445699546e-07,
"loss": 0.0963,
"step": 51328
},
{
"epoch": 2.29,
"learning_rate": 2.385336060956447e-07,
"loss": 0.0816,
"step": 51392
},
{
"epoch": 2.29,
"learning_rate": 2.3758486762133476e-07,
"loss": 0.0204,
"step": 51456
},
{
"epoch": 2.29,
"learning_rate": 2.366361291470248e-07,
"loss": 0.0517,
"step": 51520
},
{
"epoch": 2.29,
"learning_rate": 2.3568739067271486e-07,
"loss": 0.0587,
"step": 51584
},
{
"epoch": 2.3,
"learning_rate": 2.3473865219840492e-07,
"loss": 0.0545,
"step": 51648
},
{
"epoch": 2.3,
"learning_rate": 2.33789913724095e-07,
"loss": 0.0462,
"step": 51712
},
{
"epoch": 2.3,
"learning_rate": 2.3284117524978505e-07,
"loss": 0.0475,
"step": 51776
},
{
"epoch": 2.31,
"learning_rate": 2.318924367754751e-07,
"loss": 0.1472,
"step": 51840
},
{
"epoch": 2.31,
"learning_rate": 2.3094369830116518e-07,
"loss": 0.0886,
"step": 51904
},
{
"epoch": 2.31,
"learning_rate": 2.2999495982685522e-07,
"loss": 0.0724,
"step": 51968
},
{
"epoch": 2.31,
"learning_rate": 2.2904622135254528e-07,
"loss": 0.0841,
"step": 52032
},
{
"epoch": 2.32,
"learning_rate": 2.2809748287823532e-07,
"loss": 0.0481,
"step": 52096
},
{
"epoch": 2.32,
"learning_rate": 2.2714874440392538e-07,
"loss": 0.0837,
"step": 52160
},
{
"epoch": 2.32,
"learning_rate": 2.2620000592961548e-07,
"loss": 0.0733,
"step": 52224
},
{
"epoch": 2.33,
"learning_rate": 2.252512674553055e-07,
"loss": 0.058,
"step": 52288
},
{
"epoch": 2.33,
"learning_rate": 2.2430252898099558e-07,
"loss": 0.0247,
"step": 52352
},
{
"epoch": 2.33,
"learning_rate": 2.2335379050668561e-07,
"loss": 0.056,
"step": 52416
},
{
"epoch": 2.33,
"learning_rate": 2.224050520323757e-07,
"loss": 0.0634,
"step": 52480
},
{
"epoch": 2.34,
"learning_rate": 2.2145631355806574e-07,
"loss": 0.0858,
"step": 52544
},
{
"epoch": 2.34,
"learning_rate": 2.205075750837558e-07,
"loss": 0.0639,
"step": 52608
},
{
"epoch": 2.34,
"learning_rate": 2.1955883660944587e-07,
"loss": 0.0742,
"step": 52672
},
{
"epoch": 2.35,
"learning_rate": 2.1861009813513594e-07,
"loss": 0.0472,
"step": 52736
},
{
"epoch": 2.35,
"learning_rate": 2.17661359660826e-07,
"loss": 0.0809,
"step": 52800
},
{
"epoch": 2.35,
"learning_rate": 2.1672744522517712e-07,
"loss": 0.035,
"step": 52864
},
{
"epoch": 2.35,
"learning_rate": 2.1577870675086721e-07,
"loss": 0.1202,
"step": 52928
},
{
"epoch": 2.36,
"learning_rate": 2.1482996827655725e-07,
"loss": 0.0451,
"step": 52992
},
{
"epoch": 2.36,
"learning_rate": 2.1388122980224732e-07,
"loss": 0.0622,
"step": 53056
},
{
"epoch": 2.36,
"learning_rate": 2.1293249132793735e-07,
"loss": 0.0428,
"step": 53120
},
{
"epoch": 2.37,
"learning_rate": 2.1198375285362744e-07,
"loss": 0.0734,
"step": 53184
},
{
"epoch": 2.37,
"learning_rate": 2.110350143793175e-07,
"loss": 0.0441,
"step": 53248
},
{
"epoch": 2.37,
"learning_rate": 2.1008627590500755e-07,
"loss": 0.0537,
"step": 53312
},
{
"epoch": 2.37,
"learning_rate": 2.091375374306976e-07,
"loss": 0.0983,
"step": 53376
},
{
"epoch": 2.38,
"learning_rate": 2.0818879895638767e-07,
"loss": 0.0633,
"step": 53440
},
{
"epoch": 2.38,
"learning_rate": 2.0724006048207774e-07,
"loss": 0.0639,
"step": 53504
},
{
"epoch": 2.38,
"learning_rate": 2.0629132200776778e-07,
"loss": 0.0446,
"step": 53568
},
{
"epoch": 2.39,
"learning_rate": 2.0534258353345784e-07,
"loss": 0.0512,
"step": 53632
},
{
"epoch": 2.39,
"learning_rate": 2.0439384505914793e-07,
"loss": 0.0764,
"step": 53696
},
{
"epoch": 2.39,
"learning_rate": 2.0344510658483797e-07,
"loss": 0.0737,
"step": 53760
},
{
"epoch": 2.39,
"learning_rate": 2.0249636811052803e-07,
"loss": 0.0627,
"step": 53824
},
{
"epoch": 2.4,
"learning_rate": 2.0154762963621807e-07,
"loss": 0.1065,
"step": 53888
},
{
"epoch": 2.4,
"learning_rate": 2.0059889116190813e-07,
"loss": 0.1112,
"step": 53952
},
{
"epoch": 2.4,
"learning_rate": 1.996501526875982e-07,
"loss": 0.0649,
"step": 54016
},
{
"epoch": 2.41,
"learning_rate": 1.9870141421328826e-07,
"loss": 0.0729,
"step": 54080
},
{
"epoch": 2.41,
"learning_rate": 1.9775267573897833e-07,
"loss": 0.0621,
"step": 54144
},
{
"epoch": 2.41,
"learning_rate": 1.9680393726466836e-07,
"loss": 0.0563,
"step": 54208
},
{
"epoch": 2.41,
"learning_rate": 1.9585519879035846e-07,
"loss": 0.0783,
"step": 54272
},
{
"epoch": 2.42,
"learning_rate": 1.949064603160485e-07,
"loss": 0.0339,
"step": 54336
},
{
"epoch": 2.42,
"learning_rate": 1.9395772184173856e-07,
"loss": 0.1098,
"step": 54400
},
{
"epoch": 2.42,
"learning_rate": 1.930089833674286e-07,
"loss": 0.0531,
"step": 54464
},
{
"epoch": 2.42,
"learning_rate": 1.9206024489311869e-07,
"loss": 0.062,
"step": 54528
},
{
"epoch": 2.43,
"learning_rate": 1.9111150641880875e-07,
"loss": 0.0335,
"step": 54592
},
{
"epoch": 2.43,
"learning_rate": 1.901627679444988e-07,
"loss": 0.097,
"step": 54656
},
{
"epoch": 2.43,
"learning_rate": 1.8921402947018885e-07,
"loss": 0.1206,
"step": 54720
},
{
"epoch": 2.44,
"learning_rate": 1.882652909958789e-07,
"loss": 0.0818,
"step": 54784
},
{
"epoch": 2.44,
"learning_rate": 1.8731655252156898e-07,
"loss": 0.047,
"step": 54848
},
{
"epoch": 2.44,
"learning_rate": 1.8636781404725902e-07,
"loss": 0.0854,
"step": 54912
},
{
"epoch": 2.44,
"learning_rate": 1.8541907557294908e-07,
"loss": 0.0492,
"step": 54976
},
{
"epoch": 2.45,
"learning_rate": 1.8447033709863915e-07,
"loss": 0.1021,
"step": 55040
},
{
"epoch": 2.45,
"learning_rate": 1.835215986243292e-07,
"loss": 0.0635,
"step": 55104
},
{
"epoch": 2.45,
"learning_rate": 1.8257286015001927e-07,
"loss": 0.0809,
"step": 55168
},
{
"epoch": 2.46,
"learning_rate": 1.816241216757093e-07,
"loss": 0.0943,
"step": 55232
},
{
"epoch": 2.46,
"learning_rate": 1.8067538320139938e-07,
"loss": 0.0428,
"step": 55296
},
{
"epoch": 2.46,
"learning_rate": 1.7972664472708944e-07,
"loss": 0.1056,
"step": 55360
},
{
"epoch": 2.46,
"learning_rate": 1.787779062527795e-07,
"loss": 0.0723,
"step": 55424
},
{
"epoch": 2.47,
"learning_rate": 1.7782916777846957e-07,
"loss": 0.075,
"step": 55488
},
{
"epoch": 2.47,
"learning_rate": 1.768804293041596e-07,
"loss": 0.0867,
"step": 55552
},
{
"epoch": 2.47,
"learning_rate": 1.7593169082984967e-07,
"loss": 0.0473,
"step": 55616
},
{
"epoch": 2.48,
"learning_rate": 1.7498295235553973e-07,
"loss": 0.1154,
"step": 55680
},
{
"epoch": 2.48,
"learning_rate": 1.740342138812298e-07,
"loss": 0.118,
"step": 55744
},
{
"epoch": 2.48,
"learning_rate": 1.7308547540691984e-07,
"loss": 0.0474,
"step": 55808
},
{
"epoch": 2.48,
"learning_rate": 1.721367369326099e-07,
"loss": 0.0949,
"step": 55872
},
{
"epoch": 2.49,
"learning_rate": 1.711879984583e-07,
"loss": 0.0887,
"step": 55936
},
{
"epoch": 2.49,
"learning_rate": 1.7023925998399003e-07,
"loss": 0.0711,
"step": 56000
},
{
"epoch": 2.49,
"learning_rate": 1.692905215096801e-07,
"loss": 0.0556,
"step": 56064
},
{
"epoch": 2.5,
"learning_rate": 1.6834178303537013e-07,
"loss": 0.1028,
"step": 56128
},
{
"epoch": 2.5,
"learning_rate": 1.6739304456106022e-07,
"loss": 0.0673,
"step": 56192
},
{
"epoch": 2.5,
"learning_rate": 1.6644430608675029e-07,
"loss": 0.0826,
"step": 56256
},
{
"epoch": 2.5,
"learning_rate": 1.6549556761244032e-07,
"loss": 0.0579,
"step": 56320
},
{
"epoch": 2.51,
"learning_rate": 1.645468291381304e-07,
"loss": 0.0895,
"step": 56384
},
{
"epoch": 2.51,
"learning_rate": 1.6359809066382042e-07,
"loss": 0.1035,
"step": 56448
},
{
"epoch": 2.51,
"learning_rate": 1.6264935218951052e-07,
"loss": 0.158,
"step": 56512
},
{
"epoch": 2.52,
"learning_rate": 1.6170061371520055e-07,
"loss": 0.0721,
"step": 56576
},
{
"epoch": 2.52,
"learning_rate": 1.6075187524089062e-07,
"loss": 0.0803,
"step": 56640
},
{
"epoch": 2.52,
"learning_rate": 1.5980313676658068e-07,
"loss": 0.1033,
"step": 56704
},
{
"epoch": 2.52,
"learning_rate": 1.5885439829227075e-07,
"loss": 0.1123,
"step": 56768
},
{
"epoch": 2.53,
"learning_rate": 1.579056598179608e-07,
"loss": 0.0409,
"step": 56832
},
{
"epoch": 2.53,
"learning_rate": 1.5695692134365085e-07,
"loss": 0.0909,
"step": 56896
},
{
"epoch": 2.53,
"learning_rate": 1.560081828693409e-07,
"loss": 0.0896,
"step": 56960
},
{
"epoch": 2.54,
"learning_rate": 1.5505944439503098e-07,
"loss": 0.0479,
"step": 57024
},
{
"epoch": 2.54,
"learning_rate": 1.5411070592072104e-07,
"loss": 0.0769,
"step": 57088
},
{
"epoch": 2.54,
"learning_rate": 1.531619674464111e-07,
"loss": 0.0643,
"step": 57152
},
{
"epoch": 2.54,
"learning_rate": 1.5221322897210114e-07,
"loss": 0.0449,
"step": 57216
},
{
"epoch": 2.55,
"learning_rate": 1.5126449049779123e-07,
"loss": 0.0761,
"step": 57280
},
{
"epoch": 2.55,
"learning_rate": 1.5031575202348127e-07,
"loss": 0.0619,
"step": 57344
},
{
"epoch": 2.55,
"learning_rate": 1.4936701354917133e-07,
"loss": 0.0834,
"step": 57408
},
{
"epoch": 2.56,
"learning_rate": 1.4841827507486137e-07,
"loss": 0.0556,
"step": 57472
},
{
"epoch": 2.56,
"learning_rate": 1.4746953660055144e-07,
"loss": 0.0629,
"step": 57536
},
{
"epoch": 2.56,
"learning_rate": 1.4652079812624153e-07,
"loss": 0.0941,
"step": 57600
},
{
"epoch": 2.56,
"learning_rate": 1.4557205965193156e-07,
"loss": 0.0543,
"step": 57664
},
{
"epoch": 2.57,
"learning_rate": 1.4462332117762163e-07,
"loss": 0.0463,
"step": 57728
},
{
"epoch": 2.57,
"learning_rate": 1.4367458270331167e-07,
"loss": 0.0364,
"step": 57792
},
{
"epoch": 2.57,
"learning_rate": 1.4272584422900176e-07,
"loss": 0.0395,
"step": 57856
},
{
"epoch": 2.58,
"learning_rate": 1.417771057546918e-07,
"loss": 0.0566,
"step": 57920
},
{
"epoch": 2.58,
"learning_rate": 1.4082836728038186e-07,
"loss": 0.0857,
"step": 57984
},
{
"epoch": 2.58,
"learning_rate": 1.3987962880607192e-07,
"loss": 0.1735,
"step": 58048
},
{
"epoch": 2.58,
"learning_rate": 1.3893089033176199e-07,
"loss": 0.0754,
"step": 58112
},
{
"epoch": 2.59,
"learning_rate": 1.3798215185745205e-07,
"loss": 0.0505,
"step": 58176
},
{
"epoch": 2.59,
"learning_rate": 1.370334133831421e-07,
"loss": 0.0766,
"step": 58240
},
{
"epoch": 2.59,
"learning_rate": 1.3608467490883215e-07,
"loss": 0.0526,
"step": 58304
},
{
"epoch": 2.6,
"learning_rate": 1.351359364345222e-07,
"loss": 0.0535,
"step": 58368
},
{
"epoch": 2.6,
"learning_rate": 1.3418719796021228e-07,
"loss": 0.0465,
"step": 58432
},
{
"epoch": 2.6,
"learning_rate": 1.3323845948590235e-07,
"loss": 0.0652,
"step": 58496
},
{
"epoch": 2.6,
"learning_rate": 1.3228972101159238e-07,
"loss": 0.0588,
"step": 58560
},
{
"epoch": 2.61,
"learning_rate": 1.3134098253728245e-07,
"loss": 0.06,
"step": 58624
},
{
"epoch": 2.61,
"learning_rate": 1.303922440629725e-07,
"loss": 0.0837,
"step": 58688
},
{
"epoch": 2.61,
"learning_rate": 1.2944350558866258e-07,
"loss": 0.0505,
"step": 58752
},
{
"epoch": 2.62,
"learning_rate": 1.284947671143526e-07,
"loss": 0.0536,
"step": 58816
},
{
"epoch": 2.62,
"learning_rate": 1.2754602864004268e-07,
"loss": 0.0765,
"step": 58880
},
{
"epoch": 2.62,
"learning_rate": 1.2659729016573277e-07,
"loss": 0.0612,
"step": 58944
},
{
"epoch": 2.62,
"learning_rate": 1.256485516914228e-07,
"loss": 0.0564,
"step": 59008
},
{
"epoch": 2.63,
"learning_rate": 1.2469981321711287e-07,
"loss": 0.0484,
"step": 59072
},
{
"epoch": 2.63,
"learning_rate": 1.2375107474280293e-07,
"loss": 0.1047,
"step": 59136
},
{
"epoch": 2.63,
"learning_rate": 1.2280233626849297e-07,
"loss": 0.0873,
"step": 59200
},
{
"epoch": 2.64,
"learning_rate": 1.2185359779418304e-07,
"loss": 0.0641,
"step": 59264
},
{
"epoch": 2.64,
"learning_rate": 1.209048593198731e-07,
"loss": 0.0654,
"step": 59328
},
{
"epoch": 2.64,
"learning_rate": 1.1995612084556316e-07,
"loss": 0.0936,
"step": 59392
},
{
"epoch": 2.64,
"learning_rate": 1.1900738237125321e-07,
"loss": 0.0602,
"step": 59456
},
{
"epoch": 2.65,
"learning_rate": 1.1805864389694328e-07,
"loss": 0.0533,
"step": 59520
},
{
"epoch": 2.65,
"learning_rate": 1.1710990542263334e-07,
"loss": 0.1065,
"step": 59584
},
{
"epoch": 2.65,
"learning_rate": 1.161611669483234e-07,
"loss": 0.1187,
"step": 59648
},
{
"epoch": 2.66,
"learning_rate": 1.1521242847401346e-07,
"loss": 0.0678,
"step": 59712
},
{
"epoch": 2.66,
"learning_rate": 1.1426368999970351e-07,
"loss": 0.0702,
"step": 59776
},
{
"epoch": 2.66,
"learning_rate": 1.1331495152539357e-07,
"loss": 0.0872,
"step": 59840
},
{
"epoch": 2.66,
"learning_rate": 1.1236621305108364e-07,
"loss": 0.0574,
"step": 59904
},
{
"epoch": 2.67,
"learning_rate": 1.114174745767737e-07,
"loss": 0.0689,
"step": 59968
},
{
"epoch": 2.67,
"learning_rate": 1.1048356014112484e-07,
"loss": 0.0703,
"step": 60032
},
{
"epoch": 2.67,
"learning_rate": 1.095348216668149e-07,
"loss": 0.0578,
"step": 60096
},
{
"epoch": 2.68,
"learning_rate": 1.0858608319250497e-07,
"loss": 0.0828,
"step": 60160
},
{
"epoch": 2.68,
"learning_rate": 1.0763734471819502e-07,
"loss": 0.0917,
"step": 60224
},
{
"epoch": 2.68,
"learning_rate": 1.0668860624388508e-07,
"loss": 0.073,
"step": 60288
},
{
"epoch": 2.68,
"learning_rate": 1.0573986776957513e-07,
"loss": 0.0453,
"step": 60352
},
{
"epoch": 2.69,
"learning_rate": 1.047911292952652e-07,
"loss": 0.0881,
"step": 60416
},
{
"epoch": 2.69,
"learning_rate": 1.0384239082095525e-07,
"loss": 0.0675,
"step": 60480
},
{
"epoch": 2.69,
"learning_rate": 1.0289365234664532e-07,
"loss": 0.0965,
"step": 60544
},
{
"epoch": 2.7,
"learning_rate": 1.0194491387233538e-07,
"loss": 0.0637,
"step": 60608
},
{
"epoch": 2.7,
"learning_rate": 1.0099617539802544e-07,
"loss": 0.033,
"step": 60672
},
{
"epoch": 2.7,
"learning_rate": 1.0004743692371549e-07,
"loss": 0.0532,
"step": 60736
},
{
"epoch": 2.7,
"learning_rate": 9.909869844940556e-08,
"loss": 0.053,
"step": 60800
},
{
"epoch": 2.71,
"learning_rate": 9.81499599750956e-08,
"loss": 0.0748,
"step": 60864
},
{
"epoch": 2.71,
"learning_rate": 9.720122150078566e-08,
"loss": 0.1016,
"step": 60928
},
{
"epoch": 2.71,
"learning_rate": 9.625248302647573e-08,
"loss": 0.0864,
"step": 60992
},
{
"epoch": 2.72,
"learning_rate": 9.530374455216579e-08,
"loss": 0.0621,
"step": 61056
},
{
"epoch": 2.72,
"learning_rate": 9.435500607785585e-08,
"loss": 0.053,
"step": 61120
},
{
"epoch": 2.72,
"learning_rate": 9.34062676035459e-08,
"loss": 0.0541,
"step": 61184
},
{
"epoch": 2.72,
"learning_rate": 9.245752912923596e-08,
"loss": 0.0849,
"step": 61248
},
{
"epoch": 2.73,
"learning_rate": 9.150879065492602e-08,
"loss": 0.0488,
"step": 61312
},
{
"epoch": 2.73,
"learning_rate": 9.056005218061609e-08,
"loss": 0.0471,
"step": 61376
},
{
"epoch": 2.73,
"learning_rate": 8.961131370630614e-08,
"loss": 0.0974,
"step": 61440
},
{
"epoch": 2.74,
"learning_rate": 8.866257523199621e-08,
"loss": 0.0871,
"step": 61504
},
{
"epoch": 2.74,
"learning_rate": 8.771383675768626e-08,
"loss": 0.0958,
"step": 61568
},
{
"epoch": 2.74,
"learning_rate": 8.676509828337632e-08,
"loss": 0.0631,
"step": 61632
},
{
"epoch": 2.74,
"learning_rate": 8.581635980906637e-08,
"loss": 0.1262,
"step": 61696
},
{
"epoch": 2.75,
"learning_rate": 8.486762133475642e-08,
"loss": 0.0563,
"step": 61760
},
{
"epoch": 2.75,
"learning_rate": 8.39188828604465e-08,
"loss": 0.0844,
"step": 61824
},
{
"epoch": 2.75,
"learning_rate": 8.297014438613655e-08,
"loss": 0.0916,
"step": 61888
},
{
"epoch": 2.76,
"learning_rate": 8.202140591182662e-08,
"loss": 0.0569,
"step": 61952
},
{
"epoch": 2.76,
"learning_rate": 8.107266743751667e-08,
"loss": 0.1387,
"step": 62016
},
{
"epoch": 2.76,
"learning_rate": 8.012392896320673e-08,
"loss": 0.0627,
"step": 62080
},
{
"epoch": 2.76,
"learning_rate": 7.917519048889678e-08,
"loss": 0.0815,
"step": 62144
},
{
"epoch": 2.77,
"learning_rate": 7.822645201458685e-08,
"loss": 0.1097,
"step": 62208
},
{
"epoch": 2.77,
"learning_rate": 7.727771354027691e-08,
"loss": 0.094,
"step": 62272
},
{
"epoch": 2.77,
"learning_rate": 7.632897506596698e-08,
"loss": 0.0843,
"step": 62336
},
{
"epoch": 2.78,
"learning_rate": 7.538023659165703e-08,
"loss": 0.0687,
"step": 62400
},
{
"epoch": 2.78,
"learning_rate": 7.443149811734709e-08,
"loss": 0.0932,
"step": 62464
},
{
"epoch": 2.78,
"learning_rate": 7.348275964303714e-08,
"loss": 0.0515,
"step": 62528
},
{
"epoch": 2.78,
"learning_rate": 7.25340211687272e-08,
"loss": 0.0626,
"step": 62592
},
{
"epoch": 2.79,
"learning_rate": 7.158528269441727e-08,
"loss": 0.1467,
"step": 62656
},
{
"epoch": 2.79,
"learning_rate": 7.063654422010733e-08,
"loss": 0.0827,
"step": 62720
},
{
"epoch": 2.79,
"learning_rate": 6.968780574579738e-08,
"loss": 0.0736,
"step": 62784
},
{
"epoch": 2.79,
"learning_rate": 6.873906727148744e-08,
"loss": 0.0955,
"step": 62848
},
{
"epoch": 2.8,
"learning_rate": 6.77903287971775e-08,
"loss": 0.0599,
"step": 62912
},
{
"epoch": 2.8,
"learning_rate": 6.684159032286755e-08,
"loss": 0.0315,
"step": 62976
},
{
"epoch": 2.8,
"learning_rate": 6.589285184855762e-08,
"loss": 0.0659,
"step": 63040
},
{
"epoch": 2.81,
"learning_rate": 6.494411337424768e-08,
"loss": 0.0614,
"step": 63104
},
{
"epoch": 2.81,
"learning_rate": 6.399537489993774e-08,
"loss": 0.0426,
"step": 63168
},
{
"epoch": 2.81,
"learning_rate": 6.306146046428888e-08,
"loss": 0.116,
"step": 63232
},
{
"epoch": 2.81,
"learning_rate": 6.211272198997894e-08,
"loss": 0.1004,
"step": 63296
},
{
"epoch": 2.82,
"learning_rate": 6.116398351566901e-08,
"loss": 0.0469,
"step": 63360
},
{
"epoch": 2.82,
"learning_rate": 6.021524504135907e-08,
"loss": 0.0403,
"step": 63424
},
{
"epoch": 2.82,
"learning_rate": 5.9266506567049123e-08,
"loss": 0.0604,
"step": 63488
},
{
"epoch": 2.83,
"learning_rate": 5.831776809273918e-08,
"loss": 0.0871,
"step": 63552
},
{
"epoch": 2.83,
"learning_rate": 5.7369029618429245e-08,
"loss": 0.0703,
"step": 63616
},
{
"epoch": 2.83,
"learning_rate": 5.64202911441193e-08,
"loss": 0.0564,
"step": 63680
},
{
"epoch": 2.83,
"learning_rate": 5.547155266980936e-08,
"loss": 0.0806,
"step": 63744
},
{
"epoch": 2.84,
"learning_rate": 5.4522814195499424e-08,
"loss": 0.031,
"step": 63808
},
{
"epoch": 2.84,
"learning_rate": 5.357407572118948e-08,
"loss": 0.071,
"step": 63872
},
{
"epoch": 2.84,
"learning_rate": 5.262533724687954e-08,
"loss": 0.0471,
"step": 63936
},
{
"epoch": 2.85,
"learning_rate": 5.167659877256959e-08,
"loss": 0.0629,
"step": 64000
},
{
"epoch": 2.85,
"learning_rate": 5.0727860298259654e-08,
"loss": 0.0759,
"step": 64064
},
{
"epoch": 2.85,
"learning_rate": 4.977912182394971e-08,
"loss": 0.0494,
"step": 64128
},
{
"epoch": 2.85,
"learning_rate": 4.883038334963977e-08,
"loss": 0.0805,
"step": 64192
},
{
"epoch": 2.86,
"learning_rate": 4.7881644875329834e-08,
"loss": 0.0371,
"step": 64256
},
{
"epoch": 2.86,
"learning_rate": 4.693290640101989e-08,
"loss": 0.0471,
"step": 64320
},
{
"epoch": 2.86,
"learning_rate": 4.598416792670995e-08,
"loss": 0.0223,
"step": 64384
},
{
"epoch": 2.87,
"learning_rate": 4.503542945240001e-08,
"loss": 0.1177,
"step": 64448
},
{
"epoch": 2.87,
"learning_rate": 4.408669097809007e-08,
"loss": 0.0777,
"step": 64512
},
{
"epoch": 2.87,
"learning_rate": 4.313795250378013e-08,
"loss": 0.0444,
"step": 64576
},
{
"epoch": 2.87,
"learning_rate": 4.2189214029470186e-08,
"loss": 0.1089,
"step": 64640
},
{
"epoch": 2.88,
"learning_rate": 4.124047555516025e-08,
"loss": 0.0793,
"step": 64704
},
{
"epoch": 2.88,
"learning_rate": 4.029173708085031e-08,
"loss": 0.0732,
"step": 64768
},
{
"epoch": 2.88,
"learning_rate": 3.9342998606540365e-08,
"loss": 0.0384,
"step": 64832
},
{
"epoch": 2.89,
"learning_rate": 3.839426013223043e-08,
"loss": 0.0462,
"step": 64896
},
{
"epoch": 2.89,
"learning_rate": 3.744552165792048e-08,
"loss": 0.0671,
"step": 64960
},
{
"epoch": 2.89,
"learning_rate": 3.649678318361054e-08,
"loss": 0.0454,
"step": 65024
},
{
"epoch": 2.89,
"learning_rate": 3.55480447093006e-08,
"loss": 0.0829,
"step": 65088
},
{
"epoch": 2.9,
"learning_rate": 3.459930623499066e-08,
"loss": 0.0956,
"step": 65152
},
{
"epoch": 2.9,
"learning_rate": 3.3650567760680717e-08,
"loss": 0.0478,
"step": 65216
},
{
"epoch": 2.9,
"learning_rate": 3.2701829286370774e-08,
"loss": 0.0558,
"step": 65280
},
{
"epoch": 2.91,
"learning_rate": 3.175309081206084e-08,
"loss": 0.0913,
"step": 65344
},
{
"epoch": 2.91,
"learning_rate": 3.0804352337750896e-08,
"loss": 0.0834,
"step": 65408
},
{
"epoch": 2.91,
"learning_rate": 2.9855613863440953e-08,
"loss": 0.0448,
"step": 65472
},
{
"epoch": 2.91,
"learning_rate": 2.8906875389131014e-08,
"loss": 0.0597,
"step": 65536
},
{
"epoch": 2.92,
"learning_rate": 2.7958136914821072e-08,
"loss": 0.1018,
"step": 65600
},
{
"epoch": 2.92,
"learning_rate": 2.7024222479172225e-08,
"loss": 0.0871,
"step": 65664
},
{
"epoch": 2.92,
"learning_rate": 2.6075484004862282e-08,
"loss": 0.0386,
"step": 65728
},
{
"epoch": 2.93,
"learning_rate": 2.5126745530552343e-08,
"loss": 0.1053,
"step": 65792
},
{
"epoch": 2.93,
"learning_rate": 2.41780070562424e-08,
"loss": 0.0801,
"step": 65856
},
{
"epoch": 2.93,
"learning_rate": 2.3229268581932462e-08,
"loss": 0.0605,
"step": 65920
},
{
"epoch": 2.93,
"learning_rate": 2.228053010762252e-08,
"loss": 0.0583,
"step": 65984
},
{
"epoch": 2.94,
"learning_rate": 2.1331791633312577e-08,
"loss": 0.0541,
"step": 66048
},
{
"epoch": 2.94,
"learning_rate": 2.0383053159002638e-08,
"loss": 0.0358,
"step": 66112
},
{
"epoch": 2.94,
"learning_rate": 1.9434314684692695e-08,
"loss": 0.0385,
"step": 66176
},
{
"epoch": 2.95,
"learning_rate": 1.8485576210382756e-08,
"loss": 0.0723,
"step": 66240
},
{
"epoch": 2.95,
"learning_rate": 1.7536837736072817e-08,
"loss": 0.059,
"step": 66304
},
{
"epoch": 2.95,
"learning_rate": 1.6588099261762874e-08,
"loss": 0.0381,
"step": 66368
},
{
"epoch": 2.95,
"learning_rate": 1.5639360787452932e-08,
"loss": 0.0644,
"step": 66432
},
{
"epoch": 2.96,
"learning_rate": 1.4690622313142993e-08,
"loss": 0.0506,
"step": 66496
},
{
"epoch": 2.96,
"learning_rate": 1.374188383883305e-08,
"loss": 0.0423,
"step": 66560
},
{
"epoch": 2.96,
"learning_rate": 1.279314536452311e-08,
"loss": 0.0629,
"step": 66624
},
{
"epoch": 2.97,
"learning_rate": 1.1844406890213169e-08,
"loss": 0.0264,
"step": 66688
},
{
"epoch": 2.97,
"learning_rate": 1.089566841590323e-08,
"loss": 0.0475,
"step": 66752
},
{
"epoch": 2.97,
"learning_rate": 9.946929941593287e-09,
"loss": 0.0624,
"step": 66816
},
{
"epoch": 2.97,
"learning_rate": 8.998191467283346e-09,
"loss": 0.0693,
"step": 66880
},
{
"epoch": 2.98,
"learning_rate": 8.049452992973405e-09,
"loss": 0.1005,
"step": 66944
},
{
"epoch": 2.98,
"learning_rate": 7.1007145186634646e-09,
"loss": 0.0561,
"step": 67008
},
{
"epoch": 2.98,
"learning_rate": 6.151976044353523e-09,
"loss": 0.1169,
"step": 67072
},
{
"epoch": 2.99,
"learning_rate": 5.203237570043583e-09,
"loss": 0.059,
"step": 67136
},
{
"epoch": 2.99,
"learning_rate": 4.254499095733641e-09,
"loss": 0.0378,
"step": 67200
},
{
"epoch": 2.99,
"learning_rate": 3.3057606214237005e-09,
"loss": 0.0409,
"step": 67264
},
{
"epoch": 2.99,
"learning_rate": 2.3570221471137597e-09,
"loss": 0.1203,
"step": 67328
},
{
"epoch": 3.0,
"learning_rate": 1.4082836728038187e-09,
"loss": 0.097,
"step": 67392
},
{
"epoch": 3.0,
"learning_rate": 4.5954519849387766e-10,
"loss": 0.0873,
"step": 67456
}
],
"logging_steps": 64,
"max_steps": 67458,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 22486,
"total_flos": 1.432248707211264e+20,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}