hsethu's picture
Upload folder using huggingface_hub
24cf390 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 44972,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 9.990957336416734e-07,
"loss": 0.1541,
"step": 64
},
{
"epoch": 0.01,
"learning_rate": 9.981469951673633e-07,
"loss": 0.0788,
"step": 128
},
{
"epoch": 0.01,
"learning_rate": 9.971982566930536e-07,
"loss": 0.2255,
"step": 192
},
{
"epoch": 0.01,
"learning_rate": 9.962495182187434e-07,
"loss": 0.1454,
"step": 256
},
{
"epoch": 0.01,
"learning_rate": 9.953007797444335e-07,
"loss": 0.081,
"step": 320
},
{
"epoch": 0.02,
"learning_rate": 9.943520412701236e-07,
"loss": 0.1782,
"step": 384
},
{
"epoch": 0.02,
"learning_rate": 9.934033027958137e-07,
"loss": 0.1119,
"step": 448
},
{
"epoch": 0.02,
"learning_rate": 9.924545643215036e-07,
"loss": 0.1648,
"step": 512
},
{
"epoch": 0.03,
"learning_rate": 9.915058258471937e-07,
"loss": 0.1536,
"step": 576
},
{
"epoch": 0.03,
"learning_rate": 9.905570873728838e-07,
"loss": 0.1527,
"step": 640
},
{
"epoch": 0.03,
"learning_rate": 9.896083488985739e-07,
"loss": 0.1153,
"step": 704
},
{
"epoch": 0.03,
"learning_rate": 9.88659610424264e-07,
"loss": 0.0826,
"step": 768
},
{
"epoch": 0.04,
"learning_rate": 9.87710871949954e-07,
"loss": 0.135,
"step": 832
},
{
"epoch": 0.04,
"learning_rate": 9.867621334756441e-07,
"loss": 0.1657,
"step": 896
},
{
"epoch": 0.04,
"learning_rate": 9.85813395001334e-07,
"loss": 0.1777,
"step": 960
},
{
"epoch": 0.05,
"learning_rate": 9.848646565270243e-07,
"loss": 0.0975,
"step": 1024
},
{
"epoch": 0.05,
"learning_rate": 9.839159180527142e-07,
"loss": 0.1336,
"step": 1088
},
{
"epoch": 0.05,
"learning_rate": 9.829671795784043e-07,
"loss": 0.0785,
"step": 1152
},
{
"epoch": 0.05,
"learning_rate": 9.820184411040944e-07,
"loss": 0.164,
"step": 1216
},
{
"epoch": 0.06,
"learning_rate": 9.810697026297845e-07,
"loss": 0.0864,
"step": 1280
},
{
"epoch": 0.06,
"learning_rate": 9.801209641554746e-07,
"loss": 0.128,
"step": 1344
},
{
"epoch": 0.06,
"learning_rate": 9.791722256811644e-07,
"loss": 0.0897,
"step": 1408
},
{
"epoch": 0.07,
"learning_rate": 9.782234872068545e-07,
"loss": 0.0989,
"step": 1472
},
{
"epoch": 0.07,
"learning_rate": 9.772747487325446e-07,
"loss": 0.1309,
"step": 1536
},
{
"epoch": 0.07,
"learning_rate": 9.763408342968958e-07,
"loss": 0.2144,
"step": 1600
},
{
"epoch": 0.07,
"learning_rate": 9.75392095822586e-07,
"loss": 0.0691,
"step": 1664
},
{
"epoch": 0.08,
"learning_rate": 9.744433573482758e-07,
"loss": 0.1476,
"step": 1728
},
{
"epoch": 0.08,
"learning_rate": 9.73494618873966e-07,
"loss": 0.1541,
"step": 1792
},
{
"epoch": 0.08,
"learning_rate": 9.72545880399656e-07,
"loss": 0.1108,
"step": 1856
},
{
"epoch": 0.09,
"learning_rate": 9.71597141925346e-07,
"loss": 0.112,
"step": 1920
},
{
"epoch": 0.09,
"learning_rate": 9.706484034510362e-07,
"loss": 0.1901,
"step": 1984
},
{
"epoch": 0.09,
"learning_rate": 9.696996649767262e-07,
"loss": 0.1207,
"step": 2048
},
{
"epoch": 0.09,
"learning_rate": 9.687509265024163e-07,
"loss": 0.128,
"step": 2112
},
{
"epoch": 0.1,
"learning_rate": 9.678021880281062e-07,
"loss": 0.2029,
"step": 2176
},
{
"epoch": 0.1,
"learning_rate": 9.668534495537965e-07,
"loss": 0.1629,
"step": 2240
},
{
"epoch": 0.1,
"learning_rate": 9.659047110794864e-07,
"loss": 0.1662,
"step": 2304
},
{
"epoch": 0.11,
"learning_rate": 9.649559726051765e-07,
"loss": 0.1392,
"step": 2368
},
{
"epoch": 0.11,
"learning_rate": 9.640072341308666e-07,
"loss": 0.081,
"step": 2432
},
{
"epoch": 0.11,
"learning_rate": 9.630584956565567e-07,
"loss": 0.0894,
"step": 2496
},
{
"epoch": 0.11,
"learning_rate": 9.621097571822468e-07,
"loss": 0.1017,
"step": 2560
},
{
"epoch": 0.12,
"learning_rate": 9.611610187079368e-07,
"loss": 0.1476,
"step": 2624
},
{
"epoch": 0.12,
"learning_rate": 9.602122802336267e-07,
"loss": 0.1592,
"step": 2688
},
{
"epoch": 0.12,
"learning_rate": 9.592635417593168e-07,
"loss": 0.146,
"step": 2752
},
{
"epoch": 0.13,
"learning_rate": 9.58314803285007e-07,
"loss": 0.1552,
"step": 2816
},
{
"epoch": 0.13,
"learning_rate": 9.57366064810697e-07,
"loss": 0.0498,
"step": 2880
},
{
"epoch": 0.13,
"learning_rate": 9.56417326336387e-07,
"loss": 0.0999,
"step": 2944
},
{
"epoch": 0.13,
"learning_rate": 9.55468587862077e-07,
"loss": 0.1196,
"step": 3008
},
{
"epoch": 0.14,
"learning_rate": 9.545198493877673e-07,
"loss": 0.1172,
"step": 3072
},
{
"epoch": 0.14,
"learning_rate": 9.535711109134573e-07,
"loss": 0.1107,
"step": 3136
},
{
"epoch": 0.14,
"learning_rate": 9.526223724391472e-07,
"loss": 0.182,
"step": 3200
},
{
"epoch": 0.15,
"learning_rate": 9.516736339648373e-07,
"loss": 0.0687,
"step": 3264
},
{
"epoch": 0.15,
"learning_rate": 9.507248954905273e-07,
"loss": 0.1274,
"step": 3328
},
{
"epoch": 0.15,
"learning_rate": 9.497761570162175e-07,
"loss": 0.1374,
"step": 3392
},
{
"epoch": 0.15,
"learning_rate": 9.488274185419075e-07,
"loss": 0.159,
"step": 3456
},
{
"epoch": 0.16,
"learning_rate": 9.478786800675975e-07,
"loss": 0.0687,
"step": 3520
},
{
"epoch": 0.16,
"learning_rate": 9.469299415932877e-07,
"loss": 0.1446,
"step": 3584
},
{
"epoch": 0.16,
"learning_rate": 9.459812031189777e-07,
"loss": 0.1047,
"step": 3648
},
{
"epoch": 0.17,
"learning_rate": 9.450324646446678e-07,
"loss": 0.1445,
"step": 3712
},
{
"epoch": 0.17,
"learning_rate": 9.440837261703578e-07,
"loss": 0.1033,
"step": 3776
},
{
"epoch": 0.17,
"learning_rate": 9.431349876960478e-07,
"loss": 0.1514,
"step": 3840
},
{
"epoch": 0.17,
"learning_rate": 9.421862492217379e-07,
"loss": 0.1245,
"step": 3904
},
{
"epoch": 0.18,
"learning_rate": 9.41237510747428e-07,
"loss": 0.2032,
"step": 3968
},
{
"epoch": 0.18,
"learning_rate": 9.402887722731181e-07,
"loss": 0.0935,
"step": 4032
},
{
"epoch": 0.18,
"learning_rate": 9.393400337988081e-07,
"loss": 0.1492,
"step": 4096
},
{
"epoch": 0.19,
"learning_rate": 9.383912953244981e-07,
"loss": 0.1011,
"step": 4160
},
{
"epoch": 0.19,
"learning_rate": 9.374425568501883e-07,
"loss": 0.1279,
"step": 4224
},
{
"epoch": 0.19,
"learning_rate": 9.364938183758783e-07,
"loss": 0.0842,
"step": 4288
},
{
"epoch": 0.19,
"learning_rate": 9.355599039402295e-07,
"loss": 0.1482,
"step": 4352
},
{
"epoch": 0.2,
"learning_rate": 9.346111654659194e-07,
"loss": 0.1444,
"step": 4416
},
{
"epoch": 0.2,
"learning_rate": 9.336624269916095e-07,
"loss": 0.1124,
"step": 4480
},
{
"epoch": 0.2,
"learning_rate": 9.327285125559607e-07,
"loss": 0.1552,
"step": 4544
},
{
"epoch": 0.2,
"learning_rate": 9.317797740816507e-07,
"loss": 0.113,
"step": 4608
},
{
"epoch": 0.21,
"learning_rate": 9.308310356073408e-07,
"loss": 0.1406,
"step": 4672
},
{
"epoch": 0.21,
"learning_rate": 9.298822971330309e-07,
"loss": 0.1464,
"step": 4736
},
{
"epoch": 0.21,
"learning_rate": 9.289335586587209e-07,
"loss": 0.0889,
"step": 4800
},
{
"epoch": 0.22,
"learning_rate": 9.279848201844111e-07,
"loss": 0.1028,
"step": 4864
},
{
"epoch": 0.22,
"learning_rate": 9.270360817101011e-07,
"loss": 0.1816,
"step": 4928
},
{
"epoch": 0.22,
"learning_rate": 9.26087343235791e-07,
"loss": 0.0867,
"step": 4992
},
{
"epoch": 0.22,
"learning_rate": 9.251386047614812e-07,
"loss": 0.1426,
"step": 5056
},
{
"epoch": 0.23,
"learning_rate": 9.241898662871712e-07,
"loss": 0.1404,
"step": 5120
},
{
"epoch": 0.23,
"learning_rate": 9.232411278128613e-07,
"loss": 0.0599,
"step": 5184
},
{
"epoch": 0.23,
"learning_rate": 9.222923893385513e-07,
"loss": 0.0753,
"step": 5248
},
{
"epoch": 0.24,
"learning_rate": 9.213436508642414e-07,
"loss": 0.1109,
"step": 5312
},
{
"epoch": 0.24,
"learning_rate": 9.203949123899315e-07,
"loss": 0.1836,
"step": 5376
},
{
"epoch": 0.24,
"learning_rate": 9.194461739156215e-07,
"loss": 0.0633,
"step": 5440
},
{
"epoch": 0.24,
"learning_rate": 9.184974354413117e-07,
"loss": 0.1392,
"step": 5504
},
{
"epoch": 0.25,
"learning_rate": 9.175486969670017e-07,
"loss": 0.11,
"step": 5568
},
{
"epoch": 0.25,
"learning_rate": 9.165999584926916e-07,
"loss": 0.1901,
"step": 5632
},
{
"epoch": 0.25,
"learning_rate": 9.156512200183818e-07,
"loss": 0.1399,
"step": 5696
},
{
"epoch": 0.26,
"learning_rate": 9.147024815440718e-07,
"loss": 0.1312,
"step": 5760
},
{
"epoch": 0.26,
"learning_rate": 9.137537430697619e-07,
"loss": 0.0884,
"step": 5824
},
{
"epoch": 0.26,
"learning_rate": 9.12805004595452e-07,
"loss": 0.1663,
"step": 5888
},
{
"epoch": 0.26,
"learning_rate": 9.11856266121142e-07,
"loss": 0.0728,
"step": 5952
},
{
"epoch": 0.27,
"learning_rate": 9.109075276468321e-07,
"loss": 0.1709,
"step": 6016
},
{
"epoch": 0.27,
"learning_rate": 9.099587891725221e-07,
"loss": 0.1476,
"step": 6080
},
{
"epoch": 0.27,
"learning_rate": 9.090100506982122e-07,
"loss": 0.0778,
"step": 6144
},
{
"epoch": 0.28,
"learning_rate": 9.080613122239022e-07,
"loss": 0.1081,
"step": 6208
},
{
"epoch": 0.28,
"learning_rate": 9.071125737495922e-07,
"loss": 0.1059,
"step": 6272
},
{
"epoch": 0.28,
"learning_rate": 9.061638352752824e-07,
"loss": 0.1143,
"step": 6336
},
{
"epoch": 0.28,
"learning_rate": 9.052150968009724e-07,
"loss": 0.1205,
"step": 6400
},
{
"epoch": 0.29,
"learning_rate": 9.042663583266624e-07,
"loss": 0.1479,
"step": 6464
},
{
"epoch": 0.29,
"learning_rate": 9.033176198523526e-07,
"loss": 0.0817,
"step": 6528
},
{
"epoch": 0.29,
"learning_rate": 9.023688813780426e-07,
"loss": 0.1398,
"step": 6592
},
{
"epoch": 0.3,
"learning_rate": 9.014201429037327e-07,
"loss": 0.1608,
"step": 6656
},
{
"epoch": 0.3,
"learning_rate": 9.004714044294228e-07,
"loss": 0.1345,
"step": 6720
},
{
"epoch": 0.3,
"learning_rate": 8.995226659551127e-07,
"loss": 0.1773,
"step": 6784
},
{
"epoch": 0.3,
"learning_rate": 8.985739274808028e-07,
"loss": 0.1769,
"step": 6848
},
{
"epoch": 0.31,
"learning_rate": 8.976251890064928e-07,
"loss": 0.1345,
"step": 6912
},
{
"epoch": 0.31,
"learning_rate": 8.96676450532183e-07,
"loss": 0.1293,
"step": 6976
},
{
"epoch": 0.31,
"learning_rate": 8.95727712057873e-07,
"loss": 0.1388,
"step": 7040
},
{
"epoch": 0.32,
"learning_rate": 8.94778973583563e-07,
"loss": 0.1004,
"step": 7104
},
{
"epoch": 0.32,
"learning_rate": 8.938302351092532e-07,
"loss": 0.1363,
"step": 7168
},
{
"epoch": 0.32,
"learning_rate": 8.928814966349432e-07,
"loss": 0.0933,
"step": 7232
},
{
"epoch": 0.32,
"learning_rate": 8.919327581606333e-07,
"loss": 0.1617,
"step": 7296
},
{
"epoch": 0.33,
"learning_rate": 8.909840196863233e-07,
"loss": 0.1085,
"step": 7360
},
{
"epoch": 0.33,
"learning_rate": 8.900352812120133e-07,
"loss": 0.1836,
"step": 7424
},
{
"epoch": 0.33,
"learning_rate": 8.890865427377034e-07,
"loss": 0.1464,
"step": 7488
},
{
"epoch": 0.34,
"learning_rate": 8.881378042633935e-07,
"loss": 0.1166,
"step": 7552
},
{
"epoch": 0.34,
"learning_rate": 8.871890657890836e-07,
"loss": 0.1071,
"step": 7616
},
{
"epoch": 0.34,
"learning_rate": 8.862403273147736e-07,
"loss": 0.1564,
"step": 7680
},
{
"epoch": 0.34,
"learning_rate": 8.852915888404636e-07,
"loss": 0.0937,
"step": 7744
},
{
"epoch": 0.35,
"learning_rate": 8.843428503661538e-07,
"loss": 0.1095,
"step": 7808
},
{
"epoch": 0.35,
"learning_rate": 8.833941118918438e-07,
"loss": 0.1313,
"step": 7872
},
{
"epoch": 0.35,
"learning_rate": 8.824453734175337e-07,
"loss": 0.0819,
"step": 7936
},
{
"epoch": 0.36,
"learning_rate": 8.814966349432239e-07,
"loss": 0.1273,
"step": 8000
},
{
"epoch": 0.36,
"learning_rate": 8.805478964689139e-07,
"loss": 0.1079,
"step": 8064
},
{
"epoch": 0.36,
"learning_rate": 8.79599157994604e-07,
"loss": 0.0566,
"step": 8128
},
{
"epoch": 0.36,
"learning_rate": 8.786504195202941e-07,
"loss": 0.0882,
"step": 8192
},
{
"epoch": 0.37,
"learning_rate": 8.777016810459841e-07,
"loss": 0.1801,
"step": 8256
},
{
"epoch": 0.37,
"learning_rate": 8.767529425716742e-07,
"loss": 0.1006,
"step": 8320
},
{
"epoch": 0.37,
"learning_rate": 8.758042040973643e-07,
"loss": 0.1033,
"step": 8384
},
{
"epoch": 0.38,
"learning_rate": 8.748554656230544e-07,
"loss": 0.0907,
"step": 8448
},
{
"epoch": 0.38,
"learning_rate": 8.739067271487443e-07,
"loss": 0.0548,
"step": 8512
},
{
"epoch": 0.38,
"learning_rate": 8.729579886744343e-07,
"loss": 0.1173,
"step": 8576
},
{
"epoch": 0.38,
"learning_rate": 8.720092502001245e-07,
"loss": 0.1415,
"step": 8640
},
{
"epoch": 0.39,
"learning_rate": 8.710605117258145e-07,
"loss": 0.1035,
"step": 8704
},
{
"epoch": 0.39,
"learning_rate": 8.701117732515046e-07,
"loss": 0.1016,
"step": 8768
},
{
"epoch": 0.39,
"learning_rate": 8.691630347771947e-07,
"loss": 0.111,
"step": 8832
},
{
"epoch": 0.4,
"learning_rate": 8.682291203415458e-07,
"loss": 0.1427,
"step": 8896
},
{
"epoch": 0.4,
"learning_rate": 8.672803818672359e-07,
"loss": 0.1181,
"step": 8960
},
{
"epoch": 0.4,
"learning_rate": 8.66331643392926e-07,
"loss": 0.097,
"step": 9024
},
{
"epoch": 0.4,
"learning_rate": 8.65382904918616e-07,
"loss": 0.1367,
"step": 9088
},
{
"epoch": 0.41,
"learning_rate": 8.64434166444306e-07,
"loss": 0.1212,
"step": 9152
},
{
"epoch": 0.41,
"learning_rate": 8.634854279699961e-07,
"loss": 0.1393,
"step": 9216
},
{
"epoch": 0.41,
"learning_rate": 8.625366894956861e-07,
"loss": 0.1322,
"step": 9280
},
{
"epoch": 0.42,
"learning_rate": 8.615879510213763e-07,
"loss": 0.0786,
"step": 9344
},
{
"epoch": 0.42,
"learning_rate": 8.606392125470663e-07,
"loss": 0.1263,
"step": 9408
},
{
"epoch": 0.42,
"learning_rate": 8.596904740727563e-07,
"loss": 0.1537,
"step": 9472
},
{
"epoch": 0.42,
"learning_rate": 8.587417355984465e-07,
"loss": 0.1267,
"step": 9536
},
{
"epoch": 0.43,
"learning_rate": 8.577929971241365e-07,
"loss": 0.1087,
"step": 9600
},
{
"epoch": 0.43,
"learning_rate": 8.568442586498266e-07,
"loss": 0.1097,
"step": 9664
},
{
"epoch": 0.43,
"learning_rate": 8.558955201755165e-07,
"loss": 0.1423,
"step": 9728
},
{
"epoch": 0.44,
"learning_rate": 8.549467817012066e-07,
"loss": 0.0911,
"step": 9792
},
{
"epoch": 0.44,
"learning_rate": 8.539980432268967e-07,
"loss": 0.0748,
"step": 9856
},
{
"epoch": 0.44,
"learning_rate": 8.530493047525867e-07,
"loss": 0.112,
"step": 9920
},
{
"epoch": 0.44,
"learning_rate": 8.521005662782769e-07,
"loss": 0.1765,
"step": 9984
},
{
"epoch": 0.45,
"learning_rate": 8.511518278039669e-07,
"loss": 0.1539,
"step": 10048
},
{
"epoch": 0.45,
"learning_rate": 8.502030893296569e-07,
"loss": 0.1382,
"step": 10112
},
{
"epoch": 0.45,
"learning_rate": 8.492543508553471e-07,
"loss": 0.1146,
"step": 10176
},
{
"epoch": 0.46,
"learning_rate": 8.48305612381037e-07,
"loss": 0.1589,
"step": 10240
},
{
"epoch": 0.46,
"learning_rate": 8.473568739067271e-07,
"loss": 0.1184,
"step": 10304
},
{
"epoch": 0.46,
"learning_rate": 8.464081354324172e-07,
"loss": 0.0892,
"step": 10368
},
{
"epoch": 0.46,
"learning_rate": 8.454593969581072e-07,
"loss": 0.1883,
"step": 10432
},
{
"epoch": 0.47,
"learning_rate": 8.445106584837973e-07,
"loss": 0.1441,
"step": 10496
},
{
"epoch": 0.47,
"learning_rate": 8.435619200094873e-07,
"loss": 0.1233,
"step": 10560
},
{
"epoch": 0.47,
"learning_rate": 8.426131815351774e-07,
"loss": 0.1376,
"step": 10624
},
{
"epoch": 0.48,
"learning_rate": 8.416644430608675e-07,
"loss": 0.057,
"step": 10688
},
{
"epoch": 0.48,
"learning_rate": 8.407157045865575e-07,
"loss": 0.0966,
"step": 10752
},
{
"epoch": 0.48,
"learning_rate": 8.397669661122477e-07,
"loss": 0.1026,
"step": 10816
},
{
"epoch": 0.48,
"learning_rate": 8.388182276379376e-07,
"loss": 0.1161,
"step": 10880
},
{
"epoch": 0.49,
"learning_rate": 8.378694891636276e-07,
"loss": 0.1889,
"step": 10944
},
{
"epoch": 0.49,
"learning_rate": 8.369207506893178e-07,
"loss": 0.0808,
"step": 11008
},
{
"epoch": 0.49,
"learning_rate": 8.359868362536689e-07,
"loss": 0.8512,
"step": 11072
},
{
"epoch": 0.5,
"learning_rate": 8.35038097779359e-07,
"loss": 0.1572,
"step": 11136
},
{
"epoch": 0.5,
"learning_rate": 8.34089359305049e-07,
"loss": 0.1883,
"step": 11200
},
{
"epoch": 0.5,
"learning_rate": 8.331406208307391e-07,
"loss": 0.0772,
"step": 11264
},
{
"epoch": 0.5,
"learning_rate": 8.321918823564291e-07,
"loss": 0.0924,
"step": 11328
},
{
"epoch": 0.51,
"learning_rate": 8.312431438821193e-07,
"loss": 0.1308,
"step": 11392
},
{
"epoch": 0.51,
"learning_rate": 8.302944054078093e-07,
"loss": 0.2317,
"step": 11456
},
{
"epoch": 0.51,
"learning_rate": 8.293456669334992e-07,
"loss": 0.1581,
"step": 11520
},
{
"epoch": 0.52,
"learning_rate": 8.283969284591894e-07,
"loss": 0.1068,
"step": 11584
},
{
"epoch": 0.52,
"learning_rate": 8.274481899848794e-07,
"loss": 0.0793,
"step": 11648
},
{
"epoch": 0.52,
"learning_rate": 8.264994515105695e-07,
"loss": 0.1407,
"step": 11712
},
{
"epoch": 0.52,
"learning_rate": 8.255507130362596e-07,
"loss": 0.2219,
"step": 11776
},
{
"epoch": 0.53,
"learning_rate": 8.246019745619496e-07,
"loss": 0.1364,
"step": 11840
},
{
"epoch": 0.53,
"learning_rate": 8.236532360876397e-07,
"loss": 0.0842,
"step": 11904
},
{
"epoch": 0.53,
"learning_rate": 8.227193216519909e-07,
"loss": 0.0893,
"step": 11968
},
{
"epoch": 0.54,
"learning_rate": 8.217705831776809e-07,
"loss": 0.1798,
"step": 12032
},
{
"epoch": 0.54,
"learning_rate": 8.20821844703371e-07,
"loss": 0.1263,
"step": 12096
},
{
"epoch": 0.54,
"learning_rate": 8.19873106229061e-07,
"loss": 0.1289,
"step": 12160
},
{
"epoch": 0.54,
"learning_rate": 8.18924367754751e-07,
"loss": 0.1712,
"step": 12224
},
{
"epoch": 0.55,
"learning_rate": 8.179756292804412e-07,
"loss": 0.2207,
"step": 12288
},
{
"epoch": 0.55,
"learning_rate": 8.170268908061312e-07,
"loss": 0.1219,
"step": 12352
},
{
"epoch": 0.55,
"learning_rate": 8.160781523318212e-07,
"loss": 0.1446,
"step": 12416
},
{
"epoch": 0.56,
"learning_rate": 8.151294138575113e-07,
"loss": 0.076,
"step": 12480
},
{
"epoch": 0.56,
"learning_rate": 8.141806753832014e-07,
"loss": 0.1448,
"step": 12544
},
{
"epoch": 0.56,
"learning_rate": 8.132319369088915e-07,
"loss": 0.1613,
"step": 12608
},
{
"epoch": 0.56,
"learning_rate": 8.122831984345814e-07,
"loss": 0.1241,
"step": 12672
},
{
"epoch": 0.57,
"learning_rate": 8.113344599602715e-07,
"loss": 0.1833,
"step": 12736
},
{
"epoch": 0.57,
"learning_rate": 8.103857214859616e-07,
"loss": 0.2071,
"step": 12800
},
{
"epoch": 0.57,
"learning_rate": 8.094369830116516e-07,
"loss": 0.1467,
"step": 12864
},
{
"epoch": 0.57,
"learning_rate": 8.084882445373418e-07,
"loss": 0.0889,
"step": 12928
},
{
"epoch": 0.58,
"learning_rate": 8.075395060630318e-07,
"loss": 0.141,
"step": 12992
},
{
"epoch": 0.58,
"learning_rate": 8.065907675887218e-07,
"loss": 0.106,
"step": 13056
},
{
"epoch": 0.58,
"learning_rate": 8.05642029114412e-07,
"loss": 0.1359,
"step": 13120
},
{
"epoch": 0.59,
"learning_rate": 8.04693290640102e-07,
"loss": 0.2273,
"step": 13184
},
{
"epoch": 0.59,
"learning_rate": 8.03744552165792e-07,
"loss": 0.1071,
"step": 13248
},
{
"epoch": 0.59,
"learning_rate": 8.02795813691482e-07,
"loss": 0.1106,
"step": 13312
},
{
"epoch": 0.59,
"learning_rate": 8.018470752171721e-07,
"loss": 0.1035,
"step": 13376
},
{
"epoch": 0.6,
"learning_rate": 8.008983367428622e-07,
"loss": 0.1459,
"step": 13440
},
{
"epoch": 0.6,
"learning_rate": 7.999495982685522e-07,
"loss": 0.1739,
"step": 13504
},
{
"epoch": 0.6,
"learning_rate": 7.990008597942423e-07,
"loss": 0.116,
"step": 13568
},
{
"epoch": 0.61,
"learning_rate": 7.980521213199324e-07,
"loss": 0.1017,
"step": 13632
},
{
"epoch": 0.61,
"learning_rate": 7.971033828456224e-07,
"loss": 0.1414,
"step": 13696
},
{
"epoch": 0.61,
"learning_rate": 7.961546443713126e-07,
"loss": 0.2619,
"step": 13760
},
{
"epoch": 0.61,
"learning_rate": 7.952059058970026e-07,
"loss": 0.1166,
"step": 13824
},
{
"epoch": 0.62,
"learning_rate": 7.942571674226925e-07,
"loss": 0.142,
"step": 13888
},
{
"epoch": 0.62,
"learning_rate": 7.933084289483827e-07,
"loss": 0.2068,
"step": 13952
},
{
"epoch": 0.62,
"learning_rate": 7.923596904740727e-07,
"loss": 0.1119,
"step": 14016
},
{
"epoch": 0.63,
"learning_rate": 7.914109519997628e-07,
"loss": 0.143,
"step": 14080
},
{
"epoch": 0.63,
"learning_rate": 7.904770375641139e-07,
"loss": 0.1718,
"step": 14144
},
{
"epoch": 0.63,
"learning_rate": 7.89528299089804e-07,
"loss": 0.0871,
"step": 14208
},
{
"epoch": 0.63,
"learning_rate": 7.88579560615494e-07,
"loss": 0.1059,
"step": 14272
},
{
"epoch": 0.64,
"learning_rate": 7.876308221411842e-07,
"loss": 0.1276,
"step": 14336
},
{
"epoch": 0.64,
"learning_rate": 7.866820836668742e-07,
"loss": 0.1503,
"step": 14400
},
{
"epoch": 0.64,
"learning_rate": 7.857333451925641e-07,
"loss": 0.2201,
"step": 14464
},
{
"epoch": 0.65,
"learning_rate": 7.847846067182543e-07,
"loss": 0.1737,
"step": 14528
},
{
"epoch": 0.65,
"learning_rate": 7.838358682439443e-07,
"loss": 0.1133,
"step": 14592
},
{
"epoch": 0.65,
"learning_rate": 7.828871297696344e-07,
"loss": 0.1378,
"step": 14656
},
{
"epoch": 0.65,
"learning_rate": 7.819383912953245e-07,
"loss": 0.166,
"step": 14720
},
{
"epoch": 0.66,
"learning_rate": 7.809896528210145e-07,
"loss": 0.2001,
"step": 14784
},
{
"epoch": 0.66,
"learning_rate": 7.800409143467046e-07,
"loss": 0.158,
"step": 14848
},
{
"epoch": 0.66,
"learning_rate": 7.790921758723946e-07,
"loss": 0.1009,
"step": 14912
},
{
"epoch": 0.67,
"learning_rate": 7.781434373980848e-07,
"loss": 0.2009,
"step": 14976
},
{
"epoch": 0.67,
"learning_rate": 7.771946989237747e-07,
"loss": 0.1437,
"step": 15040
},
{
"epoch": 0.67,
"learning_rate": 7.762459604494647e-07,
"loss": 0.1653,
"step": 15104
},
{
"epoch": 0.67,
"learning_rate": 7.752972219751549e-07,
"loss": 0.1736,
"step": 15168
},
{
"epoch": 0.68,
"learning_rate": 7.743484835008449e-07,
"loss": 0.2137,
"step": 15232
},
{
"epoch": 0.68,
"learning_rate": 7.73399745026535e-07,
"loss": 0.1282,
"step": 15296
},
{
"epoch": 0.68,
"learning_rate": 7.724510065522251e-07,
"loss": 0.1766,
"step": 15360
},
{
"epoch": 0.69,
"learning_rate": 7.715022680779151e-07,
"loss": 0.0876,
"step": 15424
},
{
"epoch": 0.69,
"learning_rate": 7.705535296036052e-07,
"loss": 0.1338,
"step": 15488
},
{
"epoch": 0.69,
"learning_rate": 7.696047911292953e-07,
"loss": 0.2006,
"step": 15552
},
{
"epoch": 0.69,
"learning_rate": 7.686560526549854e-07,
"loss": 0.1505,
"step": 15616
},
{
"epoch": 0.7,
"learning_rate": 7.677073141806753e-07,
"loss": 0.1223,
"step": 15680
},
{
"epoch": 0.7,
"learning_rate": 7.667585757063653e-07,
"loss": 0.1693,
"step": 15744
},
{
"epoch": 0.7,
"learning_rate": 7.658098372320555e-07,
"loss": 0.1872,
"step": 15808
},
{
"epoch": 0.71,
"learning_rate": 7.648610987577455e-07,
"loss": 0.1109,
"step": 15872
},
{
"epoch": 0.71,
"learning_rate": 7.639123602834355e-07,
"loss": 0.1351,
"step": 15936
},
{
"epoch": 0.71,
"learning_rate": 7.629636218091257e-07,
"loss": 0.1336,
"step": 16000
},
{
"epoch": 0.71,
"learning_rate": 7.620148833348157e-07,
"loss": 0.2153,
"step": 16064
},
{
"epoch": 0.72,
"learning_rate": 7.610661448605058e-07,
"loss": 0.2274,
"step": 16128
},
{
"epoch": 0.72,
"learning_rate": 7.601174063861959e-07,
"loss": 0.105,
"step": 16192
},
{
"epoch": 0.72,
"learning_rate": 7.591686679118858e-07,
"loss": 0.1016,
"step": 16256
},
{
"epoch": 0.73,
"learning_rate": 7.582199294375759e-07,
"loss": 0.116,
"step": 16320
},
{
"epoch": 0.73,
"learning_rate": 7.57271190963266e-07,
"loss": 0.1086,
"step": 16384
},
{
"epoch": 0.73,
"learning_rate": 7.563224524889561e-07,
"loss": 0.1153,
"step": 16448
},
{
"epoch": 0.73,
"learning_rate": 7.553737140146461e-07,
"loss": 0.1044,
"step": 16512
},
{
"epoch": 0.74,
"learning_rate": 7.544249755403361e-07,
"loss": 0.2228,
"step": 16576
},
{
"epoch": 0.74,
"learning_rate": 7.534762370660263e-07,
"loss": 0.1002,
"step": 16640
},
{
"epoch": 0.74,
"learning_rate": 7.525274985917163e-07,
"loss": 0.1919,
"step": 16704
},
{
"epoch": 0.75,
"learning_rate": 7.515787601174063e-07,
"loss": 0.13,
"step": 16768
},
{
"epoch": 0.75,
"learning_rate": 7.506300216430964e-07,
"loss": 0.1165,
"step": 16832
},
{
"epoch": 0.75,
"learning_rate": 7.496812831687864e-07,
"loss": 0.1385,
"step": 16896
},
{
"epoch": 0.75,
"learning_rate": 7.487325446944765e-07,
"loss": 0.1836,
"step": 16960
},
{
"epoch": 0.76,
"learning_rate": 7.477838062201666e-07,
"loss": 0.1852,
"step": 17024
},
{
"epoch": 0.76,
"learning_rate": 7.468498917845177e-07,
"loss": 0.1721,
"step": 17088
},
{
"epoch": 0.76,
"learning_rate": 7.459011533102078e-07,
"loss": 0.1164,
"step": 17152
},
{
"epoch": 0.77,
"learning_rate": 7.449524148358979e-07,
"loss": 0.0989,
"step": 17216
},
{
"epoch": 0.77,
"learning_rate": 7.440036763615879e-07,
"loss": 0.1524,
"step": 17280
},
{
"epoch": 0.77,
"learning_rate": 7.430549378872781e-07,
"loss": 0.174,
"step": 17344
},
{
"epoch": 0.77,
"learning_rate": 7.42106199412968e-07,
"loss": 0.1509,
"step": 17408
},
{
"epoch": 0.78,
"learning_rate": 7.41157460938658e-07,
"loss": 0.1264,
"step": 17472
},
{
"epoch": 0.78,
"learning_rate": 7.402087224643482e-07,
"loss": 0.1148,
"step": 17536
},
{
"epoch": 0.78,
"learning_rate": 7.392599839900382e-07,
"loss": 0.1227,
"step": 17600
},
{
"epoch": 0.79,
"learning_rate": 7.383112455157283e-07,
"loss": 0.1462,
"step": 17664
},
{
"epoch": 0.79,
"learning_rate": 7.373625070414183e-07,
"loss": 0.1673,
"step": 17728
},
{
"epoch": 0.79,
"learning_rate": 7.364137685671084e-07,
"loss": 0.1274,
"step": 17792
},
{
"epoch": 0.79,
"learning_rate": 7.354650300927985e-07,
"loss": 0.162,
"step": 17856
},
{
"epoch": 0.8,
"learning_rate": 7.345162916184885e-07,
"loss": 0.148,
"step": 17920
},
{
"epoch": 0.8,
"learning_rate": 7.335675531441787e-07,
"loss": 0.157,
"step": 17984
},
{
"epoch": 0.8,
"learning_rate": 7.326188146698686e-07,
"loss": 0.1419,
"step": 18048
},
{
"epoch": 0.81,
"learning_rate": 7.316700761955586e-07,
"loss": 0.1916,
"step": 18112
},
{
"epoch": 0.81,
"learning_rate": 7.307213377212488e-07,
"loss": 0.1772,
"step": 18176
},
{
"epoch": 0.81,
"learning_rate": 7.297725992469388e-07,
"loss": 0.1243,
"step": 18240
},
{
"epoch": 0.81,
"learning_rate": 7.288238607726288e-07,
"loss": 0.1738,
"step": 18304
},
{
"epoch": 0.82,
"learning_rate": 7.27875122298319e-07,
"loss": 0.165,
"step": 18368
},
{
"epoch": 0.82,
"learning_rate": 7.26926383824009e-07,
"loss": 0.0869,
"step": 18432
},
{
"epoch": 0.82,
"learning_rate": 7.259776453496991e-07,
"loss": 0.1613,
"step": 18496
},
{
"epoch": 0.83,
"learning_rate": 7.25028906875389e-07,
"loss": 0.1646,
"step": 18560
},
{
"epoch": 0.83,
"learning_rate": 7.240801684010791e-07,
"loss": 0.1232,
"step": 18624
},
{
"epoch": 0.83,
"learning_rate": 7.231314299267692e-07,
"loss": 0.1695,
"step": 18688
},
{
"epoch": 0.83,
"learning_rate": 7.221826914524592e-07,
"loss": 0.1305,
"step": 18752
},
{
"epoch": 0.84,
"learning_rate": 7.212339529781494e-07,
"loss": 0.2047,
"step": 18816
},
{
"epoch": 0.84,
"learning_rate": 7.202852145038394e-07,
"loss": 0.1881,
"step": 18880
},
{
"epoch": 0.84,
"learning_rate": 7.193364760295294e-07,
"loss": 0.1513,
"step": 18944
},
{
"epoch": 0.85,
"learning_rate": 7.183877375552196e-07,
"loss": 0.1355,
"step": 19008
},
{
"epoch": 0.85,
"learning_rate": 7.174389990809096e-07,
"loss": 0.1733,
"step": 19072
},
{
"epoch": 0.85,
"learning_rate": 7.164902606065996e-07,
"loss": 0.1647,
"step": 19136
},
{
"epoch": 0.85,
"learning_rate": 7.155415221322897e-07,
"loss": 0.132,
"step": 19200
},
{
"epoch": 0.86,
"learning_rate": 7.145927836579797e-07,
"loss": 0.2464,
"step": 19264
},
{
"epoch": 0.86,
"learning_rate": 7.136440451836698e-07,
"loss": 0.1954,
"step": 19328
},
{
"epoch": 0.86,
"learning_rate": 7.126953067093598e-07,
"loss": 0.1751,
"step": 19392
},
{
"epoch": 0.87,
"learning_rate": 7.1174656823505e-07,
"loss": 0.1182,
"step": 19456
},
{
"epoch": 0.87,
"learning_rate": 7.1079782976074e-07,
"loss": 0.0678,
"step": 19520
},
{
"epoch": 0.87,
"learning_rate": 7.0984909128643e-07,
"loss": 0.1656,
"step": 19584
},
{
"epoch": 0.87,
"learning_rate": 7.089003528121202e-07,
"loss": 0.219,
"step": 19648
},
{
"epoch": 0.88,
"learning_rate": 7.079516143378101e-07,
"loss": 0.15,
"step": 19712
},
{
"epoch": 0.88,
"learning_rate": 7.070028758635002e-07,
"loss": 0.1845,
"step": 19776
},
{
"epoch": 0.88,
"learning_rate": 7.060541373891903e-07,
"loss": 0.1766,
"step": 19840
},
{
"epoch": 0.89,
"learning_rate": 7.051053989148803e-07,
"loss": 0.1355,
"step": 19904
},
{
"epoch": 0.89,
"learning_rate": 7.041566604405704e-07,
"loss": 0.1268,
"step": 19968
},
{
"epoch": 0.89,
"learning_rate": 7.032079219662605e-07,
"loss": 0.191,
"step": 20032
},
{
"epoch": 0.89,
"learning_rate": 7.022591834919506e-07,
"loss": 0.2032,
"step": 20096
},
{
"epoch": 0.9,
"learning_rate": 7.013104450176406e-07,
"loss": 0.2632,
"step": 20160
},
{
"epoch": 0.9,
"learning_rate": 7.003617065433306e-07,
"loss": 0.2028,
"step": 20224
},
{
"epoch": 0.9,
"learning_rate": 6.994129680690208e-07,
"loss": 0.135,
"step": 20288
},
{
"epoch": 0.91,
"learning_rate": 6.984642295947107e-07,
"loss": 0.1635,
"step": 20352
},
{
"epoch": 0.91,
"learning_rate": 6.975154911204007e-07,
"loss": 0.1986,
"step": 20416
},
{
"epoch": 0.91,
"learning_rate": 6.965667526460909e-07,
"loss": 0.1786,
"step": 20480
},
{
"epoch": 0.91,
"learning_rate": 6.956180141717809e-07,
"loss": 0.116,
"step": 20544
},
{
"epoch": 0.92,
"learning_rate": 6.94669275697471e-07,
"loss": 0.2206,
"step": 20608
},
{
"epoch": 0.92,
"learning_rate": 6.937205372231611e-07,
"loss": 0.0995,
"step": 20672
},
{
"epoch": 0.92,
"learning_rate": 6.927717987488511e-07,
"loss": 0.1634,
"step": 20736
},
{
"epoch": 0.93,
"learning_rate": 6.918230602745412e-07,
"loss": 0.1429,
"step": 20800
},
{
"epoch": 0.93,
"learning_rate": 6.908743218002312e-07,
"loss": 0.2244,
"step": 20864
},
{
"epoch": 0.93,
"learning_rate": 6.899255833259213e-07,
"loss": 0.1666,
"step": 20928
},
{
"epoch": 0.93,
"learning_rate": 6.889768448516113e-07,
"loss": 0.1413,
"step": 20992
},
{
"epoch": 0.94,
"learning_rate": 6.880281063773013e-07,
"loss": 0.1824,
"step": 21056
},
{
"epoch": 0.94,
"learning_rate": 6.870941919416525e-07,
"loss": 0.1737,
"step": 21120
},
{
"epoch": 0.94,
"learning_rate": 6.861454534673426e-07,
"loss": 0.171,
"step": 21184
},
{
"epoch": 0.94,
"learning_rate": 6.851967149930327e-07,
"loss": 0.1571,
"step": 21248
},
{
"epoch": 0.95,
"learning_rate": 6.842479765187227e-07,
"loss": 0.115,
"step": 21312
},
{
"epoch": 0.95,
"learning_rate": 6.832992380444128e-07,
"loss": 0.2192,
"step": 21376
},
{
"epoch": 0.95,
"learning_rate": 6.823504995701029e-07,
"loss": 0.146,
"step": 21440
},
{
"epoch": 0.96,
"learning_rate": 6.81401761095793e-07,
"loss": 0.1586,
"step": 21504
},
{
"epoch": 0.96,
"learning_rate": 6.804530226214829e-07,
"loss": 0.1035,
"step": 21568
},
{
"epoch": 0.96,
"learning_rate": 6.795191081858341e-07,
"loss": 0.1898,
"step": 21632
},
{
"epoch": 0.96,
"learning_rate": 6.785703697115241e-07,
"loss": 0.2901,
"step": 21696
},
{
"epoch": 0.97,
"learning_rate": 6.776216312372143e-07,
"loss": 0.1986,
"step": 21760
},
{
"epoch": 0.97,
"learning_rate": 6.766728927629043e-07,
"loss": 0.1774,
"step": 21824
},
{
"epoch": 0.97,
"learning_rate": 6.757241542885943e-07,
"loss": 0.1945,
"step": 21888
},
{
"epoch": 0.98,
"learning_rate": 6.747754158142845e-07,
"loss": 0.1453,
"step": 21952
},
{
"epoch": 0.98,
"learning_rate": 6.738266773399745e-07,
"loss": 0.1882,
"step": 22016
},
{
"epoch": 0.98,
"learning_rate": 6.728779388656646e-07,
"loss": 0.2266,
"step": 22080
},
{
"epoch": 0.98,
"learning_rate": 6.719292003913545e-07,
"loss": 0.2192,
"step": 22144
},
{
"epoch": 0.99,
"learning_rate": 6.709804619170446e-07,
"loss": 0.1191,
"step": 22208
},
{
"epoch": 0.99,
"learning_rate": 6.700317234427347e-07,
"loss": 0.1323,
"step": 22272
},
{
"epoch": 0.99,
"learning_rate": 6.690829849684247e-07,
"loss": 0.2345,
"step": 22336
},
{
"epoch": 1.0,
"learning_rate": 6.681342464941149e-07,
"loss": 0.122,
"step": 22400
},
{
"epoch": 1.0,
"learning_rate": 6.671855080198049e-07,
"loss": 0.1643,
"step": 22464
},
{
"epoch": 1.0,
"learning_rate": 6.662367695454949e-07,
"loss": 0.1756,
"step": 22528
},
{
"epoch": 1.0,
"learning_rate": 6.653028551098461e-07,
"loss": 0.0794,
"step": 22592
},
{
"epoch": 1.01,
"learning_rate": 6.643541166355362e-07,
"loss": 0.0636,
"step": 22656
},
{
"epoch": 1.01,
"learning_rate": 6.634053781612263e-07,
"loss": 0.1252,
"step": 22720
},
{
"epoch": 1.01,
"learning_rate": 6.624566396869162e-07,
"loss": 0.1114,
"step": 22784
},
{
"epoch": 1.02,
"learning_rate": 6.615079012126063e-07,
"loss": 0.1799,
"step": 22848
},
{
"epoch": 1.02,
"learning_rate": 6.605591627382963e-07,
"loss": 0.1088,
"step": 22912
},
{
"epoch": 1.02,
"learning_rate": 6.596104242639865e-07,
"loss": 0.0468,
"step": 22976
},
{
"epoch": 1.02,
"learning_rate": 6.586616857896765e-07,
"loss": 0.0891,
"step": 23040
},
{
"epoch": 1.03,
"learning_rate": 6.577129473153665e-07,
"loss": 0.0967,
"step": 23104
},
{
"epoch": 1.03,
"learning_rate": 6.567642088410567e-07,
"loss": 0.102,
"step": 23168
},
{
"epoch": 1.03,
"learning_rate": 6.558154703667467e-07,
"loss": 0.0713,
"step": 23232
},
{
"epoch": 1.04,
"learning_rate": 6.548667318924368e-07,
"loss": 0.0656,
"step": 23296
},
{
"epoch": 1.04,
"learning_rate": 6.539179934181268e-07,
"loss": 0.1185,
"step": 23360
},
{
"epoch": 1.04,
"learning_rate": 6.529692549438168e-07,
"loss": 0.0787,
"step": 23424
},
{
"epoch": 1.04,
"learning_rate": 6.520205164695069e-07,
"loss": 0.0442,
"step": 23488
},
{
"epoch": 1.05,
"learning_rate": 6.51071777995197e-07,
"loss": 0.0793,
"step": 23552
},
{
"epoch": 1.05,
"learning_rate": 6.501230395208871e-07,
"loss": 0.1479,
"step": 23616
},
{
"epoch": 1.05,
"learning_rate": 6.491743010465771e-07,
"loss": 0.0891,
"step": 23680
},
{
"epoch": 1.06,
"learning_rate": 6.482255625722671e-07,
"loss": 0.0879,
"step": 23744
},
{
"epoch": 1.06,
"learning_rate": 6.472768240979573e-07,
"loss": 0.0888,
"step": 23808
},
{
"epoch": 1.06,
"learning_rate": 6.463429096623084e-07,
"loss": 0.1321,
"step": 23872
},
{
"epoch": 1.06,
"learning_rate": 6.453941711879985e-07,
"loss": 0.0842,
"step": 23936
},
{
"epoch": 1.07,
"learning_rate": 6.444454327136884e-07,
"loss": 0.1128,
"step": 24000
},
{
"epoch": 1.07,
"learning_rate": 6.434966942393785e-07,
"loss": 0.1089,
"step": 24064
},
{
"epoch": 1.07,
"learning_rate": 6.425479557650686e-07,
"loss": 0.078,
"step": 24128
},
{
"epoch": 1.08,
"learning_rate": 6.415992172907586e-07,
"loss": 0.1481,
"step": 24192
},
{
"epoch": 1.08,
"learning_rate": 6.406504788164487e-07,
"loss": 0.0639,
"step": 24256
},
{
"epoch": 1.08,
"learning_rate": 6.397017403421388e-07,
"loss": 0.1182,
"step": 24320
},
{
"epoch": 1.08,
"learning_rate": 6.387530018678289e-07,
"loss": 0.0864,
"step": 24384
},
{
"epoch": 1.09,
"learning_rate": 6.378042633935189e-07,
"loss": 0.0626,
"step": 24448
},
{
"epoch": 1.09,
"learning_rate": 6.36855524919209e-07,
"loss": 0.0763,
"step": 24512
},
{
"epoch": 1.09,
"learning_rate": 6.35906786444899e-07,
"loss": 0.1031,
"step": 24576
},
{
"epoch": 1.1,
"learning_rate": 6.34958047970589e-07,
"loss": 0.0772,
"step": 24640
},
{
"epoch": 1.1,
"learning_rate": 6.340093094962791e-07,
"loss": 0.1173,
"step": 24704
},
{
"epoch": 1.1,
"learning_rate": 6.330605710219692e-07,
"loss": 0.1254,
"step": 24768
},
{
"epoch": 1.1,
"learning_rate": 6.321118325476592e-07,
"loss": 0.1646,
"step": 24832
},
{
"epoch": 1.11,
"learning_rate": 6.311630940733493e-07,
"loss": 0.1065,
"step": 24896
},
{
"epoch": 1.11,
"learning_rate": 6.302143555990394e-07,
"loss": 0.0916,
"step": 24960
},
{
"epoch": 1.11,
"learning_rate": 6.292656171247295e-07,
"loss": 0.0769,
"step": 25024
},
{
"epoch": 1.12,
"learning_rate": 6.283168786504195e-07,
"loss": 0.05,
"step": 25088
},
{
"epoch": 1.12,
"learning_rate": 6.273681401761095e-07,
"loss": 0.0748,
"step": 25152
},
{
"epoch": 1.12,
"learning_rate": 6.264194017017996e-07,
"loss": 0.089,
"step": 25216
},
{
"epoch": 1.12,
"learning_rate": 6.254706632274896e-07,
"loss": 0.0951,
"step": 25280
},
{
"epoch": 1.13,
"learning_rate": 6.245219247531798e-07,
"loss": 0.0955,
"step": 25344
},
{
"epoch": 1.13,
"learning_rate": 6.235731862788698e-07,
"loss": 0.1024,
"step": 25408
},
{
"epoch": 1.13,
"learning_rate": 6.226244478045598e-07,
"loss": 0.0898,
"step": 25472
},
{
"epoch": 1.14,
"learning_rate": 6.216757093302499e-07,
"loss": 0.1351,
"step": 25536
},
{
"epoch": 1.14,
"learning_rate": 6.2072697085594e-07,
"loss": 0.0797,
"step": 25600
},
{
"epoch": 1.14,
"learning_rate": 6.197782323816301e-07,
"loss": 0.0459,
"step": 25664
},
{
"epoch": 1.14,
"learning_rate": 6.1882949390732e-07,
"loss": 0.0463,
"step": 25728
},
{
"epoch": 1.15,
"learning_rate": 6.178807554330101e-07,
"loss": 0.0487,
"step": 25792
},
{
"epoch": 1.15,
"learning_rate": 6.169320169587002e-07,
"loss": 0.0971,
"step": 25856
},
{
"epoch": 1.15,
"learning_rate": 6.159832784843902e-07,
"loss": 0.0793,
"step": 25920
},
{
"epoch": 1.16,
"learning_rate": 6.150345400100804e-07,
"loss": 0.0507,
"step": 25984
},
{
"epoch": 1.16,
"learning_rate": 6.140858015357704e-07,
"loss": 0.1244,
"step": 26048
},
{
"epoch": 1.16,
"learning_rate": 6.131370630614604e-07,
"loss": 0.1191,
"step": 26112
},
{
"epoch": 1.16,
"learning_rate": 6.121883245871506e-07,
"loss": 0.1716,
"step": 26176
},
{
"epoch": 1.17,
"learning_rate": 6.112395861128406e-07,
"loss": 0.1052,
"step": 26240
},
{
"epoch": 1.17,
"learning_rate": 6.102908476385305e-07,
"loss": 0.1227,
"step": 26304
},
{
"epoch": 1.17,
"learning_rate": 6.093421091642206e-07,
"loss": 0.0686,
"step": 26368
},
{
"epoch": 1.18,
"learning_rate": 6.083933706899107e-07,
"loss": 0.1174,
"step": 26432
},
{
"epoch": 1.18,
"learning_rate": 6.074446322156008e-07,
"loss": 0.0606,
"step": 26496
},
{
"epoch": 1.18,
"learning_rate": 6.064958937412908e-07,
"loss": 0.1176,
"step": 26560
},
{
"epoch": 1.18,
"learning_rate": 6.055471552669809e-07,
"loss": 0.0385,
"step": 26624
},
{
"epoch": 1.19,
"learning_rate": 6.04598416792671e-07,
"loss": 0.0468,
"step": 26688
},
{
"epoch": 1.19,
"learning_rate": 6.03649678318361e-07,
"loss": 0.1169,
"step": 26752
},
{
"epoch": 1.19,
"learning_rate": 6.027009398440512e-07,
"loss": 0.1115,
"step": 26816
},
{
"epoch": 1.2,
"learning_rate": 6.017522013697411e-07,
"loss": 0.1258,
"step": 26880
},
{
"epoch": 1.2,
"learning_rate": 6.008034628954311e-07,
"loss": 0.1025,
"step": 26944
},
{
"epoch": 1.2,
"learning_rate": 5.998547244211213e-07,
"loss": 0.0834,
"step": 27008
},
{
"epoch": 1.2,
"learning_rate": 5.989059859468113e-07,
"loss": 0.0421,
"step": 27072
},
{
"epoch": 1.21,
"learning_rate": 5.979572474725014e-07,
"loss": 0.0936,
"step": 27136
},
{
"epoch": 1.21,
"learning_rate": 5.970085089981914e-07,
"loss": 0.1253,
"step": 27200
},
{
"epoch": 1.21,
"learning_rate": 5.960597705238815e-07,
"loss": 0.1037,
"step": 27264
},
{
"epoch": 1.22,
"learning_rate": 5.951110320495716e-07,
"loss": 0.0553,
"step": 27328
},
{
"epoch": 1.22,
"learning_rate": 5.941622935752616e-07,
"loss": 0.0932,
"step": 27392
},
{
"epoch": 1.22,
"learning_rate": 5.932135551009518e-07,
"loss": 0.0359,
"step": 27456
},
{
"epoch": 1.22,
"learning_rate": 5.922648166266417e-07,
"loss": 0.0564,
"step": 27520
},
{
"epoch": 1.23,
"learning_rate": 5.913160781523317e-07,
"loss": 0.0475,
"step": 27584
},
{
"epoch": 1.23,
"learning_rate": 5.903673396780219e-07,
"loss": 0.1187,
"step": 27648
},
{
"epoch": 1.23,
"learning_rate": 5.894186012037119e-07,
"loss": 0.102,
"step": 27712
},
{
"epoch": 1.24,
"learning_rate": 5.88469862729402e-07,
"loss": 0.1048,
"step": 27776
},
{
"epoch": 1.24,
"learning_rate": 5.875211242550921e-07,
"loss": 0.1066,
"step": 27840
},
{
"epoch": 1.24,
"learning_rate": 5.865723857807821e-07,
"loss": 0.1039,
"step": 27904
},
{
"epoch": 1.24,
"learning_rate": 5.856236473064722e-07,
"loss": 0.1027,
"step": 27968
},
{
"epoch": 1.25,
"learning_rate": 5.846749088321621e-07,
"loss": 0.0535,
"step": 28032
},
{
"epoch": 1.25,
"learning_rate": 5.837261703578523e-07,
"loss": 0.0737,
"step": 28096
},
{
"epoch": 1.25,
"learning_rate": 5.827774318835423e-07,
"loss": 0.094,
"step": 28160
},
{
"epoch": 1.26,
"learning_rate": 5.818286934092323e-07,
"loss": 0.1002,
"step": 28224
},
{
"epoch": 1.26,
"learning_rate": 5.808799549349225e-07,
"loss": 0.0618,
"step": 28288
},
{
"epoch": 1.26,
"learning_rate": 5.799312164606125e-07,
"loss": 0.1356,
"step": 28352
},
{
"epoch": 1.26,
"learning_rate": 5.789824779863025e-07,
"loss": 0.0802,
"step": 28416
},
{
"epoch": 1.27,
"learning_rate": 5.780337395119927e-07,
"loss": 0.1333,
"step": 28480
},
{
"epoch": 1.27,
"learning_rate": 5.770850010376827e-07,
"loss": 0.0687,
"step": 28544
},
{
"epoch": 1.27,
"learning_rate": 5.761362625633727e-07,
"loss": 0.0562,
"step": 28608
},
{
"epoch": 1.28,
"learning_rate": 5.751875240890628e-07,
"loss": 0.0955,
"step": 28672
},
{
"epoch": 1.28,
"learning_rate": 5.742387856147528e-07,
"loss": 0.0915,
"step": 28736
},
{
"epoch": 1.28,
"learning_rate": 5.732900471404429e-07,
"loss": 0.0632,
"step": 28800
},
{
"epoch": 1.28,
"learning_rate": 5.723413086661329e-07,
"loss": 0.1047,
"step": 28864
},
{
"epoch": 1.29,
"learning_rate": 5.713925701918231e-07,
"loss": 0.0981,
"step": 28928
},
{
"epoch": 1.29,
"learning_rate": 5.704438317175131e-07,
"loss": 0.0954,
"step": 28992
},
{
"epoch": 1.29,
"learning_rate": 5.694950932432031e-07,
"loss": 0.0572,
"step": 29056
},
{
"epoch": 1.3,
"learning_rate": 5.685463547688933e-07,
"loss": 0.1177,
"step": 29120
},
{
"epoch": 1.3,
"learning_rate": 5.675976162945832e-07,
"loss": 0.0748,
"step": 29184
},
{
"epoch": 1.3,
"learning_rate": 5.666488778202733e-07,
"loss": 0.0783,
"step": 29248
},
{
"epoch": 1.3,
"learning_rate": 5.657001393459634e-07,
"loss": 0.0556,
"step": 29312
},
{
"epoch": 1.31,
"learning_rate": 5.647514008716534e-07,
"loss": 0.0592,
"step": 29376
},
{
"epoch": 1.31,
"learning_rate": 5.638026623973435e-07,
"loss": 0.0917,
"step": 29440
},
{
"epoch": 1.31,
"learning_rate": 5.628539239230336e-07,
"loss": 0.0847,
"step": 29504
},
{
"epoch": 1.31,
"learning_rate": 5.619051854487237e-07,
"loss": 0.1253,
"step": 29568
},
{
"epoch": 1.32,
"learning_rate": 5.609564469744137e-07,
"loss": 0.1362,
"step": 29632
},
{
"epoch": 1.32,
"learning_rate": 5.600077085001037e-07,
"loss": 0.0799,
"step": 29696
},
{
"epoch": 1.32,
"learning_rate": 5.590589700257938e-07,
"loss": 0.1235,
"step": 29760
},
{
"epoch": 1.33,
"learning_rate": 5.581102315514838e-07,
"loss": 0.056,
"step": 29824
},
{
"epoch": 1.33,
"learning_rate": 5.571614930771739e-07,
"loss": 0.0952,
"step": 29888
},
{
"epoch": 1.33,
"learning_rate": 5.56212754602864e-07,
"loss": 0.102,
"step": 29952
},
{
"epoch": 1.33,
"learning_rate": 5.55264016128554e-07,
"loss": 0.105,
"step": 30016
},
{
"epoch": 1.34,
"learning_rate": 5.543152776542441e-07,
"loss": 0.1049,
"step": 30080
},
{
"epoch": 1.34,
"learning_rate": 5.533665391799342e-07,
"loss": 0.0722,
"step": 30144
},
{
"epoch": 1.34,
"learning_rate": 5.524178007056243e-07,
"loss": 0.0389,
"step": 30208
},
{
"epoch": 1.35,
"learning_rate": 5.514690622313143e-07,
"loss": 0.0589,
"step": 30272
},
{
"epoch": 1.35,
"learning_rate": 5.505203237570043e-07,
"loss": 0.1049,
"step": 30336
},
{
"epoch": 1.35,
"learning_rate": 5.495715852826944e-07,
"loss": 0.1013,
"step": 30400
},
{
"epoch": 1.35,
"learning_rate": 5.486228468083844e-07,
"loss": 0.0866,
"step": 30464
},
{
"epoch": 1.36,
"learning_rate": 5.476741083340744e-07,
"loss": 0.1195,
"step": 30528
},
{
"epoch": 1.36,
"learning_rate": 5.467253698597646e-07,
"loss": 0.049,
"step": 30592
},
{
"epoch": 1.36,
"learning_rate": 5.457766313854546e-07,
"loss": 0.0521,
"step": 30656
},
{
"epoch": 1.37,
"learning_rate": 5.448278929111447e-07,
"loss": 0.0658,
"step": 30720
},
{
"epoch": 1.37,
"learning_rate": 5.438791544368348e-07,
"loss": 0.0735,
"step": 30784
},
{
"epoch": 1.37,
"learning_rate": 5.429304159625248e-07,
"loss": 0.1463,
"step": 30848
},
{
"epoch": 1.37,
"learning_rate": 5.419816774882148e-07,
"loss": 0.1333,
"step": 30912
},
{
"epoch": 1.38,
"learning_rate": 5.410329390139049e-07,
"loss": 0.0795,
"step": 30976
},
{
"epoch": 1.38,
"learning_rate": 5.40084200539595e-07,
"loss": 0.0599,
"step": 31040
},
{
"epoch": 1.38,
"learning_rate": 5.39135462065285e-07,
"loss": 0.1246,
"step": 31104
},
{
"epoch": 1.39,
"learning_rate": 5.381867235909751e-07,
"loss": 0.0802,
"step": 31168
},
{
"epoch": 1.39,
"learning_rate": 5.372379851166652e-07,
"loss": 0.0565,
"step": 31232
},
{
"epoch": 1.39,
"learning_rate": 5.362892466423552e-07,
"loss": 0.0646,
"step": 31296
},
{
"epoch": 1.39,
"learning_rate": 5.353405081680454e-07,
"loss": 0.0712,
"step": 31360
},
{
"epoch": 1.4,
"learning_rate": 5.343917696937354e-07,
"loss": 0.1214,
"step": 31424
},
{
"epoch": 1.4,
"learning_rate": 5.334430312194253e-07,
"loss": 0.0654,
"step": 31488
},
{
"epoch": 1.4,
"learning_rate": 5.324942927451154e-07,
"loss": 0.1136,
"step": 31552
},
{
"epoch": 1.41,
"learning_rate": 5.315455542708055e-07,
"loss": 0.0907,
"step": 31616
},
{
"epoch": 1.41,
"learning_rate": 5.305968157964956e-07,
"loss": 0.0797,
"step": 31680
},
{
"epoch": 1.41,
"learning_rate": 5.296480773221856e-07,
"loss": 0.0739,
"step": 31744
},
{
"epoch": 1.41,
"learning_rate": 5.286993388478757e-07,
"loss": 0.0763,
"step": 31808
},
{
"epoch": 1.42,
"learning_rate": 5.277654244122268e-07,
"loss": 0.0724,
"step": 31872
},
{
"epoch": 1.42,
"learning_rate": 5.26816685937917e-07,
"loss": 0.0855,
"step": 31936
},
{
"epoch": 1.42,
"learning_rate": 5.25867947463607e-07,
"loss": 0.0873,
"step": 32000
},
{
"epoch": 1.43,
"learning_rate": 5.24919208989297e-07,
"loss": 0.0688,
"step": 32064
},
{
"epoch": 1.43,
"learning_rate": 5.239704705149871e-07,
"loss": 0.0945,
"step": 32128
},
{
"epoch": 1.43,
"learning_rate": 5.230217320406771e-07,
"loss": 0.1052,
"step": 32192
},
{
"epoch": 1.43,
"learning_rate": 5.220729935663672e-07,
"loss": 0.057,
"step": 32256
},
{
"epoch": 1.44,
"learning_rate": 5.211242550920573e-07,
"loss": 0.1135,
"step": 32320
},
{
"epoch": 1.44,
"learning_rate": 5.201755166177473e-07,
"loss": 0.0806,
"step": 32384
},
{
"epoch": 1.44,
"learning_rate": 5.192267781434374e-07,
"loss": 0.0931,
"step": 32448
},
{
"epoch": 1.45,
"learning_rate": 5.182780396691274e-07,
"loss": 0.0863,
"step": 32512
},
{
"epoch": 1.45,
"learning_rate": 5.173293011948176e-07,
"loss": 0.0623,
"step": 32576
},
{
"epoch": 1.45,
"learning_rate": 5.163805627205076e-07,
"loss": 0.0643,
"step": 32640
},
{
"epoch": 1.45,
"learning_rate": 5.154318242461975e-07,
"loss": 0.0693,
"step": 32704
},
{
"epoch": 1.46,
"learning_rate": 5.144830857718877e-07,
"loss": 0.0433,
"step": 32768
},
{
"epoch": 1.46,
"learning_rate": 5.135343472975777e-07,
"loss": 0.0808,
"step": 32832
},
{
"epoch": 1.46,
"learning_rate": 5.125856088232677e-07,
"loss": 0.0939,
"step": 32896
},
{
"epoch": 1.47,
"learning_rate": 5.116368703489579e-07,
"loss": 0.0852,
"step": 32960
},
{
"epoch": 1.47,
"learning_rate": 5.106881318746479e-07,
"loss": 0.155,
"step": 33024
},
{
"epoch": 1.47,
"learning_rate": 5.09739393400338e-07,
"loss": 0.127,
"step": 33088
},
{
"epoch": 1.47,
"learning_rate": 5.087906549260281e-07,
"loss": 0.0432,
"step": 33152
},
{
"epoch": 1.48,
"learning_rate": 5.078419164517181e-07,
"loss": 0.1161,
"step": 33216
},
{
"epoch": 1.48,
"learning_rate": 5.068931779774081e-07,
"loss": 0.0877,
"step": 33280
},
{
"epoch": 1.48,
"learning_rate": 5.059444395030981e-07,
"loss": 0.0579,
"step": 33344
},
{
"epoch": 1.49,
"learning_rate": 5.049957010287883e-07,
"loss": 0.0408,
"step": 33408
},
{
"epoch": 1.49,
"learning_rate": 5.040469625544783e-07,
"loss": 0.062,
"step": 33472
},
{
"epoch": 1.49,
"learning_rate": 5.030982240801683e-07,
"loss": 0.1147,
"step": 33536
},
{
"epoch": 1.49,
"learning_rate": 5.021494856058585e-07,
"loss": 0.1064,
"step": 33600
},
{
"epoch": 1.5,
"learning_rate": 5.012007471315485e-07,
"loss": 0.1084,
"step": 33664
},
{
"epoch": 1.5,
"learning_rate": 5.002520086572386e-07,
"loss": 0.1422,
"step": 33728
},
{
"epoch": 1.5,
"learning_rate": 4.993032701829287e-07,
"loss": 0.1086,
"step": 33792
},
{
"epoch": 1.51,
"learning_rate": 4.983545317086186e-07,
"loss": 0.0744,
"step": 33856
},
{
"epoch": 1.51,
"learning_rate": 4.974057932343087e-07,
"loss": 0.1086,
"step": 33920
},
{
"epoch": 1.51,
"learning_rate": 4.964570547599988e-07,
"loss": 0.1106,
"step": 33984
},
{
"epoch": 1.51,
"learning_rate": 4.955083162856888e-07,
"loss": 0.0761,
"step": 34048
},
{
"epoch": 1.52,
"learning_rate": 4.945595778113789e-07,
"loss": 0.1153,
"step": 34112
},
{
"epoch": 1.52,
"learning_rate": 4.93610839337069e-07,
"loss": 0.0666,
"step": 34176
},
{
"epoch": 1.52,
"learning_rate": 4.92662100862759e-07,
"loss": 0.095,
"step": 34240
},
{
"epoch": 1.53,
"learning_rate": 4.917133623884491e-07,
"loss": 0.1187,
"step": 34304
},
{
"epoch": 1.53,
"learning_rate": 4.907646239141392e-07,
"loss": 0.0593,
"step": 34368
},
{
"epoch": 1.53,
"learning_rate": 4.898158854398292e-07,
"loss": 0.0832,
"step": 34432
},
{
"epoch": 1.53,
"learning_rate": 4.888671469655192e-07,
"loss": 0.0724,
"step": 34496
},
{
"epoch": 1.54,
"learning_rate": 4.879184084912093e-07,
"loss": 0.0552,
"step": 34560
},
{
"epoch": 1.54,
"learning_rate": 4.869696700168994e-07,
"loss": 0.0912,
"step": 34624
},
{
"epoch": 1.54,
"learning_rate": 4.860209315425894e-07,
"loss": 0.0644,
"step": 34688
},
{
"epoch": 1.55,
"learning_rate": 4.850721930682795e-07,
"loss": 0.0813,
"step": 34752
},
{
"epoch": 1.55,
"learning_rate": 4.841234545939696e-07,
"loss": 0.1482,
"step": 34816
},
{
"epoch": 1.55,
"learning_rate": 4.831747161196596e-07,
"loss": 0.1192,
"step": 34880
},
{
"epoch": 1.55,
"learning_rate": 4.822259776453497e-07,
"loss": 0.09,
"step": 34944
},
{
"epoch": 1.56,
"learning_rate": 4.812772391710397e-07,
"loss": 0.0527,
"step": 35008
},
{
"epoch": 1.56,
"learning_rate": 4.803285006967298e-07,
"loss": 0.0867,
"step": 35072
},
{
"epoch": 1.56,
"learning_rate": 4.793797622224199e-07,
"loss": 0.0483,
"step": 35136
},
{
"epoch": 1.57,
"learning_rate": 4.784310237481099e-07,
"loss": 0.1053,
"step": 35200
},
{
"epoch": 1.57,
"learning_rate": 4.774822852738e-07,
"loss": 0.1272,
"step": 35264
},
{
"epoch": 1.57,
"learning_rate": 4.7653354679949004e-07,
"loss": 0.1117,
"step": 35328
},
{
"epoch": 1.57,
"learning_rate": 4.7558480832518013e-07,
"loss": 0.1487,
"step": 35392
},
{
"epoch": 1.58,
"learning_rate": 4.746360698508701e-07,
"loss": 0.0671,
"step": 35456
},
{
"epoch": 1.58,
"learning_rate": 4.736873313765602e-07,
"loss": 0.0657,
"step": 35520
},
{
"epoch": 1.58,
"learning_rate": 4.7273859290225024e-07,
"loss": 0.1076,
"step": 35584
},
{
"epoch": 1.59,
"learning_rate": 4.7178985442794033e-07,
"loss": 0.1425,
"step": 35648
},
{
"epoch": 1.59,
"learning_rate": 4.708411159536304e-07,
"loss": 0.088,
"step": 35712
},
{
"epoch": 1.59,
"learning_rate": 4.698923774793204e-07,
"loss": 0.1325,
"step": 35776
},
{
"epoch": 1.59,
"learning_rate": 4.689436390050105e-07,
"loss": 0.1102,
"step": 35840
},
{
"epoch": 1.6,
"learning_rate": 4.679949005307006e-07,
"loss": 0.126,
"step": 35904
},
{
"epoch": 1.6,
"learning_rate": 4.6704616205639063e-07,
"loss": 0.074,
"step": 35968
},
{
"epoch": 1.6,
"learning_rate": 4.661122476207417e-07,
"loss": 0.1062,
"step": 36032
},
{
"epoch": 1.61,
"learning_rate": 4.651635091464318e-07,
"loss": 0.0812,
"step": 36096
},
{
"epoch": 1.61,
"learning_rate": 4.642147706721219e-07,
"loss": 0.1062,
"step": 36160
},
{
"epoch": 1.61,
"learning_rate": 4.6326603219781194e-07,
"loss": 0.0861,
"step": 36224
},
{
"epoch": 1.61,
"learning_rate": 4.6231729372350203e-07,
"loss": 0.0879,
"step": 36288
},
{
"epoch": 1.62,
"learning_rate": 4.6136855524919207e-07,
"loss": 0.0649,
"step": 36352
},
{
"epoch": 1.62,
"learning_rate": 4.604198167748821e-07,
"loss": 0.0695,
"step": 36416
},
{
"epoch": 1.62,
"learning_rate": 4.594710783005722e-07,
"loss": 0.0705,
"step": 36480
},
{
"epoch": 1.63,
"learning_rate": 4.5852233982626224e-07,
"loss": 0.1642,
"step": 36544
},
{
"epoch": 1.63,
"learning_rate": 4.5757360135195233e-07,
"loss": 0.0944,
"step": 36608
},
{
"epoch": 1.63,
"learning_rate": 4.5662486287764237e-07,
"loss": 0.0872,
"step": 36672
},
{
"epoch": 1.63,
"learning_rate": 4.556909484419935e-07,
"loss": 0.105,
"step": 36736
},
{
"epoch": 1.64,
"learning_rate": 4.547422099676836e-07,
"loss": 0.0825,
"step": 36800
},
{
"epoch": 1.64,
"learning_rate": 4.5379347149337364e-07,
"loss": 0.1063,
"step": 36864
},
{
"epoch": 1.64,
"learning_rate": 4.528447330190637e-07,
"loss": 0.0384,
"step": 36928
},
{
"epoch": 1.65,
"learning_rate": 4.518959945447537e-07,
"loss": 0.0765,
"step": 36992
},
{
"epoch": 1.65,
"learning_rate": 4.509472560704438e-07,
"loss": 0.1533,
"step": 37056
},
{
"epoch": 1.65,
"learning_rate": 4.499985175961339e-07,
"loss": 0.0634,
"step": 37120
},
{
"epoch": 1.65,
"learning_rate": 4.490497791218239e-07,
"loss": 0.074,
"step": 37184
},
{
"epoch": 1.66,
"learning_rate": 4.48101040647514e-07,
"loss": 0.0641,
"step": 37248
},
{
"epoch": 1.66,
"learning_rate": 4.4715230217320407e-07,
"loss": 0.0927,
"step": 37312
},
{
"epoch": 1.66,
"learning_rate": 4.462035636988941e-07,
"loss": 0.0837,
"step": 37376
},
{
"epoch": 1.67,
"learning_rate": 4.452548252245842e-07,
"loss": 0.1142,
"step": 37440
},
{
"epoch": 1.67,
"learning_rate": 4.443060867502742e-07,
"loss": 0.0672,
"step": 37504
},
{
"epoch": 1.67,
"learning_rate": 4.4335734827596427e-07,
"loss": 0.0726,
"step": 37568
},
{
"epoch": 1.67,
"learning_rate": 4.4240860980165436e-07,
"loss": 0.1104,
"step": 37632
},
{
"epoch": 1.68,
"learning_rate": 4.414598713273444e-07,
"loss": 0.1157,
"step": 37696
},
{
"epoch": 1.68,
"learning_rate": 4.405111328530345e-07,
"loss": 0.0694,
"step": 37760
},
{
"epoch": 1.68,
"learning_rate": 4.3956239437872447e-07,
"loss": 0.0905,
"step": 37824
},
{
"epoch": 1.68,
"learning_rate": 4.3861365590441456e-07,
"loss": 0.134,
"step": 37888
},
{
"epoch": 1.69,
"learning_rate": 4.3766491743010465e-07,
"loss": 0.0478,
"step": 37952
},
{
"epoch": 1.69,
"learning_rate": 4.367161789557947e-07,
"loss": 0.1028,
"step": 38016
},
{
"epoch": 1.69,
"learning_rate": 4.357674404814848e-07,
"loss": 0.1023,
"step": 38080
},
{
"epoch": 1.7,
"learning_rate": 4.348187020071748e-07,
"loss": 0.1258,
"step": 38144
},
{
"epoch": 1.7,
"learning_rate": 4.3386996353286486e-07,
"loss": 0.0619,
"step": 38208
},
{
"epoch": 1.7,
"learning_rate": 4.3292122505855495e-07,
"loss": 0.1135,
"step": 38272
},
{
"epoch": 1.7,
"learning_rate": 4.31972486584245e-07,
"loss": 0.062,
"step": 38336
},
{
"epoch": 1.71,
"learning_rate": 4.31023748109935e-07,
"loss": 0.0513,
"step": 38400
},
{
"epoch": 1.71,
"learning_rate": 4.300750096356251e-07,
"loss": 0.0451,
"step": 38464
},
{
"epoch": 1.71,
"learning_rate": 4.2912627116131515e-07,
"loss": 0.1072,
"step": 38528
},
{
"epoch": 1.72,
"learning_rate": 4.2817753268700524e-07,
"loss": 0.0962,
"step": 38592
},
{
"epoch": 1.72,
"learning_rate": 4.2722879421269533e-07,
"loss": 0.0748,
"step": 38656
},
{
"epoch": 1.72,
"learning_rate": 4.262800557383853e-07,
"loss": 0.1036,
"step": 38720
},
{
"epoch": 1.72,
"learning_rate": 4.253313172640754e-07,
"loss": 0.0929,
"step": 38784
},
{
"epoch": 1.73,
"learning_rate": 4.2438257878976545e-07,
"loss": 0.145,
"step": 38848
},
{
"epoch": 1.73,
"learning_rate": 4.2343384031545554e-07,
"loss": 0.0449,
"step": 38912
},
{
"epoch": 1.73,
"learning_rate": 4.2248510184114563e-07,
"loss": 0.1341,
"step": 38976
},
{
"epoch": 1.74,
"learning_rate": 4.215363633668356e-07,
"loss": 0.1309,
"step": 39040
},
{
"epoch": 1.74,
"learning_rate": 4.205876248925257e-07,
"loss": 0.0446,
"step": 39104
},
{
"epoch": 1.74,
"learning_rate": 4.1963888641821574e-07,
"loss": 0.0743,
"step": 39168
},
{
"epoch": 1.74,
"learning_rate": 4.1870497198256694e-07,
"loss": 0.0653,
"step": 39232
},
{
"epoch": 1.75,
"learning_rate": 4.1775623350825693e-07,
"loss": 0.0822,
"step": 39296
},
{
"epoch": 1.75,
"learning_rate": 4.16807495033947e-07,
"loss": 0.0777,
"step": 39360
},
{
"epoch": 1.75,
"learning_rate": 4.158587565596371e-07,
"loss": 0.126,
"step": 39424
},
{
"epoch": 1.76,
"learning_rate": 4.1491001808532715e-07,
"loss": 0.0348,
"step": 39488
},
{
"epoch": 1.76,
"learning_rate": 4.1396127961101724e-07,
"loss": 0.0783,
"step": 39552
},
{
"epoch": 1.76,
"learning_rate": 4.130125411367072e-07,
"loss": 0.0846,
"step": 39616
},
{
"epoch": 1.76,
"learning_rate": 4.120638026623973e-07,
"loss": 0.0792,
"step": 39680
},
{
"epoch": 1.77,
"learning_rate": 4.111150641880874e-07,
"loss": 0.0684,
"step": 39744
},
{
"epoch": 1.77,
"learning_rate": 4.1016632571377744e-07,
"loss": 0.0712,
"step": 39808
},
{
"epoch": 1.77,
"learning_rate": 4.092175872394675e-07,
"loss": 0.0962,
"step": 39872
},
{
"epoch": 1.78,
"learning_rate": 4.0826884876515757e-07,
"loss": 0.0995,
"step": 39936
},
{
"epoch": 1.78,
"learning_rate": 4.073201102908476e-07,
"loss": 0.0408,
"step": 40000
},
{
"epoch": 1.78,
"learning_rate": 4.063713718165377e-07,
"loss": 0.0742,
"step": 40064
},
{
"epoch": 1.78,
"learning_rate": 4.0542263334222774e-07,
"loss": 0.0876,
"step": 40128
},
{
"epoch": 1.79,
"learning_rate": 4.044738948679178e-07,
"loss": 0.0912,
"step": 40192
},
{
"epoch": 1.79,
"learning_rate": 4.0352515639360786e-07,
"loss": 0.0635,
"step": 40256
},
{
"epoch": 1.79,
"learning_rate": 4.025764179192979e-07,
"loss": 0.0641,
"step": 40320
},
{
"epoch": 1.8,
"learning_rate": 4.01627679444988e-07,
"loss": 0.1015,
"step": 40384
},
{
"epoch": 1.8,
"learning_rate": 4.006789409706781e-07,
"loss": 0.0551,
"step": 40448
},
{
"epoch": 1.8,
"learning_rate": 3.9973020249636807e-07,
"loss": 0.1139,
"step": 40512
},
{
"epoch": 1.8,
"learning_rate": 3.9878146402205816e-07,
"loss": 0.1007,
"step": 40576
},
{
"epoch": 1.81,
"learning_rate": 3.978475495864093e-07,
"loss": 0.1153,
"step": 40640
},
{
"epoch": 1.81,
"learning_rate": 3.969136351507604e-07,
"loss": 0.0426,
"step": 40704
},
{
"epoch": 1.81,
"learning_rate": 3.959648966764505e-07,
"loss": 0.1175,
"step": 40768
},
{
"epoch": 1.82,
"learning_rate": 3.950161582021406e-07,
"loss": 0.0451,
"step": 40832
},
{
"epoch": 1.82,
"learning_rate": 3.940674197278306e-07,
"loss": 0.1535,
"step": 40896
},
{
"epoch": 1.82,
"learning_rate": 3.931186812535207e-07,
"loss": 0.0703,
"step": 40960
},
{
"epoch": 1.82,
"learning_rate": 3.9216994277921075e-07,
"loss": 0.1161,
"step": 41024
},
{
"epoch": 1.83,
"learning_rate": 3.912212043049008e-07,
"loss": 0.0631,
"step": 41088
},
{
"epoch": 1.83,
"learning_rate": 3.902724658305909e-07,
"loss": 0.086,
"step": 41152
},
{
"epoch": 1.83,
"learning_rate": 3.893237273562809e-07,
"loss": 0.0811,
"step": 41216
},
{
"epoch": 1.84,
"learning_rate": 3.88374988881971e-07,
"loss": 0.097,
"step": 41280
},
{
"epoch": 1.84,
"learning_rate": 3.8742625040766105e-07,
"loss": 0.0747,
"step": 41344
},
{
"epoch": 1.84,
"learning_rate": 3.864775119333511e-07,
"loss": 0.0843,
"step": 41408
},
{
"epoch": 1.84,
"learning_rate": 3.855287734590412e-07,
"loss": 0.0942,
"step": 41472
},
{
"epoch": 1.85,
"learning_rate": 3.845800349847312e-07,
"loss": 0.1175,
"step": 41536
},
{
"epoch": 1.85,
"learning_rate": 3.836312965104213e-07,
"loss": 0.0899,
"step": 41600
},
{
"epoch": 1.85,
"learning_rate": 3.8268255803611134e-07,
"loss": 0.1071,
"step": 41664
},
{
"epoch": 1.86,
"learning_rate": 3.817338195618014e-07,
"loss": 0.0559,
"step": 41728
},
{
"epoch": 1.86,
"learning_rate": 3.8078508108749147e-07,
"loss": 0.1248,
"step": 41792
},
{
"epoch": 1.86,
"learning_rate": 3.7983634261318156e-07,
"loss": 0.0601,
"step": 41856
},
{
"epoch": 1.86,
"learning_rate": 3.7888760413887155e-07,
"loss": 0.049,
"step": 41920
},
{
"epoch": 1.87,
"learning_rate": 3.7793886566456164e-07,
"loss": 0.0611,
"step": 41984
},
{
"epoch": 1.87,
"learning_rate": 3.769901271902517e-07,
"loss": 0.0884,
"step": 42048
},
{
"epoch": 1.87,
"learning_rate": 3.7604138871594176e-07,
"loss": 0.0759,
"step": 42112
},
{
"epoch": 1.88,
"learning_rate": 3.7509265024163185e-07,
"loss": 0.1484,
"step": 42176
},
{
"epoch": 1.88,
"learning_rate": 3.7414391176732184e-07,
"loss": 0.0447,
"step": 42240
},
{
"epoch": 1.88,
"learning_rate": 3.7319517329301193e-07,
"loss": 0.0491,
"step": 42304
},
{
"epoch": 1.88,
"learning_rate": 3.7224643481870197e-07,
"loss": 0.053,
"step": 42368
},
{
"epoch": 1.89,
"learning_rate": 3.7129769634439206e-07,
"loss": 0.0832,
"step": 42432
},
{
"epoch": 1.89,
"learning_rate": 3.7034895787008215e-07,
"loss": 0.0677,
"step": 42496
},
{
"epoch": 1.89,
"learning_rate": 3.6940021939577213e-07,
"loss": 0.0599,
"step": 42560
},
{
"epoch": 1.9,
"learning_rate": 3.684514809214622e-07,
"loss": 0.1093,
"step": 42624
},
{
"epoch": 1.9,
"learning_rate": 3.675027424471523e-07,
"loss": 0.116,
"step": 42688
},
{
"epoch": 1.9,
"learning_rate": 3.6655400397284235e-07,
"loss": 0.1072,
"step": 42752
},
{
"epoch": 1.9,
"learning_rate": 3.656052654985324e-07,
"loss": 0.1406,
"step": 42816
},
{
"epoch": 1.91,
"learning_rate": 3.6465652702422243e-07,
"loss": 0.0339,
"step": 42880
},
{
"epoch": 1.91,
"learning_rate": 3.637077885499125e-07,
"loss": 0.1058,
"step": 42944
},
{
"epoch": 1.91,
"learning_rate": 3.627590500756026e-07,
"loss": 0.1187,
"step": 43008
},
{
"epoch": 1.92,
"learning_rate": 3.6181031160129265e-07,
"loss": 0.0695,
"step": 43072
},
{
"epoch": 1.92,
"learning_rate": 3.608615731269827e-07,
"loss": 0.0938,
"step": 43136
},
{
"epoch": 1.92,
"learning_rate": 3.599128346526727e-07,
"loss": 0.08,
"step": 43200
},
{
"epoch": 1.92,
"learning_rate": 3.589640961783628e-07,
"loss": 0.0911,
"step": 43264
},
{
"epoch": 1.93,
"learning_rate": 3.580153577040529e-07,
"loss": 0.0983,
"step": 43328
},
{
"epoch": 1.93,
"learning_rate": 3.5706661922974294e-07,
"loss": 0.115,
"step": 43392
},
{
"epoch": 1.93,
"learning_rate": 3.56117880755433e-07,
"loss": 0.0913,
"step": 43456
},
{
"epoch": 1.94,
"learning_rate": 3.5516914228112307e-07,
"loss": 0.0739,
"step": 43520
},
{
"epoch": 1.94,
"learning_rate": 3.542204038068131e-07,
"loss": 0.1011,
"step": 43584
},
{
"epoch": 1.94,
"learning_rate": 3.532716653325032e-07,
"loss": 0.0595,
"step": 43648
},
{
"epoch": 1.94,
"learning_rate": 3.523229268581932e-07,
"loss": 0.1025,
"step": 43712
},
{
"epoch": 1.95,
"learning_rate": 3.5137418838388327e-07,
"loss": 0.1022,
"step": 43776
},
{
"epoch": 1.95,
"learning_rate": 3.5042544990957336e-07,
"loss": 0.0641,
"step": 43840
},
{
"epoch": 1.95,
"learning_rate": 3.494767114352634e-07,
"loss": 0.1397,
"step": 43904
},
{
"epoch": 1.96,
"learning_rate": 3.485279729609535e-07,
"loss": 0.136,
"step": 43968
},
{
"epoch": 1.96,
"learning_rate": 3.475792344866435e-07,
"loss": 0.0946,
"step": 44032
},
{
"epoch": 1.96,
"learning_rate": 3.4663049601233357e-07,
"loss": 0.0963,
"step": 44096
},
{
"epoch": 1.96,
"learning_rate": 3.4568175753802366e-07,
"loss": 0.0511,
"step": 44160
},
{
"epoch": 1.97,
"learning_rate": 3.447330190637137e-07,
"loss": 0.0665,
"step": 44224
},
{
"epoch": 1.97,
"learning_rate": 3.437842805894038e-07,
"loss": 0.0735,
"step": 44288
},
{
"epoch": 1.97,
"learning_rate": 3.428355421150938e-07,
"loss": 0.0443,
"step": 44352
},
{
"epoch": 1.98,
"learning_rate": 3.4188680364078386e-07,
"loss": 0.1191,
"step": 44416
},
{
"epoch": 1.98,
"learning_rate": 3.4093806516647395e-07,
"loss": 0.0886,
"step": 44480
},
{
"epoch": 1.98,
"learning_rate": 3.39989326692164e-07,
"loss": 0.0917,
"step": 44544
},
{
"epoch": 1.98,
"learning_rate": 3.390405882178541e-07,
"loss": 0.0751,
"step": 44608
},
{
"epoch": 1.99,
"learning_rate": 3.380918497435441e-07,
"loss": 0.0692,
"step": 44672
},
{
"epoch": 1.99,
"learning_rate": 3.3714311126923416e-07,
"loss": 0.1101,
"step": 44736
},
{
"epoch": 1.99,
"learning_rate": 3.3619437279492425e-07,
"loss": 0.1307,
"step": 44800
},
{
"epoch": 2.0,
"learning_rate": 3.3524563432061434e-07,
"loss": 0.1512,
"step": 44864
},
{
"epoch": 2.0,
"learning_rate": 3.342968958463043e-07,
"loss": 0.0731,
"step": 44928
}
],
"logging_steps": 64,
"max_steps": 67458,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 22486,
"total_flos": 9.54832471474176e+19,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}