Xinging's picture
Upload trainer_state.json with huggingface_hub
0ccda77 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1221,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002457002457002457,
"grad_norm": 4.019534111022949,
"learning_rate": 5.405405405405406e-07,
"loss": 1.1042,
"step": 1
},
{
"epoch": 0.004914004914004914,
"grad_norm": 3.4502384662628174,
"learning_rate": 1.0810810810810812e-06,
"loss": 1.1217,
"step": 2
},
{
"epoch": 0.007371007371007371,
"grad_norm": 3.351306438446045,
"learning_rate": 1.6216216216216219e-06,
"loss": 1.0932,
"step": 3
},
{
"epoch": 0.009828009828009828,
"grad_norm": 3.862949848175049,
"learning_rate": 2.1621621621621623e-06,
"loss": 1.0964,
"step": 4
},
{
"epoch": 0.012285012285012284,
"grad_norm": 3.430417776107788,
"learning_rate": 2.702702702702703e-06,
"loss": 1.1157,
"step": 5
},
{
"epoch": 0.014742014742014743,
"grad_norm": 3.059828758239746,
"learning_rate": 3.2432432432432437e-06,
"loss": 1.1316,
"step": 6
},
{
"epoch": 0.0171990171990172,
"grad_norm": 2.8164730072021484,
"learning_rate": 3.7837837837837844e-06,
"loss": 1.1034,
"step": 7
},
{
"epoch": 0.019656019656019656,
"grad_norm": 2.527447462081909,
"learning_rate": 4.324324324324325e-06,
"loss": 1.0492,
"step": 8
},
{
"epoch": 0.022113022113022112,
"grad_norm": 2.58087158203125,
"learning_rate": 4.864864864864866e-06,
"loss": 1.0547,
"step": 9
},
{
"epoch": 0.02457002457002457,
"grad_norm": 2.0490829944610596,
"learning_rate": 5.405405405405406e-06,
"loss": 1.0195,
"step": 10
},
{
"epoch": 0.02702702702702703,
"grad_norm": 1.5082110166549683,
"learning_rate": 5.945945945945947e-06,
"loss": 0.9986,
"step": 11
},
{
"epoch": 0.029484029484029485,
"grad_norm": 1.6830556392669678,
"learning_rate": 6.486486486486487e-06,
"loss": 0.9964,
"step": 12
},
{
"epoch": 0.03194103194103194,
"grad_norm": 1.7410277128219604,
"learning_rate": 7.027027027027028e-06,
"loss": 0.9768,
"step": 13
},
{
"epoch": 0.0343980343980344,
"grad_norm": 1.6225413084030151,
"learning_rate": 7.567567567567569e-06,
"loss": 0.9261,
"step": 14
},
{
"epoch": 0.036855036855036855,
"grad_norm": 1.786592960357666,
"learning_rate": 8.108108108108109e-06,
"loss": 1.0012,
"step": 15
},
{
"epoch": 0.03931203931203931,
"grad_norm": 1.6199265718460083,
"learning_rate": 8.64864864864865e-06,
"loss": 1.0118,
"step": 16
},
{
"epoch": 0.04176904176904177,
"grad_norm": 1.5307223796844482,
"learning_rate": 9.189189189189191e-06,
"loss": 1.0147,
"step": 17
},
{
"epoch": 0.044226044226044224,
"grad_norm": 1.498974084854126,
"learning_rate": 9.729729729729732e-06,
"loss": 0.9955,
"step": 18
},
{
"epoch": 0.04668304668304668,
"grad_norm": 1.3995916843414307,
"learning_rate": 1.027027027027027e-05,
"loss": 0.9256,
"step": 19
},
{
"epoch": 0.04914004914004914,
"grad_norm": 1.4726876020431519,
"learning_rate": 1.0810810810810812e-05,
"loss": 0.974,
"step": 20
},
{
"epoch": 0.051597051597051594,
"grad_norm": 1.3012052774429321,
"learning_rate": 1.1351351351351352e-05,
"loss": 0.9271,
"step": 21
},
{
"epoch": 0.05405405405405406,
"grad_norm": 1.274909257888794,
"learning_rate": 1.1891891891891894e-05,
"loss": 0.9561,
"step": 22
},
{
"epoch": 0.056511056511056514,
"grad_norm": 1.120010495185852,
"learning_rate": 1.2432432432432433e-05,
"loss": 0.939,
"step": 23
},
{
"epoch": 0.05896805896805897,
"grad_norm": 1.1698671579360962,
"learning_rate": 1.2972972972972975e-05,
"loss": 1.001,
"step": 24
},
{
"epoch": 0.06142506142506143,
"grad_norm": 1.1809970140457153,
"learning_rate": 1.3513513513513515e-05,
"loss": 0.9752,
"step": 25
},
{
"epoch": 0.06388206388206388,
"grad_norm": 1.2462128400802612,
"learning_rate": 1.4054054054054055e-05,
"loss": 0.922,
"step": 26
},
{
"epoch": 0.06633906633906633,
"grad_norm": 1.2176518440246582,
"learning_rate": 1.4594594594594596e-05,
"loss": 1.0068,
"step": 27
},
{
"epoch": 0.0687960687960688,
"grad_norm": 1.1821041107177734,
"learning_rate": 1.5135135135135138e-05,
"loss": 0.9086,
"step": 28
},
{
"epoch": 0.07125307125307126,
"grad_norm": 1.2778708934783936,
"learning_rate": 1.5675675675675676e-05,
"loss": 0.9738,
"step": 29
},
{
"epoch": 0.07371007371007371,
"grad_norm": 1.3080499172210693,
"learning_rate": 1.6216216216216218e-05,
"loss": 0.9731,
"step": 30
},
{
"epoch": 0.07616707616707617,
"grad_norm": 1.1985275745391846,
"learning_rate": 1.6756756756756757e-05,
"loss": 0.9423,
"step": 31
},
{
"epoch": 0.07862407862407862,
"grad_norm": 1.2243951559066772,
"learning_rate": 1.72972972972973e-05,
"loss": 0.9437,
"step": 32
},
{
"epoch": 0.08108108108108109,
"grad_norm": 1.4167068004608154,
"learning_rate": 1.783783783783784e-05,
"loss": 0.9431,
"step": 33
},
{
"epoch": 0.08353808353808354,
"grad_norm": 1.3067026138305664,
"learning_rate": 1.8378378378378383e-05,
"loss": 0.9741,
"step": 34
},
{
"epoch": 0.085995085995086,
"grad_norm": 1.2219585180282593,
"learning_rate": 1.891891891891892e-05,
"loss": 0.9488,
"step": 35
},
{
"epoch": 0.08845208845208845,
"grad_norm": 1.4997390508651733,
"learning_rate": 1.9459459459459463e-05,
"loss": 0.9776,
"step": 36
},
{
"epoch": 0.09090909090909091,
"grad_norm": 1.211965560913086,
"learning_rate": 2e-05,
"loss": 0.9315,
"step": 37
},
{
"epoch": 0.09336609336609336,
"grad_norm": 1.2097049951553345,
"learning_rate": 1.9999964798101195e-05,
"loss": 1.0067,
"step": 38
},
{
"epoch": 0.09582309582309582,
"grad_norm": 1.3059693574905396,
"learning_rate": 1.9999859192652612e-05,
"loss": 0.9853,
"step": 39
},
{
"epoch": 0.09828009828009827,
"grad_norm": 1.125588297843933,
"learning_rate": 1.9999683184397752e-05,
"loss": 0.8909,
"step": 40
},
{
"epoch": 0.10073710073710074,
"grad_norm": 1.353481650352478,
"learning_rate": 1.9999436774575783e-05,
"loss": 0.9858,
"step": 41
},
{
"epoch": 0.10319410319410319,
"grad_norm": 1.2949520349502563,
"learning_rate": 1.999911996492152e-05,
"loss": 0.9285,
"step": 42
},
{
"epoch": 0.10565110565110565,
"grad_norm": 1.2486616373062134,
"learning_rate": 1.9998732757665428e-05,
"loss": 0.982,
"step": 43
},
{
"epoch": 0.10810810810810811,
"grad_norm": 1.268140435218811,
"learning_rate": 1.9998275155533585e-05,
"loss": 0.9903,
"step": 44
},
{
"epoch": 0.11056511056511056,
"grad_norm": 1.1526011228561401,
"learning_rate": 1.9997747161747696e-05,
"loss": 0.9467,
"step": 45
},
{
"epoch": 0.11302211302211303,
"grad_norm": 1.2035915851593018,
"learning_rate": 1.9997148780025028e-05,
"loss": 0.9644,
"step": 46
},
{
"epoch": 0.11547911547911548,
"grad_norm": 1.179955005645752,
"learning_rate": 1.9996480014578422e-05,
"loss": 0.9554,
"step": 47
},
{
"epoch": 0.11793611793611794,
"grad_norm": 1.2358040809631348,
"learning_rate": 1.9995740870116233e-05,
"loss": 1.0244,
"step": 48
},
{
"epoch": 0.12039312039312039,
"grad_norm": 1.2099424600601196,
"learning_rate": 1.9994931351842327e-05,
"loss": 0.9799,
"step": 49
},
{
"epoch": 0.12285012285012285,
"grad_norm": 1.2106077671051025,
"learning_rate": 1.9994051465456014e-05,
"loss": 0.9805,
"step": 50
},
{
"epoch": 0.12530712530712532,
"grad_norm": 1.1614030599594116,
"learning_rate": 1.999310121715203e-05,
"loss": 0.9502,
"step": 51
},
{
"epoch": 0.12776412776412777,
"grad_norm": 1.3291958570480347,
"learning_rate": 1.9992080613620486e-05,
"loss": 0.9936,
"step": 52
},
{
"epoch": 0.13022113022113022,
"grad_norm": 1.2553635835647583,
"learning_rate": 1.999098966204682e-05,
"loss": 0.9781,
"step": 53
},
{
"epoch": 0.13267813267813267,
"grad_norm": 1.2526620626449585,
"learning_rate": 1.9989828370111737e-05,
"loss": 0.9853,
"step": 54
},
{
"epoch": 0.13513513513513514,
"grad_norm": 1.15430748462677,
"learning_rate": 1.998859674599118e-05,
"loss": 0.9315,
"step": 55
},
{
"epoch": 0.1375921375921376,
"grad_norm": 1.278533935546875,
"learning_rate": 1.998729479835625e-05,
"loss": 0.9431,
"step": 56
},
{
"epoch": 0.14004914004914004,
"grad_norm": 1.1361323595046997,
"learning_rate": 1.998592253637315e-05,
"loss": 0.9736,
"step": 57
},
{
"epoch": 0.14250614250614252,
"grad_norm": 1.0846306085586548,
"learning_rate": 1.998447996970313e-05,
"loss": 0.9886,
"step": 58
},
{
"epoch": 0.14496314496314497,
"grad_norm": 1.1292674541473389,
"learning_rate": 1.99829671085024e-05,
"loss": 0.9776,
"step": 59
},
{
"epoch": 0.14742014742014742,
"grad_norm": 1.276936650276184,
"learning_rate": 1.9981383963422086e-05,
"loss": 1.036,
"step": 60
},
{
"epoch": 0.14987714987714987,
"grad_norm": 1.232346534729004,
"learning_rate": 1.9979730545608128e-05,
"loss": 0.9879,
"step": 61
},
{
"epoch": 0.15233415233415235,
"grad_norm": 1.2383723258972168,
"learning_rate": 1.9978006866701212e-05,
"loss": 1.0056,
"step": 62
},
{
"epoch": 0.1547911547911548,
"grad_norm": 1.160910725593567,
"learning_rate": 1.9976212938836692e-05,
"loss": 0.9159,
"step": 63
},
{
"epoch": 0.15724815724815724,
"grad_norm": 1.2346571683883667,
"learning_rate": 1.9974348774644503e-05,
"loss": 0.9623,
"step": 64
},
{
"epoch": 0.1597051597051597,
"grad_norm": 1.127328634262085,
"learning_rate": 1.9972414387249074e-05,
"loss": 0.9398,
"step": 65
},
{
"epoch": 0.16216216216216217,
"grad_norm": 1.113250732421875,
"learning_rate": 1.9970409790269216e-05,
"loss": 1.0158,
"step": 66
},
{
"epoch": 0.16461916461916462,
"grad_norm": 1.197217345237732,
"learning_rate": 1.9968334997818062e-05,
"loss": 0.9438,
"step": 67
},
{
"epoch": 0.16707616707616707,
"grad_norm": 1.1856095790863037,
"learning_rate": 1.996619002450294e-05,
"loss": 0.9531,
"step": 68
},
{
"epoch": 0.16953316953316952,
"grad_norm": 1.1724814176559448,
"learning_rate": 1.9963974885425267e-05,
"loss": 0.9764,
"step": 69
},
{
"epoch": 0.171990171990172,
"grad_norm": 1.1636273860931396,
"learning_rate": 1.996168959618047e-05,
"loss": 0.9479,
"step": 70
},
{
"epoch": 0.17444717444717445,
"grad_norm": 1.2033659219741821,
"learning_rate": 1.9959334172857852e-05,
"loss": 0.9896,
"step": 71
},
{
"epoch": 0.1769041769041769,
"grad_norm": 1.2620456218719482,
"learning_rate": 1.9956908632040492e-05,
"loss": 0.9773,
"step": 72
},
{
"epoch": 0.17936117936117937,
"grad_norm": 1.1564908027648926,
"learning_rate": 1.9954412990805107e-05,
"loss": 0.9545,
"step": 73
},
{
"epoch": 0.18181818181818182,
"grad_norm": 1.0925333499908447,
"learning_rate": 1.995184726672197e-05,
"loss": 0.9443,
"step": 74
},
{
"epoch": 0.18427518427518427,
"grad_norm": 1.0715115070343018,
"learning_rate": 1.9949211477854747e-05,
"loss": 0.9111,
"step": 75
},
{
"epoch": 0.18673218673218672,
"grad_norm": 1.1949642896652222,
"learning_rate": 1.9946505642760398e-05,
"loss": 0.988,
"step": 76
},
{
"epoch": 0.1891891891891892,
"grad_norm": 1.209015130996704,
"learning_rate": 1.994372978048903e-05,
"loss": 0.9326,
"step": 77
},
{
"epoch": 0.19164619164619165,
"grad_norm": 1.2035118341445923,
"learning_rate": 1.9940883910583757e-05,
"loss": 0.9723,
"step": 78
},
{
"epoch": 0.1941031941031941,
"grad_norm": 1.2777043581008911,
"learning_rate": 1.993796805308059e-05,
"loss": 0.9982,
"step": 79
},
{
"epoch": 0.19656019656019655,
"grad_norm": 1.1283223628997803,
"learning_rate": 1.9934982228508278e-05,
"loss": 0.9934,
"step": 80
},
{
"epoch": 0.19901719901719903,
"grad_norm": 1.2025024890899658,
"learning_rate": 1.9931926457888155e-05,
"loss": 0.9628,
"step": 81
},
{
"epoch": 0.20147420147420148,
"grad_norm": 1.0961697101593018,
"learning_rate": 1.9928800762734007e-05,
"loss": 0.9896,
"step": 82
},
{
"epoch": 0.20393120393120392,
"grad_norm": 1.0925512313842773,
"learning_rate": 1.9925605165051917e-05,
"loss": 0.9538,
"step": 83
},
{
"epoch": 0.20638820638820637,
"grad_norm": 1.1571182012557983,
"learning_rate": 1.9922339687340102e-05,
"loss": 0.9515,
"step": 84
},
{
"epoch": 0.20884520884520885,
"grad_norm": 1.1376878023147583,
"learning_rate": 1.9919004352588768e-05,
"loss": 0.9172,
"step": 85
},
{
"epoch": 0.2113022113022113,
"grad_norm": 1.1190766096115112,
"learning_rate": 1.9915599184279943e-05,
"loss": 0.9436,
"step": 86
},
{
"epoch": 0.21375921375921375,
"grad_norm": 1.1635316610336304,
"learning_rate": 1.9912124206387297e-05,
"loss": 0.9493,
"step": 87
},
{
"epoch": 0.21621621621621623,
"grad_norm": 1.1531853675842285,
"learning_rate": 1.9908579443375995e-05,
"loss": 0.972,
"step": 88
},
{
"epoch": 0.21867321867321868,
"grad_norm": 1.0905935764312744,
"learning_rate": 1.990496492020252e-05,
"loss": 0.9686,
"step": 89
},
{
"epoch": 0.22113022113022113,
"grad_norm": 1.1130902767181396,
"learning_rate": 1.9901280662314483e-05,
"loss": 0.9579,
"step": 90
},
{
"epoch": 0.22358722358722358,
"grad_norm": 1.2269314527511597,
"learning_rate": 1.9897526695650458e-05,
"loss": 1.0352,
"step": 91
},
{
"epoch": 0.22604422604422605,
"grad_norm": 1.1804312467575073,
"learning_rate": 1.9893703046639806e-05,
"loss": 0.9444,
"step": 92
},
{
"epoch": 0.2285012285012285,
"grad_norm": 1.222614049911499,
"learning_rate": 1.9889809742202454e-05,
"loss": 0.9486,
"step": 93
},
{
"epoch": 0.23095823095823095,
"grad_norm": 1.2290183305740356,
"learning_rate": 1.9885846809748754e-05,
"loss": 1.0318,
"step": 94
},
{
"epoch": 0.2334152334152334,
"grad_norm": 1.0695152282714844,
"learning_rate": 1.9881814277179248e-05,
"loss": 1.0019,
"step": 95
},
{
"epoch": 0.23587223587223588,
"grad_norm": 1.105061650276184,
"learning_rate": 1.9877712172884504e-05,
"loss": 0.983,
"step": 96
},
{
"epoch": 0.23832923832923833,
"grad_norm": 1.1585967540740967,
"learning_rate": 1.9873540525744888e-05,
"loss": 0.9417,
"step": 97
},
{
"epoch": 0.24078624078624078,
"grad_norm": 1.1335004568099976,
"learning_rate": 1.9869299365130384e-05,
"loss": 0.952,
"step": 98
},
{
"epoch": 0.24324324324324326,
"grad_norm": 1.181249737739563,
"learning_rate": 1.9864988720900367e-05,
"loss": 0.9524,
"step": 99
},
{
"epoch": 0.2457002457002457,
"grad_norm": 1.2462314367294312,
"learning_rate": 1.986060862340342e-05,
"loss": 0.9499,
"step": 100
},
{
"epoch": 0.24815724815724816,
"grad_norm": 1.15605628490448,
"learning_rate": 1.9856159103477085e-05,
"loss": 0.9569,
"step": 101
},
{
"epoch": 0.25061425061425063,
"grad_norm": 1.0611865520477295,
"learning_rate": 1.9851640192447675e-05,
"loss": 0.8781,
"step": 102
},
{
"epoch": 0.25307125307125306,
"grad_norm": 1.0584359169006348,
"learning_rate": 1.984705192213004e-05,
"loss": 0.9487,
"step": 103
},
{
"epoch": 0.25552825552825553,
"grad_norm": 1.1824793815612793,
"learning_rate": 1.9842394324827342e-05,
"loss": 0.97,
"step": 104
},
{
"epoch": 0.257985257985258,
"grad_norm": 1.1818736791610718,
"learning_rate": 1.983766743333084e-05,
"loss": 0.9366,
"step": 105
},
{
"epoch": 0.26044226044226043,
"grad_norm": 1.1190263032913208,
"learning_rate": 1.9832871280919638e-05,
"loss": 0.91,
"step": 106
},
{
"epoch": 0.2628992628992629,
"grad_norm": 1.2692769765853882,
"learning_rate": 1.9828005901360476e-05,
"loss": 1.0086,
"step": 107
},
{
"epoch": 0.26535626535626533,
"grad_norm": 1.1339046955108643,
"learning_rate": 1.982307132890747e-05,
"loss": 0.9537,
"step": 108
},
{
"epoch": 0.2678132678132678,
"grad_norm": 1.2185297012329102,
"learning_rate": 1.9818067598301894e-05,
"loss": 0.9532,
"step": 109
},
{
"epoch": 0.2702702702702703,
"grad_norm": 1.0758349895477295,
"learning_rate": 1.9812994744771898e-05,
"loss": 0.9056,
"step": 110
},
{
"epoch": 0.2727272727272727,
"grad_norm": 1.1684318780899048,
"learning_rate": 1.9807852804032306e-05,
"loss": 0.9466,
"step": 111
},
{
"epoch": 0.2751842751842752,
"grad_norm": 1.2133638858795166,
"learning_rate": 1.980264181228433e-05,
"loss": 1.0147,
"step": 112
},
{
"epoch": 0.27764127764127766,
"grad_norm": 1.2245734930038452,
"learning_rate": 1.9797361806215335e-05,
"loss": 0.9824,
"step": 113
},
{
"epoch": 0.2800982800982801,
"grad_norm": 1.23886239528656,
"learning_rate": 1.979201282299856e-05,
"loss": 0.9882,
"step": 114
},
{
"epoch": 0.28255528255528256,
"grad_norm": 1.0840333700180054,
"learning_rate": 1.978659490029289e-05,
"loss": 0.952,
"step": 115
},
{
"epoch": 0.28501228501228504,
"grad_norm": 1.2387017011642456,
"learning_rate": 1.9781108076242548e-05,
"loss": 0.9989,
"step": 116
},
{
"epoch": 0.28746928746928746,
"grad_norm": 1.1117432117462158,
"learning_rate": 1.9775552389476865e-05,
"loss": 0.9092,
"step": 117
},
{
"epoch": 0.28992628992628994,
"grad_norm": 1.1947935819625854,
"learning_rate": 1.9769927879109982e-05,
"loss": 0.9392,
"step": 118
},
{
"epoch": 0.29238329238329236,
"grad_norm": 1.2482883930206299,
"learning_rate": 1.9764234584740592e-05,
"loss": 0.9591,
"step": 119
},
{
"epoch": 0.29484029484029484,
"grad_norm": 1.0863734483718872,
"learning_rate": 1.9758472546451645e-05,
"loss": 0.9346,
"step": 120
},
{
"epoch": 0.2972972972972973,
"grad_norm": 1.2650574445724487,
"learning_rate": 1.9752641804810083e-05,
"loss": 1.009,
"step": 121
},
{
"epoch": 0.29975429975429974,
"grad_norm": 1.0649502277374268,
"learning_rate": 1.974674240086654e-05,
"loss": 0.9568,
"step": 122
},
{
"epoch": 0.3022113022113022,
"grad_norm": 1.0599509477615356,
"learning_rate": 1.974077437615506e-05,
"loss": 0.9663,
"step": 123
},
{
"epoch": 0.3046683046683047,
"grad_norm": 1.253313660621643,
"learning_rate": 1.97347377726928e-05,
"loss": 0.9606,
"step": 124
},
{
"epoch": 0.3071253071253071,
"grad_norm": 1.143733263015747,
"learning_rate": 1.9728632632979746e-05,
"loss": 0.9633,
"step": 125
},
{
"epoch": 0.3095823095823096,
"grad_norm": 1.154212236404419,
"learning_rate": 1.9722458999998398e-05,
"loss": 1.0065,
"step": 126
},
{
"epoch": 0.31203931203931207,
"grad_norm": 1.0114328861236572,
"learning_rate": 1.971621691721348e-05,
"loss": 0.9483,
"step": 127
},
{
"epoch": 0.3144963144963145,
"grad_norm": 1.1100865602493286,
"learning_rate": 1.9709906428571616e-05,
"loss": 0.9425,
"step": 128
},
{
"epoch": 0.31695331695331697,
"grad_norm": 1.141754388809204,
"learning_rate": 1.9703527578501052e-05,
"loss": 0.9952,
"step": 129
},
{
"epoch": 0.3194103194103194,
"grad_norm": 1.172715187072754,
"learning_rate": 1.9697080411911313e-05,
"loss": 1.0016,
"step": 130
},
{
"epoch": 0.32186732186732187,
"grad_norm": 1.298166036605835,
"learning_rate": 1.9690564974192893e-05,
"loss": 0.9802,
"step": 131
},
{
"epoch": 0.32432432432432434,
"grad_norm": 1.15988028049469,
"learning_rate": 1.968398131121696e-05,
"loss": 0.9493,
"step": 132
},
{
"epoch": 0.32678132678132676,
"grad_norm": 1.1662455797195435,
"learning_rate": 1.967732946933499e-05,
"loss": 0.9291,
"step": 133
},
{
"epoch": 0.32923832923832924,
"grad_norm": 1.1036912202835083,
"learning_rate": 1.9670609495378484e-05,
"loss": 1.0128,
"step": 134
},
{
"epoch": 0.3316953316953317,
"grad_norm": 1.0609047412872314,
"learning_rate": 1.9663821436658607e-05,
"loss": 0.9378,
"step": 135
},
{
"epoch": 0.33415233415233414,
"grad_norm": 1.1160823106765747,
"learning_rate": 1.9656965340965872e-05,
"loss": 0.9166,
"step": 136
},
{
"epoch": 0.3366093366093366,
"grad_norm": 1.2418513298034668,
"learning_rate": 1.9650041256569792e-05,
"loss": 0.9673,
"step": 137
},
{
"epoch": 0.33906633906633904,
"grad_norm": 1.1111809015274048,
"learning_rate": 1.9643049232218554e-05,
"loss": 0.959,
"step": 138
},
{
"epoch": 0.3415233415233415,
"grad_norm": 1.1744064092636108,
"learning_rate": 1.9635989317138666e-05,
"loss": 0.957,
"step": 139
},
{
"epoch": 0.343980343980344,
"grad_norm": 1.1628965139389038,
"learning_rate": 1.962886156103461e-05,
"loss": 0.9292,
"step": 140
},
{
"epoch": 0.3464373464373464,
"grad_norm": 1.0930556058883667,
"learning_rate": 1.9621666014088495e-05,
"loss": 0.9468,
"step": 141
},
{
"epoch": 0.3488943488943489,
"grad_norm": 1.2241873741149902,
"learning_rate": 1.9614402726959703e-05,
"loss": 0.9632,
"step": 142
},
{
"epoch": 0.35135135135135137,
"grad_norm": 1.1155918836593628,
"learning_rate": 1.960707175078454e-05,
"loss": 0.9729,
"step": 143
},
{
"epoch": 0.3538083538083538,
"grad_norm": 1.1242003440856934,
"learning_rate": 1.9599673137175855e-05,
"loss": 0.9955,
"step": 144
},
{
"epoch": 0.35626535626535627,
"grad_norm": 1.0511232614517212,
"learning_rate": 1.9592206938222703e-05,
"loss": 0.9269,
"step": 145
},
{
"epoch": 0.35872235872235875,
"grad_norm": 1.162567138671875,
"learning_rate": 1.9584673206489955e-05,
"loss": 0.9695,
"step": 146
},
{
"epoch": 0.36117936117936117,
"grad_norm": 1.0482780933380127,
"learning_rate": 1.9577071995017945e-05,
"loss": 0.9019,
"step": 147
},
{
"epoch": 0.36363636363636365,
"grad_norm": 1.086195468902588,
"learning_rate": 1.956940335732209e-05,
"loss": 0.9473,
"step": 148
},
{
"epoch": 0.36609336609336607,
"grad_norm": 1.1613413095474243,
"learning_rate": 1.956166734739251e-05,
"loss": 0.9044,
"step": 149
},
{
"epoch": 0.36855036855036855,
"grad_norm": 1.0689467191696167,
"learning_rate": 1.9553864019693652e-05,
"loss": 0.9168,
"step": 150
},
{
"epoch": 0.371007371007371,
"grad_norm": 1.0362517833709717,
"learning_rate": 1.9545993429163913e-05,
"loss": 0.9087,
"step": 151
},
{
"epoch": 0.37346437346437344,
"grad_norm": 1.3585790395736694,
"learning_rate": 1.9538055631215233e-05,
"loss": 0.9635,
"step": 152
},
{
"epoch": 0.3759213759213759,
"grad_norm": 1.1443356275558472,
"learning_rate": 1.953005068173272e-05,
"loss": 0.9576,
"step": 153
},
{
"epoch": 0.3783783783783784,
"grad_norm": 1.1501580476760864,
"learning_rate": 1.952197863707427e-05,
"loss": 0.9189,
"step": 154
},
{
"epoch": 0.3808353808353808,
"grad_norm": 1.143819808959961,
"learning_rate": 1.9513839554070132e-05,
"loss": 0.9878,
"step": 155
},
{
"epoch": 0.3832923832923833,
"grad_norm": 1.1135509014129639,
"learning_rate": 1.9505633490022545e-05,
"loss": 0.9122,
"step": 156
},
{
"epoch": 0.3857493857493858,
"grad_norm": 1.051954746246338,
"learning_rate": 1.949736050270532e-05,
"loss": 0.9742,
"step": 157
},
{
"epoch": 0.3882063882063882,
"grad_norm": 1.259486198425293,
"learning_rate": 1.9489020650363427e-05,
"loss": 0.9365,
"step": 158
},
{
"epoch": 0.3906633906633907,
"grad_norm": 1.1075799465179443,
"learning_rate": 1.948061399171259e-05,
"loss": 0.971,
"step": 159
},
{
"epoch": 0.3931203931203931,
"grad_norm": 1.145005226135254,
"learning_rate": 1.9472140585938882e-05,
"loss": 0.9839,
"step": 160
},
{
"epoch": 0.3955773955773956,
"grad_norm": 1.0776715278625488,
"learning_rate": 1.9463600492698297e-05,
"loss": 0.9238,
"step": 161
},
{
"epoch": 0.39803439803439805,
"grad_norm": 1.10464346408844,
"learning_rate": 1.9454993772116336e-05,
"loss": 0.9642,
"step": 162
},
{
"epoch": 0.4004914004914005,
"grad_norm": 1.1394087076187134,
"learning_rate": 1.9446320484787576e-05,
"loss": 0.9942,
"step": 163
},
{
"epoch": 0.40294840294840295,
"grad_norm": 1.2384252548217773,
"learning_rate": 1.943758069177526e-05,
"loss": 0.9503,
"step": 164
},
{
"epoch": 0.40540540540540543,
"grad_norm": 1.1317917108535767,
"learning_rate": 1.9428774454610845e-05,
"loss": 0.9185,
"step": 165
},
{
"epoch": 0.40786240786240785,
"grad_norm": 1.1217604875564575,
"learning_rate": 1.9419901835293585e-05,
"loss": 1.0281,
"step": 166
},
{
"epoch": 0.4103194103194103,
"grad_norm": 1.1591936349868774,
"learning_rate": 1.9410962896290092e-05,
"loss": 0.989,
"step": 167
},
{
"epoch": 0.41277641277641275,
"grad_norm": 1.1651326417922974,
"learning_rate": 1.940195770053389e-05,
"loss": 0.9724,
"step": 168
},
{
"epoch": 0.4152334152334152,
"grad_norm": 1.0306227207183838,
"learning_rate": 1.9392886311424975e-05,
"loss": 0.8945,
"step": 169
},
{
"epoch": 0.4176904176904177,
"grad_norm": 1.0212548971176147,
"learning_rate": 1.9383748792829374e-05,
"loss": 0.9647,
"step": 170
},
{
"epoch": 0.4201474201474201,
"grad_norm": 1.0641008615493774,
"learning_rate": 1.9374545209078687e-05,
"loss": 0.9535,
"step": 171
},
{
"epoch": 0.4226044226044226,
"grad_norm": 1.0543537139892578,
"learning_rate": 1.936527562496964e-05,
"loss": 0.9291,
"step": 172
},
{
"epoch": 0.4250614250614251,
"grad_norm": 1.0414016246795654,
"learning_rate": 1.9355940105763622e-05,
"loss": 0.9358,
"step": 173
},
{
"epoch": 0.4275184275184275,
"grad_norm": 1.0907888412475586,
"learning_rate": 1.934653871718624e-05,
"loss": 0.8968,
"step": 174
},
{
"epoch": 0.42997542997543,
"grad_norm": 1.0105867385864258,
"learning_rate": 1.933707152542683e-05,
"loss": 0.915,
"step": 175
},
{
"epoch": 0.43243243243243246,
"grad_norm": 1.1251968145370483,
"learning_rate": 1.932753859713803e-05,
"loss": 0.9587,
"step": 176
},
{
"epoch": 0.4348894348894349,
"grad_norm": 1.0517654418945312,
"learning_rate": 1.9317939999435262e-05,
"loss": 0.963,
"step": 177
},
{
"epoch": 0.43734643734643736,
"grad_norm": 1.1260952949523926,
"learning_rate": 1.930827579989631e-05,
"loss": 0.9967,
"step": 178
},
{
"epoch": 0.4398034398034398,
"grad_norm": 1.174589991569519,
"learning_rate": 1.9298546066560802e-05,
"loss": 0.9883,
"step": 179
},
{
"epoch": 0.44226044226044225,
"grad_norm": 1.0986108779907227,
"learning_rate": 1.928875086792976e-05,
"loss": 0.9477,
"step": 180
},
{
"epoch": 0.44471744471744473,
"grad_norm": 1.0878371000289917,
"learning_rate": 1.9278890272965097e-05,
"loss": 0.9822,
"step": 181
},
{
"epoch": 0.44717444717444715,
"grad_norm": 1.1421008110046387,
"learning_rate": 1.926896435108915e-05,
"loss": 0.9644,
"step": 182
},
{
"epoch": 0.44963144963144963,
"grad_norm": 1.1047364473342896,
"learning_rate": 1.9258973172184176e-05,
"loss": 0.9562,
"step": 183
},
{
"epoch": 0.4520884520884521,
"grad_norm": 1.0693336725234985,
"learning_rate": 1.924891680659187e-05,
"loss": 0.9329,
"step": 184
},
{
"epoch": 0.45454545454545453,
"grad_norm": 1.0154356956481934,
"learning_rate": 1.9238795325112867e-05,
"loss": 0.9447,
"step": 185
},
{
"epoch": 0.457002457002457,
"grad_norm": 1.0603218078613281,
"learning_rate": 1.922860879900624e-05,
"loss": 0.9828,
"step": 186
},
{
"epoch": 0.4594594594594595,
"grad_norm": 1.0560318231582642,
"learning_rate": 1.9218357299988998e-05,
"loss": 0.9847,
"step": 187
},
{
"epoch": 0.4619164619164619,
"grad_norm": 1.0979032516479492,
"learning_rate": 1.920804090023559e-05,
"loss": 0.9308,
"step": 188
},
{
"epoch": 0.4643734643734644,
"grad_norm": 1.1306419372558594,
"learning_rate": 1.9197659672377388e-05,
"loss": 0.9425,
"step": 189
},
{
"epoch": 0.4668304668304668,
"grad_norm": 1.0981731414794922,
"learning_rate": 1.9187213689502177e-05,
"loss": 0.9414,
"step": 190
},
{
"epoch": 0.4692874692874693,
"grad_norm": 1.0769071578979492,
"learning_rate": 1.9176703025153643e-05,
"loss": 0.9608,
"step": 191
},
{
"epoch": 0.47174447174447176,
"grad_norm": 1.1851149797439575,
"learning_rate": 1.9166127753330856e-05,
"loss": 0.9365,
"step": 192
},
{
"epoch": 0.4742014742014742,
"grad_norm": 1.1022464036941528,
"learning_rate": 1.915548794848775e-05,
"loss": 0.9721,
"step": 193
},
{
"epoch": 0.47665847665847666,
"grad_norm": 1.0086687803268433,
"learning_rate": 1.914478368553258e-05,
"loss": 0.9209,
"step": 194
},
{
"epoch": 0.47911547911547914,
"grad_norm": 1.169425368309021,
"learning_rate": 1.9134015039827433e-05,
"loss": 0.9337,
"step": 195
},
{
"epoch": 0.48157248157248156,
"grad_norm": 1.0467708110809326,
"learning_rate": 1.9123182087187657e-05,
"loss": 0.9864,
"step": 196
},
{
"epoch": 0.48402948402948404,
"grad_norm": 1.0687167644500732,
"learning_rate": 1.911228490388136e-05,
"loss": 0.9307,
"step": 197
},
{
"epoch": 0.4864864864864865,
"grad_norm": 1.1125750541687012,
"learning_rate": 1.9101323566628842e-05,
"loss": 0.9279,
"step": 198
},
{
"epoch": 0.48894348894348894,
"grad_norm": 1.10071861743927,
"learning_rate": 1.909029815260209e-05,
"loss": 0.9786,
"step": 199
},
{
"epoch": 0.4914004914004914,
"grad_norm": 1.141037940979004,
"learning_rate": 1.9079208739424198e-05,
"loss": 1.0065,
"step": 200
},
{
"epoch": 0.49385749385749383,
"grad_norm": 1.0701899528503418,
"learning_rate": 1.906805540516885e-05,
"loss": 0.9552,
"step": 201
},
{
"epoch": 0.4963144963144963,
"grad_norm": 1.155552864074707,
"learning_rate": 1.905683822835975e-05,
"loss": 0.9931,
"step": 202
},
{
"epoch": 0.4987714987714988,
"grad_norm": 1.1882957220077515,
"learning_rate": 1.904555728797009e-05,
"loss": 0.9462,
"step": 203
},
{
"epoch": 0.5012285012285013,
"grad_norm": 1.0740363597869873,
"learning_rate": 1.903421266342197e-05,
"loss": 0.9247,
"step": 204
},
{
"epoch": 0.5036855036855037,
"grad_norm": 1.1630939245224,
"learning_rate": 1.9022804434585854e-05,
"loss": 0.9948,
"step": 205
},
{
"epoch": 0.5061425061425061,
"grad_norm": 1.099727988243103,
"learning_rate": 1.9011332681780007e-05,
"loss": 0.9637,
"step": 206
},
{
"epoch": 0.5085995085995086,
"grad_norm": 1.0455065965652466,
"learning_rate": 1.8999797485769925e-05,
"loss": 0.977,
"step": 207
},
{
"epoch": 0.5110565110565111,
"grad_norm": 1.1362916231155396,
"learning_rate": 1.898819892776777e-05,
"loss": 0.9289,
"step": 208
},
{
"epoch": 0.5135135135135135,
"grad_norm": 1.0821365118026733,
"learning_rate": 1.8976537089431793e-05,
"loss": 0.9782,
"step": 209
},
{
"epoch": 0.515970515970516,
"grad_norm": 1.1401381492614746,
"learning_rate": 1.8964812052865764e-05,
"loss": 0.9612,
"step": 210
},
{
"epoch": 0.5184275184275184,
"grad_norm": 1.192937970161438,
"learning_rate": 1.8953023900618395e-05,
"loss": 0.9762,
"step": 211
},
{
"epoch": 0.5208845208845209,
"grad_norm": 1.0559264421463013,
"learning_rate": 1.8941172715682756e-05,
"loss": 0.8589,
"step": 212
},
{
"epoch": 0.5233415233415234,
"grad_norm": 1.0663461685180664,
"learning_rate": 1.8929258581495688e-05,
"loss": 0.9301,
"step": 213
},
{
"epoch": 0.5257985257985258,
"grad_norm": 1.107938289642334,
"learning_rate": 1.8917281581937216e-05,
"loss": 0.9668,
"step": 214
},
{
"epoch": 0.5282555282555282,
"grad_norm": 1.1464968919754028,
"learning_rate": 1.8905241801329972e-05,
"loss": 0.9129,
"step": 215
},
{
"epoch": 0.5307125307125307,
"grad_norm": 1.0721112489700317,
"learning_rate": 1.889313932443858e-05,
"loss": 0.951,
"step": 216
},
{
"epoch": 0.5331695331695332,
"grad_norm": 0.9699763655662537,
"learning_rate": 1.888097423646907e-05,
"loss": 0.8962,
"step": 217
},
{
"epoch": 0.5356265356265356,
"grad_norm": 1.1581181287765503,
"learning_rate": 1.8868746623068292e-05,
"loss": 0.9586,
"step": 218
},
{
"epoch": 0.538083538083538,
"grad_norm": 1.0095258951187134,
"learning_rate": 1.885645657032328e-05,
"loss": 0.9373,
"step": 219
},
{
"epoch": 0.5405405405405406,
"grad_norm": 1.034725546836853,
"learning_rate": 1.884410416476067e-05,
"loss": 0.9411,
"step": 220
},
{
"epoch": 0.542997542997543,
"grad_norm": 1.0320111513137817,
"learning_rate": 1.8831689493346095e-05,
"loss": 0.9121,
"step": 221
},
{
"epoch": 0.5454545454545454,
"grad_norm": 1.1234315633773804,
"learning_rate": 1.881921264348355e-05,
"loss": 1.0093,
"step": 222
},
{
"epoch": 0.547911547911548,
"grad_norm": 1.0910882949829102,
"learning_rate": 1.8806673703014805e-05,
"loss": 0.9478,
"step": 223
},
{
"epoch": 0.5503685503685504,
"grad_norm": 1.1484427452087402,
"learning_rate": 1.8794072760218752e-05,
"loss": 1.0299,
"step": 224
},
{
"epoch": 0.5528255528255528,
"grad_norm": 1.1294251680374146,
"learning_rate": 1.8781409903810823e-05,
"loss": 0.9493,
"step": 225
},
{
"epoch": 0.5552825552825553,
"grad_norm": 1.1251084804534912,
"learning_rate": 1.876868522294233e-05,
"loss": 0.9393,
"step": 226
},
{
"epoch": 0.5577395577395577,
"grad_norm": 1.0826867818832397,
"learning_rate": 1.8755898807199856e-05,
"loss": 0.9591,
"step": 227
},
{
"epoch": 0.5601965601965602,
"grad_norm": 1.1034644842147827,
"learning_rate": 1.8743050746604635e-05,
"loss": 0.9302,
"step": 228
},
{
"epoch": 0.5626535626535627,
"grad_norm": 1.0749222040176392,
"learning_rate": 1.8730141131611882e-05,
"loss": 0.9186,
"step": 229
},
{
"epoch": 0.5651105651105651,
"grad_norm": 1.0956974029541016,
"learning_rate": 1.8717170053110198e-05,
"loss": 0.9394,
"step": 230
},
{
"epoch": 0.5675675675675675,
"grad_norm": 1.0449669361114502,
"learning_rate": 1.870413760242089e-05,
"loss": 0.9042,
"step": 231
},
{
"epoch": 0.5700245700245701,
"grad_norm": 1.0706597566604614,
"learning_rate": 1.869104387129737e-05,
"loss": 0.9079,
"step": 232
},
{
"epoch": 0.5724815724815725,
"grad_norm": 1.070791244506836,
"learning_rate": 1.8677888951924473e-05,
"loss": 0.9668,
"step": 233
},
{
"epoch": 0.5749385749385749,
"grad_norm": 1.1020586490631104,
"learning_rate": 1.8664672936917828e-05,
"loss": 0.9391,
"step": 234
},
{
"epoch": 0.5773955773955773,
"grad_norm": 1.073449969291687,
"learning_rate": 1.8651395919323203e-05,
"loss": 0.9479,
"step": 235
},
{
"epoch": 0.5798525798525799,
"grad_norm": 1.1879843473434448,
"learning_rate": 1.863805799261584e-05,
"loss": 0.9958,
"step": 236
},
{
"epoch": 0.5823095823095823,
"grad_norm": 1.049581527709961,
"learning_rate": 1.8624659250699807e-05,
"loss": 0.9722,
"step": 237
},
{
"epoch": 0.5847665847665847,
"grad_norm": 1.0783240795135498,
"learning_rate": 1.861119978790734e-05,
"loss": 0.901,
"step": 238
},
{
"epoch": 0.5872235872235873,
"grad_norm": 1.0940930843353271,
"learning_rate": 1.8597679698998164e-05,
"loss": 0.9154,
"step": 239
},
{
"epoch": 0.5896805896805897,
"grad_norm": 1.0629860162734985,
"learning_rate": 1.8584099079158842e-05,
"loss": 0.9529,
"step": 240
},
{
"epoch": 0.5921375921375921,
"grad_norm": 1.016574740409851,
"learning_rate": 1.8570458024002094e-05,
"loss": 0.9281,
"step": 241
},
{
"epoch": 0.5945945945945946,
"grad_norm": 1.0727007389068604,
"learning_rate": 1.855675662956613e-05,
"loss": 0.9797,
"step": 242
},
{
"epoch": 0.597051597051597,
"grad_norm": 1.02950119972229,
"learning_rate": 1.854299499231397e-05,
"loss": 0.9029,
"step": 243
},
{
"epoch": 0.5995085995085995,
"grad_norm": 1.0800044536590576,
"learning_rate": 1.852917320913276e-05,
"loss": 0.911,
"step": 244
},
{
"epoch": 0.601965601965602,
"grad_norm": 1.1379327774047852,
"learning_rate": 1.8515291377333114e-05,
"loss": 0.9316,
"step": 245
},
{
"epoch": 0.6044226044226044,
"grad_norm": 1.001541256904602,
"learning_rate": 1.8501349594648394e-05,
"loss": 0.9213,
"step": 246
},
{
"epoch": 0.6068796068796068,
"grad_norm": 1.0450564622879028,
"learning_rate": 1.8487347959234042e-05,
"loss": 0.9709,
"step": 247
},
{
"epoch": 0.6093366093366094,
"grad_norm": 1.1812976598739624,
"learning_rate": 1.847328656966689e-05,
"loss": 1.0262,
"step": 248
},
{
"epoch": 0.6117936117936118,
"grad_norm": 1.1157077550888062,
"learning_rate": 1.8459165524944463e-05,
"loss": 0.9947,
"step": 249
},
{
"epoch": 0.6142506142506142,
"grad_norm": 1.0730394124984741,
"learning_rate": 1.8444984924484278e-05,
"loss": 0.9009,
"step": 250
},
{
"epoch": 0.6167076167076168,
"grad_norm": 1.0620498657226562,
"learning_rate": 1.8430744868123146e-05,
"loss": 0.9692,
"step": 251
},
{
"epoch": 0.6191646191646192,
"grad_norm": 1.0852974653244019,
"learning_rate": 1.8416445456116473e-05,
"loss": 0.9467,
"step": 252
},
{
"epoch": 0.6216216216216216,
"grad_norm": 1.0586998462677002,
"learning_rate": 1.8402086789137547e-05,
"loss": 0.9297,
"step": 253
},
{
"epoch": 0.6240786240786241,
"grad_norm": 1.0674982070922852,
"learning_rate": 1.8387668968276836e-05,
"loss": 0.8964,
"step": 254
},
{
"epoch": 0.6265356265356266,
"grad_norm": 1.1577105522155762,
"learning_rate": 1.8373192095041278e-05,
"loss": 1.0358,
"step": 255
},
{
"epoch": 0.628992628992629,
"grad_norm": 1.0243662595748901,
"learning_rate": 1.8358656271353558e-05,
"loss": 0.9246,
"step": 256
},
{
"epoch": 0.6314496314496314,
"grad_norm": 1.0408570766448975,
"learning_rate": 1.8344061599551397e-05,
"loss": 0.9392,
"step": 257
},
{
"epoch": 0.6339066339066339,
"grad_norm": 1.0425841808319092,
"learning_rate": 1.832940818238682e-05,
"loss": 0.9868,
"step": 258
},
{
"epoch": 0.6363636363636364,
"grad_norm": 1.0140665769577026,
"learning_rate": 1.8314696123025456e-05,
"loss": 0.9199,
"step": 259
},
{
"epoch": 0.6388206388206388,
"grad_norm": 1.1359258890151978,
"learning_rate": 1.8299925525045782e-05,
"loss": 0.9352,
"step": 260
},
{
"epoch": 0.6412776412776413,
"grad_norm": 1.052809238433838,
"learning_rate": 1.8285096492438424e-05,
"loss": 0.9269,
"step": 261
},
{
"epoch": 0.6437346437346437,
"grad_norm": 1.1689685583114624,
"learning_rate": 1.8270209129605397e-05,
"loss": 0.9481,
"step": 262
},
{
"epoch": 0.6461916461916462,
"grad_norm": 1.1114914417266846,
"learning_rate": 1.8255263541359397e-05,
"loss": 0.9334,
"step": 263
},
{
"epoch": 0.6486486486486487,
"grad_norm": 1.059890627861023,
"learning_rate": 1.8240259832923035e-05,
"loss": 0.9283,
"step": 264
},
{
"epoch": 0.6511056511056511,
"grad_norm": 1.071092128753662,
"learning_rate": 1.8225198109928116e-05,
"loss": 0.9556,
"step": 265
},
{
"epoch": 0.6535626535626535,
"grad_norm": 1.1584666967391968,
"learning_rate": 1.8210078478414895e-05,
"loss": 0.9638,
"step": 266
},
{
"epoch": 0.6560196560196561,
"grad_norm": 1.0983332395553589,
"learning_rate": 1.8194901044831313e-05,
"loss": 1.0076,
"step": 267
},
{
"epoch": 0.6584766584766585,
"grad_norm": 1.0883970260620117,
"learning_rate": 1.817966591603227e-05,
"loss": 0.9824,
"step": 268
},
{
"epoch": 0.6609336609336609,
"grad_norm": 1.1348230838775635,
"learning_rate": 1.8164373199278858e-05,
"loss": 0.9559,
"step": 269
},
{
"epoch": 0.6633906633906634,
"grad_norm": 1.0387697219848633,
"learning_rate": 1.8149023002237612e-05,
"loss": 0.9434,
"step": 270
},
{
"epoch": 0.6658476658476659,
"grad_norm": 1.044997215270996,
"learning_rate": 1.8133615432979742e-05,
"loss": 0.9624,
"step": 271
},
{
"epoch": 0.6683046683046683,
"grad_norm": 1.0559712648391724,
"learning_rate": 1.8118150599980398e-05,
"loss": 0.9228,
"step": 272
},
{
"epoch": 0.6707616707616708,
"grad_norm": 1.036546230316162,
"learning_rate": 1.8102628612117868e-05,
"loss": 0.9468,
"step": 273
},
{
"epoch": 0.6732186732186732,
"grad_norm": 1.0674127340316772,
"learning_rate": 1.8087049578672847e-05,
"loss": 0.9646,
"step": 274
},
{
"epoch": 0.6756756756756757,
"grad_norm": 1.0553275346755981,
"learning_rate": 1.8071413609327638e-05,
"loss": 0.9321,
"step": 275
},
{
"epoch": 0.6781326781326781,
"grad_norm": 0.9926275610923767,
"learning_rate": 1.8055720814165415e-05,
"loss": 0.9003,
"step": 276
},
{
"epoch": 0.6805896805896806,
"grad_norm": 1.0728232860565186,
"learning_rate": 1.8039971303669407e-05,
"loss": 0.9242,
"step": 277
},
{
"epoch": 0.683046683046683,
"grad_norm": 1.0888348817825317,
"learning_rate": 1.8024165188722153e-05,
"loss": 0.9561,
"step": 278
},
{
"epoch": 0.6855036855036855,
"grad_norm": 1.090308427810669,
"learning_rate": 1.80083025806047e-05,
"loss": 0.9635,
"step": 279
},
{
"epoch": 0.687960687960688,
"grad_norm": 1.0397887229919434,
"learning_rate": 1.799238359099584e-05,
"loss": 0.917,
"step": 280
},
{
"epoch": 0.6904176904176904,
"grad_norm": 1.0251840353012085,
"learning_rate": 1.79764083319713e-05,
"loss": 0.9511,
"step": 281
},
{
"epoch": 0.6928746928746928,
"grad_norm": 1.192077398300171,
"learning_rate": 1.7960376916002974e-05,
"loss": 0.9834,
"step": 282
},
{
"epoch": 0.6953316953316954,
"grad_norm": 1.1752359867095947,
"learning_rate": 1.7944289455958114e-05,
"loss": 0.9448,
"step": 283
},
{
"epoch": 0.6977886977886978,
"grad_norm": 1.0350326299667358,
"learning_rate": 1.792814606509855e-05,
"loss": 0.9827,
"step": 284
},
{
"epoch": 0.7002457002457002,
"grad_norm": 1.0373021364212036,
"learning_rate": 1.7911946857079886e-05,
"loss": 0.979,
"step": 285
},
{
"epoch": 0.7027027027027027,
"grad_norm": 1.0575249195098877,
"learning_rate": 1.7895691945950696e-05,
"loss": 0.9272,
"step": 286
},
{
"epoch": 0.7051597051597052,
"grad_norm": 0.96884685754776,
"learning_rate": 1.787938144615173e-05,
"loss": 0.8975,
"step": 287
},
{
"epoch": 0.7076167076167076,
"grad_norm": 0.9968435168266296,
"learning_rate": 1.78630154725151e-05,
"loss": 0.9359,
"step": 288
},
{
"epoch": 0.7100737100737101,
"grad_norm": 1.1364377737045288,
"learning_rate": 1.7846594140263475e-05,
"loss": 0.9421,
"step": 289
},
{
"epoch": 0.7125307125307125,
"grad_norm": 1.0569933652877808,
"learning_rate": 1.783011756500927e-05,
"loss": 0.9258,
"step": 290
},
{
"epoch": 0.714987714987715,
"grad_norm": 1.023688793182373,
"learning_rate": 1.7813585862753832e-05,
"loss": 0.9086,
"step": 291
},
{
"epoch": 0.7174447174447175,
"grad_norm": 1.0676270723342896,
"learning_rate": 1.779699914988662e-05,
"loss": 0.9039,
"step": 292
},
{
"epoch": 0.7199017199017199,
"grad_norm": 1.1354315280914307,
"learning_rate": 1.7780357543184396e-05,
"loss": 0.9492,
"step": 293
},
{
"epoch": 0.7223587223587223,
"grad_norm": 1.0893232822418213,
"learning_rate": 1.776366115981039e-05,
"loss": 0.9203,
"step": 294
},
{
"epoch": 0.7248157248157249,
"grad_norm": 1.1178086996078491,
"learning_rate": 1.7746910117313482e-05,
"loss": 0.9977,
"step": 295
},
{
"epoch": 0.7272727272727273,
"grad_norm": 1.0091534852981567,
"learning_rate": 1.773010453362737e-05,
"loss": 0.9312,
"step": 296
},
{
"epoch": 0.7297297297297297,
"grad_norm": 1.0890346765518188,
"learning_rate": 1.771324452706975e-05,
"loss": 1.0266,
"step": 297
},
{
"epoch": 0.7321867321867321,
"grad_norm": 0.989084005355835,
"learning_rate": 1.7696330216341465e-05,
"loss": 0.88,
"step": 298
},
{
"epoch": 0.7346437346437347,
"grad_norm": 1.1306614875793457,
"learning_rate": 1.767936172052569e-05,
"loss": 0.9458,
"step": 299
},
{
"epoch": 0.7371007371007371,
"grad_norm": 1.0816539525985718,
"learning_rate": 1.7662339159087077e-05,
"loss": 0.9342,
"step": 300
},
{
"epoch": 0.7395577395577395,
"grad_norm": 1.1475470066070557,
"learning_rate": 1.7645262651870926e-05,
"loss": 0.9888,
"step": 301
},
{
"epoch": 0.742014742014742,
"grad_norm": 1.0547964572906494,
"learning_rate": 1.762813231910233e-05,
"loss": 0.873,
"step": 302
},
{
"epoch": 0.7444717444717445,
"grad_norm": 1.0881352424621582,
"learning_rate": 1.761094828138534e-05,
"loss": 0.9252,
"step": 303
},
{
"epoch": 0.7469287469287469,
"grad_norm": 1.0736604928970337,
"learning_rate": 1.7593710659702105e-05,
"loss": 0.9348,
"step": 304
},
{
"epoch": 0.7493857493857494,
"grad_norm": 1.092803716659546,
"learning_rate": 1.7576419575412028e-05,
"loss": 0.94,
"step": 305
},
{
"epoch": 0.7518427518427518,
"grad_norm": 1.0088696479797363,
"learning_rate": 1.7559075150250913e-05,
"loss": 0.9635,
"step": 306
},
{
"epoch": 0.7542997542997543,
"grad_norm": 1.0789767503738403,
"learning_rate": 1.754167750633009e-05,
"loss": 0.9647,
"step": 307
},
{
"epoch": 0.7567567567567568,
"grad_norm": 0.9914807081222534,
"learning_rate": 1.7524226766135587e-05,
"loss": 0.9332,
"step": 308
},
{
"epoch": 0.7592137592137592,
"grad_norm": 1.0111716985702515,
"learning_rate": 1.7506723052527243e-05,
"loss": 0.9126,
"step": 309
},
{
"epoch": 0.7616707616707616,
"grad_norm": 0.9699746370315552,
"learning_rate": 1.7489166488737847e-05,
"loss": 0.9494,
"step": 310
},
{
"epoch": 0.7641277641277642,
"grad_norm": 1.048949956893921,
"learning_rate": 1.7471557198372277e-05,
"loss": 0.9702,
"step": 311
},
{
"epoch": 0.7665847665847666,
"grad_norm": 1.0581941604614258,
"learning_rate": 1.7453895305406615e-05,
"loss": 0.9911,
"step": 312
},
{
"epoch": 0.769041769041769,
"grad_norm": 1.0768938064575195,
"learning_rate": 1.7436180934187307e-05,
"loss": 0.9492,
"step": 313
},
{
"epoch": 0.7714987714987716,
"grad_norm": 1.0591368675231934,
"learning_rate": 1.741841420943025e-05,
"loss": 0.9585,
"step": 314
},
{
"epoch": 0.773955773955774,
"grad_norm": 1.0444432497024536,
"learning_rate": 1.740059525621993e-05,
"loss": 0.9736,
"step": 315
},
{
"epoch": 0.7764127764127764,
"grad_norm": 1.032731056213379,
"learning_rate": 1.7382724200008546e-05,
"loss": 0.9236,
"step": 316
},
{
"epoch": 0.7788697788697788,
"grad_norm": 1.0368685722351074,
"learning_rate": 1.7364801166615124e-05,
"loss": 0.9678,
"step": 317
},
{
"epoch": 0.7813267813267813,
"grad_norm": 0.9975135922431946,
"learning_rate": 1.734682628222462e-05,
"loss": 0.9506,
"step": 318
},
{
"epoch": 0.7837837837837838,
"grad_norm": 0.969009280204773,
"learning_rate": 1.7328799673387053e-05,
"loss": 0.9284,
"step": 319
},
{
"epoch": 0.7862407862407862,
"grad_norm": 1.0193045139312744,
"learning_rate": 1.7310721467016587e-05,
"loss": 0.9434,
"step": 320
},
{
"epoch": 0.7886977886977887,
"grad_norm": 1.0702950954437256,
"learning_rate": 1.7292591790390668e-05,
"loss": 0.9494,
"step": 321
},
{
"epoch": 0.7911547911547911,
"grad_norm": 1.0544315576553345,
"learning_rate": 1.7274410771149094e-05,
"loss": 0.905,
"step": 322
},
{
"epoch": 0.7936117936117936,
"grad_norm": 1.1505653858184814,
"learning_rate": 1.725617853729316e-05,
"loss": 0.9587,
"step": 323
},
{
"epoch": 0.7960687960687961,
"grad_norm": 0.9675195813179016,
"learning_rate": 1.7237895217184702e-05,
"loss": 0.9715,
"step": 324
},
{
"epoch": 0.7985257985257985,
"grad_norm": 1.0955339670181274,
"learning_rate": 1.7219560939545246e-05,
"loss": 0.9342,
"step": 325
},
{
"epoch": 0.800982800982801,
"grad_norm": 1.127308964729309,
"learning_rate": 1.7201175833455066e-05,
"loss": 0.945,
"step": 326
},
{
"epoch": 0.8034398034398035,
"grad_norm": 1.0063378810882568,
"learning_rate": 1.718274002835229e-05,
"loss": 0.9328,
"step": 327
},
{
"epoch": 0.8058968058968059,
"grad_norm": 1.0212886333465576,
"learning_rate": 1.7164253654031986e-05,
"loss": 0.926,
"step": 328
},
{
"epoch": 0.8083538083538083,
"grad_norm": 0.9954712986946106,
"learning_rate": 1.7145716840645253e-05,
"loss": 0.9266,
"step": 329
},
{
"epoch": 0.8108108108108109,
"grad_norm": 0.9671973586082458,
"learning_rate": 1.7127129718698298e-05,
"loss": 0.975,
"step": 330
},
{
"epoch": 0.8132678132678133,
"grad_norm": 1.116471767425537,
"learning_rate": 1.710849241905151e-05,
"loss": 0.9474,
"step": 331
},
{
"epoch": 0.8157248157248157,
"grad_norm": 1.031991720199585,
"learning_rate": 1.7089805072918567e-05,
"loss": 0.9674,
"step": 332
},
{
"epoch": 0.8181818181818182,
"grad_norm": 1.0887236595153809,
"learning_rate": 1.7071067811865477e-05,
"loss": 0.9402,
"step": 333
},
{
"epoch": 0.8206388206388207,
"grad_norm": 1.0192559957504272,
"learning_rate": 1.7052280767809672e-05,
"loss": 0.9201,
"step": 334
},
{
"epoch": 0.8230958230958231,
"grad_norm": 1.0986623764038086,
"learning_rate": 1.7033444073019077e-05,
"loss": 0.9507,
"step": 335
},
{
"epoch": 0.8255528255528255,
"grad_norm": 1.0037617683410645,
"learning_rate": 1.7014557860111184e-05,
"loss": 0.9653,
"step": 336
},
{
"epoch": 0.828009828009828,
"grad_norm": 1.0202786922454834,
"learning_rate": 1.6995622262052093e-05,
"loss": 0.9412,
"step": 337
},
{
"epoch": 0.8304668304668305,
"grad_norm": 0.9347285628318787,
"learning_rate": 1.697663741215561e-05,
"loss": 0.9528,
"step": 338
},
{
"epoch": 0.8329238329238329,
"grad_norm": 1.0408426523208618,
"learning_rate": 1.6957603444082295e-05,
"loss": 0.9172,
"step": 339
},
{
"epoch": 0.8353808353808354,
"grad_norm": 1.1628843545913696,
"learning_rate": 1.6938520491838502e-05,
"loss": 1.0025,
"step": 340
},
{
"epoch": 0.8378378378378378,
"grad_norm": 0.9961422681808472,
"learning_rate": 1.6919388689775463e-05,
"loss": 0.9578,
"step": 341
},
{
"epoch": 0.8402948402948403,
"grad_norm": 1.0189507007598877,
"learning_rate": 1.6900208172588333e-05,
"loss": 0.9201,
"step": 342
},
{
"epoch": 0.8427518427518428,
"grad_norm": 1.0783741474151611,
"learning_rate": 1.6880979075315238e-05,
"loss": 0.8905,
"step": 343
},
{
"epoch": 0.8452088452088452,
"grad_norm": 1.1078404188156128,
"learning_rate": 1.6861701533336322e-05,
"loss": 0.9699,
"step": 344
},
{
"epoch": 0.8476658476658476,
"grad_norm": 1.016022801399231,
"learning_rate": 1.6842375682372803e-05,
"loss": 0.9316,
"step": 345
},
{
"epoch": 0.8501228501228502,
"grad_norm": 1.083304762840271,
"learning_rate": 1.6823001658486013e-05,
"loss": 0.9267,
"step": 346
},
{
"epoch": 0.8525798525798526,
"grad_norm": 1.6159943342208862,
"learning_rate": 1.6803579598076434e-05,
"loss": 1.0386,
"step": 347
},
{
"epoch": 0.855036855036855,
"grad_norm": 1.1118335723876953,
"learning_rate": 1.678410963788275e-05,
"loss": 0.9803,
"step": 348
},
{
"epoch": 0.8574938574938575,
"grad_norm": 0.9951636791229248,
"learning_rate": 1.676459191498087e-05,
"loss": 0.9072,
"step": 349
},
{
"epoch": 0.85995085995086,
"grad_norm": 1.0147863626480103,
"learning_rate": 1.674502656678298e-05,
"loss": 0.9751,
"step": 350
},
{
"epoch": 0.8624078624078624,
"grad_norm": 1.0647886991500854,
"learning_rate": 1.6725413731036562e-05,
"loss": 0.8859,
"step": 351
},
{
"epoch": 0.8648648648648649,
"grad_norm": 1.0200670957565308,
"learning_rate": 1.6705753545823423e-05,
"loss": 0.9253,
"step": 352
},
{
"epoch": 0.8673218673218673,
"grad_norm": 1.0413808822631836,
"learning_rate": 1.6686046149558736e-05,
"loss": 0.9261,
"step": 353
},
{
"epoch": 0.8697788697788698,
"grad_norm": 1.060482382774353,
"learning_rate": 1.6666291680990056e-05,
"loss": 0.9291,
"step": 354
},
{
"epoch": 0.8722358722358723,
"grad_norm": 1.052126169204712,
"learning_rate": 1.6646490279196344e-05,
"loss": 0.9597,
"step": 355
},
{
"epoch": 0.8746928746928747,
"grad_norm": 1.0208313465118408,
"learning_rate": 1.6626642083586986e-05,
"loss": 0.9335,
"step": 356
},
{
"epoch": 0.8771498771498771,
"grad_norm": 1.148834466934204,
"learning_rate": 1.6606747233900816e-05,
"loss": 0.9782,
"step": 357
},
{
"epoch": 0.8796068796068796,
"grad_norm": 1.0331612825393677,
"learning_rate": 1.6586805870205135e-05,
"loss": 0.8924,
"step": 358
},
{
"epoch": 0.8820638820638821,
"grad_norm": 1.1008647680282593,
"learning_rate": 1.656681813289471e-05,
"loss": 0.9563,
"step": 359
},
{
"epoch": 0.8845208845208845,
"grad_norm": 1.1791423559188843,
"learning_rate": 1.654678416269081e-05,
"loss": 1.0417,
"step": 360
},
{
"epoch": 0.8869778869778869,
"grad_norm": 1.0597944259643555,
"learning_rate": 1.652670410064019e-05,
"loss": 0.9014,
"step": 361
},
{
"epoch": 0.8894348894348895,
"grad_norm": 1.1398617029190063,
"learning_rate": 1.6506578088114105e-05,
"loss": 0.9467,
"step": 362
},
{
"epoch": 0.8918918918918919,
"grad_norm": 1.1174441576004028,
"learning_rate": 1.6486406266807343e-05,
"loss": 0.9263,
"step": 363
},
{
"epoch": 0.8943488943488943,
"grad_norm": 1.161808729171753,
"learning_rate": 1.646618877873717e-05,
"loss": 0.9582,
"step": 364
},
{
"epoch": 0.8968058968058968,
"grad_norm": 1.0449280738830566,
"learning_rate": 1.6445925766242392e-05,
"loss": 0.9732,
"step": 365
},
{
"epoch": 0.8992628992628993,
"grad_norm": 1.1118457317352295,
"learning_rate": 1.6425617371982302e-05,
"loss": 0.9491,
"step": 366
},
{
"epoch": 0.9017199017199017,
"grad_norm": 1.1369138956069946,
"learning_rate": 1.6405263738935716e-05,
"loss": 0.9477,
"step": 367
},
{
"epoch": 0.9041769041769042,
"grad_norm": 1.1870582103729248,
"learning_rate": 1.6384865010399935e-05,
"loss": 0.9596,
"step": 368
},
{
"epoch": 0.9066339066339066,
"grad_norm": 1.2551947832107544,
"learning_rate": 1.6364421329989758e-05,
"loss": 0.9229,
"step": 369
},
{
"epoch": 0.9090909090909091,
"grad_norm": 1.0891375541687012,
"learning_rate": 1.6343932841636455e-05,
"loss": 0.9326,
"step": 370
},
{
"epoch": 0.9115479115479116,
"grad_norm": 1.0899828672409058,
"learning_rate": 1.632339968958677e-05,
"loss": 0.9336,
"step": 371
},
{
"epoch": 0.914004914004914,
"grad_norm": 1.0995900630950928,
"learning_rate": 1.6302822018401885e-05,
"loss": 0.8952,
"step": 372
},
{
"epoch": 0.9164619164619164,
"grad_norm": 1.0277985334396362,
"learning_rate": 1.6282199972956425e-05,
"loss": 0.9295,
"step": 373
},
{
"epoch": 0.918918918918919,
"grad_norm": 1.0418602228164673,
"learning_rate": 1.6261533698437416e-05,
"loss": 0.9309,
"step": 374
},
{
"epoch": 0.9213759213759214,
"grad_norm": 1.0787702798843384,
"learning_rate": 1.6240823340343285e-05,
"loss": 0.9481,
"step": 375
},
{
"epoch": 0.9238329238329238,
"grad_norm": 1.0829393863677979,
"learning_rate": 1.6220069044482815e-05,
"loss": 0.8948,
"step": 376
},
{
"epoch": 0.9262899262899262,
"grad_norm": 1.0835132598876953,
"learning_rate": 1.6199270956974128e-05,
"loss": 0.9834,
"step": 377
},
{
"epoch": 0.9287469287469288,
"grad_norm": 1.098840594291687,
"learning_rate": 1.6178429224243665e-05,
"loss": 0.917,
"step": 378
},
{
"epoch": 0.9312039312039312,
"grad_norm": 1.0230814218521118,
"learning_rate": 1.6157543993025134e-05,
"loss": 0.9491,
"step": 379
},
{
"epoch": 0.9336609336609336,
"grad_norm": 0.9591358304023743,
"learning_rate": 1.6136615410358493e-05,
"loss": 0.9544,
"step": 380
},
{
"epoch": 0.9361179361179361,
"grad_norm": 1.0422619581222534,
"learning_rate": 1.6115643623588915e-05,
"loss": 0.9421,
"step": 381
},
{
"epoch": 0.9385749385749386,
"grad_norm": 1.0649887323379517,
"learning_rate": 1.6094628780365745e-05,
"loss": 0.9139,
"step": 382
},
{
"epoch": 0.941031941031941,
"grad_norm": 1.1111162900924683,
"learning_rate": 1.6073571028641452e-05,
"loss": 0.9219,
"step": 383
},
{
"epoch": 0.9434889434889435,
"grad_norm": 1.0625733137130737,
"learning_rate": 1.6052470516670613e-05,
"loss": 0.9556,
"step": 384
},
{
"epoch": 0.9459459459459459,
"grad_norm": 1.0070087909698486,
"learning_rate": 1.6031327393008848e-05,
"loss": 0.9596,
"step": 385
},
{
"epoch": 0.9484029484029484,
"grad_norm": 1.0320253372192383,
"learning_rate": 1.6010141806511765e-05,
"loss": 0.9596,
"step": 386
},
{
"epoch": 0.9508599508599509,
"grad_norm": 1.0545932054519653,
"learning_rate": 1.598891390633395e-05,
"loss": 0.9776,
"step": 387
},
{
"epoch": 0.9533169533169533,
"grad_norm": 1.021756649017334,
"learning_rate": 1.596764384192787e-05,
"loss": 0.9311,
"step": 388
},
{
"epoch": 0.9557739557739557,
"grad_norm": 1.0189892053604126,
"learning_rate": 1.594633176304287e-05,
"loss": 0.923,
"step": 389
},
{
"epoch": 0.9582309582309583,
"grad_norm": 1.037270426750183,
"learning_rate": 1.5924977819724068e-05,
"loss": 0.9173,
"step": 390
},
{
"epoch": 0.9606879606879607,
"grad_norm": 1.061562180519104,
"learning_rate": 1.590358216231134e-05,
"loss": 0.9129,
"step": 391
},
{
"epoch": 0.9631449631449631,
"grad_norm": 1.053601622581482,
"learning_rate": 1.5882144941438234e-05,
"loss": 0.9286,
"step": 392
},
{
"epoch": 0.9656019656019657,
"grad_norm": 1.1030821800231934,
"learning_rate": 1.5860666308030933e-05,
"loss": 1.0121,
"step": 393
},
{
"epoch": 0.9680589680589681,
"grad_norm": 0.9831104278564453,
"learning_rate": 1.5839146413307167e-05,
"loss": 0.9245,
"step": 394
},
{
"epoch": 0.9705159705159705,
"grad_norm": 1.0132577419281006,
"learning_rate": 1.5817585408775168e-05,
"loss": 0.8906,
"step": 395
},
{
"epoch": 0.972972972972973,
"grad_norm": 1.1054580211639404,
"learning_rate": 1.5795983446232602e-05,
"loss": 0.9785,
"step": 396
},
{
"epoch": 0.9754299754299754,
"grad_norm": 0.9700505137443542,
"learning_rate": 1.5774340677765483e-05,
"loss": 0.9462,
"step": 397
},
{
"epoch": 0.9778869778869779,
"grad_norm": 1.126731276512146,
"learning_rate": 1.5752657255747122e-05,
"loss": 0.9751,
"step": 398
},
{
"epoch": 0.9803439803439803,
"grad_norm": 1.0231095552444458,
"learning_rate": 1.5730933332837045e-05,
"loss": 0.958,
"step": 399
},
{
"epoch": 0.9828009828009828,
"grad_norm": 1.0033038854599,
"learning_rate": 1.5709169061979915e-05,
"loss": 0.892,
"step": 400
},
{
"epoch": 0.9852579852579852,
"grad_norm": 1.080012559890747,
"learning_rate": 1.568736459640447e-05,
"loss": 0.9401,
"step": 401
},
{
"epoch": 0.9877149877149877,
"grad_norm": 1.0674031972885132,
"learning_rate": 1.5665520089622424e-05,
"loss": 0.9373,
"step": 402
},
{
"epoch": 0.9901719901719902,
"grad_norm": 0.9849039316177368,
"learning_rate": 1.5643635695427405e-05,
"loss": 0.9225,
"step": 403
},
{
"epoch": 0.9926289926289926,
"grad_norm": 0.9782394170761108,
"learning_rate": 1.5621711567893853e-05,
"loss": 0.9431,
"step": 404
},
{
"epoch": 0.995085995085995,
"grad_norm": 1.012881875038147,
"learning_rate": 1.5599747861375957e-05,
"loss": 0.9146,
"step": 405
},
{
"epoch": 0.9975429975429976,
"grad_norm": 0.9708315134048462,
"learning_rate": 1.5577744730506545e-05,
"loss": 0.9521,
"step": 406
},
{
"epoch": 1.0,
"grad_norm": 1.0670959949493408,
"learning_rate": 1.5555702330196024e-05,
"loss": 0.8446,
"step": 407
},
{
"epoch": 1.0024570024570025,
"grad_norm": 1.511311411857605,
"learning_rate": 1.5533620815631255e-05,
"loss": 0.6437,
"step": 408
},
{
"epoch": 1.0049140049140048,
"grad_norm": 1.3223845958709717,
"learning_rate": 1.551150034227449e-05,
"loss": 0.6258,
"step": 409
},
{
"epoch": 1.0073710073710074,
"grad_norm": 1.1301041841506958,
"learning_rate": 1.5489341065862263e-05,
"loss": 0.6252,
"step": 410
},
{
"epoch": 1.00982800982801,
"grad_norm": 1.0698575973510742,
"learning_rate": 1.546714314240429e-05,
"loss": 0.5928,
"step": 411
},
{
"epoch": 1.0122850122850122,
"grad_norm": 1.2460176944732666,
"learning_rate": 1.5444906728182388e-05,
"loss": 0.5851,
"step": 412
},
{
"epoch": 1.0147420147420148,
"grad_norm": 1.3937596082687378,
"learning_rate": 1.5422631979749354e-05,
"loss": 0.5751,
"step": 413
},
{
"epoch": 1.0171990171990173,
"grad_norm": 1.3066784143447876,
"learning_rate": 1.5400319053927875e-05,
"loss": 0.5688,
"step": 414
},
{
"epoch": 1.0196560196560196,
"grad_norm": 1.098724126815796,
"learning_rate": 1.5377968107809425e-05,
"loss": 0.5269,
"step": 415
},
{
"epoch": 1.0221130221130221,
"grad_norm": 1.1766797304153442,
"learning_rate": 1.5355579298753154e-05,
"loss": 0.5651,
"step": 416
},
{
"epoch": 1.0245700245700247,
"grad_norm": 1.2139365673065186,
"learning_rate": 1.5333152784384777e-05,
"loss": 0.6453,
"step": 417
},
{
"epoch": 1.027027027027027,
"grad_norm": 1.347971796989441,
"learning_rate": 1.5310688722595472e-05,
"loss": 0.5967,
"step": 418
},
{
"epoch": 1.0294840294840295,
"grad_norm": 1.4031742811203003,
"learning_rate": 1.528818727154077e-05,
"loss": 0.59,
"step": 419
},
{
"epoch": 1.031941031941032,
"grad_norm": 1.1434392929077148,
"learning_rate": 1.5265648589639424e-05,
"loss": 0.5591,
"step": 420
},
{
"epoch": 1.0343980343980343,
"grad_norm": 1.1155598163604736,
"learning_rate": 1.5243072835572319e-05,
"loss": 0.5689,
"step": 421
},
{
"epoch": 1.0368550368550369,
"grad_norm": 1.2288144826889038,
"learning_rate": 1.5220460168281335e-05,
"loss": 0.6077,
"step": 422
},
{
"epoch": 1.0393120393120394,
"grad_norm": 1.1721631288528442,
"learning_rate": 1.519781074696824e-05,
"loss": 0.5737,
"step": 423
},
{
"epoch": 1.0417690417690417,
"grad_norm": 1.1209845542907715,
"learning_rate": 1.5175124731093553e-05,
"loss": 0.5862,
"step": 424
},
{
"epoch": 1.0442260442260443,
"grad_norm": 1.1233024597167969,
"learning_rate": 1.5152402280375454e-05,
"loss": 0.565,
"step": 425
},
{
"epoch": 1.0466830466830466,
"grad_norm": 1.1558212041854858,
"learning_rate": 1.5129643554788614e-05,
"loss": 0.5595,
"step": 426
},
{
"epoch": 1.049140049140049,
"grad_norm": 1.147844910621643,
"learning_rate": 1.5106848714563112e-05,
"loss": 0.5793,
"step": 427
},
{
"epoch": 1.0515970515970516,
"grad_norm": 1.128221869468689,
"learning_rate": 1.5084017920183271e-05,
"loss": 0.5357,
"step": 428
},
{
"epoch": 1.054054054054054,
"grad_norm": 1.3069839477539062,
"learning_rate": 1.5061151332386565e-05,
"loss": 0.5914,
"step": 429
},
{
"epoch": 1.0565110565110565,
"grad_norm": 1.1022357940673828,
"learning_rate": 1.5038249112162446e-05,
"loss": 0.6201,
"step": 430
},
{
"epoch": 1.058968058968059,
"grad_norm": 1.3305854797363281,
"learning_rate": 1.5015311420751243e-05,
"loss": 0.6081,
"step": 431
},
{
"epoch": 1.0614250614250613,
"grad_norm": 1.1212079524993896,
"learning_rate": 1.4992338419643022e-05,
"loss": 0.5857,
"step": 432
},
{
"epoch": 1.0638820638820639,
"grad_norm": 1.1525722742080688,
"learning_rate": 1.4969330270576428e-05,
"loss": 0.5945,
"step": 433
},
{
"epoch": 1.0663390663390664,
"grad_norm": 1.0892962217330933,
"learning_rate": 1.4946287135537571e-05,
"loss": 0.5693,
"step": 434
},
{
"epoch": 1.0687960687960687,
"grad_norm": 1.0893158912658691,
"learning_rate": 1.4923209176758872e-05,
"loss": 0.5715,
"step": 435
},
{
"epoch": 1.0712530712530712,
"grad_norm": 1.0946080684661865,
"learning_rate": 1.4900096556717923e-05,
"loss": 0.5478,
"step": 436
},
{
"epoch": 1.0737100737100738,
"grad_norm": 1.124007225036621,
"learning_rate": 1.4876949438136348e-05,
"loss": 0.5887,
"step": 437
},
{
"epoch": 1.076167076167076,
"grad_norm": 1.1379528045654297,
"learning_rate": 1.485376798397865e-05,
"loss": 0.5656,
"step": 438
},
{
"epoch": 1.0786240786240786,
"grad_norm": 1.1640037298202515,
"learning_rate": 1.4830552357451075e-05,
"loss": 0.6137,
"step": 439
},
{
"epoch": 1.0810810810810811,
"grad_norm": 1.1026787757873535,
"learning_rate": 1.4807302722000447e-05,
"loss": 0.5827,
"step": 440
},
{
"epoch": 1.0835380835380835,
"grad_norm": 1.0457117557525635,
"learning_rate": 1.4784019241313025e-05,
"loss": 0.5224,
"step": 441
},
{
"epoch": 1.085995085995086,
"grad_norm": 1.0897119045257568,
"learning_rate": 1.4760702079313363e-05,
"loss": 0.56,
"step": 442
},
{
"epoch": 1.0884520884520885,
"grad_norm": 1.099949836730957,
"learning_rate": 1.473735140016313e-05,
"loss": 0.5739,
"step": 443
},
{
"epoch": 1.0909090909090908,
"grad_norm": 1.1518844366073608,
"learning_rate": 1.4713967368259981e-05,
"loss": 0.5996,
"step": 444
},
{
"epoch": 1.0933660933660934,
"grad_norm": 1.13741934299469,
"learning_rate": 1.4690550148236371e-05,
"loss": 0.5978,
"step": 445
},
{
"epoch": 1.095823095823096,
"grad_norm": 1.1064966917037964,
"learning_rate": 1.466709990495843e-05,
"loss": 0.5975,
"step": 446
},
{
"epoch": 1.0982800982800982,
"grad_norm": 1.1217718124389648,
"learning_rate": 1.4643616803524778e-05,
"loss": 0.5567,
"step": 447
},
{
"epoch": 1.1007371007371007,
"grad_norm": 1.0240809917449951,
"learning_rate": 1.462010100926536e-05,
"loss": 0.5173,
"step": 448
},
{
"epoch": 1.1031941031941033,
"grad_norm": 1.1165695190429688,
"learning_rate": 1.4596552687740304e-05,
"loss": 0.5505,
"step": 449
},
{
"epoch": 1.1056511056511056,
"grad_norm": 1.2207868099212646,
"learning_rate": 1.4572972004738732e-05,
"loss": 0.6181,
"step": 450
},
{
"epoch": 1.1081081081081081,
"grad_norm": 1.0639880895614624,
"learning_rate": 1.454935912627761e-05,
"loss": 0.5136,
"step": 451
},
{
"epoch": 1.1105651105651106,
"grad_norm": 1.145183801651001,
"learning_rate": 1.4525714218600566e-05,
"loss": 0.5783,
"step": 452
},
{
"epoch": 1.113022113022113,
"grad_norm": 1.0687991380691528,
"learning_rate": 1.4502037448176734e-05,
"loss": 0.5946,
"step": 453
},
{
"epoch": 1.1154791154791155,
"grad_norm": 1.100060224533081,
"learning_rate": 1.4478328981699568e-05,
"loss": 0.5731,
"step": 454
},
{
"epoch": 1.117936117936118,
"grad_norm": 1.0320765972137451,
"learning_rate": 1.4454588986085677e-05,
"loss": 0.5585,
"step": 455
},
{
"epoch": 1.1203931203931203,
"grad_norm": 1.1691075563430786,
"learning_rate": 1.443081762847364e-05,
"loss": 0.5978,
"step": 456
},
{
"epoch": 1.1228501228501229,
"grad_norm": 1.1214145421981812,
"learning_rate": 1.4407015076222845e-05,
"loss": 0.6147,
"step": 457
},
{
"epoch": 1.1253071253071254,
"grad_norm": 1.0933703184127808,
"learning_rate": 1.4383181496912301e-05,
"loss": 0.593,
"step": 458
},
{
"epoch": 1.1277641277641277,
"grad_norm": 1.1798315048217773,
"learning_rate": 1.4359317058339457e-05,
"loss": 0.5715,
"step": 459
},
{
"epoch": 1.1302211302211302,
"grad_norm": 1.0610649585723877,
"learning_rate": 1.4335421928519022e-05,
"loss": 0.5861,
"step": 460
},
{
"epoch": 1.1326781326781328,
"grad_norm": 1.0927037000656128,
"learning_rate": 1.4311496275681785e-05,
"loss": 0.5606,
"step": 461
},
{
"epoch": 1.135135135135135,
"grad_norm": 1.1032183170318604,
"learning_rate": 1.4287540268273428e-05,
"loss": 0.5669,
"step": 462
},
{
"epoch": 1.1375921375921376,
"grad_norm": 1.1331627368927002,
"learning_rate": 1.4263554074953338e-05,
"loss": 0.5704,
"step": 463
},
{
"epoch": 1.1400491400491402,
"grad_norm": 1.0285066366195679,
"learning_rate": 1.4239537864593432e-05,
"loss": 0.585,
"step": 464
},
{
"epoch": 1.1425061425061425,
"grad_norm": 1.1035512685775757,
"learning_rate": 1.4215491806276944e-05,
"loss": 0.5674,
"step": 465
},
{
"epoch": 1.144963144963145,
"grad_norm": 1.1370140314102173,
"learning_rate": 1.4191416069297261e-05,
"loss": 0.5789,
"step": 466
},
{
"epoch": 1.1474201474201475,
"grad_norm": 1.3444671630859375,
"learning_rate": 1.4167310823156713e-05,
"loss": 0.6142,
"step": 467
},
{
"epoch": 1.1498771498771498,
"grad_norm": 1.15607488155365,
"learning_rate": 1.4143176237565386e-05,
"loss": 0.5732,
"step": 468
},
{
"epoch": 1.1523341523341524,
"grad_norm": 1.1212263107299805,
"learning_rate": 1.4119012482439929e-05,
"loss": 0.5674,
"step": 469
},
{
"epoch": 1.154791154791155,
"grad_norm": 1.0720258951187134,
"learning_rate": 1.4094819727902354e-05,
"loss": 0.5752,
"step": 470
},
{
"epoch": 1.1572481572481572,
"grad_norm": 1.1930720806121826,
"learning_rate": 1.407059814427884e-05,
"loss": 0.5781,
"step": 471
},
{
"epoch": 1.1597051597051597,
"grad_norm": 1.1133079528808594,
"learning_rate": 1.4046347902098535e-05,
"loss": 0.57,
"step": 472
},
{
"epoch": 1.1621621621621623,
"grad_norm": 1.2250217199325562,
"learning_rate": 1.4022069172092354e-05,
"loss": 0.5761,
"step": 473
},
{
"epoch": 1.1646191646191646,
"grad_norm": 1.050622582435608,
"learning_rate": 1.3997762125191774e-05,
"loss": 0.5909,
"step": 474
},
{
"epoch": 1.1670761670761671,
"grad_norm": 1.0757161378860474,
"learning_rate": 1.3973426932527637e-05,
"loss": 0.5524,
"step": 475
},
{
"epoch": 1.1695331695331694,
"grad_norm": 1.1241124868392944,
"learning_rate": 1.3949063765428943e-05,
"loss": 0.5881,
"step": 476
},
{
"epoch": 1.171990171990172,
"grad_norm": 1.1652573347091675,
"learning_rate": 1.3924672795421638e-05,
"loss": 0.5778,
"step": 477
},
{
"epoch": 1.1744471744471745,
"grad_norm": 1.065495491027832,
"learning_rate": 1.3900254194227417e-05,
"loss": 0.5632,
"step": 478
},
{
"epoch": 1.1769041769041768,
"grad_norm": 1.2024372816085815,
"learning_rate": 1.38758081337625e-05,
"loss": 0.5675,
"step": 479
},
{
"epoch": 1.1793611793611793,
"grad_norm": 1.121402382850647,
"learning_rate": 1.385133478613644e-05,
"loss": 0.517,
"step": 480
},
{
"epoch": 1.1818181818181819,
"grad_norm": 1.1037131547927856,
"learning_rate": 1.3826834323650899e-05,
"loss": 0.5616,
"step": 481
},
{
"epoch": 1.1842751842751842,
"grad_norm": 1.135852336883545,
"learning_rate": 1.3802306918798435e-05,
"loss": 0.533,
"step": 482
},
{
"epoch": 1.1867321867321867,
"grad_norm": 1.2349828481674194,
"learning_rate": 1.3777752744261295e-05,
"loss": 0.5841,
"step": 483
},
{
"epoch": 1.1891891891891893,
"grad_norm": 1.265579104423523,
"learning_rate": 1.3753171972910191e-05,
"loss": 0.5973,
"step": 484
},
{
"epoch": 1.1916461916461916,
"grad_norm": 1.139418363571167,
"learning_rate": 1.3728564777803089e-05,
"loss": 0.5541,
"step": 485
},
{
"epoch": 1.194103194103194,
"grad_norm": 0.9873649477958679,
"learning_rate": 1.3703931332183987e-05,
"loss": 0.5606,
"step": 486
},
{
"epoch": 1.1965601965601966,
"grad_norm": 1.097970724105835,
"learning_rate": 1.3679271809481693e-05,
"loss": 0.578,
"step": 487
},
{
"epoch": 1.199017199017199,
"grad_norm": 1.1830071210861206,
"learning_rate": 1.3654586383308619e-05,
"loss": 0.5815,
"step": 488
},
{
"epoch": 1.2014742014742015,
"grad_norm": 1.1203513145446777,
"learning_rate": 1.3629875227459532e-05,
"loss": 0.5886,
"step": 489
},
{
"epoch": 1.203931203931204,
"grad_norm": 1.120291829109192,
"learning_rate": 1.3605138515910362e-05,
"loss": 0.582,
"step": 490
},
{
"epoch": 1.2063882063882063,
"grad_norm": 1.167699933052063,
"learning_rate": 1.3580376422816945e-05,
"loss": 0.602,
"step": 491
},
{
"epoch": 1.2088452088452089,
"grad_norm": 1.2335660457611084,
"learning_rate": 1.3555589122513828e-05,
"loss": 0.6206,
"step": 492
},
{
"epoch": 1.2113022113022114,
"grad_norm": 1.15221107006073,
"learning_rate": 1.3530776789513009e-05,
"loss": 0.5953,
"step": 493
},
{
"epoch": 1.2137592137592137,
"grad_norm": 1.0700962543487549,
"learning_rate": 1.3505939598502742e-05,
"loss": 0.5308,
"step": 494
},
{
"epoch": 1.2162162162162162,
"grad_norm": 1.2463740110397339,
"learning_rate": 1.3481077724346279e-05,
"loss": 0.6081,
"step": 495
},
{
"epoch": 1.2186732186732188,
"grad_norm": 1.0911908149719238,
"learning_rate": 1.345619134208066e-05,
"loss": 0.5504,
"step": 496
},
{
"epoch": 1.221130221130221,
"grad_norm": 1.2074313163757324,
"learning_rate": 1.3431280626915466e-05,
"loss": 0.5432,
"step": 497
},
{
"epoch": 1.2235872235872236,
"grad_norm": 1.1220638751983643,
"learning_rate": 1.340634575423159e-05,
"loss": 0.5558,
"step": 498
},
{
"epoch": 1.2260442260442261,
"grad_norm": 0.9794447422027588,
"learning_rate": 1.3381386899580005e-05,
"loss": 0.5194,
"step": 499
},
{
"epoch": 1.2285012285012284,
"grad_norm": 1.2143794298171997,
"learning_rate": 1.3356404238680528e-05,
"loss": 0.5486,
"step": 500
},
{
"epoch": 1.230958230958231,
"grad_norm": 1.2196683883666992,
"learning_rate": 1.3331397947420578e-05,
"loss": 0.5858,
"step": 501
},
{
"epoch": 1.2334152334152333,
"grad_norm": 1.1935386657714844,
"learning_rate": 1.3306368201853941e-05,
"loss": 0.5625,
"step": 502
},
{
"epoch": 1.2358722358722358,
"grad_norm": 1.2700451612472534,
"learning_rate": 1.3281315178199537e-05,
"loss": 0.5968,
"step": 503
},
{
"epoch": 1.2383292383292384,
"grad_norm": 1.1718500852584839,
"learning_rate": 1.3256239052840157e-05,
"loss": 0.5889,
"step": 504
},
{
"epoch": 1.2407862407862407,
"grad_norm": 1.193869948387146,
"learning_rate": 1.3231140002321252e-05,
"loss": 0.6119,
"step": 505
},
{
"epoch": 1.2432432432432432,
"grad_norm": 1.0735101699829102,
"learning_rate": 1.320601820334967e-05,
"loss": 0.5991,
"step": 506
},
{
"epoch": 1.2457002457002457,
"grad_norm": 1.1022897958755493,
"learning_rate": 1.3180873832792417e-05,
"loss": 0.5711,
"step": 507
},
{
"epoch": 1.248157248157248,
"grad_norm": 1.0396614074707031,
"learning_rate": 1.3155707067675408e-05,
"loss": 0.5668,
"step": 508
},
{
"epoch": 1.2506142506142506,
"grad_norm": 1.0049409866333008,
"learning_rate": 1.3130518085182224e-05,
"loss": 0.5679,
"step": 509
},
{
"epoch": 1.253071253071253,
"grad_norm": 1.1597648859024048,
"learning_rate": 1.3105307062652873e-05,
"loss": 0.5354,
"step": 510
},
{
"epoch": 1.2555282555282554,
"grad_norm": 1.121046543121338,
"learning_rate": 1.3080074177582527e-05,
"loss": 0.5465,
"step": 511
},
{
"epoch": 1.257985257985258,
"grad_norm": 1.3277342319488525,
"learning_rate": 1.3054819607620275e-05,
"loss": 0.6109,
"step": 512
},
{
"epoch": 1.2604422604422605,
"grad_norm": 1.156744360923767,
"learning_rate": 1.3029543530567884e-05,
"loss": 0.6064,
"step": 513
},
{
"epoch": 1.2628992628992628,
"grad_norm": 1.1742829084396362,
"learning_rate": 1.3004246124378537e-05,
"loss": 0.6237,
"step": 514
},
{
"epoch": 1.2653562653562653,
"grad_norm": 1.1408833265304565,
"learning_rate": 1.2978927567155575e-05,
"loss": 0.5855,
"step": 515
},
{
"epoch": 1.2678132678132679,
"grad_norm": 1.0413517951965332,
"learning_rate": 1.2953588037151261e-05,
"loss": 0.545,
"step": 516
},
{
"epoch": 1.2702702702702702,
"grad_norm": 1.1523407697677612,
"learning_rate": 1.2928227712765504e-05,
"loss": 0.5932,
"step": 517
},
{
"epoch": 1.2727272727272727,
"grad_norm": 1.1548582315444946,
"learning_rate": 1.2902846772544625e-05,
"loss": 0.5318,
"step": 518
},
{
"epoch": 1.2751842751842752,
"grad_norm": 1.1297783851623535,
"learning_rate": 1.2877445395180077e-05,
"loss": 0.5321,
"step": 519
},
{
"epoch": 1.2776412776412776,
"grad_norm": 1.244998574256897,
"learning_rate": 1.2852023759507204e-05,
"loss": 0.5985,
"step": 520
},
{
"epoch": 1.28009828009828,
"grad_norm": 1.2012943029403687,
"learning_rate": 1.282658204450398e-05,
"loss": 0.6266,
"step": 521
},
{
"epoch": 1.2825552825552826,
"grad_norm": 1.134021520614624,
"learning_rate": 1.2801120429289731e-05,
"loss": 0.5548,
"step": 522
},
{
"epoch": 1.285012285012285,
"grad_norm": 1.203113079071045,
"learning_rate": 1.2775639093123905e-05,
"loss": 0.5569,
"step": 523
},
{
"epoch": 1.2874692874692875,
"grad_norm": 1.2195301055908203,
"learning_rate": 1.2750138215404784e-05,
"loss": 0.5714,
"step": 524
},
{
"epoch": 1.28992628992629,
"grad_norm": 1.056214451789856,
"learning_rate": 1.2724617975668229e-05,
"loss": 0.6026,
"step": 525
},
{
"epoch": 1.2923832923832923,
"grad_norm": 1.014293909072876,
"learning_rate": 1.2699078553586424e-05,
"loss": 0.5393,
"step": 526
},
{
"epoch": 1.2948402948402948,
"grad_norm": 1.0652614831924438,
"learning_rate": 1.2673520128966592e-05,
"loss": 0.5407,
"step": 527
},
{
"epoch": 1.2972972972972974,
"grad_norm": 1.1022783517837524,
"learning_rate": 1.2647942881749756e-05,
"loss": 0.5819,
"step": 528
},
{
"epoch": 1.2997542997542997,
"grad_norm": 1.086804986000061,
"learning_rate": 1.2622346992009447e-05,
"loss": 0.5685,
"step": 529
},
{
"epoch": 1.3022113022113022,
"grad_norm": 1.307706594467163,
"learning_rate": 1.2596732639950444e-05,
"loss": 0.5525,
"step": 530
},
{
"epoch": 1.3046683046683047,
"grad_norm": 1.0837743282318115,
"learning_rate": 1.2571100005907522e-05,
"loss": 0.5333,
"step": 531
},
{
"epoch": 1.307125307125307,
"grad_norm": 1.0788577795028687,
"learning_rate": 1.254544927034415e-05,
"loss": 0.5888,
"step": 532
},
{
"epoch": 1.3095823095823096,
"grad_norm": 1.233123779296875,
"learning_rate": 1.2519780613851254e-05,
"loss": 0.6043,
"step": 533
},
{
"epoch": 1.3120393120393121,
"grad_norm": 1.1542140245437622,
"learning_rate": 1.249409421714592e-05,
"loss": 0.5822,
"step": 534
},
{
"epoch": 1.3144963144963144,
"grad_norm": 1.1778144836425781,
"learning_rate": 1.2468390261070139e-05,
"loss": 0.6057,
"step": 535
},
{
"epoch": 1.316953316953317,
"grad_norm": 1.1264017820358276,
"learning_rate": 1.244266892658952e-05,
"loss": 0.5904,
"step": 536
},
{
"epoch": 1.3194103194103195,
"grad_norm": 1.1247997283935547,
"learning_rate": 1.2416930394792026e-05,
"loss": 0.5753,
"step": 537
},
{
"epoch": 1.3218673218673218,
"grad_norm": 1.2115566730499268,
"learning_rate": 1.2391174846886698e-05,
"loss": 0.5877,
"step": 538
},
{
"epoch": 1.3243243243243243,
"grad_norm": 1.1855846643447876,
"learning_rate": 1.2365402464202369e-05,
"loss": 0.5685,
"step": 539
},
{
"epoch": 1.3267813267813269,
"grad_norm": 1.2449346780776978,
"learning_rate": 1.2339613428186407e-05,
"loss": 0.5672,
"step": 540
},
{
"epoch": 1.3292383292383292,
"grad_norm": 1.0996452569961548,
"learning_rate": 1.2313807920403419e-05,
"loss": 0.5765,
"step": 541
},
{
"epoch": 1.3316953316953317,
"grad_norm": 1.1104735136032104,
"learning_rate": 1.228798612253397e-05,
"loss": 0.5834,
"step": 542
},
{
"epoch": 1.3341523341523343,
"grad_norm": 1.1029411554336548,
"learning_rate": 1.2262148216373333e-05,
"loss": 0.5432,
"step": 543
},
{
"epoch": 1.3366093366093366,
"grad_norm": 1.308183193206787,
"learning_rate": 1.2236294383830177e-05,
"loss": 0.6166,
"step": 544
},
{
"epoch": 1.339066339066339,
"grad_norm": 1.1299169063568115,
"learning_rate": 1.22104248069253e-05,
"loss": 0.592,
"step": 545
},
{
"epoch": 1.3415233415233416,
"grad_norm": 1.0875178575515747,
"learning_rate": 1.2184539667790349e-05,
"loss": 0.5121,
"step": 546
},
{
"epoch": 1.343980343980344,
"grad_norm": 1.1140974760055542,
"learning_rate": 1.2158639148666533e-05,
"loss": 0.5538,
"step": 547
},
{
"epoch": 1.3464373464373465,
"grad_norm": 1.304472804069519,
"learning_rate": 1.2132723431903341e-05,
"loss": 0.635,
"step": 548
},
{
"epoch": 1.348894348894349,
"grad_norm": 1.1797876358032227,
"learning_rate": 1.2106792699957264e-05,
"loss": 0.6401,
"step": 549
},
{
"epoch": 1.3513513513513513,
"grad_norm": 1.2239415645599365,
"learning_rate": 1.2080847135390502e-05,
"loss": 0.5928,
"step": 550
},
{
"epoch": 1.3538083538083538,
"grad_norm": 1.146208643913269,
"learning_rate": 1.2054886920869682e-05,
"loss": 0.564,
"step": 551
},
{
"epoch": 1.3562653562653564,
"grad_norm": 1.2212401628494263,
"learning_rate": 1.202891223916457e-05,
"loss": 0.5695,
"step": 552
},
{
"epoch": 1.3587223587223587,
"grad_norm": 1.1870454549789429,
"learning_rate": 1.2002923273146793e-05,
"loss": 0.5908,
"step": 553
},
{
"epoch": 1.3611793611793612,
"grad_norm": 1.0334968566894531,
"learning_rate": 1.1976920205788542e-05,
"loss": 0.5675,
"step": 554
},
{
"epoch": 1.3636363636363638,
"grad_norm": 1.0874249935150146,
"learning_rate": 1.1950903220161286e-05,
"loss": 0.5755,
"step": 555
},
{
"epoch": 1.366093366093366,
"grad_norm": 1.1364604234695435,
"learning_rate": 1.1924872499434478e-05,
"loss": 0.5464,
"step": 556
},
{
"epoch": 1.3685503685503686,
"grad_norm": 1.2387900352478027,
"learning_rate": 1.1898828226874284e-05,
"loss": 0.5867,
"step": 557
},
{
"epoch": 1.3710073710073711,
"grad_norm": 1.2175296545028687,
"learning_rate": 1.1872770585842273e-05,
"loss": 0.5807,
"step": 558
},
{
"epoch": 1.3734643734643734,
"grad_norm": 1.1570370197296143,
"learning_rate": 1.1846699759794129e-05,
"loss": 0.5783,
"step": 559
},
{
"epoch": 1.375921375921376,
"grad_norm": 1.0780045986175537,
"learning_rate": 1.1820615932278375e-05,
"loss": 0.5679,
"step": 560
},
{
"epoch": 1.3783783783783785,
"grad_norm": 1.045121192932129,
"learning_rate": 1.1794519286935056e-05,
"loss": 0.5759,
"step": 561
},
{
"epoch": 1.3808353808353808,
"grad_norm": 1.117018699645996,
"learning_rate": 1.1768410007494466e-05,
"loss": 0.5849,
"step": 562
},
{
"epoch": 1.3832923832923834,
"grad_norm": 1.085938572883606,
"learning_rate": 1.174228827777585e-05,
"loss": 0.6009,
"step": 563
},
{
"epoch": 1.3857493857493859,
"grad_norm": 1.171752691268921,
"learning_rate": 1.1716154281686105e-05,
"loss": 0.6076,
"step": 564
},
{
"epoch": 1.3882063882063882,
"grad_norm": 1.102292537689209,
"learning_rate": 1.1690008203218493e-05,
"loss": 0.5868,
"step": 565
},
{
"epoch": 1.3906633906633907,
"grad_norm": 1.1972408294677734,
"learning_rate": 1.1663850226451328e-05,
"loss": 0.5649,
"step": 566
},
{
"epoch": 1.393120393120393,
"grad_norm": 1.191436767578125,
"learning_rate": 1.16376805355467e-05,
"loss": 0.5581,
"step": 567
},
{
"epoch": 1.3955773955773956,
"grad_norm": 1.1172912120819092,
"learning_rate": 1.1611499314749177e-05,
"loss": 0.5802,
"step": 568
},
{
"epoch": 1.398034398034398,
"grad_norm": 1.151570439338684,
"learning_rate": 1.158530674838449e-05,
"loss": 0.5652,
"step": 569
},
{
"epoch": 1.4004914004914004,
"grad_norm": 1.2126294374465942,
"learning_rate": 1.155910302085826e-05,
"loss": 0.571,
"step": 570
},
{
"epoch": 1.402948402948403,
"grad_norm": 1.2168971300125122,
"learning_rate": 1.1532888316654675e-05,
"loss": 0.5791,
"step": 571
},
{
"epoch": 1.4054054054054055,
"grad_norm": 1.1153680086135864,
"learning_rate": 1.1506662820335208e-05,
"loss": 0.5781,
"step": 572
},
{
"epoch": 1.4078624078624078,
"grad_norm": 1.1872464418411255,
"learning_rate": 1.1480426716537316e-05,
"loss": 0.5923,
"step": 573
},
{
"epoch": 1.4103194103194103,
"grad_norm": 1.0620458126068115,
"learning_rate": 1.145418018997313e-05,
"loss": 0.5603,
"step": 574
},
{
"epoch": 1.4127764127764126,
"grad_norm": 1.0912894010543823,
"learning_rate": 1.1427923425428165e-05,
"loss": 0.6011,
"step": 575
},
{
"epoch": 1.4152334152334152,
"grad_norm": 1.1529377698898315,
"learning_rate": 1.1401656607760015e-05,
"loss": 0.6114,
"step": 576
},
{
"epoch": 1.4176904176904177,
"grad_norm": 1.2269554138183594,
"learning_rate": 1.1375379921897052e-05,
"loss": 0.5876,
"step": 577
},
{
"epoch": 1.42014742014742,
"grad_norm": 1.1351017951965332,
"learning_rate": 1.134909355283712e-05,
"loss": 0.5898,
"step": 578
},
{
"epoch": 1.4226044226044225,
"grad_norm": 1.1139864921569824,
"learning_rate": 1.1322797685646243e-05,
"loss": 0.5656,
"step": 579
},
{
"epoch": 1.425061425061425,
"grad_norm": 1.0196126699447632,
"learning_rate": 1.1296492505457315e-05,
"loss": 0.5386,
"step": 580
},
{
"epoch": 1.4275184275184274,
"grad_norm": 1.1085504293441772,
"learning_rate": 1.1270178197468788e-05,
"loss": 0.5934,
"step": 581
},
{
"epoch": 1.42997542997543,
"grad_norm": 1.1703898906707764,
"learning_rate": 1.1243854946943389e-05,
"loss": 0.5998,
"step": 582
},
{
"epoch": 1.4324324324324325,
"grad_norm": 1.1272860765457153,
"learning_rate": 1.1217522939206796e-05,
"loss": 0.5733,
"step": 583
},
{
"epoch": 1.4348894348894348,
"grad_norm": 1.3168493509292603,
"learning_rate": 1.1191182359646338e-05,
"loss": 0.5752,
"step": 584
},
{
"epoch": 1.4373464373464373,
"grad_norm": 1.2049705982208252,
"learning_rate": 1.1164833393709707e-05,
"loss": 0.5701,
"step": 585
},
{
"epoch": 1.4398034398034398,
"grad_norm": 1.1225930452346802,
"learning_rate": 1.1138476226903626e-05,
"loss": 0.561,
"step": 586
},
{
"epoch": 1.4422604422604421,
"grad_norm": 1.230978012084961,
"learning_rate": 1.1112111044792557e-05,
"loss": 0.6239,
"step": 587
},
{
"epoch": 1.4447174447174447,
"grad_norm": 1.2035282850265503,
"learning_rate": 1.1085738032997397e-05,
"loss": 0.6059,
"step": 588
},
{
"epoch": 1.4471744471744472,
"grad_norm": 1.062477469444275,
"learning_rate": 1.1059357377194161e-05,
"loss": 0.5714,
"step": 589
},
{
"epoch": 1.4496314496314495,
"grad_norm": 1.0980175733566284,
"learning_rate": 1.103296926311269e-05,
"loss": 0.5742,
"step": 590
},
{
"epoch": 1.452088452088452,
"grad_norm": 1.0794448852539062,
"learning_rate": 1.1006573876535322e-05,
"loss": 0.5433,
"step": 591
},
{
"epoch": 1.4545454545454546,
"grad_norm": 1.2062351703643799,
"learning_rate": 1.098017140329561e-05,
"loss": 0.5712,
"step": 592
},
{
"epoch": 1.457002457002457,
"grad_norm": 1.1053940057754517,
"learning_rate": 1.0953762029276982e-05,
"loss": 0.547,
"step": 593
},
{
"epoch": 1.4594594594594594,
"grad_norm": 1.1719647645950317,
"learning_rate": 1.0927345940411466e-05,
"loss": 0.6152,
"step": 594
},
{
"epoch": 1.461916461916462,
"grad_norm": 1.2481141090393066,
"learning_rate": 1.0900923322678366e-05,
"loss": 0.5968,
"step": 595
},
{
"epoch": 1.4643734643734643,
"grad_norm": 1.1074830293655396,
"learning_rate": 1.0874494362102932e-05,
"loss": 0.5624,
"step": 596
},
{
"epoch": 1.4668304668304668,
"grad_norm": 1.031218409538269,
"learning_rate": 1.0848059244755093e-05,
"loss": 0.5549,
"step": 597
},
{
"epoch": 1.4692874692874693,
"grad_norm": 1.140859842300415,
"learning_rate": 1.082161815674811e-05,
"loss": 0.5655,
"step": 598
},
{
"epoch": 1.4717444717444716,
"grad_norm": 1.1622551679611206,
"learning_rate": 1.0795171284237284e-05,
"loss": 0.6138,
"step": 599
},
{
"epoch": 1.4742014742014742,
"grad_norm": 1.0816534757614136,
"learning_rate": 1.0768718813418643e-05,
"loss": 0.5708,
"step": 600
},
{
"epoch": 1.4766584766584767,
"grad_norm": 1.1631544828414917,
"learning_rate": 1.0742260930527625e-05,
"loss": 0.5568,
"step": 601
},
{
"epoch": 1.479115479115479,
"grad_norm": 1.2862136363983154,
"learning_rate": 1.0715797821837776e-05,
"loss": 0.5371,
"step": 602
},
{
"epoch": 1.4815724815724816,
"grad_norm": 1.1199933290481567,
"learning_rate": 1.068932967365943e-05,
"loss": 0.6247,
"step": 603
},
{
"epoch": 1.484029484029484,
"grad_norm": 1.182577133178711,
"learning_rate": 1.0662856672338398e-05,
"loss": 0.5763,
"step": 604
},
{
"epoch": 1.4864864864864864,
"grad_norm": 1.2508751153945923,
"learning_rate": 1.0636379004254665e-05,
"loss": 0.5975,
"step": 605
},
{
"epoch": 1.488943488943489,
"grad_norm": 1.0829459428787231,
"learning_rate": 1.0609896855821069e-05,
"loss": 0.5828,
"step": 606
},
{
"epoch": 1.4914004914004915,
"grad_norm": 1.1169177293777466,
"learning_rate": 1.0583410413481995e-05,
"loss": 0.5655,
"step": 607
},
{
"epoch": 1.4938574938574938,
"grad_norm": 1.201220154762268,
"learning_rate": 1.0556919863712053e-05,
"loss": 0.5554,
"step": 608
},
{
"epoch": 1.4963144963144963,
"grad_norm": 1.085603952407837,
"learning_rate": 1.0530425393014773e-05,
"loss": 0.5567,
"step": 609
},
{
"epoch": 1.4987714987714988,
"grad_norm": 1.1561790704727173,
"learning_rate": 1.0503927187921291e-05,
"loss": 0.5799,
"step": 610
},
{
"epoch": 1.5012285012285012,
"grad_norm": 1.1585196256637573,
"learning_rate": 1.0477425434989038e-05,
"loss": 0.5606,
"step": 611
},
{
"epoch": 1.5036855036855037,
"grad_norm": 1.2159061431884766,
"learning_rate": 1.045092032080041e-05,
"loss": 0.5641,
"step": 612
},
{
"epoch": 1.5061425061425062,
"grad_norm": 1.1273988485336304,
"learning_rate": 1.0424412031961485e-05,
"loss": 0.5652,
"step": 613
},
{
"epoch": 1.5085995085995085,
"grad_norm": 1.1589583158493042,
"learning_rate": 1.0397900755100678e-05,
"loss": 0.6213,
"step": 614
},
{
"epoch": 1.511056511056511,
"grad_norm": 1.0823664665222168,
"learning_rate": 1.0371386676867447e-05,
"loss": 0.5555,
"step": 615
},
{
"epoch": 1.5135135135135136,
"grad_norm": 1.1465460062026978,
"learning_rate": 1.0344869983930975e-05,
"loss": 0.5761,
"step": 616
},
{
"epoch": 1.515970515970516,
"grad_norm": 1.1848366260528564,
"learning_rate": 1.0318350862978848e-05,
"loss": 0.5642,
"step": 617
},
{
"epoch": 1.5184275184275184,
"grad_norm": 1.058383822441101,
"learning_rate": 1.0291829500715744e-05,
"loss": 0.569,
"step": 618
},
{
"epoch": 1.520884520884521,
"grad_norm": 1.1151319742202759,
"learning_rate": 1.0265306083862135e-05,
"loss": 0.5399,
"step": 619
},
{
"epoch": 1.5233415233415233,
"grad_norm": 1.128150224685669,
"learning_rate": 1.0238780799152939e-05,
"loss": 0.5654,
"step": 620
},
{
"epoch": 1.5257985257985258,
"grad_norm": 1.1646603345870972,
"learning_rate": 1.0212253833336237e-05,
"loss": 0.5588,
"step": 621
},
{
"epoch": 1.5282555282555284,
"grad_norm": 1.0704180002212524,
"learning_rate": 1.0185725373171942e-05,
"loss": 0.5414,
"step": 622
},
{
"epoch": 1.5307125307125307,
"grad_norm": 1.1394037008285522,
"learning_rate": 1.015919560543049e-05,
"loss": 0.526,
"step": 623
},
{
"epoch": 1.5331695331695332,
"grad_norm": 1.2430076599121094,
"learning_rate": 1.013266471689152e-05,
"loss": 0.6033,
"step": 624
},
{
"epoch": 1.5356265356265357,
"grad_norm": 1.1466593742370605,
"learning_rate": 1.0106132894342564e-05,
"loss": 0.5498,
"step": 625
},
{
"epoch": 1.538083538083538,
"grad_norm": 1.121476411819458,
"learning_rate": 1.0079600324577722e-05,
"loss": 0.5734,
"step": 626
},
{
"epoch": 1.5405405405405406,
"grad_norm": 1.135146975517273,
"learning_rate": 1.005306719439637e-05,
"loss": 0.5479,
"step": 627
},
{
"epoch": 1.542997542997543,
"grad_norm": 1.0370548963546753,
"learning_rate": 1.0026533690601815e-05,
"loss": 0.5541,
"step": 628
},
{
"epoch": 1.5454545454545454,
"grad_norm": 1.1773468255996704,
"learning_rate": 1e-05,
"loss": 0.5672,
"step": 629
},
{
"epoch": 1.547911547911548,
"grad_norm": 1.2109761238098145,
"learning_rate": 9.973466309398187e-06,
"loss": 0.598,
"step": 630
},
{
"epoch": 1.5503685503685505,
"grad_norm": 1.1260876655578613,
"learning_rate": 9.946932805603635e-06,
"loss": 0.5986,
"step": 631
},
{
"epoch": 1.5528255528255528,
"grad_norm": 1.2091740369796753,
"learning_rate": 9.92039967542228e-06,
"loss": 0.6053,
"step": 632
},
{
"epoch": 1.5552825552825553,
"grad_norm": 1.1314700841903687,
"learning_rate": 9.89386710565744e-06,
"loss": 0.5463,
"step": 633
},
{
"epoch": 1.5577395577395579,
"grad_norm": 1.1741538047790527,
"learning_rate": 9.867335283108481e-06,
"loss": 0.5285,
"step": 634
},
{
"epoch": 1.5601965601965602,
"grad_norm": 1.104568362236023,
"learning_rate": 9.840804394569512e-06,
"loss": 0.5926,
"step": 635
},
{
"epoch": 1.5626535626535627,
"grad_norm": 1.2032363414764404,
"learning_rate": 9.81427462682806e-06,
"loss": 0.5756,
"step": 636
},
{
"epoch": 1.5651105651105652,
"grad_norm": 1.0761467218399048,
"learning_rate": 9.787746166663765e-06,
"loss": 0.566,
"step": 637
},
{
"epoch": 1.5675675675675675,
"grad_norm": 1.2620768547058105,
"learning_rate": 9.761219200847066e-06,
"loss": 0.6151,
"step": 638
},
{
"epoch": 1.57002457002457,
"grad_norm": 1.1293340921401978,
"learning_rate": 9.734693916137869e-06,
"loss": 0.5631,
"step": 639
},
{
"epoch": 1.5724815724815726,
"grad_norm": 1.150578260421753,
"learning_rate": 9.708170499284256e-06,
"loss": 0.5691,
"step": 640
},
{
"epoch": 1.574938574938575,
"grad_norm": 1.1344029903411865,
"learning_rate": 9.681649137021158e-06,
"loss": 0.5681,
"step": 641
},
{
"epoch": 1.5773955773955772,
"grad_norm": 1.1116544008255005,
"learning_rate": 9.655130016069029e-06,
"loss": 0.5878,
"step": 642
},
{
"epoch": 1.57985257985258,
"grad_norm": 1.0114622116088867,
"learning_rate": 9.628613323132554e-06,
"loss": 0.5139,
"step": 643
},
{
"epoch": 1.5823095823095823,
"grad_norm": 1.0908740758895874,
"learning_rate": 9.602099244899324e-06,
"loss": 0.5706,
"step": 644
},
{
"epoch": 1.5847665847665846,
"grad_norm": 1.139978051185608,
"learning_rate": 9.57558796803852e-06,
"loss": 0.5887,
"step": 645
},
{
"epoch": 1.5872235872235874,
"grad_norm": 1.1135131120681763,
"learning_rate": 9.549079679199592e-06,
"loss": 0.5152,
"step": 646
},
{
"epoch": 1.5896805896805897,
"grad_norm": 1.0935195684432983,
"learning_rate": 9.522574565010964e-06,
"loss": 0.5599,
"step": 647
},
{
"epoch": 1.592137592137592,
"grad_norm": 1.159075140953064,
"learning_rate": 9.496072812078712e-06,
"loss": 0.5657,
"step": 648
},
{
"epoch": 1.5945945945945947,
"grad_norm": 1.1898802518844604,
"learning_rate": 9.46957460698523e-06,
"loss": 0.5783,
"step": 649
},
{
"epoch": 1.597051597051597,
"grad_norm": 1.1011685132980347,
"learning_rate": 9.44308013628795e-06,
"loss": 0.5674,
"step": 650
},
{
"epoch": 1.5995085995085994,
"grad_norm": 1.160726547241211,
"learning_rate": 9.416589586518009e-06,
"loss": 0.6082,
"step": 651
},
{
"epoch": 1.6019656019656021,
"grad_norm": 1.1680805683135986,
"learning_rate": 9.390103144178933e-06,
"loss": 0.5194,
"step": 652
},
{
"epoch": 1.6044226044226044,
"grad_norm": 1.1895872354507446,
"learning_rate": 9.363620995745337e-06,
"loss": 0.5288,
"step": 653
},
{
"epoch": 1.6068796068796067,
"grad_norm": 1.1235671043395996,
"learning_rate": 9.337143327661604e-06,
"loss": 0.5395,
"step": 654
},
{
"epoch": 1.6093366093366095,
"grad_norm": 1.1525102853775024,
"learning_rate": 9.310670326340576e-06,
"loss": 0.5442,
"step": 655
},
{
"epoch": 1.6117936117936118,
"grad_norm": 1.1586118936538696,
"learning_rate": 9.284202178162225e-06,
"loss": 0.547,
"step": 656
},
{
"epoch": 1.6142506142506141,
"grad_norm": 1.177646517753601,
"learning_rate": 9.257739069472375e-06,
"loss": 0.5781,
"step": 657
},
{
"epoch": 1.6167076167076169,
"grad_norm": 1.5798202753067017,
"learning_rate": 9.23128118658136e-06,
"loss": 0.5414,
"step": 658
},
{
"epoch": 1.6191646191646192,
"grad_norm": 1.2702497243881226,
"learning_rate": 9.204828715762719e-06,
"loss": 0.5682,
"step": 659
},
{
"epoch": 1.6216216216216215,
"grad_norm": 1.1953880786895752,
"learning_rate": 9.178381843251892e-06,
"loss": 0.5605,
"step": 660
},
{
"epoch": 1.6240786240786242,
"grad_norm": 1.2357171773910522,
"learning_rate": 9.151940755244912e-06,
"loss": 0.5783,
"step": 661
},
{
"epoch": 1.6265356265356266,
"grad_norm": 1.1717873811721802,
"learning_rate": 9.125505637897072e-06,
"loss": 0.5778,
"step": 662
},
{
"epoch": 1.6289926289926289,
"grad_norm": 1.1912225484848022,
"learning_rate": 9.09907667732164e-06,
"loss": 0.5197,
"step": 663
},
{
"epoch": 1.6314496314496314,
"grad_norm": 1.115591287612915,
"learning_rate": 9.072654059588534e-06,
"loss": 0.5781,
"step": 664
},
{
"epoch": 1.633906633906634,
"grad_norm": 1.177725076675415,
"learning_rate": 9.046237970723022e-06,
"loss": 0.582,
"step": 665
},
{
"epoch": 1.6363636363636362,
"grad_norm": 1.127977728843689,
"learning_rate": 9.019828596704394e-06,
"loss": 0.5365,
"step": 666
},
{
"epoch": 1.6388206388206388,
"grad_norm": 1.211154580116272,
"learning_rate": 8.99342612346468e-06,
"loss": 0.5424,
"step": 667
},
{
"epoch": 1.6412776412776413,
"grad_norm": 1.160064458847046,
"learning_rate": 8.967030736887315e-06,
"loss": 0.5525,
"step": 668
},
{
"epoch": 1.6437346437346436,
"grad_norm": 1.1632789373397827,
"learning_rate": 8.94064262280584e-06,
"loss": 0.5635,
"step": 669
},
{
"epoch": 1.6461916461916462,
"grad_norm": 1.0825740098953247,
"learning_rate": 8.914261967002605e-06,
"loss": 0.5794,
"step": 670
},
{
"epoch": 1.6486486486486487,
"grad_norm": 1.1623291969299316,
"learning_rate": 8.887888955207444e-06,
"loss": 0.5661,
"step": 671
},
{
"epoch": 1.651105651105651,
"grad_norm": 1.3267773389816284,
"learning_rate": 8.861523773096379e-06,
"loss": 0.5771,
"step": 672
},
{
"epoch": 1.6535626535626535,
"grad_norm": 1.2022407054901123,
"learning_rate": 8.835166606290295e-06,
"loss": 0.5586,
"step": 673
},
{
"epoch": 1.656019656019656,
"grad_norm": 1.211409568786621,
"learning_rate": 8.808817640353662e-06,
"loss": 0.574,
"step": 674
},
{
"epoch": 1.6584766584766584,
"grad_norm": 1.0847737789154053,
"learning_rate": 8.782477060793211e-06,
"loss": 0.5778,
"step": 675
},
{
"epoch": 1.660933660933661,
"grad_norm": 1.048552393913269,
"learning_rate": 8.756145053056615e-06,
"loss": 0.5541,
"step": 676
},
{
"epoch": 1.6633906633906634,
"grad_norm": 1.2484380006790161,
"learning_rate": 8.729821802531213e-06,
"loss": 0.6039,
"step": 677
},
{
"epoch": 1.6658476658476657,
"grad_norm": 1.3350811004638672,
"learning_rate": 8.703507494542692e-06,
"loss": 0.5658,
"step": 678
},
{
"epoch": 1.6683046683046683,
"grad_norm": 1.1048815250396729,
"learning_rate": 8.67720231435376e-06,
"loss": 0.5723,
"step": 679
},
{
"epoch": 1.6707616707616708,
"grad_norm": 1.128318190574646,
"learning_rate": 8.650906447162884e-06,
"loss": 0.602,
"step": 680
},
{
"epoch": 1.6732186732186731,
"grad_norm": 1.0732570886611938,
"learning_rate": 8.624620078102952e-06,
"loss": 0.5558,
"step": 681
},
{
"epoch": 1.6756756756756757,
"grad_norm": 1.1415199041366577,
"learning_rate": 8.59834339223999e-06,
"loss": 0.5847,
"step": 682
},
{
"epoch": 1.6781326781326782,
"grad_norm": 1.2452774047851562,
"learning_rate": 8.572076574571838e-06,
"loss": 0.5393,
"step": 683
},
{
"epoch": 1.6805896805896805,
"grad_norm": 1.1395483016967773,
"learning_rate": 8.545819810026871e-06,
"loss": 0.5818,
"step": 684
},
{
"epoch": 1.683046683046683,
"grad_norm": 1.126442551612854,
"learning_rate": 8.519573283462688e-06,
"loss": 0.577,
"step": 685
},
{
"epoch": 1.6855036855036856,
"grad_norm": 1.2619662284851074,
"learning_rate": 8.493337179664794e-06,
"loss": 0.6061,
"step": 686
},
{
"epoch": 1.6879606879606879,
"grad_norm": 1.1817083358764648,
"learning_rate": 8.467111683345327e-06,
"loss": 0.5834,
"step": 687
},
{
"epoch": 1.6904176904176904,
"grad_norm": 1.1162532567977905,
"learning_rate": 8.440896979141743e-06,
"loss": 0.5752,
"step": 688
},
{
"epoch": 1.692874692874693,
"grad_norm": 1.238197922706604,
"learning_rate": 8.414693251615513e-06,
"loss": 0.5541,
"step": 689
},
{
"epoch": 1.6953316953316953,
"grad_norm": 1.0888656377792358,
"learning_rate": 8.388500685250826e-06,
"loss": 0.5887,
"step": 690
},
{
"epoch": 1.6977886977886978,
"grad_norm": 1.2266663312911987,
"learning_rate": 8.362319464453301e-06,
"loss": 0.5747,
"step": 691
},
{
"epoch": 1.7002457002457003,
"grad_norm": 1.132505178451538,
"learning_rate": 8.336149773548679e-06,
"loss": 0.5689,
"step": 692
},
{
"epoch": 1.7027027027027026,
"grad_norm": 1.0732828378677368,
"learning_rate": 8.309991796781512e-06,
"loss": 0.5357,
"step": 693
},
{
"epoch": 1.7051597051597052,
"grad_norm": 1.0756043195724487,
"learning_rate": 8.283845718313894e-06,
"loss": 0.559,
"step": 694
},
{
"epoch": 1.7076167076167077,
"grad_norm": 1.0958280563354492,
"learning_rate": 8.257711722224153e-06,
"loss": 0.5177,
"step": 695
},
{
"epoch": 1.71007371007371,
"grad_norm": 1.0943565368652344,
"learning_rate": 8.231589992505536e-06,
"loss": 0.5725,
"step": 696
},
{
"epoch": 1.7125307125307125,
"grad_norm": 1.1087898015975952,
"learning_rate": 8.205480713064947e-06,
"loss": 0.574,
"step": 697
},
{
"epoch": 1.714987714987715,
"grad_norm": 1.128554344177246,
"learning_rate": 8.17938406772163e-06,
"loss": 0.594,
"step": 698
},
{
"epoch": 1.7174447174447174,
"grad_norm": 1.1335420608520508,
"learning_rate": 8.153300240205874e-06,
"loss": 0.5724,
"step": 699
},
{
"epoch": 1.71990171990172,
"grad_norm": 1.1042388677597046,
"learning_rate": 8.12722941415773e-06,
"loss": 0.573,
"step": 700
},
{
"epoch": 1.7223587223587224,
"grad_norm": 1.1227362155914307,
"learning_rate": 8.101171773125716e-06,
"loss": 0.5123,
"step": 701
},
{
"epoch": 1.7248157248157248,
"grad_norm": 1.2100160121917725,
"learning_rate": 8.075127500565525e-06,
"loss": 0.5836,
"step": 702
},
{
"epoch": 1.7272727272727273,
"grad_norm": 1.1813348531723022,
"learning_rate": 8.04909677983872e-06,
"loss": 0.5789,
"step": 703
},
{
"epoch": 1.7297297297297298,
"grad_norm": 1.216829538345337,
"learning_rate": 8.02307979421146e-06,
"loss": 0.6077,
"step": 704
},
{
"epoch": 1.7321867321867321,
"grad_norm": 1.1228618621826172,
"learning_rate": 7.99707672685321e-06,
"loss": 0.5049,
"step": 705
},
{
"epoch": 1.7346437346437347,
"grad_norm": 1.1116825342178345,
"learning_rate": 7.971087760835434e-06,
"loss": 0.5949,
"step": 706
},
{
"epoch": 1.7371007371007372,
"grad_norm": 1.1956136226654053,
"learning_rate": 7.945113079130323e-06,
"loss": 0.5613,
"step": 707
},
{
"epoch": 1.7395577395577395,
"grad_norm": 1.1992087364196777,
"learning_rate": 7.9191528646095e-06,
"loss": 0.5768,
"step": 708
},
{
"epoch": 1.742014742014742,
"grad_norm": 1.2149180173873901,
"learning_rate": 7.89320730004274e-06,
"loss": 0.5682,
"step": 709
},
{
"epoch": 1.7444717444717446,
"grad_norm": 1.0318468809127808,
"learning_rate": 7.867276568096662e-06,
"loss": 0.5533,
"step": 710
},
{
"epoch": 1.746928746928747,
"grad_norm": 1.1024391651153564,
"learning_rate": 7.84136085133347e-06,
"loss": 0.5543,
"step": 711
},
{
"epoch": 1.7493857493857494,
"grad_norm": 1.1635844707489014,
"learning_rate": 7.815460332209656e-06,
"loss": 0.5954,
"step": 712
},
{
"epoch": 1.751842751842752,
"grad_norm": 1.251796007156372,
"learning_rate": 7.789575193074703e-06,
"loss": 0.5956,
"step": 713
},
{
"epoch": 1.7542997542997543,
"grad_norm": 1.1243458986282349,
"learning_rate": 7.763705616169825e-06,
"loss": 0.5409,
"step": 714
},
{
"epoch": 1.7567567567567568,
"grad_norm": 1.0625543594360352,
"learning_rate": 7.737851783626672e-06,
"loss": 0.5581,
"step": 715
},
{
"epoch": 1.7592137592137593,
"grad_norm": 1.2470191717147827,
"learning_rate": 7.712013877466032e-06,
"loss": 0.6114,
"step": 716
},
{
"epoch": 1.7616707616707616,
"grad_norm": 1.011608362197876,
"learning_rate": 7.686192079596586e-06,
"loss": 0.5524,
"step": 717
},
{
"epoch": 1.7641277641277642,
"grad_norm": 1.0156747102737427,
"learning_rate": 7.660386571813593e-06,
"loss": 0.5425,
"step": 718
},
{
"epoch": 1.7665847665847667,
"grad_norm": 1.1418312788009644,
"learning_rate": 7.634597535797633e-06,
"loss": 0.5418,
"step": 719
},
{
"epoch": 1.769041769041769,
"grad_norm": 1.1201746463775635,
"learning_rate": 7.608825153113305e-06,
"loss": 0.5863,
"step": 720
},
{
"epoch": 1.7714987714987716,
"grad_norm": 1.1697362661361694,
"learning_rate": 7.5830696052079754e-06,
"loss": 0.5746,
"step": 721
},
{
"epoch": 1.773955773955774,
"grad_norm": 1.1003642082214355,
"learning_rate": 7.557331073410486e-06,
"loss": 0.597,
"step": 722
},
{
"epoch": 1.7764127764127764,
"grad_norm": 1.1120378971099854,
"learning_rate": 7.531609738929865e-06,
"loss": 0.5934,
"step": 723
},
{
"epoch": 1.7788697788697787,
"grad_norm": 1.1342380046844482,
"learning_rate": 7.5059057828540815e-06,
"loss": 0.5819,
"step": 724
},
{
"epoch": 1.7813267813267815,
"grad_norm": 1.178702473640442,
"learning_rate": 7.480219386148751e-06,
"loss": 0.5867,
"step": 725
},
{
"epoch": 1.7837837837837838,
"grad_norm": 1.1170891523361206,
"learning_rate": 7.454550729655853e-06,
"loss": 0.5619,
"step": 726
},
{
"epoch": 1.786240786240786,
"grad_norm": 1.1073411703109741,
"learning_rate": 7.428899994092482e-06,
"loss": 0.5536,
"step": 727
},
{
"epoch": 1.7886977886977888,
"grad_norm": 1.0886826515197754,
"learning_rate": 7.403267360049557e-06,
"loss": 0.5302,
"step": 728
},
{
"epoch": 1.7911547911547911,
"grad_norm": 1.1056452989578247,
"learning_rate": 7.377653007990559e-06,
"loss": 0.5706,
"step": 729
},
{
"epoch": 1.7936117936117935,
"grad_norm": 1.1150050163269043,
"learning_rate": 7.3520571182502465e-06,
"loss": 0.5991,
"step": 730
},
{
"epoch": 1.7960687960687962,
"grad_norm": 1.08351731300354,
"learning_rate": 7.326479871033408e-06,
"loss": 0.5642,
"step": 731
},
{
"epoch": 1.7985257985257985,
"grad_norm": 1.0654759407043457,
"learning_rate": 7.300921446413582e-06,
"loss": 0.5203,
"step": 732
},
{
"epoch": 1.8009828009828008,
"grad_norm": 1.209181547164917,
"learning_rate": 7.275382024331773e-06,
"loss": 0.5787,
"step": 733
},
{
"epoch": 1.8034398034398036,
"grad_norm": 1.1766862869262695,
"learning_rate": 7.249861784595218e-06,
"loss": 0.5681,
"step": 734
},
{
"epoch": 1.805896805896806,
"grad_norm": 1.110587477684021,
"learning_rate": 7.2243609068761e-06,
"loss": 0.5994,
"step": 735
},
{
"epoch": 1.8083538083538082,
"grad_norm": 1.2170779705047607,
"learning_rate": 7.198879570710272e-06,
"loss": 0.6055,
"step": 736
},
{
"epoch": 1.810810810810811,
"grad_norm": 1.0853193998336792,
"learning_rate": 7.173417955496025e-06,
"loss": 0.5567,
"step": 737
},
{
"epoch": 1.8132678132678133,
"grad_norm": 1.0811327695846558,
"learning_rate": 7.1479762404927955e-06,
"loss": 0.5671,
"step": 738
},
{
"epoch": 1.8157248157248156,
"grad_norm": 1.1481190919876099,
"learning_rate": 7.122554604819925e-06,
"loss": 0.5699,
"step": 739
},
{
"epoch": 1.8181818181818183,
"grad_norm": 1.2198657989501953,
"learning_rate": 7.097153227455379e-06,
"loss": 0.5912,
"step": 740
},
{
"epoch": 1.8206388206388207,
"grad_norm": 1.1447055339813232,
"learning_rate": 7.071772287234497e-06,
"loss": 0.6129,
"step": 741
},
{
"epoch": 1.823095823095823,
"grad_norm": 1.19182288646698,
"learning_rate": 7.046411962848744e-06,
"loss": 0.5565,
"step": 742
},
{
"epoch": 1.8255528255528255,
"grad_norm": 1.06711745262146,
"learning_rate": 7.021072432844427e-06,
"loss": 0.5472,
"step": 743
},
{
"epoch": 1.828009828009828,
"grad_norm": 1.1729991436004639,
"learning_rate": 6.995753875621465e-06,
"loss": 0.5959,
"step": 744
},
{
"epoch": 1.8304668304668303,
"grad_norm": 1.3030084371566772,
"learning_rate": 6.970456469432116e-06,
"loss": 0.5969,
"step": 745
},
{
"epoch": 1.8329238329238329,
"grad_norm": 1.085019588470459,
"learning_rate": 6.945180392379729e-06,
"loss": 0.5444,
"step": 746
},
{
"epoch": 1.8353808353808354,
"grad_norm": 1.1228212118148804,
"learning_rate": 6.9199258224174774e-06,
"loss": 0.5572,
"step": 747
},
{
"epoch": 1.8378378378378377,
"grad_norm": 1.219850778579712,
"learning_rate": 6.894692937347127e-06,
"loss": 0.5937,
"step": 748
},
{
"epoch": 1.8402948402948403,
"grad_norm": 1.0725655555725098,
"learning_rate": 6.869481914817779e-06,
"loss": 0.5827,
"step": 749
},
{
"epoch": 1.8427518427518428,
"grad_norm": 1.117969036102295,
"learning_rate": 6.844292932324597e-06,
"loss": 0.5589,
"step": 750
},
{
"epoch": 1.845208845208845,
"grad_norm": 1.0720747709274292,
"learning_rate": 6.819126167207586e-06,
"loss": 0.5062,
"step": 751
},
{
"epoch": 1.8476658476658476,
"grad_norm": 1.0815181732177734,
"learning_rate": 6.793981796650333e-06,
"loss": 0.5798,
"step": 752
},
{
"epoch": 1.8501228501228502,
"grad_norm": 1.0702989101409912,
"learning_rate": 6.768859997678751e-06,
"loss": 0.5327,
"step": 753
},
{
"epoch": 1.8525798525798525,
"grad_norm": 1.0632387399673462,
"learning_rate": 6.743760947159847e-06,
"loss": 0.5849,
"step": 754
},
{
"epoch": 1.855036855036855,
"grad_norm": 1.113589882850647,
"learning_rate": 6.718684821800468e-06,
"loss": 0.5947,
"step": 755
},
{
"epoch": 1.8574938574938575,
"grad_norm": 1.2338849306106567,
"learning_rate": 6.693631798146061e-06,
"loss": 0.61,
"step": 756
},
{
"epoch": 1.8599508599508598,
"grad_norm": 1.028483271598816,
"learning_rate": 6.668602052579425e-06,
"loss": 0.5944,
"step": 757
},
{
"epoch": 1.8624078624078624,
"grad_norm": 1.157347321510315,
"learning_rate": 6.643595761319475e-06,
"loss": 0.5437,
"step": 758
},
{
"epoch": 1.864864864864865,
"grad_norm": 1.0857754945755005,
"learning_rate": 6.61861310042e-06,
"loss": 0.5505,
"step": 759
},
{
"epoch": 1.8673218673218672,
"grad_norm": 1.0336072444915771,
"learning_rate": 6.593654245768415e-06,
"loss": 0.5798,
"step": 760
},
{
"epoch": 1.8697788697788698,
"grad_norm": 1.1750551462173462,
"learning_rate": 6.5687193730845375e-06,
"loss": 0.554,
"step": 761
},
{
"epoch": 1.8722358722358723,
"grad_norm": 1.137463927268982,
"learning_rate": 6.543808657919345e-06,
"loss": 0.5542,
"step": 762
},
{
"epoch": 1.8746928746928746,
"grad_norm": 1.2450264692306519,
"learning_rate": 6.518922275653724e-06,
"loss": 0.5759,
"step": 763
},
{
"epoch": 1.8771498771498771,
"grad_norm": 1.132763147354126,
"learning_rate": 6.494060401497262e-06,
"loss": 0.5395,
"step": 764
},
{
"epoch": 1.8796068796068797,
"grad_norm": 1.107035756111145,
"learning_rate": 6.469223210486992e-06,
"loss": 0.5819,
"step": 765
},
{
"epoch": 1.882063882063882,
"grad_norm": 1.2326126098632812,
"learning_rate": 6.444410877486178e-06,
"loss": 0.6089,
"step": 766
},
{
"epoch": 1.8845208845208845,
"grad_norm": 1.1576228141784668,
"learning_rate": 6.419623577183056e-06,
"loss": 0.5568,
"step": 767
},
{
"epoch": 1.886977886977887,
"grad_norm": 1.259473443031311,
"learning_rate": 6.394861484089641e-06,
"loss": 0.5501,
"step": 768
},
{
"epoch": 1.8894348894348894,
"grad_norm": 1.0849790573120117,
"learning_rate": 6.370124772540469e-06,
"loss": 0.5554,
"step": 769
},
{
"epoch": 1.8918918918918919,
"grad_norm": 1.0747138261795044,
"learning_rate": 6.345413616691385e-06,
"loss": 0.5572,
"step": 770
},
{
"epoch": 1.8943488943488944,
"grad_norm": 1.2411308288574219,
"learning_rate": 6.320728190518308e-06,
"loss": 0.5823,
"step": 771
},
{
"epoch": 1.8968058968058967,
"grad_norm": 1.0845237970352173,
"learning_rate": 6.29606866781602e-06,
"loss": 0.5564,
"step": 772
},
{
"epoch": 1.8992628992628993,
"grad_norm": 1.0935763120651245,
"learning_rate": 6.2714352221969155e-06,
"loss": 0.5657,
"step": 773
},
{
"epoch": 1.9017199017199018,
"grad_norm": 1.171252727508545,
"learning_rate": 6.246828027089811e-06,
"loss": 0.5762,
"step": 774
},
{
"epoch": 1.904176904176904,
"grad_norm": 1.205479383468628,
"learning_rate": 6.222247255738706e-06,
"loss": 0.5382,
"step": 775
},
{
"epoch": 1.9066339066339066,
"grad_norm": 1.0936667919158936,
"learning_rate": 6.197693081201568e-06,
"loss": 0.5512,
"step": 776
},
{
"epoch": 1.9090909090909092,
"grad_norm": 1.1234700679779053,
"learning_rate": 6.173165676349103e-06,
"loss": 0.5765,
"step": 777
},
{
"epoch": 1.9115479115479115,
"grad_norm": 1.1193130016326904,
"learning_rate": 6.14866521386356e-06,
"loss": 0.5407,
"step": 778
},
{
"epoch": 1.914004914004914,
"grad_norm": 1.1735546588897705,
"learning_rate": 6.124191866237504e-06,
"loss": 0.5759,
"step": 779
},
{
"epoch": 1.9164619164619165,
"grad_norm": 1.0813647508621216,
"learning_rate": 6.0997458057725875e-06,
"loss": 0.5435,
"step": 780
},
{
"epoch": 1.9189189189189189,
"grad_norm": 1.0532335042953491,
"learning_rate": 6.075327204578363e-06,
"loss": 0.5623,
"step": 781
},
{
"epoch": 1.9213759213759214,
"grad_norm": 1.0584250688552856,
"learning_rate": 6.0509362345710585e-06,
"loss": 0.5828,
"step": 782
},
{
"epoch": 1.923832923832924,
"grad_norm": 1.1264971494674683,
"learning_rate": 6.026573067472366e-06,
"loss": 0.591,
"step": 783
},
{
"epoch": 1.9262899262899262,
"grad_norm": 1.1128865480422974,
"learning_rate": 6.00223787480823e-06,
"loss": 0.5407,
"step": 784
},
{
"epoch": 1.9287469287469288,
"grad_norm": 1.1384304761886597,
"learning_rate": 5.97793082790765e-06,
"loss": 0.5633,
"step": 785
},
{
"epoch": 1.9312039312039313,
"grad_norm": 1.0851879119873047,
"learning_rate": 5.953652097901468e-06,
"loss": 0.5651,
"step": 786
},
{
"epoch": 1.9336609336609336,
"grad_norm": 1.2878016233444214,
"learning_rate": 5.929401855721162e-06,
"loss": 0.5841,
"step": 787
},
{
"epoch": 1.9361179361179361,
"grad_norm": 1.143180251121521,
"learning_rate": 5.905180272097648e-06,
"loss": 0.5518,
"step": 788
},
{
"epoch": 1.9385749385749387,
"grad_norm": 1.147679090499878,
"learning_rate": 5.880987517560075e-06,
"loss": 0.5335,
"step": 789
},
{
"epoch": 1.941031941031941,
"grad_norm": 1.084428071975708,
"learning_rate": 5.856823762434618e-06,
"loss": 0.5145,
"step": 790
},
{
"epoch": 1.9434889434889435,
"grad_norm": 1.1757539510726929,
"learning_rate": 5.832689176843291e-06,
"loss": 0.6105,
"step": 791
},
{
"epoch": 1.945945945945946,
"grad_norm": 1.2322425842285156,
"learning_rate": 5.808583930702739e-06,
"loss": 0.5917,
"step": 792
},
{
"epoch": 1.9484029484029484,
"grad_norm": 1.0495308637619019,
"learning_rate": 5.784508193723058e-06,
"loss": 0.555,
"step": 793
},
{
"epoch": 1.950859950859951,
"grad_norm": 1.2324461936950684,
"learning_rate": 5.7604621354065704e-06,
"loss": 0.5557,
"step": 794
},
{
"epoch": 1.9533169533169534,
"grad_norm": 1.0863914489746094,
"learning_rate": 5.73644592504666e-06,
"loss": 0.5689,
"step": 795
},
{
"epoch": 1.9557739557739557,
"grad_norm": 1.1552340984344482,
"learning_rate": 5.712459731726577e-06,
"loss": 0.5918,
"step": 796
},
{
"epoch": 1.9582309582309583,
"grad_norm": 1.1713007688522339,
"learning_rate": 5.688503724318217e-06,
"loss": 0.5743,
"step": 797
},
{
"epoch": 1.9606879606879608,
"grad_norm": 1.267248272895813,
"learning_rate": 5.6645780714809814e-06,
"loss": 0.5988,
"step": 798
},
{
"epoch": 1.9631449631449631,
"grad_norm": 1.1006453037261963,
"learning_rate": 5.640682941660547e-06,
"loss": 0.5526,
"step": 799
},
{
"epoch": 1.9656019656019657,
"grad_norm": 1.116868257522583,
"learning_rate": 5.616818503087704e-06,
"loss": 0.5802,
"step": 800
},
{
"epoch": 1.9680589680589682,
"grad_norm": 1.2064473628997803,
"learning_rate": 5.592984923777156e-06,
"loss": 0.5626,
"step": 801
},
{
"epoch": 1.9705159705159705,
"grad_norm": 1.1360725164413452,
"learning_rate": 5.5691823715263646e-06,
"loss": 0.5534,
"step": 802
},
{
"epoch": 1.972972972972973,
"grad_norm": 1.1609777212142944,
"learning_rate": 5.545411013914329e-06,
"loss": 0.5672,
"step": 803
},
{
"epoch": 1.9754299754299756,
"grad_norm": 1.1151705980300903,
"learning_rate": 5.521671018300436e-06,
"loss": 0.5639,
"step": 804
},
{
"epoch": 1.9778869778869779,
"grad_norm": 1.1209923028945923,
"learning_rate": 5.497962551823266e-06,
"loss": 0.5873,
"step": 805
},
{
"epoch": 1.9803439803439802,
"grad_norm": 1.110759973526001,
"learning_rate": 5.4742857813994356e-06,
"loss": 0.5441,
"step": 806
},
{
"epoch": 1.982800982800983,
"grad_norm": 1.2078731060028076,
"learning_rate": 5.450640873722395e-06,
"loss": 0.54,
"step": 807
},
{
"epoch": 1.9852579852579852,
"grad_norm": 1.1308197975158691,
"learning_rate": 5.427027995261269e-06,
"loss": 0.5208,
"step": 808
},
{
"epoch": 1.9877149877149876,
"grad_norm": 1.201937198638916,
"learning_rate": 5.403447312259702e-06,
"loss": 0.5978,
"step": 809
},
{
"epoch": 1.9901719901719903,
"grad_norm": 1.1013566255569458,
"learning_rate": 5.379898990734641e-06,
"loss": 0.5676,
"step": 810
},
{
"epoch": 1.9926289926289926,
"grad_norm": 1.2219934463500977,
"learning_rate": 5.356383196475226e-06,
"loss": 0.5878,
"step": 811
},
{
"epoch": 1.995085995085995,
"grad_norm": 1.0953749418258667,
"learning_rate": 5.332900095041568e-06,
"loss": 0.5594,
"step": 812
},
{
"epoch": 1.9975429975429977,
"grad_norm": 1.1119327545166016,
"learning_rate": 5.3094498517636324e-06,
"loss": 0.5875,
"step": 813
},
{
"epoch": 2.0,
"grad_norm": 1.0389142036437988,
"learning_rate": 5.286032631740023e-06,
"loss": 0.4483,
"step": 814
},
{
"epoch": 2.0024570024570023,
"grad_norm": 1.7053855657577515,
"learning_rate": 5.262648599836873e-06,
"loss": 0.3474,
"step": 815
},
{
"epoch": 2.004914004914005,
"grad_norm": 1.522942066192627,
"learning_rate": 5.239297920686641e-06,
"loss": 0.311,
"step": 816
},
{
"epoch": 2.0073710073710074,
"grad_norm": 1.504423975944519,
"learning_rate": 5.215980758686978e-06,
"loss": 0.3016,
"step": 817
},
{
"epoch": 2.0098280098280097,
"grad_norm": 1.269704818725586,
"learning_rate": 5.192697277999557e-06,
"loss": 0.319,
"step": 818
},
{
"epoch": 2.0122850122850124,
"grad_norm": 1.2130259275436401,
"learning_rate": 5.169447642548928e-06,
"loss": 0.3009,
"step": 819
},
{
"epoch": 2.0147420147420148,
"grad_norm": 1.2411495447158813,
"learning_rate": 5.146232016021353e-06,
"loss": 0.3164,
"step": 820
},
{
"epoch": 2.017199017199017,
"grad_norm": 1.1585211753845215,
"learning_rate": 5.1230505618636575e-06,
"loss": 0.2941,
"step": 821
},
{
"epoch": 2.01965601965602,
"grad_norm": 1.374408483505249,
"learning_rate": 5.09990344328208e-06,
"loss": 0.3249,
"step": 822
},
{
"epoch": 2.022113022113022,
"grad_norm": 1.8337355852127075,
"learning_rate": 5.076790823241131e-06,
"loss": 0.3264,
"step": 823
},
{
"epoch": 2.0245700245700244,
"grad_norm": 1.4516721963882446,
"learning_rate": 5.053712864462432e-06,
"loss": 0.2907,
"step": 824
},
{
"epoch": 2.027027027027027,
"grad_norm": 1.3430378437042236,
"learning_rate": 5.030669729423572e-06,
"loss": 0.2621,
"step": 825
},
{
"epoch": 2.0294840294840295,
"grad_norm": 1.6277974843978882,
"learning_rate": 5.0076615803569815e-06,
"loss": 0.3386,
"step": 826
},
{
"epoch": 2.031941031941032,
"grad_norm": 1.6233184337615967,
"learning_rate": 4.984688579248757e-06,
"loss": 0.3282,
"step": 827
},
{
"epoch": 2.0343980343980346,
"grad_norm": 1.2828155755996704,
"learning_rate": 4.961750887837558e-06,
"loss": 0.29,
"step": 828
},
{
"epoch": 2.036855036855037,
"grad_norm": 1.379706621170044,
"learning_rate": 4.938848667613436e-06,
"loss": 0.3222,
"step": 829
},
{
"epoch": 2.039312039312039,
"grad_norm": 1.233864188194275,
"learning_rate": 4.915982079816732e-06,
"loss": 0.2746,
"step": 830
},
{
"epoch": 2.041769041769042,
"grad_norm": 1.2086654901504517,
"learning_rate": 4.893151285436891e-06,
"loss": 0.2961,
"step": 831
},
{
"epoch": 2.0442260442260443,
"grad_norm": 1.1589128971099854,
"learning_rate": 4.870356445211388e-06,
"loss": 0.3014,
"step": 832
},
{
"epoch": 2.0466830466830466,
"grad_norm": 1.0225163698196411,
"learning_rate": 4.84759771962455e-06,
"loss": 0.2605,
"step": 833
},
{
"epoch": 2.0491400491400493,
"grad_norm": 1.1696596145629883,
"learning_rate": 4.82487526890645e-06,
"loss": 0.2958,
"step": 834
},
{
"epoch": 2.0515970515970516,
"grad_norm": 1.2834105491638184,
"learning_rate": 4.802189253031764e-06,
"loss": 0.3001,
"step": 835
},
{
"epoch": 2.054054054054054,
"grad_norm": 1.0881999731063843,
"learning_rate": 4.779539831718668e-06,
"loss": 0.2837,
"step": 836
},
{
"epoch": 2.0565110565110567,
"grad_norm": 1.2137595415115356,
"learning_rate": 4.756927164427685e-06,
"loss": 0.2729,
"step": 837
},
{
"epoch": 2.058968058968059,
"grad_norm": 1.3076894283294678,
"learning_rate": 4.734351410360577e-06,
"loss": 0.2936,
"step": 838
},
{
"epoch": 2.0614250614250613,
"grad_norm": 1.243006944656372,
"learning_rate": 4.711812728459233e-06,
"loss": 0.3079,
"step": 839
},
{
"epoch": 2.063882063882064,
"grad_norm": 1.1716985702514648,
"learning_rate": 4.689311277404529e-06,
"loss": 0.2839,
"step": 840
},
{
"epoch": 2.0663390663390664,
"grad_norm": 1.2072100639343262,
"learning_rate": 4.666847215615225e-06,
"loss": 0.2894,
"step": 841
},
{
"epoch": 2.0687960687960687,
"grad_norm": 1.3369587659835815,
"learning_rate": 4.644420701246847e-06,
"loss": 0.3135,
"step": 842
},
{
"epoch": 2.0712530712530715,
"grad_norm": 1.0535438060760498,
"learning_rate": 4.622031892190579e-06,
"loss": 0.2848,
"step": 843
},
{
"epoch": 2.0737100737100738,
"grad_norm": 1.1448948383331299,
"learning_rate": 4.599680946072127e-06,
"loss": 0.2981,
"step": 844
},
{
"epoch": 2.076167076167076,
"grad_norm": 1.163978099822998,
"learning_rate": 4.57736802025065e-06,
"loss": 0.2698,
"step": 845
},
{
"epoch": 2.078624078624079,
"grad_norm": 1.185007929801941,
"learning_rate": 4.555093271817617e-06,
"loss": 0.2822,
"step": 846
},
{
"epoch": 2.081081081081081,
"grad_norm": 1.2226570844650269,
"learning_rate": 4.532856857595714e-06,
"loss": 0.3142,
"step": 847
},
{
"epoch": 2.0835380835380835,
"grad_norm": 1.1364682912826538,
"learning_rate": 4.51065893413774e-06,
"loss": 0.2684,
"step": 848
},
{
"epoch": 2.085995085995086,
"grad_norm": 1.1746481657028198,
"learning_rate": 4.488499657725511e-06,
"loss": 0.2883,
"step": 849
},
{
"epoch": 2.0884520884520885,
"grad_norm": 1.126842975616455,
"learning_rate": 4.466379184368747e-06,
"loss": 0.2986,
"step": 850
},
{
"epoch": 2.090909090909091,
"grad_norm": 1.199286937713623,
"learning_rate": 4.444297669803981e-06,
"loss": 0.295,
"step": 851
},
{
"epoch": 2.093366093366093,
"grad_norm": 1.1583151817321777,
"learning_rate": 4.422255269493455e-06,
"loss": 0.2661,
"step": 852
},
{
"epoch": 2.095823095823096,
"grad_norm": 1.1596896648406982,
"learning_rate": 4.400252138624047e-06,
"loss": 0.2743,
"step": 853
},
{
"epoch": 2.098280098280098,
"grad_norm": 1.1054956912994385,
"learning_rate": 4.378288432106151e-06,
"loss": 0.2925,
"step": 854
},
{
"epoch": 2.100737100737101,
"grad_norm": 1.0872220993041992,
"learning_rate": 4.356364304572596e-06,
"loss": 0.2562,
"step": 855
},
{
"epoch": 2.1031941031941033,
"grad_norm": 1.1248897314071655,
"learning_rate": 4.334479910377577e-06,
"loss": 0.2926,
"step": 856
},
{
"epoch": 2.1056511056511056,
"grad_norm": 1.1297627687454224,
"learning_rate": 4.312635403595532e-06,
"loss": 0.2948,
"step": 857
},
{
"epoch": 2.108108108108108,
"grad_norm": 1.2067081928253174,
"learning_rate": 4.290830938020087e-06,
"loss": 0.3015,
"step": 858
},
{
"epoch": 2.1105651105651106,
"grad_norm": 1.1445972919464111,
"learning_rate": 4.269066667162956e-06,
"loss": 0.2866,
"step": 859
},
{
"epoch": 2.113022113022113,
"grad_norm": 1.2343792915344238,
"learning_rate": 4.247342744252883e-06,
"loss": 0.2957,
"step": 860
},
{
"epoch": 2.1154791154791153,
"grad_norm": 1.199395775794983,
"learning_rate": 4.2256593222345185e-06,
"loss": 0.2869,
"step": 861
},
{
"epoch": 2.117936117936118,
"grad_norm": 1.1511932611465454,
"learning_rate": 4.2040165537674e-06,
"loss": 0.3021,
"step": 862
},
{
"epoch": 2.1203931203931203,
"grad_norm": 1.146592140197754,
"learning_rate": 4.182414591224834e-06,
"loss": 0.3078,
"step": 863
},
{
"epoch": 2.1228501228501226,
"grad_norm": 1.0966308116912842,
"learning_rate": 4.160853586692839e-06,
"loss": 0.273,
"step": 864
},
{
"epoch": 2.1253071253071254,
"grad_norm": 1.1649693250656128,
"learning_rate": 4.139333691969071e-06,
"loss": 0.287,
"step": 865
},
{
"epoch": 2.1277641277641277,
"grad_norm": 1.0332820415496826,
"learning_rate": 4.1178550585617694e-06,
"loss": 0.2918,
"step": 866
},
{
"epoch": 2.13022113022113,
"grad_norm": 1.2864841222763062,
"learning_rate": 4.096417837688666e-06,
"loss": 0.3119,
"step": 867
},
{
"epoch": 2.1326781326781328,
"grad_norm": 1.1031755208969116,
"learning_rate": 4.075022180275935e-06,
"loss": 0.296,
"step": 868
},
{
"epoch": 2.135135135135135,
"grad_norm": 1.1938380002975464,
"learning_rate": 4.053668236957135e-06,
"loss": 0.2908,
"step": 869
},
{
"epoch": 2.1375921375921374,
"grad_norm": 1.0921452045440674,
"learning_rate": 4.032356158072132e-06,
"loss": 0.2943,
"step": 870
},
{
"epoch": 2.14004914004914,
"grad_norm": 1.2852400541305542,
"learning_rate": 4.011086093666057e-06,
"loss": 0.2911,
"step": 871
},
{
"epoch": 2.1425061425061425,
"grad_norm": 1.3049036264419556,
"learning_rate": 3.9898581934882365e-06,
"loss": 0.2957,
"step": 872
},
{
"epoch": 2.1449631449631448,
"grad_norm": 1.2709492444992065,
"learning_rate": 3.96867260699116e-06,
"loss": 0.3144,
"step": 873
},
{
"epoch": 2.1474201474201475,
"grad_norm": 1.1413925886154175,
"learning_rate": 3.947529483329388e-06,
"loss": 0.262,
"step": 874
},
{
"epoch": 2.14987714987715,
"grad_norm": 1.2879313230514526,
"learning_rate": 3.92642897135855e-06,
"loss": 0.2973,
"step": 875
},
{
"epoch": 2.152334152334152,
"grad_norm": 1.30074942111969,
"learning_rate": 3.905371219634257e-06,
"loss": 0.305,
"step": 876
},
{
"epoch": 2.154791154791155,
"grad_norm": 1.1726915836334229,
"learning_rate": 3.884356376411089e-06,
"loss": 0.2893,
"step": 877
},
{
"epoch": 2.157248157248157,
"grad_norm": 1.1194170713424683,
"learning_rate": 3.863384589641509e-06,
"loss": 0.2986,
"step": 878
},
{
"epoch": 2.1597051597051595,
"grad_norm": 1.1586145162582397,
"learning_rate": 3.8424560069748705e-06,
"loss": 0.2887,
"step": 879
},
{
"epoch": 2.1621621621621623,
"grad_norm": 1.1653716564178467,
"learning_rate": 3.821570775756339e-06,
"loss": 0.2736,
"step": 880
},
{
"epoch": 2.1646191646191646,
"grad_norm": 1.1076536178588867,
"learning_rate": 3.8007290430258712e-06,
"loss": 0.2893,
"step": 881
},
{
"epoch": 2.167076167076167,
"grad_norm": 1.2675201892852783,
"learning_rate": 3.779930955517187e-06,
"loss": 0.2954,
"step": 882
},
{
"epoch": 2.1695331695331697,
"grad_norm": 1.0717624425888062,
"learning_rate": 3.759176659656717e-06,
"loss": 0.289,
"step": 883
},
{
"epoch": 2.171990171990172,
"grad_norm": 1.171949863433838,
"learning_rate": 3.7384663015625856e-06,
"loss": 0.3027,
"step": 884
},
{
"epoch": 2.1744471744471743,
"grad_norm": 1.057793140411377,
"learning_rate": 3.7178000270435765e-06,
"loss": 0.2767,
"step": 885
},
{
"epoch": 2.176904176904177,
"grad_norm": 1.1228421926498413,
"learning_rate": 3.697177981598116e-06,
"loss": 0.2935,
"step": 886
},
{
"epoch": 2.1793611793611793,
"grad_norm": 1.140847086906433,
"learning_rate": 3.6766003104132332e-06,
"loss": 0.2543,
"step": 887
},
{
"epoch": 2.1818181818181817,
"grad_norm": 1.0831724405288696,
"learning_rate": 3.6560671583635467e-06,
"loss": 0.2599,
"step": 888
},
{
"epoch": 2.1842751842751844,
"grad_norm": 1.1893972158432007,
"learning_rate": 3.6355786700102426e-06,
"loss": 0.2868,
"step": 889
},
{
"epoch": 2.1867321867321867,
"grad_norm": 1.1741374731063843,
"learning_rate": 3.6151349896000687e-06,
"loss": 0.2847,
"step": 890
},
{
"epoch": 2.189189189189189,
"grad_norm": 1.1736268997192383,
"learning_rate": 3.5947362610642854e-06,
"loss": 0.2869,
"step": 891
},
{
"epoch": 2.191646191646192,
"grad_norm": 1.2521358728408813,
"learning_rate": 3.5743826280176997e-06,
"loss": 0.3096,
"step": 892
},
{
"epoch": 2.194103194103194,
"grad_norm": 1.271240472793579,
"learning_rate": 3.5540742337576083e-06,
"loss": 0.3035,
"step": 893
},
{
"epoch": 2.1965601965601964,
"grad_norm": 1.2453268766403198,
"learning_rate": 3.533811221262833e-06,
"loss": 0.278,
"step": 894
},
{
"epoch": 2.199017199017199,
"grad_norm": 1.7575854063034058,
"learning_rate": 3.5135937331926595e-06,
"loss": 0.3754,
"step": 895
},
{
"epoch": 2.2014742014742015,
"grad_norm": 1.1458030939102173,
"learning_rate": 3.493421911885894e-06,
"loss": 0.3078,
"step": 896
},
{
"epoch": 2.203931203931204,
"grad_norm": 1.1252243518829346,
"learning_rate": 3.4732958993598153e-06,
"loss": 0.2726,
"step": 897
},
{
"epoch": 2.2063882063882065,
"grad_norm": 1.1965776681900024,
"learning_rate": 3.4532158373091916e-06,
"loss": 0.2874,
"step": 898
},
{
"epoch": 2.208845208845209,
"grad_norm": 1.3262277841567993,
"learning_rate": 3.433181867105291e-06,
"loss": 0.302,
"step": 899
},
{
"epoch": 2.211302211302211,
"grad_norm": 1.0740711688995361,
"learning_rate": 3.413194129794869e-06,
"loss": 0.259,
"step": 900
},
{
"epoch": 2.213759213759214,
"grad_norm": 1.219891905784607,
"learning_rate": 3.3932527660991877e-06,
"loss": 0.3086,
"step": 901
},
{
"epoch": 2.2162162162162162,
"grad_norm": 1.1779321432113647,
"learning_rate": 3.373357916413016e-06,
"loss": 0.2801,
"step": 902
},
{
"epoch": 2.2186732186732185,
"grad_norm": 1.1177716255187988,
"learning_rate": 3.3535097208036584e-06,
"loss": 0.2943,
"step": 903
},
{
"epoch": 2.2211302211302213,
"grad_norm": 1.1561752557754517,
"learning_rate": 3.333708319009945e-06,
"loss": 0.2821,
"step": 904
},
{
"epoch": 2.2235872235872236,
"grad_norm": 1.2509113550186157,
"learning_rate": 3.313953850441266e-06,
"loss": 0.2696,
"step": 905
},
{
"epoch": 2.226044226044226,
"grad_norm": 1.2087299823760986,
"learning_rate": 3.2942464541765775e-06,
"loss": 0.2836,
"step": 906
},
{
"epoch": 2.2285012285012287,
"grad_norm": 1.1378703117370605,
"learning_rate": 3.2745862689634433e-06,
"loss": 0.2844,
"step": 907
},
{
"epoch": 2.230958230958231,
"grad_norm": 1.3601089715957642,
"learning_rate": 3.254973433217021e-06,
"loss": 0.2834,
"step": 908
},
{
"epoch": 2.2334152334152333,
"grad_norm": 1.133008360862732,
"learning_rate": 3.2354080850191328e-06,
"loss": 0.2851,
"step": 909
},
{
"epoch": 2.235872235872236,
"grad_norm": 1.2364482879638672,
"learning_rate": 3.2158903621172556e-06,
"loss": 0.2857,
"step": 910
},
{
"epoch": 2.2383292383292384,
"grad_norm": 1.2216523885726929,
"learning_rate": 3.196420401923567e-06,
"loss": 0.2758,
"step": 911
},
{
"epoch": 2.2407862407862407,
"grad_norm": 1.1590107679367065,
"learning_rate": 3.1769983415139894e-06,
"loss": 0.2787,
"step": 912
},
{
"epoch": 2.2432432432432434,
"grad_norm": 1.1557618379592896,
"learning_rate": 3.157624317627195e-06,
"loss": 0.3007,
"step": 913
},
{
"epoch": 2.2457002457002457,
"grad_norm": 1.2400990724563599,
"learning_rate": 3.1382984666636806e-06,
"loss": 0.2936,
"step": 914
},
{
"epoch": 2.248157248157248,
"grad_norm": 1.131558895111084,
"learning_rate": 3.1190209246847624e-06,
"loss": 0.2745,
"step": 915
},
{
"epoch": 2.250614250614251,
"grad_norm": 1.1964099407196045,
"learning_rate": 3.099791827411668e-06,
"loss": 0.2921,
"step": 916
},
{
"epoch": 2.253071253071253,
"grad_norm": 1.1541016101837158,
"learning_rate": 3.0806113102245395e-06,
"loss": 0.2726,
"step": 917
},
{
"epoch": 2.2555282555282554,
"grad_norm": 1.1907479763031006,
"learning_rate": 3.061479508161502e-06,
"loss": 0.2958,
"step": 918
},
{
"epoch": 2.257985257985258,
"grad_norm": 1.21101713180542,
"learning_rate": 3.042396555917707e-06,
"loss": 0.2923,
"step": 919
},
{
"epoch": 2.2604422604422605,
"grad_norm": 1.1928739547729492,
"learning_rate": 3.023362587844393e-06,
"loss": 0.2854,
"step": 920
},
{
"epoch": 2.262899262899263,
"grad_norm": 1.1127504110336304,
"learning_rate": 3.00437773794791e-06,
"loss": 0.2775,
"step": 921
},
{
"epoch": 2.2653562653562656,
"grad_norm": 1.1492047309875488,
"learning_rate": 2.9854421398888212e-06,
"loss": 0.2756,
"step": 922
},
{
"epoch": 2.267813267813268,
"grad_norm": 1.120092511177063,
"learning_rate": 2.966555926980922e-06,
"loss": 0.2773,
"step": 923
},
{
"epoch": 2.27027027027027,
"grad_norm": 1.2535494565963745,
"learning_rate": 2.947719232190329e-06,
"loss": 0.2991,
"step": 924
},
{
"epoch": 2.2727272727272725,
"grad_norm": 1.1032397747039795,
"learning_rate": 2.9289321881345257e-06,
"loss": 0.2598,
"step": 925
},
{
"epoch": 2.2751842751842752,
"grad_norm": 1.060048222541809,
"learning_rate": 2.9101949270814346e-06,
"loss": 0.2686,
"step": 926
},
{
"epoch": 2.2776412776412776,
"grad_norm": 1.238160490989685,
"learning_rate": 2.8915075809484903e-06,
"loss": 0.289,
"step": 927
},
{
"epoch": 2.2800982800982803,
"grad_norm": 1.0575051307678223,
"learning_rate": 2.872870281301704e-06,
"loss": 0.2595,
"step": 928
},
{
"epoch": 2.2825552825552826,
"grad_norm": 1.2428430318832397,
"learning_rate": 2.8542831593547483e-06,
"loss": 0.278,
"step": 929
},
{
"epoch": 2.285012285012285,
"grad_norm": 1.2655268907546997,
"learning_rate": 2.8357463459680122e-06,
"loss": 0.2877,
"step": 930
},
{
"epoch": 2.2874692874692872,
"grad_norm": 1.1474885940551758,
"learning_rate": 2.8172599716477145e-06,
"loss": 0.2683,
"step": 931
},
{
"epoch": 2.28992628992629,
"grad_norm": 1.2706103324890137,
"learning_rate": 2.7988241665449357e-06,
"loss": 0.2611,
"step": 932
},
{
"epoch": 2.2923832923832923,
"grad_norm": 1.1547375917434692,
"learning_rate": 2.780439060454756e-06,
"loss": 0.2957,
"step": 933
},
{
"epoch": 2.294840294840295,
"grad_norm": 1.1076372861862183,
"learning_rate": 2.7621047828153e-06,
"loss": 0.2787,
"step": 934
},
{
"epoch": 2.2972972972972974,
"grad_norm": 1.2577446699142456,
"learning_rate": 2.7438214627068448e-06,
"loss": 0.2823,
"step": 935
},
{
"epoch": 2.2997542997542997,
"grad_norm": 1.2012608051300049,
"learning_rate": 2.7255892288509044e-06,
"loss": 0.2818,
"step": 936
},
{
"epoch": 2.302211302211302,
"grad_norm": 1.1386785507202148,
"learning_rate": 2.707408209609339e-06,
"loss": 0.2757,
"step": 937
},
{
"epoch": 2.3046683046683047,
"grad_norm": 1.18578040599823,
"learning_rate": 2.6892785329834157e-06,
"loss": 0.3073,
"step": 938
},
{
"epoch": 2.307125307125307,
"grad_norm": 1.1896729469299316,
"learning_rate": 2.6712003266129525e-06,
"loss": 0.2911,
"step": 939
},
{
"epoch": 2.30958230958231,
"grad_norm": 1.308111548423767,
"learning_rate": 2.6531737177753804e-06,
"loss": 0.2859,
"step": 940
},
{
"epoch": 2.312039312039312,
"grad_norm": 1.1311650276184082,
"learning_rate": 2.6351988333848787e-06,
"loss": 0.256,
"step": 941
},
{
"epoch": 2.3144963144963144,
"grad_norm": 1.0582762956619263,
"learning_rate": 2.6172757999914553e-06,
"loss": 0.2848,
"step": 942
},
{
"epoch": 2.3169533169533167,
"grad_norm": 1.1629694700241089,
"learning_rate": 2.5994047437800708e-06,
"loss": 0.2799,
"step": 943
},
{
"epoch": 2.3194103194103195,
"grad_norm": 1.1673933267593384,
"learning_rate": 2.581585790569755e-06,
"loss": 0.2754,
"step": 944
},
{
"epoch": 2.321867321867322,
"grad_norm": 1.0987865924835205,
"learning_rate": 2.5638190658126937e-06,
"loss": 0.2858,
"step": 945
},
{
"epoch": 2.3243243243243246,
"grad_norm": 1.1722525358200073,
"learning_rate": 2.5461046945933855e-06,
"loss": 0.2593,
"step": 946
},
{
"epoch": 2.326781326781327,
"grad_norm": 1.1092591285705566,
"learning_rate": 2.5284428016277284e-06,
"loss": 0.2822,
"step": 947
},
{
"epoch": 2.329238329238329,
"grad_norm": 1.265002965927124,
"learning_rate": 2.510833511262156e-06,
"loss": 0.2801,
"step": 948
},
{
"epoch": 2.3316953316953315,
"grad_norm": 1.2009599208831787,
"learning_rate": 2.493276947472756e-06,
"loss": 0.3001,
"step": 949
},
{
"epoch": 2.3341523341523343,
"grad_norm": 1.1949445009231567,
"learning_rate": 2.4757732338644127e-06,
"loss": 0.2641,
"step": 950
},
{
"epoch": 2.3366093366093366,
"grad_norm": 1.1856027841567993,
"learning_rate": 2.458322493669911e-06,
"loss": 0.2698,
"step": 951
},
{
"epoch": 2.339066339066339,
"grad_norm": 1.2151247262954712,
"learning_rate": 2.4409248497490923e-06,
"loss": 0.3014,
"step": 952
},
{
"epoch": 2.3415233415233416,
"grad_norm": 1.188072919845581,
"learning_rate": 2.4235804245879723e-06,
"loss": 0.2605,
"step": 953
},
{
"epoch": 2.343980343980344,
"grad_norm": 1.2835193872451782,
"learning_rate": 2.406289340297896e-06,
"loss": 0.2931,
"step": 954
},
{
"epoch": 2.3464373464373462,
"grad_norm": 1.1126747131347656,
"learning_rate": 2.3890517186146623e-06,
"loss": 0.2556,
"step": 955
},
{
"epoch": 2.348894348894349,
"grad_norm": 1.2112548351287842,
"learning_rate": 2.3718676808976683e-06,
"loss": 0.2794,
"step": 956
},
{
"epoch": 2.3513513513513513,
"grad_norm": 1.2068251371383667,
"learning_rate": 2.354737348129077e-06,
"loss": 0.3035,
"step": 957
},
{
"epoch": 2.3538083538083536,
"grad_norm": 1.171133041381836,
"learning_rate": 2.337660840912923e-06,
"loss": 0.2557,
"step": 958
},
{
"epoch": 2.3562653562653564,
"grad_norm": 1.235913634300232,
"learning_rate": 2.320638279474312e-06,
"loss": 0.3008,
"step": 959
},
{
"epoch": 2.3587223587223587,
"grad_norm": 1.0996620655059814,
"learning_rate": 2.3036697836585353e-06,
"loss": 0.2694,
"step": 960
},
{
"epoch": 2.361179361179361,
"grad_norm": 1.0999877452850342,
"learning_rate": 2.2867554729302545e-06,
"loss": 0.2786,
"step": 961
},
{
"epoch": 2.3636363636363638,
"grad_norm": 1.1389085054397583,
"learning_rate": 2.26989546637263e-06,
"loss": 0.2814,
"step": 962
},
{
"epoch": 2.366093366093366,
"grad_norm": 1.0945831537246704,
"learning_rate": 2.25308988268652e-06,
"loss": 0.2832,
"step": 963
},
{
"epoch": 2.3685503685503684,
"grad_norm": 1.1268113851547241,
"learning_rate": 2.2363388401896125e-06,
"loss": 0.2806,
"step": 964
},
{
"epoch": 2.371007371007371,
"grad_norm": 1.2085450887680054,
"learning_rate": 2.2196424568156073e-06,
"loss": 0.2877,
"step": 965
},
{
"epoch": 2.3734643734643734,
"grad_norm": 1.0432332754135132,
"learning_rate": 2.2030008501133815e-06,
"loss": 0.2772,
"step": 966
},
{
"epoch": 2.3759213759213758,
"grad_norm": 0.9682121872901917,
"learning_rate": 2.186414137246172e-06,
"loss": 0.2562,
"step": 967
},
{
"epoch": 2.3783783783783785,
"grad_norm": 1.0478007793426514,
"learning_rate": 2.1698824349907344e-06,
"loss": 0.2646,
"step": 968
},
{
"epoch": 2.380835380835381,
"grad_norm": 1.244933843612671,
"learning_rate": 2.1534058597365284e-06,
"loss": 0.2826,
"step": 969
},
{
"epoch": 2.383292383292383,
"grad_norm": 1.1740527153015137,
"learning_rate": 2.136984527484901e-06,
"loss": 0.2891,
"step": 970
},
{
"epoch": 2.385749385749386,
"grad_norm": 1.235903024673462,
"learning_rate": 2.1206185538482704e-06,
"loss": 0.319,
"step": 971
},
{
"epoch": 2.388206388206388,
"grad_norm": 1.3708535432815552,
"learning_rate": 2.1043080540493055e-06,
"loss": 0.342,
"step": 972
},
{
"epoch": 2.3906633906633905,
"grad_norm": 1.1570706367492676,
"learning_rate": 2.0880531429201146e-06,
"loss": 0.2617,
"step": 973
},
{
"epoch": 2.3931203931203933,
"grad_norm": 1.1274479627609253,
"learning_rate": 2.0718539349014544e-06,
"loss": 0.2889,
"step": 974
},
{
"epoch": 2.3955773955773956,
"grad_norm": 1.166656494140625,
"learning_rate": 2.0557105440418902e-06,
"loss": 0.2788,
"step": 975
},
{
"epoch": 2.398034398034398,
"grad_norm": 1.27115797996521,
"learning_rate": 2.039623083997031e-06,
"loss": 0.3151,
"step": 976
},
{
"epoch": 2.4004914004914006,
"grad_norm": 1.1158872842788696,
"learning_rate": 2.0235916680287015e-06,
"loss": 0.2556,
"step": 977
},
{
"epoch": 2.402948402948403,
"grad_norm": 1.1800823211669922,
"learning_rate": 2.007616409004165e-06,
"loss": 0.2862,
"step": 978
},
{
"epoch": 2.4054054054054053,
"grad_norm": 1.3932093381881714,
"learning_rate": 1.991697419395301e-06,
"loss": 0.3104,
"step": 979
},
{
"epoch": 2.407862407862408,
"grad_norm": 1.1910911798477173,
"learning_rate": 1.97583481127785e-06,
"loss": 0.2861,
"step": 980
},
{
"epoch": 2.4103194103194103,
"grad_norm": 1.057436227798462,
"learning_rate": 1.960028696330596e-06,
"loss": 0.2789,
"step": 981
},
{
"epoch": 2.4127764127764126,
"grad_norm": 1.1399625539779663,
"learning_rate": 1.9442791858345887e-06,
"loss": 0.2795,
"step": 982
},
{
"epoch": 2.4152334152334154,
"grad_norm": 1.1812423467636108,
"learning_rate": 1.9285863906723612e-06,
"loss": 0.2795,
"step": 983
},
{
"epoch": 2.4176904176904177,
"grad_norm": 1.2888036966323853,
"learning_rate": 1.9129504213271565e-06,
"loss": 0.2742,
"step": 984
},
{
"epoch": 2.42014742014742,
"grad_norm": 1.102538824081421,
"learning_rate": 1.8973713878821343e-06,
"loss": 0.3167,
"step": 985
},
{
"epoch": 2.4226044226044228,
"grad_norm": 1.266358733177185,
"learning_rate": 1.881849400019602e-06,
"loss": 0.2778,
"step": 986
},
{
"epoch": 2.425061425061425,
"grad_norm": 1.1434144973754883,
"learning_rate": 1.8663845670202562e-06,
"loss": 0.3077,
"step": 987
},
{
"epoch": 2.4275184275184274,
"grad_norm": 1.1956113576889038,
"learning_rate": 1.8509769977623905e-06,
"loss": 0.3028,
"step": 988
},
{
"epoch": 2.42997542997543,
"grad_norm": 1.1281932592391968,
"learning_rate": 1.8356268007211442e-06,
"loss": 0.2734,
"step": 989
},
{
"epoch": 2.4324324324324325,
"grad_norm": 1.0696520805358887,
"learning_rate": 1.8203340839677307e-06,
"loss": 0.266,
"step": 990
},
{
"epoch": 2.4348894348894348,
"grad_norm": 1.0929800271987915,
"learning_rate": 1.8050989551686915e-06,
"loss": 0.2641,
"step": 991
},
{
"epoch": 2.4373464373464375,
"grad_norm": 1.2299550771713257,
"learning_rate": 1.7899215215851084e-06,
"loss": 0.3026,
"step": 992
},
{
"epoch": 2.43980343980344,
"grad_norm": 1.2351603507995605,
"learning_rate": 1.7748018900718856e-06,
"loss": 0.2932,
"step": 993
},
{
"epoch": 2.442260442260442,
"grad_norm": 1.1006183624267578,
"learning_rate": 1.7597401670769688e-06,
"loss": 0.2719,
"step": 994
},
{
"epoch": 2.444717444717445,
"grad_norm": 1.1939946413040161,
"learning_rate": 1.744736458640607e-06,
"loss": 0.2791,
"step": 995
},
{
"epoch": 2.447174447174447,
"grad_norm": 1.0497217178344727,
"learning_rate": 1.729790870394603e-06,
"loss": 0.287,
"step": 996
},
{
"epoch": 2.4496314496314495,
"grad_norm": 1.224005937576294,
"learning_rate": 1.7149035075615795e-06,
"loss": 0.2938,
"step": 997
},
{
"epoch": 2.4520884520884523,
"grad_norm": 1.0267997980117798,
"learning_rate": 1.700074474954221e-06,
"loss": 0.2861,
"step": 998
},
{
"epoch": 2.4545454545454546,
"grad_norm": 1.2167069911956787,
"learning_rate": 1.6853038769745466e-06,
"loss": 0.274,
"step": 999
},
{
"epoch": 2.457002457002457,
"grad_norm": 1.2097947597503662,
"learning_rate": 1.6705918176131807e-06,
"loss": 0.2765,
"step": 1000
},
{
"epoch": 2.4594594594594597,
"grad_norm": 1.2343907356262207,
"learning_rate": 1.6559384004486057e-06,
"loss": 0.3039,
"step": 1001
},
{
"epoch": 2.461916461916462,
"grad_norm": 1.189889669418335,
"learning_rate": 1.6413437286464419e-06,
"loss": 0.2819,
"step": 1002
},
{
"epoch": 2.4643734643734643,
"grad_norm": 1.2207257747650146,
"learning_rate": 1.6268079049587205e-06,
"loss": 0.2932,
"step": 1003
},
{
"epoch": 2.4668304668304666,
"grad_norm": 1.2403969764709473,
"learning_rate": 1.6123310317231644e-06,
"loss": 0.2853,
"step": 1004
},
{
"epoch": 2.4692874692874693,
"grad_norm": 1.2092585563659668,
"learning_rate": 1.5979132108624572e-06,
"loss": 0.3057,
"step": 1005
},
{
"epoch": 2.4717444717444716,
"grad_norm": 1.2463258504867554,
"learning_rate": 1.583554543883532e-06,
"loss": 0.2919,
"step": 1006
},
{
"epoch": 2.4742014742014744,
"grad_norm": 1.1727359294891357,
"learning_rate": 1.5692551318768556e-06,
"loss": 0.2936,
"step": 1007
},
{
"epoch": 2.4766584766584767,
"grad_norm": 1.196444034576416,
"learning_rate": 1.5550150755157267e-06,
"loss": 0.2956,
"step": 1008
},
{
"epoch": 2.479115479115479,
"grad_norm": 1.1921645402908325,
"learning_rate": 1.5408344750555382e-06,
"loss": 0.2894,
"step": 1009
},
{
"epoch": 2.4815724815724813,
"grad_norm": 1.2030874490737915,
"learning_rate": 1.5267134303331122e-06,
"loss": 0.2697,
"step": 1010
},
{
"epoch": 2.484029484029484,
"grad_norm": 1.0682005882263184,
"learning_rate": 1.5126520407659618e-06,
"loss": 0.2678,
"step": 1011
},
{
"epoch": 2.4864864864864864,
"grad_norm": 1.3198879957199097,
"learning_rate": 1.4986504053516105e-06,
"loss": 0.2587,
"step": 1012
},
{
"epoch": 2.488943488943489,
"grad_norm": 1.224260687828064,
"learning_rate": 1.4847086226668871e-06,
"loss": 0.2822,
"step": 1013
},
{
"epoch": 2.4914004914004915,
"grad_norm": 1.1843713521957397,
"learning_rate": 1.4708267908672401e-06,
"loss": 0.2684,
"step": 1014
},
{
"epoch": 2.493857493857494,
"grad_norm": 1.1694753170013428,
"learning_rate": 1.4570050076860343e-06,
"loss": 0.2821,
"step": 1015
},
{
"epoch": 2.496314496314496,
"grad_norm": 1.159956693649292,
"learning_rate": 1.4432433704338723e-06,
"loss": 0.2618,
"step": 1016
},
{
"epoch": 2.498771498771499,
"grad_norm": 1.1955686807632446,
"learning_rate": 1.4295419759979079e-06,
"loss": 0.3257,
"step": 1017
},
{
"epoch": 2.501228501228501,
"grad_norm": 1.0967086553573608,
"learning_rate": 1.4159009208411611e-06,
"loss": 0.2534,
"step": 1018
},
{
"epoch": 2.503685503685504,
"grad_norm": 1.1328158378601074,
"learning_rate": 1.4023203010018393e-06,
"loss": 0.2726,
"step": 1019
},
{
"epoch": 2.506142506142506,
"grad_norm": 1.1212531328201294,
"learning_rate": 1.3888002120926625e-06,
"loss": 0.2834,
"step": 1020
},
{
"epoch": 2.5085995085995085,
"grad_norm": 1.1289138793945312,
"learning_rate": 1.3753407493001968e-06,
"loss": 0.2623,
"step": 1021
},
{
"epoch": 2.511056511056511,
"grad_norm": 1.1258163452148438,
"learning_rate": 1.3619420073841638e-06,
"loss": 0.2946,
"step": 1022
},
{
"epoch": 2.5135135135135136,
"grad_norm": 1.1019937992095947,
"learning_rate": 1.3486040806767997e-06,
"loss": 0.2761,
"step": 1023
},
{
"epoch": 2.515970515970516,
"grad_norm": 1.1924903392791748,
"learning_rate": 1.3353270630821713e-06,
"loss": 0.2674,
"step": 1024
},
{
"epoch": 2.5184275184275187,
"grad_norm": 1.318726658821106,
"learning_rate": 1.3221110480755306e-06,
"loss": 0.2704,
"step": 1025
},
{
"epoch": 2.520884520884521,
"grad_norm": 1.2188078165054321,
"learning_rate": 1.308956128702632e-06,
"loss": 0.2968,
"step": 1026
},
{
"epoch": 2.5233415233415233,
"grad_norm": 1.2859346866607666,
"learning_rate": 1.2958623975791118e-06,
"loss": 0.2819,
"step": 1027
},
{
"epoch": 2.5257985257985256,
"grad_norm": 1.233586311340332,
"learning_rate": 1.2828299468898075e-06,
"loss": 0.2779,
"step": 1028
},
{
"epoch": 2.5282555282555284,
"grad_norm": 1.1495767831802368,
"learning_rate": 1.2698588683881185e-06,
"loss": 0.2845,
"step": 1029
},
{
"epoch": 2.5307125307125307,
"grad_norm": 1.0828988552093506,
"learning_rate": 1.2569492533953664e-06,
"loss": 0.2778,
"step": 1030
},
{
"epoch": 2.5331695331695334,
"grad_norm": 1.1832849979400635,
"learning_rate": 1.2441011928001435e-06,
"loss": 0.289,
"step": 1031
},
{
"epoch": 2.5356265356265357,
"grad_norm": 1.2485005855560303,
"learning_rate": 1.231314777057675e-06,
"loss": 0.301,
"step": 1032
},
{
"epoch": 2.538083538083538,
"grad_norm": 1.331150770187378,
"learning_rate": 1.2185900961891794e-06,
"loss": 0.2788,
"step": 1033
},
{
"epoch": 2.5405405405405403,
"grad_norm": 1.1798036098480225,
"learning_rate": 1.2059272397812494e-06,
"loss": 0.2734,
"step": 1034
},
{
"epoch": 2.542997542997543,
"grad_norm": 1.0681538581848145,
"learning_rate": 1.1933262969851988e-06,
"loss": 0.2526,
"step": 1035
},
{
"epoch": 2.5454545454545454,
"grad_norm": 1.2806833982467651,
"learning_rate": 1.1807873565164507e-06,
"loss": 0.2725,
"step": 1036
},
{
"epoch": 2.547911547911548,
"grad_norm": 1.1350480318069458,
"learning_rate": 1.1683105066539068e-06,
"loss": 0.2657,
"step": 1037
},
{
"epoch": 2.5503685503685505,
"grad_norm": 1.1276003122329712,
"learning_rate": 1.1558958352393334e-06,
"loss": 0.2595,
"step": 1038
},
{
"epoch": 2.552825552825553,
"grad_norm": 1.1399433612823486,
"learning_rate": 1.1435434296767235e-06,
"loss": 0.2911,
"step": 1039
},
{
"epoch": 2.555282555282555,
"grad_norm": 1.2321282625198364,
"learning_rate": 1.1312533769317101e-06,
"loss": 0.2738,
"step": 1040
},
{
"epoch": 2.557739557739558,
"grad_norm": 1.122154712677002,
"learning_rate": 1.1190257635309276e-06,
"loss": 0.2789,
"step": 1041
},
{
"epoch": 2.56019656019656,
"grad_norm": 1.193674921989441,
"learning_rate": 1.1068606755614241e-06,
"loss": 0.2772,
"step": 1042
},
{
"epoch": 2.562653562653563,
"grad_norm": 1.2136112451553345,
"learning_rate": 1.0947581986700307e-06,
"loss": 0.2857,
"step": 1043
},
{
"epoch": 2.5651105651105652,
"grad_norm": 1.168110966682434,
"learning_rate": 1.0827184180627858e-06,
"loss": 0.2967,
"step": 1044
},
{
"epoch": 2.5675675675675675,
"grad_norm": 1.1920348405838013,
"learning_rate": 1.0707414185043163e-06,
"loss": 0.2717,
"step": 1045
},
{
"epoch": 2.57002457002457,
"grad_norm": 1.105496883392334,
"learning_rate": 1.0588272843172454e-06,
"loss": 0.2772,
"step": 1046
},
{
"epoch": 2.5724815724815726,
"grad_norm": 1.213555097579956,
"learning_rate": 1.0469760993816058e-06,
"loss": 0.2833,
"step": 1047
},
{
"epoch": 2.574938574938575,
"grad_norm": 1.1333016157150269,
"learning_rate": 1.0351879471342374e-06,
"loss": 0.2969,
"step": 1048
},
{
"epoch": 2.5773955773955772,
"grad_norm": 1.2033612728118896,
"learning_rate": 1.0234629105682104e-06,
"loss": 0.2883,
"step": 1049
},
{
"epoch": 2.57985257985258,
"grad_norm": 1.0642030239105225,
"learning_rate": 1.0118010722322313e-06,
"loss": 0.2479,
"step": 1050
},
{
"epoch": 2.5823095823095823,
"grad_norm": 1.2464839220046997,
"learning_rate": 1.0002025142300764e-06,
"loss": 0.255,
"step": 1051
},
{
"epoch": 2.5847665847665846,
"grad_norm": 1.2047244310379028,
"learning_rate": 9.886673182199958e-07,
"loss": 0.2825,
"step": 1052
},
{
"epoch": 2.5872235872235874,
"grad_norm": 1.172672152519226,
"learning_rate": 9.771955654141496e-07,
"loss": 0.2931,
"step": 1053
},
{
"epoch": 2.5896805896805897,
"grad_norm": 1.1367522478103638,
"learning_rate": 9.657873365780324e-07,
"loss": 0.2581,
"step": 1054
},
{
"epoch": 2.592137592137592,
"grad_norm": 1.2225159406661987,
"learning_rate": 9.544427120299139e-07,
"loss": 0.2879,
"step": 1055
},
{
"epoch": 2.5945945945945947,
"grad_norm": 1.236801028251648,
"learning_rate": 9.431617716402508e-07,
"loss": 0.3092,
"step": 1056
},
{
"epoch": 2.597051597051597,
"grad_norm": 1.2201226949691772,
"learning_rate": 9.319445948311534e-07,
"loss": 0.2772,
"step": 1057
},
{
"epoch": 2.5995085995085994,
"grad_norm": 1.0961273908615112,
"learning_rate": 9.207912605758052e-07,
"loss": 0.237,
"step": 1058
},
{
"epoch": 2.601965601965602,
"grad_norm": 1.198622465133667,
"learning_rate": 9.097018473979124e-07,
"loss": 0.2972,
"step": 1059
},
{
"epoch": 2.6044226044226044,
"grad_norm": 1.1575977802276611,
"learning_rate": 8.986764333711584e-07,
"loss": 0.2669,
"step": 1060
},
{
"epoch": 2.6068796068796067,
"grad_norm": 1.133432388305664,
"learning_rate": 8.87715096118642e-07,
"loss": 0.2863,
"step": 1061
},
{
"epoch": 2.6093366093366095,
"grad_norm": 1.2548049688339233,
"learning_rate": 8.768179128123456e-07,
"loss": 0.2829,
"step": 1062
},
{
"epoch": 2.611793611793612,
"grad_norm": 1.1735867261886597,
"learning_rate": 8.659849601725701e-07,
"loss": 0.3031,
"step": 1063
},
{
"epoch": 2.614250614250614,
"grad_norm": 1.1587166786193848,
"learning_rate": 8.55216314467422e-07,
"loss": 0.2957,
"step": 1064
},
{
"epoch": 2.616707616707617,
"grad_norm": 1.1736050844192505,
"learning_rate": 8.445120515122552e-07,
"loss": 0.2913,
"step": 1065
},
{
"epoch": 2.619164619164619,
"grad_norm": 1.1523027420043945,
"learning_rate": 8.338722466691451e-07,
"loss": 0.2941,
"step": 1066
},
{
"epoch": 2.6216216216216215,
"grad_norm": 1.1677874326705933,
"learning_rate": 8.232969748463571e-07,
"loss": 0.2821,
"step": 1067
},
{
"epoch": 2.6240786240786242,
"grad_norm": 1.081207513809204,
"learning_rate": 8.127863104978262e-07,
"loss": 0.292,
"step": 1068
},
{
"epoch": 2.6265356265356266,
"grad_norm": 1.1837942600250244,
"learning_rate": 8.023403276226127e-07,
"loss": 0.2864,
"step": 1069
},
{
"epoch": 2.628992628992629,
"grad_norm": 1.287129521369934,
"learning_rate": 7.919590997644111e-07,
"loss": 0.286,
"step": 1070
},
{
"epoch": 2.631449631449631,
"grad_norm": 1.3001762628555298,
"learning_rate": 7.816427000110016e-07,
"loss": 0.2688,
"step": 1071
},
{
"epoch": 2.633906633906634,
"grad_norm": 1.0917863845825195,
"learning_rate": 7.713912009937607e-07,
"loss": 0.2893,
"step": 1072
},
{
"epoch": 2.6363636363636362,
"grad_norm": 1.1937156915664673,
"learning_rate": 7.612046748871327e-07,
"loss": 0.2526,
"step": 1073
},
{
"epoch": 2.638820638820639,
"grad_norm": 1.1753548383712769,
"learning_rate": 7.510831934081309e-07,
"loss": 0.3071,
"step": 1074
},
{
"epoch": 2.6412776412776413,
"grad_norm": 1.205013632774353,
"learning_rate": 7.410268278158273e-07,
"loss": 0.2828,
"step": 1075
},
{
"epoch": 2.6437346437346436,
"grad_norm": 1.2608375549316406,
"learning_rate": 7.310356489108539e-07,
"loss": 0.3012,
"step": 1076
},
{
"epoch": 2.646191646191646,
"grad_norm": 1.3116860389709473,
"learning_rate": 7.211097270349065e-07,
"loss": 0.3083,
"step": 1077
},
{
"epoch": 2.6486486486486487,
"grad_norm": 1.1478344202041626,
"learning_rate": 7.112491320702441e-07,
"loss": 0.2746,
"step": 1078
},
{
"epoch": 2.651105651105651,
"grad_norm": 1.1642433404922485,
"learning_rate": 7.014539334392012e-07,
"loss": 0.2602,
"step": 1079
},
{
"epoch": 2.6535626535626538,
"grad_norm": 1.2262073755264282,
"learning_rate": 6.917242001036916e-07,
"loss": 0.292,
"step": 1080
},
{
"epoch": 2.656019656019656,
"grad_norm": 1.0907559394836426,
"learning_rate": 6.820600005647382e-07,
"loss": 0.2751,
"step": 1081
},
{
"epoch": 2.6584766584766584,
"grad_norm": 1.1675864458084106,
"learning_rate": 6.724614028619736e-07,
"loss": 0.3053,
"step": 1082
},
{
"epoch": 2.6609336609336607,
"grad_norm": 1.0712223052978516,
"learning_rate": 6.629284745731701e-07,
"loss": 0.2692,
"step": 1083
},
{
"epoch": 2.6633906633906634,
"grad_norm": 1.0594029426574707,
"learning_rate": 6.53461282813762e-07,
"loss": 0.2844,
"step": 1084
},
{
"epoch": 2.6658476658476657,
"grad_norm": 1.1743744611740112,
"learning_rate": 6.440598942363796e-07,
"loss": 0.2781,
"step": 1085
},
{
"epoch": 2.6683046683046685,
"grad_norm": 1.264631748199463,
"learning_rate": 6.347243750303623e-07,
"loss": 0.2892,
"step": 1086
},
{
"epoch": 2.670761670761671,
"grad_norm": 1.218990683555603,
"learning_rate": 6.254547909213149e-07,
"loss": 0.2764,
"step": 1087
},
{
"epoch": 2.673218673218673,
"grad_norm": 1.1565907001495361,
"learning_rate": 6.162512071706272e-07,
"loss": 0.3149,
"step": 1088
},
{
"epoch": 2.6756756756756754,
"grad_norm": 1.0758461952209473,
"learning_rate": 6.071136885750272e-07,
"loss": 0.2777,
"step": 1089
},
{
"epoch": 2.678132678132678,
"grad_norm": 1.3106664419174194,
"learning_rate": 5.980422994661139e-07,
"loss": 0.2998,
"step": 1090
},
{
"epoch": 2.6805896805896805,
"grad_norm": 1.1924840211868286,
"learning_rate": 5.890371037099107e-07,
"loss": 0.2812,
"step": 1091
},
{
"epoch": 2.6830466830466833,
"grad_norm": 1.2186545133590698,
"learning_rate": 5.800981647064186e-07,
"loss": 0.2615,
"step": 1092
},
{
"epoch": 2.6855036855036856,
"grad_norm": 1.1541470289230347,
"learning_rate": 5.71225545389158e-07,
"loss": 0.2529,
"step": 1093
},
{
"epoch": 2.687960687960688,
"grad_norm": 1.0920991897583008,
"learning_rate": 5.624193082247431e-07,
"loss": 0.26,
"step": 1094
},
{
"epoch": 2.69041769041769,
"grad_norm": 1.1850959062576294,
"learning_rate": 5.536795152124253e-07,
"loss": 0.2716,
"step": 1095
},
{
"epoch": 2.692874692874693,
"grad_norm": 1.1443251371383667,
"learning_rate": 5.450062278836677e-07,
"loss": 0.2753,
"step": 1096
},
{
"epoch": 2.6953316953316953,
"grad_norm": 1.2090981006622314,
"learning_rate": 5.363995073017047e-07,
"loss": 0.285,
"step": 1097
},
{
"epoch": 2.697788697788698,
"grad_norm": 1.13853120803833,
"learning_rate": 5.278594140611204e-07,
"loss": 0.2684,
"step": 1098
},
{
"epoch": 2.7002457002457003,
"grad_norm": 1.1839234828948975,
"learning_rate": 5.193860082874125e-07,
"loss": 0.2889,
"step": 1099
},
{
"epoch": 2.7027027027027026,
"grad_norm": 1.1700607538223267,
"learning_rate": 5.109793496365767e-07,
"loss": 0.293,
"step": 1100
},
{
"epoch": 2.705159705159705,
"grad_norm": 1.210724115371704,
"learning_rate": 5.026394972946813e-07,
"loss": 0.242,
"step": 1101
},
{
"epoch": 2.7076167076167077,
"grad_norm": 1.1651077270507812,
"learning_rate": 4.943665099774553e-07,
"loss": 0.253,
"step": 1102
},
{
"epoch": 2.71007371007371,
"grad_norm": 1.3025398254394531,
"learning_rate": 4.861604459298697e-07,
"loss": 0.2538,
"step": 1103
},
{
"epoch": 2.7125307125307128,
"grad_norm": 1.3783848285675049,
"learning_rate": 4.780213629257324e-07,
"loss": 0.298,
"step": 1104
},
{
"epoch": 2.714987714987715,
"grad_norm": 1.200514793395996,
"learning_rate": 4.6994931826728094e-07,
"loss": 0.3035,
"step": 1105
},
{
"epoch": 2.7174447174447174,
"grad_norm": 1.2227789163589478,
"learning_rate": 4.6194436878477024e-07,
"loss": 0.2535,
"step": 1106
},
{
"epoch": 2.7199017199017197,
"grad_norm": 1.1504371166229248,
"learning_rate": 4.5400657083608857e-07,
"loss": 0.2884,
"step": 1107
},
{
"epoch": 2.7223587223587224,
"grad_norm": 1.082088828086853,
"learning_rate": 4.4613598030634585e-07,
"loss": 0.2622,
"step": 1108
},
{
"epoch": 2.7248157248157248,
"grad_norm": 1.182896375656128,
"learning_rate": 4.3833265260749157e-07,
"loss": 0.255,
"step": 1109
},
{
"epoch": 2.7272727272727275,
"grad_norm": 1.1281861066818237,
"learning_rate": 4.305966426779118e-07,
"loss": 0.3047,
"step": 1110
},
{
"epoch": 2.72972972972973,
"grad_norm": 1.2978154420852661,
"learning_rate": 4.229280049820561e-07,
"loss": 0.2818,
"step": 1111
},
{
"epoch": 2.732186732186732,
"grad_norm": 1.1349396705627441,
"learning_rate": 4.15326793510048e-07,
"loss": 0.2761,
"step": 1112
},
{
"epoch": 2.7346437346437344,
"grad_norm": 1.2176555395126343,
"learning_rate": 4.077930617773007e-07,
"loss": 0.2848,
"step": 1113
},
{
"epoch": 2.737100737100737,
"grad_norm": 1.2432841062545776,
"learning_rate": 4.0032686282414525e-07,
"loss": 0.27,
"step": 1114
},
{
"epoch": 2.7395577395577395,
"grad_norm": 1.237545132637024,
"learning_rate": 3.929282492154607e-07,
"loss": 0.3215,
"step": 1115
},
{
"epoch": 2.7420147420147423,
"grad_norm": 1.1233947277069092,
"learning_rate": 3.855972730402968e-07,
"loss": 0.2894,
"step": 1116
},
{
"epoch": 2.7444717444717446,
"grad_norm": 1.1773197650909424,
"learning_rate": 3.783339859115065e-07,
"loss": 0.2755,
"step": 1117
},
{
"epoch": 2.746928746928747,
"grad_norm": 1.1950693130493164,
"learning_rate": 3.711384389653916e-07,
"loss": 0.2796,
"step": 1118
},
{
"epoch": 2.749385749385749,
"grad_norm": 1.2141116857528687,
"learning_rate": 3.6401068286133544e-07,
"loss": 0.2647,
"step": 1119
},
{
"epoch": 2.751842751842752,
"grad_norm": 1.0938400030136108,
"learning_rate": 3.569507677814488e-07,
"loss": 0.255,
"step": 1120
},
{
"epoch": 2.7542997542997543,
"grad_norm": 1.1651341915130615,
"learning_rate": 3.49958743430211e-07,
"loss": 0.3147,
"step": 1121
},
{
"epoch": 2.756756756756757,
"grad_norm": 1.1536781787872314,
"learning_rate": 3.430346590341338e-07,
"loss": 0.2439,
"step": 1122
},
{
"epoch": 2.7592137592137593,
"grad_norm": 1.1681103706359863,
"learning_rate": 3.361785633413961e-07,
"loss": 0.302,
"step": 1123
},
{
"epoch": 2.7616707616707616,
"grad_norm": 1.232194423675537,
"learning_rate": 3.2939050462151957e-07,
"loss": 0.2695,
"step": 1124
},
{
"epoch": 2.764127764127764,
"grad_norm": 1.1450867652893066,
"learning_rate": 3.226705306650113e-07,
"loss": 0.2851,
"step": 1125
},
{
"epoch": 2.7665847665847667,
"grad_norm": 1.3288469314575195,
"learning_rate": 3.160186887830441e-07,
"loss": 0.302,
"step": 1126
},
{
"epoch": 2.769041769041769,
"grad_norm": 1.1425598859786987,
"learning_rate": 3.0943502580710773e-07,
"loss": 0.2622,
"step": 1127
},
{
"epoch": 2.7714987714987718,
"grad_norm": 1.1829224824905396,
"learning_rate": 3.029195880886904e-07,
"loss": 0.2634,
"step": 1128
},
{
"epoch": 2.773955773955774,
"grad_norm": 1.1650996208190918,
"learning_rate": 2.9647242149895005e-07,
"loss": 0.2707,
"step": 1129
},
{
"epoch": 2.7764127764127764,
"grad_norm": 1.126253604888916,
"learning_rate": 2.900935714283848e-07,
"loss": 0.2745,
"step": 1130
},
{
"epoch": 2.7788697788697787,
"grad_norm": 1.1476022005081177,
"learning_rate": 2.837830827865229e-07,
"loss": 0.2736,
"step": 1131
},
{
"epoch": 2.7813267813267815,
"grad_norm": 1.1921050548553467,
"learning_rate": 2.775410000016021e-07,
"loss": 0.2842,
"step": 1132
},
{
"epoch": 2.7837837837837838,
"grad_norm": 1.04782235622406,
"learning_rate": 2.7136736702025436e-07,
"loss": 0.2721,
"step": 1133
},
{
"epoch": 2.786240786240786,
"grad_norm": 1.1524274349212646,
"learning_rate": 2.652622273072003e-07,
"loss": 0.2634,
"step": 1134
},
{
"epoch": 2.788697788697789,
"grad_norm": 1.07859206199646,
"learning_rate": 2.5922562384494197e-07,
"loss": 0.2449,
"step": 1135
},
{
"epoch": 2.791154791154791,
"grad_norm": 1.0668470859527588,
"learning_rate": 2.532575991334618e-07,
"loss": 0.2661,
"step": 1136
},
{
"epoch": 2.7936117936117935,
"grad_norm": 1.3293726444244385,
"learning_rate": 2.473581951899184e-07,
"loss": 0.2754,
"step": 1137
},
{
"epoch": 2.796068796068796,
"grad_norm": 1.3514832258224487,
"learning_rate": 2.415274535483547e-07,
"loss": 0.3009,
"step": 1138
},
{
"epoch": 2.7985257985257985,
"grad_norm": 1.1948869228363037,
"learning_rate": 2.3576541525941132e-07,
"loss": 0.2922,
"step": 1139
},
{
"epoch": 2.800982800982801,
"grad_norm": 1.2372150421142578,
"learning_rate": 2.300721208900192e-07,
"loss": 0.2998,
"step": 1140
},
{
"epoch": 2.8034398034398036,
"grad_norm": 1.1227543354034424,
"learning_rate": 2.2444761052313857e-07,
"loss": 0.2711,
"step": 1141
},
{
"epoch": 2.805896805896806,
"grad_norm": 1.199108600616455,
"learning_rate": 2.1889192375745494e-07,
"loss": 0.2866,
"step": 1142
},
{
"epoch": 2.808353808353808,
"grad_norm": 1.3015297651290894,
"learning_rate": 2.1340509970711466e-07,
"loss": 0.2932,
"step": 1143
},
{
"epoch": 2.810810810810811,
"grad_norm": 1.2696152925491333,
"learning_rate": 2.0798717700144078e-07,
"loss": 0.2987,
"step": 1144
},
{
"epoch": 2.8132678132678133,
"grad_norm": 1.140032410621643,
"learning_rate": 2.0263819378466888e-07,
"loss": 0.2619,
"step": 1145
},
{
"epoch": 2.8157248157248156,
"grad_norm": 1.0940791368484497,
"learning_rate": 1.973581877156716e-07,
"loss": 0.2952,
"step": 1146
},
{
"epoch": 2.8181818181818183,
"grad_norm": 1.2078993320465088,
"learning_rate": 1.921471959676957e-07,
"loss": 0.2713,
"step": 1147
},
{
"epoch": 2.8206388206388207,
"grad_norm": 1.1918680667877197,
"learning_rate": 1.870052552281032e-07,
"loss": 0.2639,
"step": 1148
},
{
"epoch": 2.823095823095823,
"grad_norm": 1.1544560194015503,
"learning_rate": 1.8193240169810943e-07,
"loss": 0.2788,
"step": 1149
},
{
"epoch": 2.8255528255528253,
"grad_norm": 1.2254925966262817,
"learning_rate": 1.7692867109252888e-07,
"loss": 0.2613,
"step": 1150
},
{
"epoch": 2.828009828009828,
"grad_norm": 1.1546679735183716,
"learning_rate": 1.719940986395252e-07,
"loss": 0.3007,
"step": 1151
},
{
"epoch": 2.8304668304668303,
"grad_norm": 1.0897902250289917,
"learning_rate": 1.6712871908036387e-07,
"loss": 0.2689,
"step": 1152
},
{
"epoch": 2.832923832923833,
"grad_norm": 1.3028407096862793,
"learning_rate": 1.623325666691644e-07,
"loss": 0.2945,
"step": 1153
},
{
"epoch": 2.8353808353808354,
"grad_norm": 1.1586449146270752,
"learning_rate": 1.5760567517266068e-07,
"loss": 0.2778,
"step": 1154
},
{
"epoch": 2.8378378378378377,
"grad_norm": 1.1948736906051636,
"learning_rate": 1.5294807786996212e-07,
"loss": 0.2989,
"step": 1155
},
{
"epoch": 2.84029484029484,
"grad_norm": 1.1248109340667725,
"learning_rate": 1.4835980755232626e-07,
"loss": 0.2825,
"step": 1156
},
{
"epoch": 2.842751842751843,
"grad_norm": 1.2164751291275024,
"learning_rate": 1.4384089652291544e-07,
"loss": 0.2964,
"step": 1157
},
{
"epoch": 2.845208845208845,
"grad_norm": 1.1897505521774292,
"learning_rate": 1.3939137659658153e-07,
"loss": 0.282,
"step": 1158
},
{
"epoch": 2.847665847665848,
"grad_norm": 1.2657814025878906,
"learning_rate": 1.3501127909963275e-07,
"loss": 0.3165,
"step": 1159
},
{
"epoch": 2.85012285012285,
"grad_norm": 1.1859006881713867,
"learning_rate": 1.3070063486961937e-07,
"loss": 0.2956,
"step": 1160
},
{
"epoch": 2.8525798525798525,
"grad_norm": 1.1239105463027954,
"learning_rate": 1.2645947425511397e-07,
"loss": 0.2981,
"step": 1161
},
{
"epoch": 2.855036855036855,
"grad_norm": 1.2066782712936401,
"learning_rate": 1.2228782711549924e-07,
"loss": 0.2862,
"step": 1162
},
{
"epoch": 2.8574938574938575,
"grad_norm": 1.241547703742981,
"learning_rate": 1.1818572282075392e-07,
"loss": 0.2795,
"step": 1163
},
{
"epoch": 2.85995085995086,
"grad_norm": 1.1320499181747437,
"learning_rate": 1.1415319025124938e-07,
"loss": 0.2577,
"step": 1164
},
{
"epoch": 2.8624078624078626,
"grad_norm": 1.3028688430786133,
"learning_rate": 1.1019025779754666e-07,
"loss": 0.301,
"step": 1165
},
{
"epoch": 2.864864864864865,
"grad_norm": 1.0557419061660767,
"learning_rate": 1.0629695336019763e-07,
"loss": 0.2667,
"step": 1166
},
{
"epoch": 2.8673218673218672,
"grad_norm": 1.2495602369308472,
"learning_rate": 1.0247330434954073e-07,
"loss": 0.2856,
"step": 1167
},
{
"epoch": 2.8697788697788695,
"grad_norm": 1.1358976364135742,
"learning_rate": 9.87193376855189e-08,
"loss": 0.2883,
"step": 1168
},
{
"epoch": 2.8722358722358723,
"grad_norm": 1.2297807931900024,
"learning_rate": 9.503507979748305e-08,
"loss": 0.318,
"step": 1169
},
{
"epoch": 2.8746928746928746,
"grad_norm": 1.1552633047103882,
"learning_rate": 9.142055662400672e-08,
"loss": 0.273,
"step": 1170
},
{
"epoch": 2.8771498771498774,
"grad_norm": 1.249558925628662,
"learning_rate": 8.787579361270616e-08,
"loss": 0.2796,
"step": 1171
},
{
"epoch": 2.8796068796068797,
"grad_norm": 1.194430947303772,
"learning_rate": 8.440081572005931e-08,
"loss": 0.2586,
"step": 1172
},
{
"epoch": 2.882063882063882,
"grad_norm": 1.0738084316253662,
"learning_rate": 8.099564741123167e-08,
"loss": 0.2687,
"step": 1173
},
{
"epoch": 2.8845208845208843,
"grad_norm": 1.1545612812042236,
"learning_rate": 7.766031265989849e-08,
"loss": 0.2653,
"step": 1174
},
{
"epoch": 2.886977886977887,
"grad_norm": 1.1456379890441895,
"learning_rate": 7.439483494808498e-08,
"loss": 0.2829,
"step": 1175
},
{
"epoch": 2.8894348894348894,
"grad_norm": 1.156511902809143,
"learning_rate": 7.119923726599421e-08,
"loss": 0.285,
"step": 1176
},
{
"epoch": 2.891891891891892,
"grad_norm": 1.2387841939926147,
"learning_rate": 6.807354211184613e-08,
"loss": 0.2733,
"step": 1177
},
{
"epoch": 2.8943488943488944,
"grad_norm": 1.1624895334243774,
"learning_rate": 6.501777149172328e-08,
"loss": 0.2839,
"step": 1178
},
{
"epoch": 2.8968058968058967,
"grad_norm": 1.15232253074646,
"learning_rate": 6.203194691940973e-08,
"loss": 0.2959,
"step": 1179
},
{
"epoch": 2.899262899262899,
"grad_norm": 1.06272292137146,
"learning_rate": 5.9116089416246803e-08,
"loss": 0.2555,
"step": 1180
},
{
"epoch": 2.901719901719902,
"grad_norm": 1.2629801034927368,
"learning_rate": 5.6270219510975445e-08,
"loss": 0.2869,
"step": 1181
},
{
"epoch": 2.904176904176904,
"grad_norm": 1.2183952331542969,
"learning_rate": 5.349435723960184e-08,
"loss": 0.3002,
"step": 1182
},
{
"epoch": 2.906633906633907,
"grad_norm": 1.1183772087097168,
"learning_rate": 5.078852214525198e-08,
"loss": 0.2884,
"step": 1183
},
{
"epoch": 2.909090909090909,
"grad_norm": 1.1172285079956055,
"learning_rate": 4.815273327803183e-08,
"loss": 0.2807,
"step": 1184
},
{
"epoch": 2.9115479115479115,
"grad_norm": 1.1453272104263306,
"learning_rate": 4.5587009194894005e-08,
"loss": 0.2751,
"step": 1185
},
{
"epoch": 2.914004914004914,
"grad_norm": 1.0735313892364502,
"learning_rate": 4.309136795951241e-08,
"loss": 0.2466,
"step": 1186
},
{
"epoch": 2.9164619164619165,
"grad_norm": 1.0989115238189697,
"learning_rate": 4.066582714214895e-08,
"loss": 0.2747,
"step": 1187
},
{
"epoch": 2.918918918918919,
"grad_norm": 1.0996559858322144,
"learning_rate": 3.831040381953144e-08,
"loss": 0.2654,
"step": 1188
},
{
"epoch": 2.9213759213759216,
"grad_norm": 1.115569829940796,
"learning_rate": 3.602511457473479e-08,
"loss": 0.2533,
"step": 1189
},
{
"epoch": 2.923832923832924,
"grad_norm": 1.195621371269226,
"learning_rate": 3.380997549706444e-08,
"loss": 0.2631,
"step": 1190
},
{
"epoch": 2.9262899262899262,
"grad_norm": 1.1971734762191772,
"learning_rate": 3.1665002181937575e-08,
"loss": 0.2753,
"step": 1191
},
{
"epoch": 2.9287469287469285,
"grad_norm": 1.1896756887435913,
"learning_rate": 2.9590209730784302e-08,
"loss": 0.2725,
"step": 1192
},
{
"epoch": 2.9312039312039313,
"grad_norm": 1.0904500484466553,
"learning_rate": 2.758561275092886e-08,
"loss": 0.2584,
"step": 1193
},
{
"epoch": 2.9336609336609336,
"grad_norm": 1.1623117923736572,
"learning_rate": 2.5651225355497466e-08,
"loss": 0.2706,
"step": 1194
},
{
"epoch": 2.9361179361179364,
"grad_norm": 1.1518123149871826,
"learning_rate": 2.3787061163309533e-08,
"loss": 0.2932,
"step": 1195
},
{
"epoch": 2.9385749385749387,
"grad_norm": 1.278153419494629,
"learning_rate": 2.1993133298791047e-08,
"loss": 0.2776,
"step": 1196
},
{
"epoch": 2.941031941031941,
"grad_norm": 1.205899715423584,
"learning_rate": 2.0269454391874665e-08,
"loss": 0.278,
"step": 1197
},
{
"epoch": 2.9434889434889433,
"grad_norm": 1.2302438020706177,
"learning_rate": 1.861603657791422e-08,
"loss": 0.2969,
"step": 1198
},
{
"epoch": 2.945945945945946,
"grad_norm": 1.1887091398239136,
"learning_rate": 1.7032891497600347e-08,
"loss": 0.2893,
"step": 1199
},
{
"epoch": 2.9484029484029484,
"grad_norm": 1.1503926515579224,
"learning_rate": 1.5520030296873877e-08,
"loss": 0.2682,
"step": 1200
},
{
"epoch": 2.950859950859951,
"grad_norm": 1.2009198665618896,
"learning_rate": 1.4077463626852584e-08,
"loss": 0.276,
"step": 1201
},
{
"epoch": 2.9533169533169534,
"grad_norm": 1.10666024684906,
"learning_rate": 1.270520164375344e-08,
"loss": 0.2661,
"step": 1202
},
{
"epoch": 2.9557739557739557,
"grad_norm": 1.2643858194351196,
"learning_rate": 1.1403254008822695e-08,
"loss": 0.2768,
"step": 1203
},
{
"epoch": 2.958230958230958,
"grad_norm": 1.1746954917907715,
"learning_rate": 1.0171629888265921e-08,
"loss": 0.2755,
"step": 1204
},
{
"epoch": 2.960687960687961,
"grad_norm": 1.2816386222839355,
"learning_rate": 9.010337953185843e-09,
"loss": 0.3065,
"step": 1205
},
{
"epoch": 2.963144963144963,
"grad_norm": 1.1486769914627075,
"learning_rate": 7.919386379515726e-09,
"loss": 0.2906,
"step": 1206
},
{
"epoch": 2.965601965601966,
"grad_norm": 1.0958787202835083,
"learning_rate": 6.8987828479716304e-09,
"loss": 0.2905,
"step": 1207
},
{
"epoch": 2.968058968058968,
"grad_norm": 1.1250931024551392,
"learning_rate": 5.948534543988027e-09,
"loss": 0.2816,
"step": 1208
},
{
"epoch": 2.9705159705159705,
"grad_norm": 1.084580659866333,
"learning_rate": 5.068648157675604e-09,
"loss": 0.2873,
"step": 1209
},
{
"epoch": 2.972972972972973,
"grad_norm": 1.1390430927276611,
"learning_rate": 4.259129883767976e-09,
"loss": 0.2621,
"step": 1210
},
{
"epoch": 2.9754299754299756,
"grad_norm": 1.2942683696746826,
"learning_rate": 3.5199854215817176e-09,
"loss": 0.2856,
"step": 1211
},
{
"epoch": 2.977886977886978,
"grad_norm": 1.0806019306182861,
"learning_rate": 2.851219974973063e-09,
"loss": 0.3059,
"step": 1212
},
{
"epoch": 2.98034398034398,
"grad_norm": 1.1289433240890503,
"learning_rate": 2.2528382523057115e-09,
"loss": 0.279,
"step": 1213
},
{
"epoch": 2.982800982800983,
"grad_norm": 1.2074559926986694,
"learning_rate": 1.7248444664141882e-09,
"loss": 0.2985,
"step": 1214
},
{
"epoch": 2.9852579852579852,
"grad_norm": 1.0647261142730713,
"learning_rate": 1.2672423345760908e-09,
"loss": 0.258,
"step": 1215
},
{
"epoch": 2.9877149877149876,
"grad_norm": 1.2075697183609009,
"learning_rate": 8.80035078482111e-10,
"loss": 0.2782,
"step": 1216
},
{
"epoch": 2.9901719901719903,
"grad_norm": 1.1851577758789062,
"learning_rate": 5.632254242204926e-10,
"loss": 0.2759,
"step": 1217
},
{
"epoch": 2.9926289926289926,
"grad_norm": 1.156044840812683,
"learning_rate": 3.1681560225038657e-10,
"loss": 0.2875,
"step": 1218
},
{
"epoch": 2.995085995085995,
"grad_norm": 1.169237494468689,
"learning_rate": 1.4080734739074787e-10,
"loss": 0.2794,
"step": 1219
},
{
"epoch": 2.9975429975429977,
"grad_norm": 1.1338391304016113,
"learning_rate": 3.52018988059033e-11,
"loss": 0.286,
"step": 1220
},
{
"epoch": 3.0,
"grad_norm": 1.1234797239303589,
"learning_rate": 0.0,
"loss": 0.2237,
"step": 1221
},
{
"epoch": 3.0,
"step": 1221,
"total_flos": 3.3710113113023447e+18,
"train_loss": 0.6036928490011737,
"train_runtime": 9045.1389,
"train_samples_per_second": 17.247,
"train_steps_per_second": 0.135
}
],
"logging_steps": 1,
"max_steps": 1221,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.3710113113023447e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}