msinkinst-ep1-ckpt / trainer_state.json
Fizzarolli's picture
Upload folder using huggingface_hub
a2f002c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.001669449081803,
"eval_steps": 500,
"global_step": 600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001669449081803005,
"grad_norm": 0.0,
"learning_rate": 2.0000000000000002e-07,
"loss": 2.0754,
"step": 1
},
{
"epoch": 0.00333889816360601,
"grad_norm": 0.0,
"learning_rate": 4.0000000000000003e-07,
"loss": 1.8508,
"step": 2
},
{
"epoch": 0.005008347245409015,
"grad_norm": 0.0,
"learning_rate": 6.000000000000001e-07,
"loss": 1.7493,
"step": 3
},
{
"epoch": 0.00667779632721202,
"grad_norm": 0.0,
"learning_rate": 8.000000000000001e-07,
"loss": 1.7768,
"step": 4
},
{
"epoch": 0.008347245409015025,
"grad_norm": 0.0,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.8859,
"step": 5
},
{
"epoch": 0.01001669449081803,
"grad_norm": 0.0,
"learning_rate": 1.2000000000000002e-06,
"loss": 2.1114,
"step": 6
},
{
"epoch": 0.011686143572621035,
"grad_norm": 0.0,
"learning_rate": 1.4000000000000001e-06,
"loss": 1.8268,
"step": 7
},
{
"epoch": 0.01335559265442404,
"grad_norm": 0.0,
"learning_rate": 1.6000000000000001e-06,
"loss": 1.427,
"step": 8
},
{
"epoch": 0.015025041736227046,
"grad_norm": 0.0,
"learning_rate": 1.8000000000000001e-06,
"loss": 1.8472,
"step": 9
},
{
"epoch": 0.01669449081803005,
"grad_norm": 0.0,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.7571,
"step": 10
},
{
"epoch": 0.018363939899833055,
"grad_norm": 0.0,
"learning_rate": 2.2e-06,
"loss": 1.8135,
"step": 11
},
{
"epoch": 0.02003338898163606,
"grad_norm": 0.0,
"learning_rate": 2.4000000000000003e-06,
"loss": 1.6594,
"step": 12
},
{
"epoch": 0.021702838063439065,
"grad_norm": 0.0,
"learning_rate": 2.6e-06,
"loss": 1.8615,
"step": 13
},
{
"epoch": 0.02337228714524207,
"grad_norm": 0.0,
"learning_rate": 2.8000000000000003e-06,
"loss": 1.9803,
"step": 14
},
{
"epoch": 0.025041736227045076,
"grad_norm": 0.0,
"learning_rate": 3e-06,
"loss": 1.9604,
"step": 15
},
{
"epoch": 0.02671118530884808,
"grad_norm": 0.0,
"learning_rate": 3.2000000000000003e-06,
"loss": 1.8476,
"step": 16
},
{
"epoch": 0.028380634390651086,
"grad_norm": 0.0,
"learning_rate": 3.4000000000000005e-06,
"loss": 1.71,
"step": 17
},
{
"epoch": 0.03005008347245409,
"grad_norm": 0.0,
"learning_rate": 3.6000000000000003e-06,
"loss": 1.9865,
"step": 18
},
{
"epoch": 0.03171953255425709,
"grad_norm": 0.0,
"learning_rate": 3.8000000000000005e-06,
"loss": 1.9356,
"step": 19
},
{
"epoch": 0.0333889816360601,
"grad_norm": 0.0,
"learning_rate": 4.000000000000001e-06,
"loss": 1.6531,
"step": 20
},
{
"epoch": 0.035058430717863104,
"grad_norm": 0.0,
"learning_rate": 4.2000000000000004e-06,
"loss": 1.6921,
"step": 21
},
{
"epoch": 0.03672787979966611,
"grad_norm": 0.0,
"learning_rate": 4.4e-06,
"loss": 1.7874,
"step": 22
},
{
"epoch": 0.038397328881469114,
"grad_norm": 0.0,
"learning_rate": 4.600000000000001e-06,
"loss": 1.8481,
"step": 23
},
{
"epoch": 0.04006677796327212,
"grad_norm": 0.0,
"learning_rate": 4.800000000000001e-06,
"loss": 1.6141,
"step": 24
},
{
"epoch": 0.041736227045075125,
"grad_norm": 0.0,
"learning_rate": 5e-06,
"loss": 1.621,
"step": 25
},
{
"epoch": 0.04340567612687813,
"grad_norm": 0.0,
"learning_rate": 5.2e-06,
"loss": 1.5873,
"step": 26
},
{
"epoch": 0.045075125208681135,
"grad_norm": 0.0,
"learning_rate": 5.400000000000001e-06,
"loss": 1.6264,
"step": 27
},
{
"epoch": 0.04674457429048414,
"grad_norm": 0.0,
"learning_rate": 5.600000000000001e-06,
"loss": 1.7444,
"step": 28
},
{
"epoch": 0.048414023372287146,
"grad_norm": 0.0,
"learning_rate": 5.8e-06,
"loss": 1.9601,
"step": 29
},
{
"epoch": 0.05008347245409015,
"grad_norm": 0.0,
"learning_rate": 6e-06,
"loss": 1.7666,
"step": 30
},
{
"epoch": 0.05175292153589316,
"grad_norm": 0.0,
"learning_rate": 6.200000000000001e-06,
"loss": 1.973,
"step": 31
},
{
"epoch": 0.05342237061769616,
"grad_norm": 0.0,
"learning_rate": 6.4000000000000006e-06,
"loss": 1.6661,
"step": 32
},
{
"epoch": 0.05509181969949917,
"grad_norm": 0.0,
"learning_rate": 6.600000000000001e-06,
"loss": 1.9688,
"step": 33
},
{
"epoch": 0.05676126878130217,
"grad_norm": 0.0,
"learning_rate": 6.800000000000001e-06,
"loss": 1.9153,
"step": 34
},
{
"epoch": 0.05843071786310518,
"grad_norm": 0.0,
"learning_rate": 7e-06,
"loss": 1.804,
"step": 35
},
{
"epoch": 0.06010016694490818,
"grad_norm": 0.0,
"learning_rate": 7.2000000000000005e-06,
"loss": 1.7318,
"step": 36
},
{
"epoch": 0.06176961602671119,
"grad_norm": 0.0,
"learning_rate": 7.4e-06,
"loss": 2.0672,
"step": 37
},
{
"epoch": 0.06343906510851419,
"grad_norm": 0.0,
"learning_rate": 7.600000000000001e-06,
"loss": 1.8864,
"step": 38
},
{
"epoch": 0.0651085141903172,
"grad_norm": 0.0,
"learning_rate": 7.800000000000002e-06,
"loss": 1.7625,
"step": 39
},
{
"epoch": 0.0667779632721202,
"grad_norm": 0.0,
"learning_rate": 8.000000000000001e-06,
"loss": 1.7466,
"step": 40
},
{
"epoch": 0.06844741235392321,
"grad_norm": 0.0,
"learning_rate": 8.2e-06,
"loss": 1.5579,
"step": 41
},
{
"epoch": 0.07011686143572621,
"grad_norm": 0.0,
"learning_rate": 8.400000000000001e-06,
"loss": 1.83,
"step": 42
},
{
"epoch": 0.07178631051752922,
"grad_norm": 0.0,
"learning_rate": 8.6e-06,
"loss": 1.9467,
"step": 43
},
{
"epoch": 0.07345575959933222,
"grad_norm": 0.0,
"learning_rate": 8.8e-06,
"loss": 1.9879,
"step": 44
},
{
"epoch": 0.07512520868113523,
"grad_norm": 0.0,
"learning_rate": 9e-06,
"loss": 1.9357,
"step": 45
},
{
"epoch": 0.07679465776293823,
"grad_norm": 0.0,
"learning_rate": 9.200000000000002e-06,
"loss": 1.7311,
"step": 46
},
{
"epoch": 0.07846410684474124,
"grad_norm": 0.0,
"learning_rate": 9.4e-06,
"loss": 1.658,
"step": 47
},
{
"epoch": 0.08013355592654424,
"grad_norm": 0.0,
"learning_rate": 9.600000000000001e-06,
"loss": 1.6306,
"step": 48
},
{
"epoch": 0.08180300500834725,
"grad_norm": 0.0,
"learning_rate": 9.800000000000001e-06,
"loss": 1.783,
"step": 49
},
{
"epoch": 0.08347245409015025,
"grad_norm": 0.0,
"learning_rate": 1e-05,
"loss": 1.8001,
"step": 50
},
{
"epoch": 0.08514190317195326,
"grad_norm": 0.0,
"learning_rate": 9.999981277850585e-06,
"loss": 1.6999,
"step": 51
},
{
"epoch": 0.08681135225375626,
"grad_norm": 0.0,
"learning_rate": 9.999925111542544e-06,
"loss": 1.7434,
"step": 52
},
{
"epoch": 0.08848080133555926,
"grad_norm": 0.0,
"learning_rate": 9.999831501496497e-06,
"loss": 1.8493,
"step": 53
},
{
"epoch": 0.09015025041736227,
"grad_norm": 0.0,
"learning_rate": 9.999700448413483e-06,
"loss": 2.01,
"step": 54
},
{
"epoch": 0.09181969949916527,
"grad_norm": 0.0,
"learning_rate": 9.999531953274934e-06,
"loss": 1.8475,
"step": 55
},
{
"epoch": 0.09348914858096828,
"grad_norm": 0.0,
"learning_rate": 9.999326017342688e-06,
"loss": 1.5294,
"step": 56
},
{
"epoch": 0.09515859766277128,
"grad_norm": 0.0,
"learning_rate": 9.999082642158972e-06,
"loss": 1.6968,
"step": 57
},
{
"epoch": 0.09682804674457429,
"grad_norm": 0.0,
"learning_rate": 9.998801829546387e-06,
"loss": 1.6576,
"step": 58
},
{
"epoch": 0.09849749582637729,
"grad_norm": 0.0,
"learning_rate": 9.9984835816079e-06,
"loss": 1.9567,
"step": 59
},
{
"epoch": 0.1001669449081803,
"grad_norm": 0.0,
"learning_rate": 9.998127900726825e-06,
"loss": 1.5722,
"step": 60
},
{
"epoch": 0.1018363939899833,
"grad_norm": 0.0,
"learning_rate": 9.997734789566809e-06,
"loss": 1.5099,
"step": 61
},
{
"epoch": 0.10350584307178631,
"grad_norm": 0.0,
"learning_rate": 9.997304251071802e-06,
"loss": 1.6942,
"step": 62
},
{
"epoch": 0.10517529215358931,
"grad_norm": 0.0,
"learning_rate": 9.996836288466046e-06,
"loss": 1.4786,
"step": 63
},
{
"epoch": 0.10684474123539232,
"grad_norm": 0.0,
"learning_rate": 9.99633090525405e-06,
"loss": 1.7518,
"step": 64
},
{
"epoch": 0.10851419031719532,
"grad_norm": 0.0,
"learning_rate": 9.99578810522056e-06,
"loss": 1.8813,
"step": 65
},
{
"epoch": 0.11018363939899833,
"grad_norm": 0.0,
"learning_rate": 9.995207892430525e-06,
"loss": 1.6325,
"step": 66
},
{
"epoch": 0.11185308848080133,
"grad_norm": 0.0,
"learning_rate": 9.994590271229077e-06,
"loss": 1.6373,
"step": 67
},
{
"epoch": 0.11352253756260434,
"grad_norm": 0.0,
"learning_rate": 9.9939352462415e-06,
"loss": 1.7568,
"step": 68
},
{
"epoch": 0.11519198664440734,
"grad_norm": 0.0,
"learning_rate": 9.993242822373178e-06,
"loss": 1.8238,
"step": 69
},
{
"epoch": 0.11686143572621036,
"grad_norm": 0.0,
"learning_rate": 9.99251300480958e-06,
"loss": 1.8395,
"step": 70
},
{
"epoch": 0.11853088480801335,
"grad_norm": 0.0,
"learning_rate": 9.991745799016206e-06,
"loss": 1.4879,
"step": 71
},
{
"epoch": 0.12020033388981637,
"grad_norm": 0.0,
"learning_rate": 9.990941210738553e-06,
"loss": 1.9031,
"step": 72
},
{
"epoch": 0.12186978297161936,
"grad_norm": 0.0,
"learning_rate": 9.990099246002071e-06,
"loss": 1.7939,
"step": 73
},
{
"epoch": 0.12353923205342238,
"grad_norm": 0.0,
"learning_rate": 9.989219911112114e-06,
"loss": 1.7135,
"step": 74
},
{
"epoch": 0.12520868113522537,
"grad_norm": 0.0,
"learning_rate": 9.988303212653898e-06,
"loss": 1.7721,
"step": 75
},
{
"epoch": 0.12687813021702837,
"grad_norm": 0.0,
"learning_rate": 9.98734915749245e-06,
"loss": 1.7121,
"step": 76
},
{
"epoch": 0.1285475792988314,
"grad_norm": 0.0,
"learning_rate": 9.986357752772555e-06,
"loss": 1.6391,
"step": 77
},
{
"epoch": 0.1302170283806344,
"grad_norm": 0.0,
"learning_rate": 9.985329005918702e-06,
"loss": 1.7974,
"step": 78
},
{
"epoch": 0.1318864774624374,
"grad_norm": 0.0,
"learning_rate": 9.984262924635036e-06,
"loss": 1.5529,
"step": 79
},
{
"epoch": 0.1335559265442404,
"grad_norm": 0.0,
"learning_rate": 9.983159516905287e-06,
"loss": 1.9062,
"step": 80
},
{
"epoch": 0.1352253756260434,
"grad_norm": 0.0,
"learning_rate": 9.982018790992722e-06,
"loss": 1.7188,
"step": 81
},
{
"epoch": 0.13689482470784642,
"grad_norm": 0.0,
"learning_rate": 9.980840755440075e-06,
"loss": 1.6821,
"step": 82
},
{
"epoch": 0.13856427378964942,
"grad_norm": 0.0,
"learning_rate": 9.979625419069495e-06,
"loss": 2.0225,
"step": 83
},
{
"epoch": 0.14023372287145242,
"grad_norm": 0.0,
"learning_rate": 9.978372790982457e-06,
"loss": 1.7347,
"step": 84
},
{
"epoch": 0.1419031719532554,
"grad_norm": 0.0,
"learning_rate": 9.977082880559725e-06,
"loss": 1.7869,
"step": 85
},
{
"epoch": 0.14357262103505844,
"grad_norm": 0.0,
"learning_rate": 9.975755697461254e-06,
"loss": 1.7912,
"step": 86
},
{
"epoch": 0.14524207011686144,
"grad_norm": 0.0,
"learning_rate": 9.974391251626132e-06,
"loss": 1.6523,
"step": 87
},
{
"epoch": 0.14691151919866444,
"grad_norm": 0.0,
"learning_rate": 9.972989553272501e-06,
"loss": 1.7847,
"step": 88
},
{
"epoch": 0.14858096828046743,
"grad_norm": 0.0,
"learning_rate": 9.971550612897487e-06,
"loss": 1.5771,
"step": 89
},
{
"epoch": 0.15025041736227046,
"grad_norm": 0.0,
"learning_rate": 9.970074441277111e-06,
"loss": 1.721,
"step": 90
},
{
"epoch": 0.15191986644407346,
"grad_norm": 0.0,
"learning_rate": 9.968561049466214e-06,
"loss": 1.7414,
"step": 91
},
{
"epoch": 0.15358931552587646,
"grad_norm": 0.0,
"learning_rate": 9.967010448798376e-06,
"loss": 1.6912,
"step": 92
},
{
"epoch": 0.15525876460767946,
"grad_norm": 0.0,
"learning_rate": 9.965422650885829e-06,
"loss": 1.5043,
"step": 93
},
{
"epoch": 0.15692821368948248,
"grad_norm": 0.0,
"learning_rate": 9.963797667619368e-06,
"loss": 1.654,
"step": 94
},
{
"epoch": 0.15859766277128548,
"grad_norm": 0.0,
"learning_rate": 9.962135511168263e-06,
"loss": 1.6316,
"step": 95
},
{
"epoch": 0.16026711185308848,
"grad_norm": 0.0,
"learning_rate": 9.960436193980175e-06,
"loss": 1.5251,
"step": 96
},
{
"epoch": 0.16193656093489148,
"grad_norm": 0.0,
"learning_rate": 9.958699728781046e-06,
"loss": 1.919,
"step": 97
},
{
"epoch": 0.1636060100166945,
"grad_norm": 0.0,
"learning_rate": 9.956926128575026e-06,
"loss": 1.8145,
"step": 98
},
{
"epoch": 0.1652754590984975,
"grad_norm": 0.0,
"learning_rate": 9.955115406644357e-06,
"loss": 1.7591,
"step": 99
},
{
"epoch": 0.1669449081803005,
"grad_norm": 0.0,
"learning_rate": 9.953267576549279e-06,
"loss": 1.7051,
"step": 100
},
{
"epoch": 0.1686143572621035,
"grad_norm": 0.0,
"learning_rate": 9.951382652127935e-06,
"loss": 1.6331,
"step": 101
},
{
"epoch": 0.17028380634390652,
"grad_norm": 0.0,
"learning_rate": 9.949460647496258e-06,
"loss": 1.8922,
"step": 102
},
{
"epoch": 0.17195325542570952,
"grad_norm": 0.0,
"learning_rate": 9.947501577047874e-06,
"loss": 1.9153,
"step": 103
},
{
"epoch": 0.17362270450751252,
"grad_norm": 0.0,
"learning_rate": 9.945505455453983e-06,
"loss": 1.7728,
"step": 104
},
{
"epoch": 0.17529215358931552,
"grad_norm": 0.0,
"learning_rate": 9.943472297663262e-06,
"loss": 1.615,
"step": 105
},
{
"epoch": 0.17696160267111852,
"grad_norm": 0.0,
"learning_rate": 9.941402118901743e-06,
"loss": 1.6637,
"step": 106
},
{
"epoch": 0.17863105175292154,
"grad_norm": 0.0,
"learning_rate": 9.939294934672707e-06,
"loss": 1.6815,
"step": 107
},
{
"epoch": 0.18030050083472454,
"grad_norm": 0.0,
"learning_rate": 9.93715076075656e-06,
"loss": 1.7668,
"step": 108
},
{
"epoch": 0.18196994991652754,
"grad_norm": 0.0,
"learning_rate": 9.934969613210718e-06,
"loss": 1.4658,
"step": 109
},
{
"epoch": 0.18363939899833054,
"grad_norm": 0.0,
"learning_rate": 9.932751508369492e-06,
"loss": 1.7838,
"step": 110
},
{
"epoch": 0.18530884808013356,
"grad_norm": 0.0,
"learning_rate": 9.930496462843954e-06,
"loss": 1.666,
"step": 111
},
{
"epoch": 0.18697829716193656,
"grad_norm": 0.0,
"learning_rate": 9.92820449352183e-06,
"loss": 1.7945,
"step": 112
},
{
"epoch": 0.18864774624373956,
"grad_norm": 0.0,
"learning_rate": 9.92587561756735e-06,
"loss": 1.762,
"step": 113
},
{
"epoch": 0.19031719532554256,
"grad_norm": 0.0,
"learning_rate": 9.923509852421144e-06,
"loss": 1.5722,
"step": 114
},
{
"epoch": 0.19198664440734559,
"grad_norm": 0.0,
"learning_rate": 9.921107215800095e-06,
"loss": 1.7682,
"step": 115
},
{
"epoch": 0.19365609348914858,
"grad_norm": 0.0,
"learning_rate": 9.91866772569721e-06,
"loss": 1.8919,
"step": 116
},
{
"epoch": 0.19532554257095158,
"grad_norm": 0.0,
"learning_rate": 9.91619140038149e-06,
"loss": 1.9279,
"step": 117
},
{
"epoch": 0.19699499165275458,
"grad_norm": 0.0,
"learning_rate": 9.913678258397785e-06,
"loss": 1.784,
"step": 118
},
{
"epoch": 0.1986644407345576,
"grad_norm": 0.0,
"learning_rate": 9.911128318566668e-06,
"loss": 1.978,
"step": 119
},
{
"epoch": 0.2003338898163606,
"grad_norm": 0.0,
"learning_rate": 9.908541599984276e-06,
"loss": 1.6277,
"step": 120
},
{
"epoch": 0.2020033388981636,
"grad_norm": 0.0,
"learning_rate": 9.905918122022183e-06,
"loss": 1.55,
"step": 121
},
{
"epoch": 0.2036727879799666,
"grad_norm": 0.0,
"learning_rate": 9.90325790432725e-06,
"loss": 1.6347,
"step": 122
},
{
"epoch": 0.20534223706176963,
"grad_norm": 0.0,
"learning_rate": 9.90056096682147e-06,
"loss": 1.786,
"step": 123
},
{
"epoch": 0.20701168614357263,
"grad_norm": 0.0,
"learning_rate": 9.897827329701834e-06,
"loss": 1.7375,
"step": 124
},
{
"epoch": 0.20868113522537562,
"grad_norm": 0.0,
"learning_rate": 9.895057013440163e-06,
"loss": 1.6394,
"step": 125
},
{
"epoch": 0.21035058430717862,
"grad_norm": 0.0,
"learning_rate": 9.892250038782972e-06,
"loss": 1.6817,
"step": 126
},
{
"epoch": 0.21202003338898165,
"grad_norm": 0.0,
"learning_rate": 9.889406426751296e-06,
"loss": 1.9121,
"step": 127
},
{
"epoch": 0.21368948247078465,
"grad_norm": 0.0,
"learning_rate": 9.88652619864055e-06,
"loss": 1.4702,
"step": 128
},
{
"epoch": 0.21535893155258765,
"grad_norm": 0.0,
"learning_rate": 9.883609376020356e-06,
"loss": 1.6333,
"step": 129
},
{
"epoch": 0.21702838063439064,
"grad_norm": 0.0,
"learning_rate": 9.880655980734391e-06,
"loss": 1.64,
"step": 130
},
{
"epoch": 0.21869782971619364,
"grad_norm": 0.0,
"learning_rate": 9.877666034900216e-06,
"loss": 1.6464,
"step": 131
},
{
"epoch": 0.22036727879799667,
"grad_norm": 0.0,
"learning_rate": 9.874639560909118e-06,
"loss": 1.7207,
"step": 132
},
{
"epoch": 0.22203672787979967,
"grad_norm": 0.0,
"learning_rate": 9.871576581425937e-06,
"loss": 1.8864,
"step": 133
},
{
"epoch": 0.22370617696160267,
"grad_norm": 0.0,
"learning_rate": 9.868477119388897e-06,
"loss": 1.7744,
"step": 134
},
{
"epoch": 0.22537562604340566,
"grad_norm": 0.0,
"learning_rate": 9.86534119800943e-06,
"loss": 1.592,
"step": 135
},
{
"epoch": 0.2270450751252087,
"grad_norm": 0.0,
"learning_rate": 9.862168840772018e-06,
"loss": 1.7051,
"step": 136
},
{
"epoch": 0.2287145242070117,
"grad_norm": 0.0,
"learning_rate": 9.858960071433994e-06,
"loss": 1.7263,
"step": 137
},
{
"epoch": 0.2303839732888147,
"grad_norm": 0.0,
"learning_rate": 9.855714914025386e-06,
"loss": 1.6824,
"step": 138
},
{
"epoch": 0.23205342237061768,
"grad_norm": 0.0,
"learning_rate": 9.852433392848718e-06,
"loss": 1.8981,
"step": 139
},
{
"epoch": 0.2337228714524207,
"grad_norm": 0.0,
"learning_rate": 9.849115532478848e-06,
"loss": 1.4935,
"step": 140
},
{
"epoch": 0.2353923205342237,
"grad_norm": 0.0,
"learning_rate": 9.84576135776276e-06,
"loss": 1.3096,
"step": 141
},
{
"epoch": 0.2370617696160267,
"grad_norm": 0.0,
"learning_rate": 9.842370893819404e-06,
"loss": 1.963,
"step": 142
},
{
"epoch": 0.2387312186978297,
"grad_norm": 0.0,
"learning_rate": 9.838944166039486e-06,
"loss": 1.7601,
"step": 143
},
{
"epoch": 0.24040066777963273,
"grad_norm": 0.0,
"learning_rate": 9.83548120008529e-06,
"loss": 1.694,
"step": 144
},
{
"epoch": 0.24207011686143573,
"grad_norm": 0.0,
"learning_rate": 9.831982021890483e-06,
"loss": 1.697,
"step": 145
},
{
"epoch": 0.24373956594323873,
"grad_norm": 0.0,
"learning_rate": 9.828446657659919e-06,
"loss": 1.3733,
"step": 146
},
{
"epoch": 0.24540901502504173,
"grad_norm": 0.0,
"learning_rate": 9.824875133869447e-06,
"loss": 1.9497,
"step": 147
},
{
"epoch": 0.24707846410684475,
"grad_norm": 0.0,
"learning_rate": 9.821267477265705e-06,
"loss": 1.8237,
"step": 148
},
{
"epoch": 0.24874791318864775,
"grad_norm": 0.0,
"learning_rate": 9.817623714865931e-06,
"loss": 1.782,
"step": 149
},
{
"epoch": 0.25041736227045075,
"grad_norm": 0.0,
"learning_rate": 9.813943873957748e-06,
"loss": 1.7703,
"step": 150
},
{
"epoch": 0.25208681135225375,
"grad_norm": 0.0,
"learning_rate": 9.810227982098968e-06,
"loss": 1.7999,
"step": 151
},
{
"epoch": 0.25375626043405675,
"grad_norm": 0.0,
"learning_rate": 9.806476067117384e-06,
"loss": 1.5728,
"step": 152
},
{
"epoch": 0.25542570951585974,
"grad_norm": 0.0,
"learning_rate": 9.802688157110564e-06,
"loss": 1.5857,
"step": 153
},
{
"epoch": 0.2570951585976628,
"grad_norm": 0.0,
"learning_rate": 9.798864280445633e-06,
"loss": 1.6758,
"step": 154
},
{
"epoch": 0.2587646076794658,
"grad_norm": 0.0,
"learning_rate": 9.795004465759067e-06,
"loss": 1.5893,
"step": 155
},
{
"epoch": 0.2604340567612688,
"grad_norm": 0.0,
"learning_rate": 9.791108741956476e-06,
"loss": 1.8483,
"step": 156
},
{
"epoch": 0.2621035058430718,
"grad_norm": 0.0,
"learning_rate": 9.787177138212391e-06,
"loss": 1.9552,
"step": 157
},
{
"epoch": 0.2637729549248748,
"grad_norm": 0.0,
"learning_rate": 9.78320968397004e-06,
"loss": 1.7145,
"step": 158
},
{
"epoch": 0.2654424040066778,
"grad_norm": 0.0,
"learning_rate": 9.779206408941131e-06,
"loss": 1.6563,
"step": 159
},
{
"epoch": 0.2671118530884808,
"grad_norm": 0.0,
"learning_rate": 9.77516734310563e-06,
"loss": 1.7699,
"step": 160
},
{
"epoch": 0.2687813021702838,
"grad_norm": 0.0,
"learning_rate": 9.771092516711538e-06,
"loss": 1.6084,
"step": 161
},
{
"epoch": 0.2704507512520868,
"grad_norm": 0.0,
"learning_rate": 9.766981960274653e-06,
"loss": 1.6172,
"step": 162
},
{
"epoch": 0.27212020033388984,
"grad_norm": 0.0,
"learning_rate": 9.76283570457836e-06,
"loss": 1.7309,
"step": 163
},
{
"epoch": 0.27378964941569284,
"grad_norm": 0.0,
"learning_rate": 9.758653780673381e-06,
"loss": 1.8666,
"step": 164
},
{
"epoch": 0.27545909849749584,
"grad_norm": 0.0,
"learning_rate": 9.754436219877564e-06,
"loss": 1.9176,
"step": 165
},
{
"epoch": 0.27712854757929883,
"grad_norm": 0.0,
"learning_rate": 9.750183053775625e-06,
"loss": 1.9063,
"step": 166
},
{
"epoch": 0.27879799666110183,
"grad_norm": 0.0,
"learning_rate": 9.745894314218933e-06,
"loss": 1.8567,
"step": 167
},
{
"epoch": 0.28046744574290483,
"grad_norm": 0.0,
"learning_rate": 9.741570033325254e-06,
"loss": 1.861,
"step": 168
},
{
"epoch": 0.28213689482470783,
"grad_norm": 0.0,
"learning_rate": 9.737210243478522e-06,
"loss": 1.7261,
"step": 169
},
{
"epoch": 0.2838063439065108,
"grad_norm": 0.0,
"learning_rate": 9.732814977328593e-06,
"loss": 1.6596,
"step": 170
},
{
"epoch": 0.2854757929883139,
"grad_norm": 0.0,
"learning_rate": 9.728384267790997e-06,
"loss": 1.9513,
"step": 171
},
{
"epoch": 0.2871452420701169,
"grad_norm": 0.0,
"learning_rate": 9.723918148046696e-06,
"loss": 1.6785,
"step": 172
},
{
"epoch": 0.2888146911519199,
"grad_norm": 0.0,
"learning_rate": 9.719416651541839e-06,
"loss": 1.7828,
"step": 173
},
{
"epoch": 0.2904841402337229,
"grad_norm": 0.0,
"learning_rate": 9.714879811987496e-06,
"loss": 1.9663,
"step": 174
},
{
"epoch": 0.2921535893155259,
"grad_norm": 0.0,
"learning_rate": 9.710307663359426e-06,
"loss": 1.6448,
"step": 175
},
{
"epoch": 0.2938230383973289,
"grad_norm": 0.0,
"learning_rate": 9.705700239897809e-06,
"loss": 1.5589,
"step": 176
},
{
"epoch": 0.29549248747913187,
"grad_norm": 0.0,
"learning_rate": 9.701057576106991e-06,
"loss": 1.9398,
"step": 177
},
{
"epoch": 0.29716193656093487,
"grad_norm": 0.0,
"learning_rate": 9.69637970675523e-06,
"loss": 1.7781,
"step": 178
},
{
"epoch": 0.2988313856427379,
"grad_norm": 0.0,
"learning_rate": 9.691666666874438e-06,
"loss": 1.7034,
"step": 179
},
{
"epoch": 0.3005008347245409,
"grad_norm": 0.0,
"learning_rate": 9.686918491759904e-06,
"loss": 1.9747,
"step": 180
},
{
"epoch": 0.3021702838063439,
"grad_norm": 0.0,
"learning_rate": 9.682135216970048e-06,
"loss": 1.627,
"step": 181
},
{
"epoch": 0.3038397328881469,
"grad_norm": 0.0,
"learning_rate": 9.677316878326144e-06,
"loss": 1.6952,
"step": 182
},
{
"epoch": 0.3055091819699499,
"grad_norm": 0.0,
"learning_rate": 9.672463511912056e-06,
"loss": 1.9787,
"step": 183
},
{
"epoch": 0.3071786310517529,
"grad_norm": 0.0,
"learning_rate": 9.667575154073962e-06,
"loss": 1.7576,
"step": 184
},
{
"epoch": 0.3088480801335559,
"grad_norm": 0.0,
"learning_rate": 9.66265184142009e-06,
"loss": 1.4266,
"step": 185
},
{
"epoch": 0.3105175292153589,
"grad_norm": 0.0,
"learning_rate": 9.657693610820437e-06,
"loss": 1.84,
"step": 186
},
{
"epoch": 0.3121869782971619,
"grad_norm": 0.0,
"learning_rate": 9.652700499406497e-06,
"loss": 1.9044,
"step": 187
},
{
"epoch": 0.31385642737896496,
"grad_norm": 0.0,
"learning_rate": 9.647672544570981e-06,
"loss": 1.9096,
"step": 188
},
{
"epoch": 0.31552587646076796,
"grad_norm": 0.0,
"learning_rate": 9.642609783967539e-06,
"loss": 1.9484,
"step": 189
},
{
"epoch": 0.31719532554257096,
"grad_norm": 0.0,
"learning_rate": 9.637512255510475e-06,
"loss": 1.832,
"step": 190
},
{
"epoch": 0.31886477462437396,
"grad_norm": 0.0,
"learning_rate": 9.632379997374462e-06,
"loss": 1.8468,
"step": 191
},
{
"epoch": 0.32053422370617696,
"grad_norm": 0.0,
"learning_rate": 9.627213047994265e-06,
"loss": 1.5239,
"step": 192
},
{
"epoch": 0.32220367278797996,
"grad_norm": 0.0,
"learning_rate": 9.622011446064439e-06,
"loss": 1.9019,
"step": 193
},
{
"epoch": 0.32387312186978295,
"grad_norm": 0.0,
"learning_rate": 9.616775230539057e-06,
"loss": 1.6285,
"step": 194
},
{
"epoch": 0.32554257095158595,
"grad_norm": 0.0,
"learning_rate": 9.611504440631398e-06,
"loss": 1.7333,
"step": 195
},
{
"epoch": 0.327212020033389,
"grad_norm": 0.0,
"learning_rate": 9.606199115813672e-06,
"loss": 1.4345,
"step": 196
},
{
"epoch": 0.328881469115192,
"grad_norm": 0.0,
"learning_rate": 9.600859295816708e-06,
"loss": 1.7107,
"step": 197
},
{
"epoch": 0.330550918196995,
"grad_norm": 0.0,
"learning_rate": 9.595485020629676e-06,
"loss": 1.7476,
"step": 198
},
{
"epoch": 0.332220367278798,
"grad_norm": 0.0,
"learning_rate": 9.590076330499763e-06,
"loss": 1.8789,
"step": 199
},
{
"epoch": 0.333889816360601,
"grad_norm": 0.0,
"learning_rate": 9.584633265931894e-06,
"loss": 1.8005,
"step": 200
},
{
"epoch": 0.335559265442404,
"grad_norm": 0.0,
"learning_rate": 9.579155867688415e-06,
"loss": 1.9923,
"step": 201
},
{
"epoch": 0.337228714524207,
"grad_norm": 0.0,
"learning_rate": 9.573644176788795e-06,
"loss": 1.827,
"step": 202
},
{
"epoch": 0.33889816360601,
"grad_norm": 0.0,
"learning_rate": 9.568098234509312e-06,
"loss": 1.8459,
"step": 203
},
{
"epoch": 0.34056761268781305,
"grad_norm": 0.0,
"learning_rate": 9.562518082382751e-06,
"loss": 1.6333,
"step": 204
},
{
"epoch": 0.34223706176961605,
"grad_norm": 0.0,
"learning_rate": 9.55690376219809e-06,
"loss": 1.7023,
"step": 205
},
{
"epoch": 0.34390651085141904,
"grad_norm": 0.0,
"learning_rate": 9.551255316000183e-06,
"loss": 1.7017,
"step": 206
},
{
"epoch": 0.34557595993322204,
"grad_norm": 0.0,
"learning_rate": 9.545572786089452e-06,
"loss": 1.7043,
"step": 207
},
{
"epoch": 0.34724540901502504,
"grad_norm": 0.0,
"learning_rate": 9.539856215021568e-06,
"loss": 1.7999,
"step": 208
},
{
"epoch": 0.34891485809682804,
"grad_norm": 0.0,
"learning_rate": 9.53410564560713e-06,
"loss": 1.409,
"step": 209
},
{
"epoch": 0.35058430717863104,
"grad_norm": 0.0,
"learning_rate": 9.528321120911345e-06,
"loss": 1.4238,
"step": 210
},
{
"epoch": 0.35225375626043404,
"grad_norm": 0.0,
"learning_rate": 9.522502684253709e-06,
"loss": 1.958,
"step": 211
},
{
"epoch": 0.35392320534223703,
"grad_norm": 0.0,
"learning_rate": 9.516650379207677e-06,
"loss": 1.8037,
"step": 212
},
{
"epoch": 0.3555926544240401,
"grad_norm": 0.0,
"learning_rate": 9.510764249600339e-06,
"loss": 1.8266,
"step": 213
},
{
"epoch": 0.3572621035058431,
"grad_norm": 0.0,
"learning_rate": 9.504844339512096e-06,
"loss": 1.9888,
"step": 214
},
{
"epoch": 0.3589315525876461,
"grad_norm": 0.0,
"learning_rate": 9.498890693276326e-06,
"loss": 1.8452,
"step": 215
},
{
"epoch": 0.3606010016694491,
"grad_norm": 0.0,
"learning_rate": 9.492903355479047e-06,
"loss": 1.7189,
"step": 216
},
{
"epoch": 0.3622704507512521,
"grad_norm": 0.0,
"learning_rate": 9.486882370958596e-06,
"loss": 1.7222,
"step": 217
},
{
"epoch": 0.3639398998330551,
"grad_norm": 0.0,
"learning_rate": 9.480827784805278e-06,
"loss": 1.8466,
"step": 218
},
{
"epoch": 0.3656093489148581,
"grad_norm": 0.0,
"learning_rate": 9.474739642361043e-06,
"loss": 1.7537,
"step": 219
},
{
"epoch": 0.3672787979966611,
"grad_norm": 0.0,
"learning_rate": 9.468617989219136e-06,
"loss": 1.9107,
"step": 220
},
{
"epoch": 0.36894824707846413,
"grad_norm": 0.0,
"learning_rate": 9.462462871223755e-06,
"loss": 1.5911,
"step": 221
},
{
"epoch": 0.37061769616026713,
"grad_norm": 0.0,
"learning_rate": 9.45627433446972e-06,
"loss": 1.5626,
"step": 222
},
{
"epoch": 0.3722871452420701,
"grad_norm": 0.0,
"learning_rate": 9.450052425302112e-06,
"loss": 1.6464,
"step": 223
},
{
"epoch": 0.3739565943238731,
"grad_norm": 0.0,
"learning_rate": 9.443797190315938e-06,
"loss": 1.8094,
"step": 224
},
{
"epoch": 0.3756260434056761,
"grad_norm": 0.0,
"learning_rate": 9.437508676355774e-06,
"loss": 1.5682,
"step": 225
},
{
"epoch": 0.3772954924874791,
"grad_norm": 0.0,
"learning_rate": 9.431186930515419e-06,
"loss": 1.5277,
"step": 226
},
{
"epoch": 0.3789649415692821,
"grad_norm": 0.0,
"learning_rate": 9.424832000137542e-06,
"loss": 1.8445,
"step": 227
},
{
"epoch": 0.3806343906510851,
"grad_norm": 0.0,
"learning_rate": 9.418443932813328e-06,
"loss": 1.6292,
"step": 228
},
{
"epoch": 0.3823038397328882,
"grad_norm": 0.0,
"learning_rate": 9.412022776382113e-06,
"loss": 2.0086,
"step": 229
},
{
"epoch": 0.38397328881469117,
"grad_norm": 0.0,
"learning_rate": 9.405568578931042e-06,
"loss": 1.9954,
"step": 230
},
{
"epoch": 0.38564273789649417,
"grad_norm": 0.0,
"learning_rate": 9.399081388794688e-06,
"loss": 1.9507,
"step": 231
},
{
"epoch": 0.38731218697829717,
"grad_norm": 0.0,
"learning_rate": 9.392561254554712e-06,
"loss": 1.9047,
"step": 232
},
{
"epoch": 0.38898163606010017,
"grad_norm": 0.0,
"learning_rate": 9.386008225039486e-06,
"loss": 1.81,
"step": 233
},
{
"epoch": 0.39065108514190316,
"grad_norm": 0.0,
"learning_rate": 9.379422349323728e-06,
"loss": 1.5756,
"step": 234
},
{
"epoch": 0.39232053422370616,
"grad_norm": 0.0,
"learning_rate": 9.372803676728138e-06,
"loss": 1.5665,
"step": 235
},
{
"epoch": 0.39398998330550916,
"grad_norm": 0.0,
"learning_rate": 9.366152256819025e-06,
"loss": 1.6799,
"step": 236
},
{
"epoch": 0.39565943238731216,
"grad_norm": 0.0,
"learning_rate": 9.359468139407942e-06,
"loss": 1.4389,
"step": 237
},
{
"epoch": 0.3973288814691152,
"grad_norm": 0.0,
"learning_rate": 9.352751374551305e-06,
"loss": 1.8924,
"step": 238
},
{
"epoch": 0.3989983305509182,
"grad_norm": 0.0,
"learning_rate": 9.346002012550027e-06,
"loss": 1.8713,
"step": 239
},
{
"epoch": 0.4006677796327212,
"grad_norm": 0.0,
"learning_rate": 9.339220103949132e-06,
"loss": 1.7269,
"step": 240
},
{
"epoch": 0.4023372287145242,
"grad_norm": 0.0,
"learning_rate": 9.332405699537382e-06,
"loss": 1.651,
"step": 241
},
{
"epoch": 0.4040066777963272,
"grad_norm": 0.0,
"learning_rate": 9.325558850346897e-06,
"loss": 1.6421,
"step": 242
},
{
"epoch": 0.4056761268781302,
"grad_norm": 0.0,
"learning_rate": 9.318679607652768e-06,
"loss": 1.6585,
"step": 243
},
{
"epoch": 0.4073455759599332,
"grad_norm": 0.0,
"learning_rate": 9.311768022972682e-06,
"loss": 1.8029,
"step": 244
},
{
"epoch": 0.4090150250417362,
"grad_norm": 0.0,
"learning_rate": 9.304824148066526e-06,
"loss": 1.5796,
"step": 245
},
{
"epoch": 0.41068447412353926,
"grad_norm": 0.0,
"learning_rate": 9.297848034936007e-06,
"loss": 1.7035,
"step": 246
},
{
"epoch": 0.41235392320534225,
"grad_norm": 0.0,
"learning_rate": 9.290839735824254e-06,
"loss": 1.8096,
"step": 247
},
{
"epoch": 0.41402337228714525,
"grad_norm": 0.0,
"learning_rate": 9.283799303215442e-06,
"loss": 1.7213,
"step": 248
},
{
"epoch": 0.41569282136894825,
"grad_norm": 0.0,
"learning_rate": 9.276726789834378e-06,
"loss": 1.8537,
"step": 249
},
{
"epoch": 0.41736227045075125,
"grad_norm": 0.0,
"learning_rate": 9.269622248646124e-06,
"loss": 1.7887,
"step": 250
},
{
"epoch": 0.41903171953255425,
"grad_norm": 0.0,
"learning_rate": 9.262485732855597e-06,
"loss": 1.6711,
"step": 251
},
{
"epoch": 0.42070116861435725,
"grad_norm": 0.0,
"learning_rate": 9.255317295907158e-06,
"loss": 1.7755,
"step": 252
},
{
"epoch": 0.42237061769616024,
"grad_norm": 0.0,
"learning_rate": 9.24811699148423e-06,
"loss": 1.8195,
"step": 253
},
{
"epoch": 0.4240400667779633,
"grad_norm": 0.0,
"learning_rate": 9.240884873508876e-06,
"loss": 2.0062,
"step": 254
},
{
"epoch": 0.4257095158597663,
"grad_norm": 0.0,
"learning_rate": 9.233620996141421e-06,
"loss": 1.7525,
"step": 255
},
{
"epoch": 0.4273789649415693,
"grad_norm": 0.0,
"learning_rate": 9.226325413780021e-06,
"loss": 1.338,
"step": 256
},
{
"epoch": 0.4290484140233723,
"grad_norm": 0.0,
"learning_rate": 9.218998181060271e-06,
"loss": 1.7117,
"step": 257
},
{
"epoch": 0.4307178631051753,
"grad_norm": 0.0,
"learning_rate": 9.211639352854786e-06,
"loss": 1.7551,
"step": 258
},
{
"epoch": 0.4323873121869783,
"grad_norm": 0.0,
"learning_rate": 9.204248984272802e-06,
"loss": 1.4557,
"step": 259
},
{
"epoch": 0.4340567612687813,
"grad_norm": 0.0,
"learning_rate": 9.196827130659752e-06,
"loss": 1.6702,
"step": 260
},
{
"epoch": 0.4357262103505843,
"grad_norm": 0.0,
"learning_rate": 9.189373847596853e-06,
"loss": 1.6316,
"step": 261
},
{
"epoch": 0.4373956594323873,
"grad_norm": 0.0,
"learning_rate": 9.181889190900702e-06,
"loss": 1.6081,
"step": 262
},
{
"epoch": 0.43906510851419034,
"grad_norm": 0.0,
"learning_rate": 9.174373216622841e-06,
"loss": 1.4578,
"step": 263
},
{
"epoch": 0.44073455759599334,
"grad_norm": 0.0,
"learning_rate": 9.166825981049345e-06,
"loss": 1.8555,
"step": 264
},
{
"epoch": 0.44240400667779634,
"grad_norm": 0.0,
"learning_rate": 9.15924754070041e-06,
"loss": 1.7215,
"step": 265
},
{
"epoch": 0.44407345575959933,
"grad_norm": 0.0,
"learning_rate": 9.151637952329903e-06,
"loss": 1.8248,
"step": 266
},
{
"epoch": 0.44574290484140233,
"grad_norm": 0.0,
"learning_rate": 9.143997272924974e-06,
"loss": 1.8428,
"step": 267
},
{
"epoch": 0.44741235392320533,
"grad_norm": 0.0,
"learning_rate": 9.136325559705593e-06,
"loss": 1.8746,
"step": 268
},
{
"epoch": 0.44908180300500833,
"grad_norm": 0.0,
"learning_rate": 9.128622870124147e-06,
"loss": 1.7128,
"step": 269
},
{
"epoch": 0.4507512520868113,
"grad_norm": 0.0,
"learning_rate": 9.120889261864999e-06,
"loss": 1.569,
"step": 270
},
{
"epoch": 0.4524207011686144,
"grad_norm": 0.0,
"learning_rate": 9.113124792844053e-06,
"loss": 1.6879,
"step": 271
},
{
"epoch": 0.4540901502504174,
"grad_norm": 0.0,
"learning_rate": 9.105329521208334e-06,
"loss": 1.7053,
"step": 272
},
{
"epoch": 0.4557595993322204,
"grad_norm": 0.0,
"learning_rate": 9.097503505335534e-06,
"loss": 1.763,
"step": 273
},
{
"epoch": 0.4574290484140234,
"grad_norm": 0.0,
"learning_rate": 9.089646803833589e-06,
"loss": 1.6282,
"step": 274
},
{
"epoch": 0.4590984974958264,
"grad_norm": 0.0,
"learning_rate": 9.081759475540236e-06,
"loss": 1.7139,
"step": 275
},
{
"epoch": 0.4607679465776294,
"grad_norm": 0.0,
"learning_rate": 9.073841579522571e-06,
"loss": 1.7466,
"step": 276
},
{
"epoch": 0.46243739565943237,
"grad_norm": 0.0,
"learning_rate": 9.065893175076604e-06,
"loss": 1.7338,
"step": 277
},
{
"epoch": 0.46410684474123537,
"grad_norm": 0.0,
"learning_rate": 9.057914321726824e-06,
"loss": 1.9141,
"step": 278
},
{
"epoch": 0.4657762938230384,
"grad_norm": 0.0,
"learning_rate": 9.049905079225744e-06,
"loss": 1.6576,
"step": 279
},
{
"epoch": 0.4674457429048414,
"grad_norm": 0.0,
"learning_rate": 9.041865507553458e-06,
"loss": 1.7862,
"step": 280
},
{
"epoch": 0.4691151919866444,
"grad_norm": 0.0,
"learning_rate": 9.033795666917191e-06,
"loss": 1.7305,
"step": 281
},
{
"epoch": 0.4707846410684474,
"grad_norm": 0.0,
"learning_rate": 9.025695617750848e-06,
"loss": 1.9305,
"step": 282
},
{
"epoch": 0.4724540901502504,
"grad_norm": 0.0,
"learning_rate": 9.01756542071456e-06,
"loss": 1.6688,
"step": 283
},
{
"epoch": 0.4741235392320534,
"grad_norm": 0.0,
"learning_rate": 9.009405136694234e-06,
"loss": 1.9435,
"step": 284
},
{
"epoch": 0.4757929883138564,
"grad_norm": 0.0,
"learning_rate": 9.001214826801092e-06,
"loss": 2.0056,
"step": 285
},
{
"epoch": 0.4774624373956594,
"grad_norm": 0.0,
"learning_rate": 8.992994552371217e-06,
"loss": 1.5892,
"step": 286
},
{
"epoch": 0.4791318864774624,
"grad_norm": 0.0,
"learning_rate": 8.98474437496509e-06,
"loss": 1.6205,
"step": 287
},
{
"epoch": 0.48080133555926546,
"grad_norm": 0.0,
"learning_rate": 8.976464356367133e-06,
"loss": 1.7236,
"step": 288
},
{
"epoch": 0.48247078464106846,
"grad_norm": 0.0,
"learning_rate": 8.968154558585244e-06,
"loss": 1.646,
"step": 289
},
{
"epoch": 0.48414023372287146,
"grad_norm": 0.0,
"learning_rate": 8.959815043850336e-06,
"loss": 1.5001,
"step": 290
},
{
"epoch": 0.48580968280467446,
"grad_norm": 0.0,
"learning_rate": 8.951445874615862e-06,
"loss": 1.7625,
"step": 291
},
{
"epoch": 0.48747913188647746,
"grad_norm": 0.0,
"learning_rate": 8.943047113557358e-06,
"loss": 1.848,
"step": 292
},
{
"epoch": 0.48914858096828046,
"grad_norm": 0.0,
"learning_rate": 8.934618823571968e-06,
"loss": 1.6042,
"step": 293
},
{
"epoch": 0.49081803005008345,
"grad_norm": 0.0,
"learning_rate": 8.926161067777973e-06,
"loss": 1.6858,
"step": 294
},
{
"epoch": 0.49248747913188645,
"grad_norm": 0.0,
"learning_rate": 8.917673909514321e-06,
"loss": 2.0782,
"step": 295
},
{
"epoch": 0.4941569282136895,
"grad_norm": 0.0,
"learning_rate": 8.90915741234015e-06,
"loss": 1.6056,
"step": 296
},
{
"epoch": 0.4958263772954925,
"grad_norm": 0.0,
"learning_rate": 8.900611640034313e-06,
"loss": 1.9565,
"step": 297
},
{
"epoch": 0.4974958263772955,
"grad_norm": 0.0,
"learning_rate": 8.892036656594898e-06,
"loss": 1.4701,
"step": 298
},
{
"epoch": 0.4991652754590985,
"grad_norm": 0.0,
"learning_rate": 8.883432526238757e-06,
"loss": 1.4169,
"step": 299
},
{
"epoch": 0.5008347245409015,
"grad_norm": 0.0,
"learning_rate": 8.874799313401014e-06,
"loss": 1.6465,
"step": 300
},
{
"epoch": 0.5025041736227045,
"grad_norm": 0.0,
"learning_rate": 8.866137082734591e-06,
"loss": 1.5715,
"step": 301
},
{
"epoch": 0.5041736227045075,
"grad_norm": 0.0,
"learning_rate": 8.857445899109716e-06,
"loss": 1.848,
"step": 302
},
{
"epoch": 0.5058430717863105,
"grad_norm": 0.0,
"learning_rate": 8.848725827613445e-06,
"loss": 1.7313,
"step": 303
},
{
"epoch": 0.5075125208681135,
"grad_norm": 0.0,
"learning_rate": 8.839976933549173e-06,
"loss": 1.8501,
"step": 304
},
{
"epoch": 0.5091819699499165,
"grad_norm": 0.0,
"learning_rate": 8.831199282436136e-06,
"loss": 1.5231,
"step": 305
},
{
"epoch": 0.5108514190317195,
"grad_norm": 0.0,
"learning_rate": 8.822392940008937e-06,
"loss": 1.4616,
"step": 306
},
{
"epoch": 0.5125208681135225,
"grad_norm": 0.0,
"learning_rate": 8.813557972217038e-06,
"loss": 1.5747,
"step": 307
},
{
"epoch": 0.5141903171953256,
"grad_norm": 0.0,
"learning_rate": 8.804694445224274e-06,
"loss": 1.5613,
"step": 308
},
{
"epoch": 0.5158597662771286,
"grad_norm": 0.0,
"learning_rate": 8.795802425408352e-06,
"loss": 1.3077,
"step": 309
},
{
"epoch": 0.5175292153589316,
"grad_norm": 0.0,
"learning_rate": 8.786881979360368e-06,
"loss": 1.5789,
"step": 310
},
{
"epoch": 0.5191986644407346,
"grad_norm": 0.0,
"learning_rate": 8.777933173884288e-06,
"loss": 1.7127,
"step": 311
},
{
"epoch": 0.5208681135225376,
"grad_norm": 0.0,
"learning_rate": 8.76895607599646e-06,
"loss": 1.7422,
"step": 312
},
{
"epoch": 0.5225375626043406,
"grad_norm": 0.0,
"learning_rate": 8.759950752925114e-06,
"loss": 1.6902,
"step": 313
},
{
"epoch": 0.5242070116861436,
"grad_norm": 0.0,
"learning_rate": 8.750917272109849e-06,
"loss": 1.763,
"step": 314
},
{
"epoch": 0.5258764607679466,
"grad_norm": 0.0,
"learning_rate": 8.741855701201138e-06,
"loss": 1.7908,
"step": 315
},
{
"epoch": 0.5275459098497496,
"grad_norm": 0.0,
"learning_rate": 8.732766108059814e-06,
"loss": 1.6755,
"step": 316
},
{
"epoch": 0.5292153589315526,
"grad_norm": 0.0,
"learning_rate": 8.723648560756565e-06,
"loss": 1.7597,
"step": 317
},
{
"epoch": 0.5308848080133556,
"grad_norm": 0.0,
"learning_rate": 8.714503127571425e-06,
"loss": 1.9694,
"step": 318
},
{
"epoch": 0.5325542570951586,
"grad_norm": 0.0,
"learning_rate": 8.705329876993262e-06,
"loss": 1.6419,
"step": 319
},
{
"epoch": 0.5342237061769616,
"grad_norm": 0.0,
"learning_rate": 8.696128877719258e-06,
"loss": 1.8573,
"step": 320
},
{
"epoch": 0.5358931552587646,
"grad_norm": 0.0,
"learning_rate": 8.686900198654413e-06,
"loss": 1.7608,
"step": 321
},
{
"epoch": 0.5375626043405676,
"grad_norm": 0.0,
"learning_rate": 8.677643908911007e-06,
"loss": 2.0053,
"step": 322
},
{
"epoch": 0.5392320534223706,
"grad_norm": 0.0,
"learning_rate": 8.668360077808093e-06,
"loss": 1.8578,
"step": 323
},
{
"epoch": 0.5409015025041736,
"grad_norm": 0.0,
"learning_rate": 8.659048774870986e-06,
"loss": 1.6681,
"step": 324
},
{
"epoch": 0.5425709515859767,
"grad_norm": 0.0,
"learning_rate": 8.649710069830723e-06,
"loss": 1.6123,
"step": 325
},
{
"epoch": 0.5442404006677797,
"grad_norm": 0.0,
"learning_rate": 8.64034403262356e-06,
"loss": 1.7475,
"step": 326
},
{
"epoch": 0.5459098497495827,
"grad_norm": 0.0,
"learning_rate": 8.630950733390434e-06,
"loss": 1.3078,
"step": 327
},
{
"epoch": 0.5475792988313857,
"grad_norm": 0.0,
"learning_rate": 8.621530242476446e-06,
"loss": 1.6937,
"step": 328
},
{
"epoch": 0.5492487479131887,
"grad_norm": 0.0,
"learning_rate": 8.612082630430333e-06,
"loss": 1.9197,
"step": 329
},
{
"epoch": 0.5509181969949917,
"grad_norm": 0.0,
"learning_rate": 8.602607968003935e-06,
"loss": 1.7944,
"step": 330
},
{
"epoch": 0.5525876460767947,
"grad_norm": 0.0,
"learning_rate": 8.593106326151672e-06,
"loss": 1.83,
"step": 331
},
{
"epoch": 0.5542570951585977,
"grad_norm": 0.0,
"learning_rate": 8.583577776030005e-06,
"loss": 2.0895,
"step": 332
},
{
"epoch": 0.5559265442404007,
"grad_norm": 0.0,
"learning_rate": 8.574022388996913e-06,
"loss": 1.6171,
"step": 333
},
{
"epoch": 0.5575959933222037,
"grad_norm": 0.0,
"learning_rate": 8.564440236611344e-06,
"loss": 1.486,
"step": 334
},
{
"epoch": 0.5592654424040067,
"grad_norm": 0.0,
"learning_rate": 8.5548313906327e-06,
"loss": 1.7145,
"step": 335
},
{
"epoch": 0.5609348914858097,
"grad_norm": 0.0,
"learning_rate": 8.545195923020273e-06,
"loss": 1.713,
"step": 336
},
{
"epoch": 0.5626043405676127,
"grad_norm": 0.0,
"learning_rate": 8.535533905932739e-06,
"loss": 1.756,
"step": 337
},
{
"epoch": 0.5642737896494157,
"grad_norm": 0.0,
"learning_rate": 8.525845411727581e-06,
"loss": 1.5199,
"step": 338
},
{
"epoch": 0.5659432387312187,
"grad_norm": 0.0,
"learning_rate": 8.516130512960576e-06,
"loss": 1.7866,
"step": 339
},
{
"epoch": 0.5676126878130217,
"grad_norm": 0.0,
"learning_rate": 8.506389282385242e-06,
"loss": 1.9604,
"step": 340
},
{
"epoch": 0.5692821368948247,
"grad_norm": 0.0,
"learning_rate": 8.49662179295228e-06,
"loss": 1.8083,
"step": 341
},
{
"epoch": 0.5709515859766278,
"grad_norm": 0.0,
"learning_rate": 8.486828117809057e-06,
"loss": 1.8973,
"step": 342
},
{
"epoch": 0.5726210350584308,
"grad_norm": 0.0,
"learning_rate": 8.47700833029903e-06,
"loss": 1.6784,
"step": 343
},
{
"epoch": 0.5742904841402338,
"grad_norm": 0.0,
"learning_rate": 8.467162503961209e-06,
"loss": 1.7076,
"step": 344
},
{
"epoch": 0.5759599332220368,
"grad_norm": 0.0,
"learning_rate": 8.45729071252961e-06,
"loss": 1.4877,
"step": 345
},
{
"epoch": 0.5776293823038398,
"grad_norm": 0.0,
"learning_rate": 8.447393029932692e-06,
"loss": 1.8807,
"step": 346
},
{
"epoch": 0.5792988313856428,
"grad_norm": 0.0,
"learning_rate": 8.43746953029281e-06,
"loss": 1.8924,
"step": 347
},
{
"epoch": 0.5809682804674458,
"grad_norm": 0.0,
"learning_rate": 8.427520287925669e-06,
"loss": 1.7638,
"step": 348
},
{
"epoch": 0.5826377295492488,
"grad_norm": 0.0,
"learning_rate": 8.417545377339739e-06,
"loss": 1.8664,
"step": 349
},
{
"epoch": 0.5843071786310517,
"grad_norm": 0.0,
"learning_rate": 8.407544873235736e-06,
"loss": 1.6039,
"step": 350
},
{
"epoch": 0.5859766277128547,
"grad_norm": 0.0,
"learning_rate": 8.39751885050603e-06,
"loss": 1.8483,
"step": 351
},
{
"epoch": 0.5876460767946577,
"grad_norm": 0.0,
"learning_rate": 8.387467384234096e-06,
"loss": 1.9636,
"step": 352
},
{
"epoch": 0.5893155258764607,
"grad_norm": 0.0,
"learning_rate": 8.377390549693959e-06,
"loss": 1.3699,
"step": 353
},
{
"epoch": 0.5909849749582637,
"grad_norm": 0.0,
"learning_rate": 8.367288422349617e-06,
"loss": 1.7837,
"step": 354
},
{
"epoch": 0.5926544240400667,
"grad_norm": 0.0,
"learning_rate": 8.35716107785449e-06,
"loss": 1.7749,
"step": 355
},
{
"epoch": 0.5943238731218697,
"grad_norm": 0.0,
"learning_rate": 8.347008592050834e-06,
"loss": 1.6986,
"step": 356
},
{
"epoch": 0.5959933222036727,
"grad_norm": 0.0,
"learning_rate": 8.336831040969196e-06,
"loss": 1.7904,
"step": 357
},
{
"epoch": 0.5976627712854758,
"grad_norm": 0.0,
"learning_rate": 8.326628500827826e-06,
"loss": 1.9549,
"step": 358
},
{
"epoch": 0.5993322203672788,
"grad_norm": 0.0,
"learning_rate": 8.316401048032121e-06,
"loss": 1.7828,
"step": 359
},
{
"epoch": 0.6010016694490818,
"grad_norm": 0.0,
"learning_rate": 8.306148759174036e-06,
"loss": 1.7061,
"step": 360
},
{
"epoch": 0.6026711185308848,
"grad_norm": 0.0,
"learning_rate": 8.295871711031527e-06,
"loss": 1.5943,
"step": 361
},
{
"epoch": 0.6043405676126878,
"grad_norm": 0.0,
"learning_rate": 8.285569980567965e-06,
"loss": 1.7353,
"step": 362
},
{
"epoch": 0.6060100166944908,
"grad_norm": 0.0,
"learning_rate": 8.275243644931565e-06,
"loss": 1.7265,
"step": 363
},
{
"epoch": 0.6076794657762938,
"grad_norm": 0.0,
"learning_rate": 8.264892781454807e-06,
"loss": 1.9438,
"step": 364
},
{
"epoch": 0.6093489148580968,
"grad_norm": 0.0,
"learning_rate": 8.254517467653858e-06,
"loss": 1.6295,
"step": 365
},
{
"epoch": 0.6110183639398998,
"grad_norm": 0.0,
"learning_rate": 8.244117781227982e-06,
"loss": 2.0601,
"step": 366
},
{
"epoch": 0.6126878130217028,
"grad_norm": 0.0,
"learning_rate": 8.23369380005898e-06,
"loss": 1.6861,
"step": 367
},
{
"epoch": 0.6143572621035058,
"grad_norm": 0.0,
"learning_rate": 8.22324560221058e-06,
"loss": 1.68,
"step": 368
},
{
"epoch": 0.6160267111853088,
"grad_norm": 0.0,
"learning_rate": 8.21277326592787e-06,
"loss": 1.683,
"step": 369
},
{
"epoch": 0.6176961602671118,
"grad_norm": 0.0,
"learning_rate": 8.202276869636713e-06,
"loss": 1.4055,
"step": 370
},
{
"epoch": 0.6193656093489148,
"grad_norm": 0.0,
"learning_rate": 8.191756491943146e-06,
"loss": 1.7286,
"step": 371
},
{
"epoch": 0.6210350584307178,
"grad_norm": 0.0,
"learning_rate": 8.1812122116328e-06,
"loss": 1.7372,
"step": 372
},
{
"epoch": 0.6227045075125208,
"grad_norm": 0.0,
"learning_rate": 8.170644107670313e-06,
"loss": 1.5938,
"step": 373
},
{
"epoch": 0.6243739565943238,
"grad_norm": 0.0,
"learning_rate": 8.160052259198737e-06,
"loss": 1.5759,
"step": 374
},
{
"epoch": 0.6260434056761269,
"grad_norm": 0.0,
"learning_rate": 8.149436745538934e-06,
"loss": 1.6956,
"step": 375
},
{
"epoch": 0.6277128547579299,
"grad_norm": 0.0,
"learning_rate": 8.138797646189e-06,
"loss": 1.5602,
"step": 376
},
{
"epoch": 0.6293823038397329,
"grad_norm": 0.0,
"learning_rate": 8.128135040823661e-06,
"loss": 1.5855,
"step": 377
},
{
"epoch": 0.6310517529215359,
"grad_norm": 0.0,
"learning_rate": 8.117449009293668e-06,
"loss": 1.9672,
"step": 378
},
{
"epoch": 0.6327212020033389,
"grad_norm": 0.0,
"learning_rate": 8.106739631625216e-06,
"loss": 1.8238,
"step": 379
},
{
"epoch": 0.6343906510851419,
"grad_norm": 0.0,
"learning_rate": 8.096006988019331e-06,
"loss": 1.8443,
"step": 380
},
{
"epoch": 0.6360601001669449,
"grad_norm": 0.0,
"learning_rate": 8.085251158851278e-06,
"loss": 1.7355,
"step": 381
},
{
"epoch": 0.6377295492487479,
"grad_norm": 0.0,
"learning_rate": 8.074472224669952e-06,
"loss": 1.6855,
"step": 382
},
{
"epoch": 0.6393989983305509,
"grad_norm": 0.0,
"learning_rate": 8.063670266197278e-06,
"loss": 1.9201,
"step": 383
},
{
"epoch": 0.6410684474123539,
"grad_norm": 0.0,
"learning_rate": 8.052845364327609e-06,
"loss": 1.6821,
"step": 384
},
{
"epoch": 0.6427378964941569,
"grad_norm": 0.0,
"learning_rate": 8.041997600127118e-06,
"loss": 1.8732,
"step": 385
},
{
"epoch": 0.6444073455759599,
"grad_norm": 0.0,
"learning_rate": 8.031127054833192e-06,
"loss": 1.5744,
"step": 386
},
{
"epoch": 0.6460767946577629,
"grad_norm": 0.0,
"learning_rate": 8.020233809853815e-06,
"loss": 1.6747,
"step": 387
},
{
"epoch": 0.6477462437395659,
"grad_norm": 0.0,
"learning_rate": 8.009317946766975e-06,
"loss": 1.6959,
"step": 388
},
{
"epoch": 0.6494156928213689,
"grad_norm": 0.0,
"learning_rate": 7.998379547320038e-06,
"loss": 1.464,
"step": 389
},
{
"epoch": 0.6510851419031719,
"grad_norm": 0.0,
"learning_rate": 7.987418693429145e-06,
"loss": 1.7419,
"step": 390
},
{
"epoch": 0.6527545909849749,
"grad_norm": 0.0,
"learning_rate": 7.976435467178592e-06,
"loss": 1.801,
"step": 391
},
{
"epoch": 0.654424040066778,
"grad_norm": 0.0,
"learning_rate": 7.965429950820222e-06,
"loss": 1.7309,
"step": 392
},
{
"epoch": 0.656093489148581,
"grad_norm": 0.0,
"learning_rate": 7.954402226772804e-06,
"loss": 1.6584,
"step": 393
},
{
"epoch": 0.657762938230384,
"grad_norm": 0.0,
"learning_rate": 7.943352377621414e-06,
"loss": 1.8112,
"step": 394
},
{
"epoch": 0.659432387312187,
"grad_norm": 0.0,
"learning_rate": 7.932280486116825e-06,
"loss": 1.9561,
"step": 395
},
{
"epoch": 0.66110183639399,
"grad_norm": 0.0,
"learning_rate": 7.92118663517488e-06,
"loss": 1.7664,
"step": 396
},
{
"epoch": 0.662771285475793,
"grad_norm": 0.0,
"learning_rate": 7.910070907875871e-06,
"loss": 1.5639,
"step": 397
},
{
"epoch": 0.664440734557596,
"grad_norm": 0.0,
"learning_rate": 7.898933387463924e-06,
"loss": 1.401,
"step": 398
},
{
"epoch": 0.666110183639399,
"grad_norm": 0.0,
"learning_rate": 7.887774157346365e-06,
"loss": 1.5964,
"step": 399
},
{
"epoch": 0.667779632721202,
"grad_norm": 0.0,
"learning_rate": 7.876593301093104e-06,
"loss": 1.6001,
"step": 400
},
{
"epoch": 0.669449081803005,
"grad_norm": 0.0,
"learning_rate": 7.865390902436005e-06,
"loss": 1.7532,
"step": 401
},
{
"epoch": 0.671118530884808,
"grad_norm": 0.0,
"learning_rate": 7.854167045268265e-06,
"loss": 1.6746,
"step": 402
},
{
"epoch": 0.672787979966611,
"grad_norm": 0.0,
"learning_rate": 7.842921813643767e-06,
"loss": 1.9211,
"step": 403
},
{
"epoch": 0.674457429048414,
"grad_norm": 0.0,
"learning_rate": 7.831655291776484e-06,
"loss": 1.9482,
"step": 404
},
{
"epoch": 0.676126878130217,
"grad_norm": 0.0,
"learning_rate": 7.82036756403981e-06,
"loss": 1.8791,
"step": 405
},
{
"epoch": 0.67779632721202,
"grad_norm": 0.0,
"learning_rate": 7.809058714965962e-06,
"loss": 1.8097,
"step": 406
},
{
"epoch": 0.679465776293823,
"grad_norm": 0.0,
"learning_rate": 7.797728829245321e-06,
"loss": 1.6008,
"step": 407
},
{
"epoch": 0.6811352253756261,
"grad_norm": 0.0,
"learning_rate": 7.786377991725813e-06,
"loss": 1.5568,
"step": 408
},
{
"epoch": 0.6828046744574291,
"grad_norm": 0.0,
"learning_rate": 7.775006287412268e-06,
"loss": 1.5637,
"step": 409
},
{
"epoch": 0.6844741235392321,
"grad_norm": 0.0,
"learning_rate": 7.763613801465785e-06,
"loss": 1.8548,
"step": 410
},
{
"epoch": 0.6861435726210351,
"grad_norm": 0.0,
"learning_rate": 7.752200619203094e-06,
"loss": 1.6665,
"step": 411
},
{
"epoch": 0.6878130217028381,
"grad_norm": 0.0,
"learning_rate": 7.740766826095918e-06,
"loss": 1.8327,
"step": 412
},
{
"epoch": 0.6894824707846411,
"grad_norm": 0.0,
"learning_rate": 7.729312507770326e-06,
"loss": 1.7214,
"step": 413
},
{
"epoch": 0.6911519198664441,
"grad_norm": 0.0,
"learning_rate": 7.717837750006106e-06,
"loss": 1.4923,
"step": 414
},
{
"epoch": 0.6928213689482471,
"grad_norm": 0.0,
"learning_rate": 7.706342638736108e-06,
"loss": 1.9765,
"step": 415
},
{
"epoch": 0.6944908180300501,
"grad_norm": 0.0,
"learning_rate": 7.694827260045608e-06,
"loss": 1.8433,
"step": 416
},
{
"epoch": 0.6961602671118531,
"grad_norm": 0.0,
"learning_rate": 7.683291700171663e-06,
"loss": 1.7273,
"step": 417
},
{
"epoch": 0.6978297161936561,
"grad_norm": 0.0,
"learning_rate": 7.671736045502462e-06,
"loss": 1.9375,
"step": 418
},
{
"epoch": 0.6994991652754591,
"grad_norm": 0.0,
"learning_rate": 7.660160382576683e-06,
"loss": 1.8932,
"step": 419
},
{
"epoch": 0.7011686143572621,
"grad_norm": 0.0,
"learning_rate": 7.648564798082842e-06,
"loss": 1.8344,
"step": 420
},
{
"epoch": 0.7028380634390651,
"grad_norm": 0.0,
"learning_rate": 7.636949378858647e-06,
"loss": 1.888,
"step": 421
},
{
"epoch": 0.7045075125208681,
"grad_norm": 0.0,
"learning_rate": 7.625314211890342e-06,
"loss": 1.6684,
"step": 422
},
{
"epoch": 0.7061769616026711,
"grad_norm": 0.0,
"learning_rate": 7.613659384312062e-06,
"loss": 1.6708,
"step": 423
},
{
"epoch": 0.7078464106844741,
"grad_norm": 0.0,
"learning_rate": 7.601984983405173e-06,
"loss": 1.619,
"step": 424
},
{
"epoch": 0.7095158597662772,
"grad_norm": 0.0,
"learning_rate": 7.590291096597631e-06,
"loss": 1.7182,
"step": 425
},
{
"epoch": 0.7111853088480802,
"grad_norm": 0.0,
"learning_rate": 7.57857781146331e-06,
"loss": 1.8545,
"step": 426
},
{
"epoch": 0.7128547579298832,
"grad_norm": 0.0,
"learning_rate": 7.566845215721362e-06,
"loss": 1.8239,
"step": 427
},
{
"epoch": 0.7145242070116862,
"grad_norm": 0.0,
"learning_rate": 7.555093397235553e-06,
"loss": 1.7683,
"step": 428
},
{
"epoch": 0.7161936560934892,
"grad_norm": 0.0,
"learning_rate": 7.543322444013601e-06,
"loss": 1.8621,
"step": 429
},
{
"epoch": 0.7178631051752922,
"grad_norm": 0.0,
"learning_rate": 7.531532444206524e-06,
"loss": 1.916,
"step": 430
},
{
"epoch": 0.7195325542570952,
"grad_norm": 0.0,
"learning_rate": 7.519723486107977e-06,
"loss": 1.6987,
"step": 431
},
{
"epoch": 0.7212020033388982,
"grad_norm": 0.0,
"learning_rate": 7.507895658153594e-06,
"loss": 1.6218,
"step": 432
},
{
"epoch": 0.7228714524207012,
"grad_norm": 0.0,
"learning_rate": 7.496049048920317e-06,
"loss": 1.6171,
"step": 433
},
{
"epoch": 0.7245409015025042,
"grad_norm": 0.0,
"learning_rate": 7.484183747125743e-06,
"loss": 1.8034,
"step": 434
},
{
"epoch": 0.7262103505843072,
"grad_norm": 0.0,
"learning_rate": 7.472299841627452e-06,
"loss": 1.8408,
"step": 435
},
{
"epoch": 0.7278797996661102,
"grad_norm": 0.0,
"learning_rate": 7.460397421422346e-06,
"loss": 1.857,
"step": 436
},
{
"epoch": 0.7295492487479132,
"grad_norm": 0.0,
"learning_rate": 7.448476575645982e-06,
"loss": 1.6,
"step": 437
},
{
"epoch": 0.7312186978297162,
"grad_norm": 0.0,
"learning_rate": 7.4365373935719e-06,
"loss": 1.9061,
"step": 438
},
{
"epoch": 0.7328881469115192,
"grad_norm": 0.0,
"learning_rate": 7.424579964610963e-06,
"loss": 1.8586,
"step": 439
},
{
"epoch": 0.7345575959933222,
"grad_norm": 0.0,
"learning_rate": 7.412604378310677e-06,
"loss": 2.0079,
"step": 440
},
{
"epoch": 0.7362270450751253,
"grad_norm": 0.0,
"learning_rate": 7.400610724354531e-06,
"loss": 1.7882,
"step": 441
},
{
"epoch": 0.7378964941569283,
"grad_norm": 0.0,
"learning_rate": 7.388599092561315e-06,
"loss": 1.8505,
"step": 442
},
{
"epoch": 0.7395659432387313,
"grad_norm": 0.0,
"learning_rate": 7.376569572884457e-06,
"loss": 1.5792,
"step": 443
},
{
"epoch": 0.7412353923205343,
"grad_norm": 0.0,
"learning_rate": 7.364522255411342e-06,
"loss": 1.7987,
"step": 444
},
{
"epoch": 0.7429048414023373,
"grad_norm": 0.0,
"learning_rate": 7.3524572303626415e-06,
"loss": 1.814,
"step": 445
},
{
"epoch": 0.7445742904841403,
"grad_norm": 0.0,
"learning_rate": 7.340374588091638e-06,
"loss": 1.54,
"step": 446
},
{
"epoch": 0.7462437395659433,
"grad_norm": 0.0,
"learning_rate": 7.328274419083541e-06,
"loss": 1.9338,
"step": 447
},
{
"epoch": 0.7479131886477463,
"grad_norm": 0.0,
"learning_rate": 7.316156813954821e-06,
"loss": 1.81,
"step": 448
},
{
"epoch": 0.7495826377295493,
"grad_norm": 0.0,
"learning_rate": 7.304021863452525e-06,
"loss": 1.7657,
"step": 449
},
{
"epoch": 0.7512520868113522,
"grad_norm": 0.0,
"learning_rate": 7.291869658453594e-06,
"loss": 1.8334,
"step": 450
},
{
"epoch": 0.7529215358931552,
"grad_norm": 0.0,
"learning_rate": 7.279700289964187e-06,
"loss": 1.6198,
"step": 451
},
{
"epoch": 0.7545909849749582,
"grad_norm": 0.0,
"learning_rate": 7.267513849119001e-06,
"loss": 1.8972,
"step": 452
},
{
"epoch": 0.7562604340567612,
"grad_norm": 0.0,
"learning_rate": 7.255310427180579e-06,
"loss": 1.819,
"step": 453
},
{
"epoch": 0.7579298831385642,
"grad_norm": 0.0,
"learning_rate": 7.243090115538639e-06,
"loss": 1.5967,
"step": 454
},
{
"epoch": 0.7595993322203672,
"grad_norm": 0.0,
"learning_rate": 7.230853005709378e-06,
"loss": 1.8483,
"step": 455
},
{
"epoch": 0.7612687813021702,
"grad_norm": 0.0,
"learning_rate": 7.218599189334799e-06,
"loss": 1.7642,
"step": 456
},
{
"epoch": 0.7629382303839732,
"grad_norm": 0.0,
"learning_rate": 7.206328758182013e-06,
"loss": 1.6935,
"step": 457
},
{
"epoch": 0.7646076794657763,
"grad_norm": 0.0,
"learning_rate": 7.194041804142556e-06,
"loss": 1.6039,
"step": 458
},
{
"epoch": 0.7662771285475793,
"grad_norm": 0.0,
"learning_rate": 7.181738419231708e-06,
"loss": 1.4567,
"step": 459
},
{
"epoch": 0.7679465776293823,
"grad_norm": 0.0,
"learning_rate": 7.169418695587791e-06,
"loss": 1.5594,
"step": 460
},
{
"epoch": 0.7696160267111853,
"grad_norm": 0.0,
"learning_rate": 7.157082725471488e-06,
"loss": 1.8546,
"step": 461
},
{
"epoch": 0.7712854757929883,
"grad_norm": 0.0,
"learning_rate": 7.144730601265148e-06,
"loss": 1.8287,
"step": 462
},
{
"epoch": 0.7729549248747913,
"grad_norm": 0.0,
"learning_rate": 7.132362415472099e-06,
"loss": 1.6402,
"step": 463
},
{
"epoch": 0.7746243739565943,
"grad_norm": 0.0,
"learning_rate": 7.1199782607159494e-06,
"loss": 1.549,
"step": 464
},
{
"epoch": 0.7762938230383973,
"grad_norm": 0.0,
"learning_rate": 7.107578229739895e-06,
"loss": 1.5773,
"step": 465
},
{
"epoch": 0.7779632721202003,
"grad_norm": 0.0,
"learning_rate": 7.095162415406034e-06,
"loss": 2.0086,
"step": 466
},
{
"epoch": 0.7796327212020033,
"grad_norm": 0.0,
"learning_rate": 7.082730910694655e-06,
"loss": 1.635,
"step": 467
},
{
"epoch": 0.7813021702838063,
"grad_norm": 0.0,
"learning_rate": 7.070283808703553e-06,
"loss": 1.7928,
"step": 468
},
{
"epoch": 0.7829716193656093,
"grad_norm": 0.0,
"learning_rate": 7.057821202647332e-06,
"loss": 1.839,
"step": 469
},
{
"epoch": 0.7846410684474123,
"grad_norm": 0.0,
"learning_rate": 7.045343185856701e-06,
"loss": 1.6673,
"step": 470
},
{
"epoch": 0.7863105175292153,
"grad_norm": 0.0,
"learning_rate": 7.032849851777774e-06,
"loss": 1.7299,
"step": 471
},
{
"epoch": 0.7879799666110183,
"grad_norm": 0.0,
"learning_rate": 7.020341293971383e-06,
"loss": 1.8863,
"step": 472
},
{
"epoch": 0.7896494156928213,
"grad_norm": 0.0,
"learning_rate": 7.0078176061123595e-06,
"loss": 1.8666,
"step": 473
},
{
"epoch": 0.7913188647746243,
"grad_norm": 0.0,
"learning_rate": 6.995278881988847e-06,
"loss": 1.3643,
"step": 474
},
{
"epoch": 0.7929883138564274,
"grad_norm": 0.0,
"learning_rate": 6.982725215501592e-06,
"loss": 1.6456,
"step": 475
},
{
"epoch": 0.7946577629382304,
"grad_norm": 0.0,
"learning_rate": 6.970156700663244e-06,
"loss": 1.8537,
"step": 476
},
{
"epoch": 0.7963272120200334,
"grad_norm": 0.0,
"learning_rate": 6.957573431597646e-06,
"loss": 1.8776,
"step": 477
},
{
"epoch": 0.7979966611018364,
"grad_norm": 0.0,
"learning_rate": 6.9449755025391355e-06,
"loss": 1.8815,
"step": 478
},
{
"epoch": 0.7996661101836394,
"grad_norm": 0.0,
"learning_rate": 6.932363007831837e-06,
"loss": 1.8574,
"step": 479
},
{
"epoch": 0.8013355592654424,
"grad_norm": 0.0,
"learning_rate": 6.919736041928956e-06,
"loss": 1.8733,
"step": 480
},
{
"epoch": 0.8030050083472454,
"grad_norm": 0.0,
"learning_rate": 6.907094699392066e-06,
"loss": 1.7342,
"step": 481
},
{
"epoch": 0.8046744574290484,
"grad_norm": 0.0,
"learning_rate": 6.894439074890413e-06,
"loss": 1.6841,
"step": 482
},
{
"epoch": 0.8063439065108514,
"grad_norm": 0.0,
"learning_rate": 6.881769263200192e-06,
"loss": 1.6834,
"step": 483
},
{
"epoch": 0.8080133555926544,
"grad_norm": 0.0,
"learning_rate": 6.869085359203844e-06,
"loss": 1.8617,
"step": 484
},
{
"epoch": 0.8096828046744574,
"grad_norm": 0.0,
"learning_rate": 6.8563874578893505e-06,
"loss": 1.8515,
"step": 485
},
{
"epoch": 0.8113522537562604,
"grad_norm": 0.0,
"learning_rate": 6.843675654349513e-06,
"loss": 1.672,
"step": 486
},
{
"epoch": 0.8130217028380634,
"grad_norm": 0.0,
"learning_rate": 6.830950043781245e-06,
"loss": 1.8821,
"step": 487
},
{
"epoch": 0.8146911519198664,
"grad_norm": 0.0,
"learning_rate": 6.818210721484859e-06,
"loss": 1.9313,
"step": 488
},
{
"epoch": 0.8163606010016694,
"grad_norm": 0.0,
"learning_rate": 6.805457782863354e-06,
"loss": 1.8146,
"step": 489
},
{
"epoch": 0.8180300500834724,
"grad_norm": 0.0,
"learning_rate": 6.792691323421698e-06,
"loss": 1.6788,
"step": 490
},
{
"epoch": 0.8196994991652755,
"grad_norm": 0.0,
"learning_rate": 6.779911438766117e-06,
"loss": 1.8528,
"step": 491
},
{
"epoch": 0.8213689482470785,
"grad_norm": 0.0,
"learning_rate": 6.767118224603374e-06,
"loss": 1.6756,
"step": 492
},
{
"epoch": 0.8230383973288815,
"grad_norm": 0.0,
"learning_rate": 6.754311776740057e-06,
"loss": 1.4272,
"step": 493
},
{
"epoch": 0.8247078464106845,
"grad_norm": 0.0,
"learning_rate": 6.741492191081856e-06,
"loss": 1.7705,
"step": 494
},
{
"epoch": 0.8263772954924875,
"grad_norm": 0.0,
"learning_rate": 6.728659563632853e-06,
"loss": 1.5004,
"step": 495
},
{
"epoch": 0.8280467445742905,
"grad_norm": 0.0,
"learning_rate": 6.715813990494793e-06,
"loss": 2.0629,
"step": 496
},
{
"epoch": 0.8297161936560935,
"grad_norm": 0.0,
"learning_rate": 6.702955567866372e-06,
"loss": 2.0329,
"step": 497
},
{
"epoch": 0.8313856427378965,
"grad_norm": 0.0,
"learning_rate": 6.690084392042514e-06,
"loss": 1.7926,
"step": 498
},
{
"epoch": 0.8330550918196995,
"grad_norm": 0.0,
"learning_rate": 6.677200559413652e-06,
"loss": 1.6044,
"step": 499
},
{
"epoch": 0.8347245409015025,
"grad_norm": 0.0,
"learning_rate": 6.664304166465e-06,
"loss": 1.2786,
"step": 500
},
{
"epoch": 0.8363939899833055,
"grad_norm": 0.0,
"learning_rate": 6.651395309775837e-06,
"loss": 1.8032,
"step": 501
},
{
"epoch": 0.8380634390651085,
"grad_norm": 0.0,
"learning_rate": 6.638474086018778e-06,
"loss": 1.807,
"step": 502
},
{
"epoch": 0.8397328881469115,
"grad_norm": 0.0,
"learning_rate": 6.62554059195906e-06,
"loss": 1.5938,
"step": 503
},
{
"epoch": 0.8414023372287145,
"grad_norm": 0.0,
"learning_rate": 6.612594924453801e-06,
"loss": 1.5486,
"step": 504
},
{
"epoch": 0.8430717863105175,
"grad_norm": 0.0,
"learning_rate": 6.599637180451295e-06,
"loss": 1.7077,
"step": 505
},
{
"epoch": 0.8447412353923205,
"grad_norm": 0.0,
"learning_rate": 6.5866674569902676e-06,
"loss": 1.4935,
"step": 506
},
{
"epoch": 0.8464106844741235,
"grad_norm": 0.0,
"learning_rate": 6.5736858511991585e-06,
"loss": 1.5449,
"step": 507
},
{
"epoch": 0.8480801335559266,
"grad_norm": 0.0,
"learning_rate": 6.5606924602953925e-06,
"loss": 1.3814,
"step": 508
},
{
"epoch": 0.8497495826377296,
"grad_norm": 0.0,
"learning_rate": 6.547687381584653e-06,
"loss": 2.0371,
"step": 509
},
{
"epoch": 0.8514190317195326,
"grad_norm": 0.0,
"learning_rate": 6.534670712460151e-06,
"loss": 1.8953,
"step": 510
},
{
"epoch": 0.8530884808013356,
"grad_norm": 0.0,
"learning_rate": 6.521642550401894e-06,
"loss": 1.63,
"step": 511
},
{
"epoch": 0.8547579298831386,
"grad_norm": 0.0,
"learning_rate": 6.508602992975963e-06,
"loss": 1.7057,
"step": 512
},
{
"epoch": 0.8564273789649416,
"grad_norm": 0.0,
"learning_rate": 6.495552137833774e-06,
"loss": 1.6038,
"step": 513
},
{
"epoch": 0.8580968280467446,
"grad_norm": 0.0,
"learning_rate": 6.4824900827113506e-06,
"loss": 1.6273,
"step": 514
},
{
"epoch": 0.8597662771285476,
"grad_norm": 0.0,
"learning_rate": 6.469416925428593e-06,
"loss": 1.8133,
"step": 515
},
{
"epoch": 0.8614357262103506,
"grad_norm": 0.0,
"learning_rate": 6.456332763888544e-06,
"loss": 1.6532,
"step": 516
},
{
"epoch": 0.8631051752921536,
"grad_norm": 0.0,
"learning_rate": 6.443237696076652e-06,
"loss": 1.9142,
"step": 517
},
{
"epoch": 0.8647746243739566,
"grad_norm": 0.0,
"learning_rate": 6.430131820060043e-06,
"loss": 1.7401,
"step": 518
},
{
"epoch": 0.8664440734557596,
"grad_norm": 0.0,
"learning_rate": 6.417015233986786e-06,
"loss": 1.5529,
"step": 519
},
{
"epoch": 0.8681135225375626,
"grad_norm": 0.0,
"learning_rate": 6.403888036085155e-06,
"loss": 1.5511,
"step": 520
},
{
"epoch": 0.8697829716193656,
"grad_norm": 0.0,
"learning_rate": 6.390750324662895e-06,
"loss": 1.7932,
"step": 521
},
{
"epoch": 0.8714524207011686,
"grad_norm": 0.0,
"learning_rate": 6.3776021981064825e-06,
"loss": 1.7507,
"step": 522
},
{
"epoch": 0.8731218697829716,
"grad_norm": 0.0,
"learning_rate": 6.364443754880395e-06,
"loss": 1.8488,
"step": 523
},
{
"epoch": 0.8747913188647746,
"grad_norm": 0.0,
"learning_rate": 6.3512750935263664e-06,
"loss": 1.4568,
"step": 524
},
{
"epoch": 0.8764607679465777,
"grad_norm": 0.0,
"learning_rate": 6.338096312662658e-06,
"loss": 1.8529,
"step": 525
},
{
"epoch": 0.8781302170283807,
"grad_norm": 0.0,
"learning_rate": 6.32490751098331e-06,
"loss": 1.6563,
"step": 526
},
{
"epoch": 0.8797996661101837,
"grad_norm": 0.0,
"learning_rate": 6.311708787257408e-06,
"loss": 1.6838,
"step": 527
},
{
"epoch": 0.8814691151919867,
"grad_norm": 0.0,
"learning_rate": 6.298500240328342e-06,
"loss": 1.7945,
"step": 528
},
{
"epoch": 0.8831385642737897,
"grad_norm": 0.0,
"learning_rate": 6.285281969113072e-06,
"loss": 1.4784,
"step": 529
},
{
"epoch": 0.8848080133555927,
"grad_norm": 0.0,
"learning_rate": 6.272054072601374e-06,
"loss": 1.7104,
"step": 530
},
{
"epoch": 0.8864774624373957,
"grad_norm": 0.0,
"learning_rate": 6.258816649855109e-06,
"loss": 1.5431,
"step": 531
},
{
"epoch": 0.8881469115191987,
"grad_norm": 0.0,
"learning_rate": 6.245569800007484e-06,
"loss": 1.5852,
"step": 532
},
{
"epoch": 0.8898163606010017,
"grad_norm": 0.0,
"learning_rate": 6.232313622262297e-06,
"loss": 1.7488,
"step": 533
},
{
"epoch": 0.8914858096828047,
"grad_norm": 0.0,
"learning_rate": 6.219048215893204e-06,
"loss": 1.6947,
"step": 534
},
{
"epoch": 0.8931552587646077,
"grad_norm": 0.0,
"learning_rate": 6.2057736802429724e-06,
"loss": 1.6526,
"step": 535
},
{
"epoch": 0.8948247078464107,
"grad_norm": 0.0,
"learning_rate": 6.192490114722741e-06,
"loss": 1.4266,
"step": 536
},
{
"epoch": 0.8964941569282137,
"grad_norm": 0.0,
"learning_rate": 6.179197618811267e-06,
"loss": 1.4984,
"step": 537
},
{
"epoch": 0.8981636060100167,
"grad_norm": 0.0,
"learning_rate": 6.1658962920541875e-06,
"loss": 1.7565,
"step": 538
},
{
"epoch": 0.8998330550918197,
"grad_norm": 0.0,
"learning_rate": 6.152586234063277e-06,
"loss": 1.6057,
"step": 539
},
{
"epoch": 0.9015025041736227,
"grad_norm": 0.0,
"learning_rate": 6.139267544515689e-06,
"loss": 1.4978,
"step": 540
},
{
"epoch": 0.9031719532554258,
"grad_norm": 0.0,
"learning_rate": 6.125940323153223e-06,
"loss": 1.5634,
"step": 541
},
{
"epoch": 0.9048414023372288,
"grad_norm": 0.0,
"learning_rate": 6.112604669781572e-06,
"loss": 1.8368,
"step": 542
},
{
"epoch": 0.9065108514190318,
"grad_norm": 0.0,
"learning_rate": 6.0992606842695745e-06,
"loss": 1.4745,
"step": 543
},
{
"epoch": 0.9081803005008348,
"grad_norm": 0.0,
"learning_rate": 6.0859084665484645e-06,
"loss": 1.6676,
"step": 544
},
{
"epoch": 0.9098497495826378,
"grad_norm": 0.0,
"learning_rate": 6.07254811661113e-06,
"loss": 1.7473,
"step": 545
},
{
"epoch": 0.9115191986644408,
"grad_norm": 0.0,
"learning_rate": 6.059179734511357e-06,
"loss": 1.8217,
"step": 546
},
{
"epoch": 0.9131886477462438,
"grad_norm": 0.0,
"learning_rate": 6.045803420363085e-06,
"loss": 2.0666,
"step": 547
},
{
"epoch": 0.9148580968280468,
"grad_norm": 0.0,
"learning_rate": 6.032419274339654e-06,
"loss": 1.7439,
"step": 548
},
{
"epoch": 0.9165275459098498,
"grad_norm": 0.0,
"learning_rate": 6.019027396673058e-06,
"loss": 1.6459,
"step": 549
},
{
"epoch": 0.9181969949916527,
"grad_norm": 0.0,
"learning_rate": 6.005627887653189e-06,
"loss": 1.6537,
"step": 550
},
{
"epoch": 0.9198664440734557,
"grad_norm": 0.0,
"learning_rate": 5.9922208476270914e-06,
"loss": 1.4779,
"step": 551
},
{
"epoch": 0.9215358931552587,
"grad_norm": 0.0,
"learning_rate": 5.978806376998209e-06,
"loss": 1.6659,
"step": 552
},
{
"epoch": 0.9232053422370617,
"grad_norm": 0.0,
"learning_rate": 5.965384576225632e-06,
"loss": 1.8208,
"step": 553
},
{
"epoch": 0.9248747913188647,
"grad_norm": 0.0,
"learning_rate": 5.951955545823342e-06,
"loss": 1.7005,
"step": 554
},
{
"epoch": 0.9265442404006677,
"grad_norm": 0.0,
"learning_rate": 5.938519386359466e-06,
"loss": 1.7261,
"step": 555
},
{
"epoch": 0.9282136894824707,
"grad_norm": 0.0,
"learning_rate": 5.925076198455517e-06,
"loss": 1.8591,
"step": 556
},
{
"epoch": 0.9298831385642737,
"grad_norm": 0.0,
"learning_rate": 5.911626082785644e-06,
"loss": 1.9504,
"step": 557
},
{
"epoch": 0.9315525876460768,
"grad_norm": 0.0,
"learning_rate": 5.898169140075878e-06,
"loss": 1.8146,
"step": 558
},
{
"epoch": 0.9332220367278798,
"grad_norm": 0.0,
"learning_rate": 5.884705471103376e-06,
"loss": 1.7633,
"step": 559
},
{
"epoch": 0.9348914858096828,
"grad_norm": 0.0,
"learning_rate": 5.871235176695664e-06,
"loss": 1.7236,
"step": 560
},
{
"epoch": 0.9365609348914858,
"grad_norm": 0.0,
"learning_rate": 5.857758357729892e-06,
"loss": 1.9265,
"step": 561
},
{
"epoch": 0.9382303839732888,
"grad_norm": 0.0,
"learning_rate": 5.844275115132064e-06,
"loss": 1.5696,
"step": 562
},
{
"epoch": 0.9398998330550918,
"grad_norm": 0.0,
"learning_rate": 5.830785549876296e-06,
"loss": 1.8268,
"step": 563
},
{
"epoch": 0.9415692821368948,
"grad_norm": 0.0,
"learning_rate": 5.817289762984048e-06,
"loss": 1.5622,
"step": 564
},
{
"epoch": 0.9432387312186978,
"grad_norm": 0.0,
"learning_rate": 5.803787855523377e-06,
"loss": 1.7594,
"step": 565
},
{
"epoch": 0.9449081803005008,
"grad_norm": 0.0,
"learning_rate": 5.790279928608173e-06,
"loss": 1.6731,
"step": 566
},
{
"epoch": 0.9465776293823038,
"grad_norm": 0.0,
"learning_rate": 5.776766083397409e-06,
"loss": 1.5842,
"step": 567
},
{
"epoch": 0.9482470784641068,
"grad_norm": 0.0,
"learning_rate": 5.763246421094373e-06,
"loss": 1.8225,
"step": 568
},
{
"epoch": 0.9499165275459098,
"grad_norm": 0.0,
"learning_rate": 5.749721042945924e-06,
"loss": 1.591,
"step": 569
},
{
"epoch": 0.9515859766277128,
"grad_norm": 0.0,
"learning_rate": 5.736190050241719e-06,
"loss": 1.7563,
"step": 570
},
{
"epoch": 0.9532554257095158,
"grad_norm": 0.0,
"learning_rate": 5.722653544313467e-06,
"loss": 1.707,
"step": 571
},
{
"epoch": 0.9549248747913188,
"grad_norm": 0.0,
"learning_rate": 5.709111626534161e-06,
"loss": 1.8462,
"step": 572
},
{
"epoch": 0.9565943238731218,
"grad_norm": 0.0,
"learning_rate": 5.695564398317326e-06,
"loss": 1.6125,
"step": 573
},
{
"epoch": 0.9582637729549248,
"grad_norm": 0.0,
"learning_rate": 5.6820119611162515e-06,
"loss": 1.6431,
"step": 574
},
{
"epoch": 0.9599332220367279,
"grad_norm": 0.0,
"learning_rate": 5.668454416423243e-06,
"loss": 1.9529,
"step": 575
},
{
"epoch": 0.9616026711185309,
"grad_norm": 0.0,
"learning_rate": 5.65489186576885e-06,
"loss": 1.8668,
"step": 576
},
{
"epoch": 0.9632721202003339,
"grad_norm": 0.0,
"learning_rate": 5.64132441072111e-06,
"loss": 1.9664,
"step": 577
},
{
"epoch": 0.9649415692821369,
"grad_norm": 0.0,
"learning_rate": 5.627752152884794e-06,
"loss": 1.6759,
"step": 578
},
{
"epoch": 0.9666110183639399,
"grad_norm": 0.0,
"learning_rate": 5.614175193900639e-06,
"loss": 1.8378,
"step": 579
},
{
"epoch": 0.9682804674457429,
"grad_norm": 0.0,
"learning_rate": 5.600593635444583e-06,
"loss": 1.2707,
"step": 580
},
{
"epoch": 0.9699499165275459,
"grad_norm": 0.0,
"learning_rate": 5.587007579227014e-06,
"loss": 1.6216,
"step": 581
},
{
"epoch": 0.9716193656093489,
"grad_norm": 0.0,
"learning_rate": 5.573417126992004e-06,
"loss": 1.688,
"step": 582
},
{
"epoch": 0.9732888146911519,
"grad_norm": 0.0,
"learning_rate": 5.559822380516539e-06,
"loss": 1.8716,
"step": 583
},
{
"epoch": 0.9749582637729549,
"grad_norm": 0.0,
"learning_rate": 5.546223441609775e-06,
"loss": 1.938,
"step": 584
},
{
"epoch": 0.9766277128547579,
"grad_norm": 0.0,
"learning_rate": 5.532620412112255e-06,
"loss": 1.5521,
"step": 585
},
{
"epoch": 0.9782971619365609,
"grad_norm": 0.0,
"learning_rate": 5.51901339389516e-06,
"loss": 1.7355,
"step": 586
},
{
"epoch": 0.9799666110183639,
"grad_norm": 0.0,
"learning_rate": 5.5054024888595415e-06,
"loss": 1.3318,
"step": 587
},
{
"epoch": 0.9816360601001669,
"grad_norm": 0.0,
"learning_rate": 5.491787798935557e-06,
"loss": 1.7539,
"step": 588
},
{
"epoch": 0.9833055091819699,
"grad_norm": 0.0,
"learning_rate": 5.478169426081712e-06,
"loss": 1.8847,
"step": 589
},
{
"epoch": 0.9849749582637729,
"grad_norm": 0.0,
"learning_rate": 5.464547472284091e-06,
"loss": 1.6883,
"step": 590
},
{
"epoch": 0.986644407345576,
"grad_norm": 0.0,
"learning_rate": 5.450922039555594e-06,
"loss": 1.7047,
"step": 591
},
{
"epoch": 0.988313856427379,
"grad_norm": 0.0,
"learning_rate": 5.437293229935178e-06,
"loss": 1.5539,
"step": 592
},
{
"epoch": 0.989983305509182,
"grad_norm": 0.0,
"learning_rate": 5.4236611454870865e-06,
"loss": 1.6321,
"step": 593
},
{
"epoch": 0.991652754590985,
"grad_norm": 0.0,
"learning_rate": 5.4100258883000874e-06,
"loss": 1.7593,
"step": 594
},
{
"epoch": 0.993322203672788,
"grad_norm": 0.0,
"learning_rate": 5.39638756048671e-06,
"loss": 1.7527,
"step": 595
},
{
"epoch": 0.994991652754591,
"grad_norm": 0.0,
"learning_rate": 5.38274626418248e-06,
"loss": 1.3562,
"step": 596
},
{
"epoch": 0.996661101836394,
"grad_norm": 0.0,
"learning_rate": 5.3691021015451494e-06,
"loss": 1.4055,
"step": 597
},
{
"epoch": 0.998330550918197,
"grad_norm": 0.0,
"learning_rate": 5.355455174753941e-06,
"loss": 1.7539,
"step": 598
},
{
"epoch": 1.0,
"grad_norm": 0.0,
"learning_rate": 5.341805586008778e-06,
"loss": 1.7956,
"step": 599
},
{
"epoch": 1.001669449081803,
"grad_norm": 0.0,
"learning_rate": 5.328153437529512e-06,
"loss": 2.0003,
"step": 600
}
],
"logging_steps": 1,
"max_steps": 1198,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6.75387247951872e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}