Forecast-ing's picture
Training in progress, epoch 5
1a8ed53 verified
{
"best_metric": 1.5342012108859804,
"best_model_checkpoint": "./modernBERT-content-regression/run-2/checkpoint-496",
"epoch": 4.0,
"eval_steps": 500,
"global_step": 496,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.008064516129032258,
"grad_norm": 312.814453125,
"learning_rate": 2.4759427123128026e-05,
"loss": 21.3087,
"step": 1
},
{
"epoch": 0.016129032258064516,
"grad_norm": 73.38264465332031,
"learning_rate": 2.4719428048615702e-05,
"loss": 0.7206,
"step": 2
},
{
"epoch": 0.024193548387096774,
"grad_norm": 363.2410583496094,
"learning_rate": 2.467942897410338e-05,
"loss": 135.449,
"step": 3
},
{
"epoch": 0.03225806451612903,
"grad_norm": 64.86761474609375,
"learning_rate": 2.4639429899591056e-05,
"loss": 12.5636,
"step": 4
},
{
"epoch": 0.04032258064516129,
"grad_norm": 86.92879486083984,
"learning_rate": 2.4599430825078733e-05,
"loss": 2.4774,
"step": 5
},
{
"epoch": 0.04838709677419355,
"grad_norm": 271.8641662597656,
"learning_rate": 2.455943175056641e-05,
"loss": 3.729,
"step": 6
},
{
"epoch": 0.056451612903225805,
"grad_norm": 41.483768463134766,
"learning_rate": 2.4519432676054087e-05,
"loss": 5.4716,
"step": 7
},
{
"epoch": 0.06451612903225806,
"grad_norm": 163.63929748535156,
"learning_rate": 2.4479433601541764e-05,
"loss": 11.4921,
"step": 8
},
{
"epoch": 0.07258064516129033,
"grad_norm": 67.34258270263672,
"learning_rate": 2.443943452702944e-05,
"loss": 0.827,
"step": 9
},
{
"epoch": 0.08064516129032258,
"grad_norm": 45.18867874145508,
"learning_rate": 2.4399435452517118e-05,
"loss": 2.3716,
"step": 10
},
{
"epoch": 0.08870967741935484,
"grad_norm": 130.92532348632812,
"learning_rate": 2.4359436378004795e-05,
"loss": 10.3088,
"step": 11
},
{
"epoch": 0.0967741935483871,
"grad_norm": 34.51155471801758,
"learning_rate": 2.4319437303492468e-05,
"loss": 0.7379,
"step": 12
},
{
"epoch": 0.10483870967741936,
"grad_norm": 109.77779388427734,
"learning_rate": 2.427943822898015e-05,
"loss": 1.3991,
"step": 13
},
{
"epoch": 0.11290322580645161,
"grad_norm": 33.590213775634766,
"learning_rate": 2.4239439154467826e-05,
"loss": 1.3007,
"step": 14
},
{
"epoch": 0.12096774193548387,
"grad_norm": 34.60383605957031,
"learning_rate": 2.4199440079955502e-05,
"loss": 0.1627,
"step": 15
},
{
"epoch": 0.12903225806451613,
"grad_norm": 183.3640899658203,
"learning_rate": 2.415944100544318e-05,
"loss": 9.0763,
"step": 16
},
{
"epoch": 0.13709677419354838,
"grad_norm": 27.07301139831543,
"learning_rate": 2.4119441930930853e-05,
"loss": 1.9223,
"step": 17
},
{
"epoch": 0.14516129032258066,
"grad_norm": 106.09356689453125,
"learning_rate": 2.4079442856418533e-05,
"loss": 2.1224,
"step": 18
},
{
"epoch": 0.1532258064516129,
"grad_norm": 32.14213562011719,
"learning_rate": 2.403944378190621e-05,
"loss": 0.84,
"step": 19
},
{
"epoch": 0.16129032258064516,
"grad_norm": 186.9098358154297,
"learning_rate": 2.3999444707393887e-05,
"loss": 6.1026,
"step": 20
},
{
"epoch": 0.1693548387096774,
"grad_norm": 252.75877380371094,
"learning_rate": 2.3959445632881564e-05,
"loss": 4.5225,
"step": 21
},
{
"epoch": 0.1774193548387097,
"grad_norm": 577.6574096679688,
"learning_rate": 2.3919446558369238e-05,
"loss": 13.0971,
"step": 22
},
{
"epoch": 0.18548387096774194,
"grad_norm": 236.11961364746094,
"learning_rate": 2.3879447483856914e-05,
"loss": 12.425,
"step": 23
},
{
"epoch": 0.1935483870967742,
"grad_norm": 329.27984619140625,
"learning_rate": 2.3839448409344595e-05,
"loss": 7.6775,
"step": 24
},
{
"epoch": 0.20161290322580644,
"grad_norm": 100.30821990966797,
"learning_rate": 2.379944933483227e-05,
"loss": 1.4652,
"step": 25
},
{
"epoch": 0.20967741935483872,
"grad_norm": 96.21698760986328,
"learning_rate": 2.3759450260319945e-05,
"loss": 1.4142,
"step": 26
},
{
"epoch": 0.21774193548387097,
"grad_norm": 31.13923454284668,
"learning_rate": 2.3719451185807622e-05,
"loss": 1.5587,
"step": 27
},
{
"epoch": 0.22580645161290322,
"grad_norm": 37.34580612182617,
"learning_rate": 2.36794521112953e-05,
"loss": 0.6072,
"step": 28
},
{
"epoch": 0.23387096774193547,
"grad_norm": 50.031856536865234,
"learning_rate": 2.363945303678298e-05,
"loss": 1.2987,
"step": 29
},
{
"epoch": 0.24193548387096775,
"grad_norm": 65.12112426757812,
"learning_rate": 2.3599453962270656e-05,
"loss": 1.5842,
"step": 30
},
{
"epoch": 0.25,
"grad_norm": 64.7094497680664,
"learning_rate": 2.355945488775833e-05,
"loss": 2.2444,
"step": 31
},
{
"epoch": 0.25806451612903225,
"grad_norm": 85.2149429321289,
"learning_rate": 2.3519455813246007e-05,
"loss": 2.9294,
"step": 32
},
{
"epoch": 0.2661290322580645,
"grad_norm": 20.915496826171875,
"learning_rate": 2.3479456738733684e-05,
"loss": 0.6207,
"step": 33
},
{
"epoch": 0.27419354838709675,
"grad_norm": 54.965335845947266,
"learning_rate": 2.3439457664221364e-05,
"loss": 1.9851,
"step": 34
},
{
"epoch": 0.28225806451612906,
"grad_norm": 187.736328125,
"learning_rate": 2.339945858970904e-05,
"loss": 21.4765,
"step": 35
},
{
"epoch": 0.2903225806451613,
"grad_norm": 65.61473846435547,
"learning_rate": 2.3359459515196714e-05,
"loss": 1.4577,
"step": 36
},
{
"epoch": 0.29838709677419356,
"grad_norm": 32.118228912353516,
"learning_rate": 2.331946044068439e-05,
"loss": 0.4661,
"step": 37
},
{
"epoch": 0.3064516129032258,
"grad_norm": 78.08635711669922,
"learning_rate": 2.3279461366172068e-05,
"loss": 19.0058,
"step": 38
},
{
"epoch": 0.31451612903225806,
"grad_norm": 153.27804565429688,
"learning_rate": 2.3239462291659745e-05,
"loss": 5.0193,
"step": 39
},
{
"epoch": 0.3225806451612903,
"grad_norm": 205.57205200195312,
"learning_rate": 2.3199463217147422e-05,
"loss": 17.3633,
"step": 40
},
{
"epoch": 0.33064516129032256,
"grad_norm": 207.53005981445312,
"learning_rate": 2.31594641426351e-05,
"loss": 7.6631,
"step": 41
},
{
"epoch": 0.3387096774193548,
"grad_norm": 85.1321029663086,
"learning_rate": 2.3119465068122776e-05,
"loss": 19.0139,
"step": 42
},
{
"epoch": 0.3467741935483871,
"grad_norm": 133.19154357910156,
"learning_rate": 2.3079465993610453e-05,
"loss": 21.4597,
"step": 43
},
{
"epoch": 0.3548387096774194,
"grad_norm": 116.96082305908203,
"learning_rate": 2.303946691909813e-05,
"loss": 3.4332,
"step": 44
},
{
"epoch": 0.3629032258064516,
"grad_norm": 99.06409454345703,
"learning_rate": 2.2999467844585807e-05,
"loss": 3.114,
"step": 45
},
{
"epoch": 0.3709677419354839,
"grad_norm": 58.03582763671875,
"learning_rate": 2.2959468770073484e-05,
"loss": 2.1934,
"step": 46
},
{
"epoch": 0.3790322580645161,
"grad_norm": 66.2879409790039,
"learning_rate": 2.291946969556116e-05,
"loss": 3.4978,
"step": 47
},
{
"epoch": 0.3870967741935484,
"grad_norm": 84.04444122314453,
"learning_rate": 2.2879470621048837e-05,
"loss": 2.5483,
"step": 48
},
{
"epoch": 0.3951612903225806,
"grad_norm": 40.6508903503418,
"learning_rate": 2.2839471546536514e-05,
"loss": 0.8297,
"step": 49
},
{
"epoch": 0.4032258064516129,
"grad_norm": 75.82645416259766,
"learning_rate": 2.279947247202419e-05,
"loss": 8.1934,
"step": 50
},
{
"epoch": 0.4112903225806452,
"grad_norm": 24.05048179626465,
"learning_rate": 2.2759473397511868e-05,
"loss": 0.3819,
"step": 51
},
{
"epoch": 0.41935483870967744,
"grad_norm": 39.44225311279297,
"learning_rate": 2.2719474322999545e-05,
"loss": 1.1629,
"step": 52
},
{
"epoch": 0.4274193548387097,
"grad_norm": 195.91845703125,
"learning_rate": 2.2679475248487222e-05,
"loss": 19.0751,
"step": 53
},
{
"epoch": 0.43548387096774194,
"grad_norm": 10.670635223388672,
"learning_rate": 2.26394761739749e-05,
"loss": 0.5451,
"step": 54
},
{
"epoch": 0.4435483870967742,
"grad_norm": 45.76976013183594,
"learning_rate": 2.2599477099462576e-05,
"loss": 5.5405,
"step": 55
},
{
"epoch": 0.45161290322580644,
"grad_norm": 31.259849548339844,
"learning_rate": 2.2559478024950253e-05,
"loss": 1.616,
"step": 56
},
{
"epoch": 0.4596774193548387,
"grad_norm": 16.430635452270508,
"learning_rate": 2.251947895043793e-05,
"loss": 0.2542,
"step": 57
},
{
"epoch": 0.46774193548387094,
"grad_norm": 83.4207992553711,
"learning_rate": 2.2479479875925607e-05,
"loss": 9.8378,
"step": 58
},
{
"epoch": 0.47580645161290325,
"grad_norm": 18.212440490722656,
"learning_rate": 2.2439480801413284e-05,
"loss": 0.2447,
"step": 59
},
{
"epoch": 0.4838709677419355,
"grad_norm": 107.16078186035156,
"learning_rate": 2.239948172690096e-05,
"loss": 14.317,
"step": 60
},
{
"epoch": 0.49193548387096775,
"grad_norm": 32.10540771484375,
"learning_rate": 2.2359482652388637e-05,
"loss": 0.6299,
"step": 61
},
{
"epoch": 0.5,
"grad_norm": 73.37329864501953,
"learning_rate": 2.2319483577876314e-05,
"loss": 21.8458,
"step": 62
},
{
"epoch": 0.5080645161290323,
"grad_norm": 46.12656784057617,
"learning_rate": 2.227948450336399e-05,
"loss": 1.2991,
"step": 63
},
{
"epoch": 0.5161290322580645,
"grad_norm": 18.842960357666016,
"learning_rate": 2.2239485428851665e-05,
"loss": 2.414,
"step": 64
},
{
"epoch": 0.5241935483870968,
"grad_norm": 77.6194839477539,
"learning_rate": 2.2199486354339345e-05,
"loss": 13.3138,
"step": 65
},
{
"epoch": 0.532258064516129,
"grad_norm": 66.23066711425781,
"learning_rate": 2.2159487279827022e-05,
"loss": 1.6535,
"step": 66
},
{
"epoch": 0.5403225806451613,
"grad_norm": 37.77677917480469,
"learning_rate": 2.21194882053147e-05,
"loss": 1.8361,
"step": 67
},
{
"epoch": 0.5483870967741935,
"grad_norm": 46.24187088012695,
"learning_rate": 2.2079489130802376e-05,
"loss": 1.4393,
"step": 68
},
{
"epoch": 0.5564516129032258,
"grad_norm": 80.01097106933594,
"learning_rate": 2.203949005629005e-05,
"loss": 3.5612,
"step": 69
},
{
"epoch": 0.5645161290322581,
"grad_norm": 7.726450443267822,
"learning_rate": 2.199949098177773e-05,
"loss": 0.3842,
"step": 70
},
{
"epoch": 0.5725806451612904,
"grad_norm": 13.420072555541992,
"learning_rate": 2.1959491907265407e-05,
"loss": 2.0311,
"step": 71
},
{
"epoch": 0.5806451612903226,
"grad_norm": 81.66263580322266,
"learning_rate": 2.1919492832753084e-05,
"loss": 15.1423,
"step": 72
},
{
"epoch": 0.5887096774193549,
"grad_norm": 30.341995239257812,
"learning_rate": 2.1879493758240757e-05,
"loss": 0.4867,
"step": 73
},
{
"epoch": 0.5967741935483871,
"grad_norm": 83.49819946289062,
"learning_rate": 2.1839494683728434e-05,
"loss": 3.1285,
"step": 74
},
{
"epoch": 0.6048387096774194,
"grad_norm": 62.19274139404297,
"learning_rate": 2.1799495609216114e-05,
"loss": 6.7166,
"step": 75
},
{
"epoch": 0.6129032258064516,
"grad_norm": 25.30666732788086,
"learning_rate": 2.175949653470379e-05,
"loss": 0.4676,
"step": 76
},
{
"epoch": 0.6209677419354839,
"grad_norm": 33.63331604003906,
"learning_rate": 2.1719497460191468e-05,
"loss": 0.7665,
"step": 77
},
{
"epoch": 0.6290322580645161,
"grad_norm": 42.89381408691406,
"learning_rate": 2.1679498385679142e-05,
"loss": 12.7414,
"step": 78
},
{
"epoch": 0.6370967741935484,
"grad_norm": 87.96309661865234,
"learning_rate": 2.163949931116682e-05,
"loss": 23.3716,
"step": 79
},
{
"epoch": 0.6451612903225806,
"grad_norm": 11.2321195602417,
"learning_rate": 2.1599500236654496e-05,
"loss": 1.0413,
"step": 80
},
{
"epoch": 0.6532258064516129,
"grad_norm": 21.426607131958008,
"learning_rate": 2.1559501162142176e-05,
"loss": 0.4798,
"step": 81
},
{
"epoch": 0.6612903225806451,
"grad_norm": 26.169578552246094,
"learning_rate": 2.1519502087629853e-05,
"loss": 1.9313,
"step": 82
},
{
"epoch": 0.6693548387096774,
"grad_norm": 18.651702880859375,
"learning_rate": 2.1479503013117526e-05,
"loss": 0.3472,
"step": 83
},
{
"epoch": 0.6774193548387096,
"grad_norm": 7.624361038208008,
"learning_rate": 2.1439503938605203e-05,
"loss": 0.6932,
"step": 84
},
{
"epoch": 0.6854838709677419,
"grad_norm": 123.84193420410156,
"learning_rate": 2.139950486409288e-05,
"loss": 17.2375,
"step": 85
},
{
"epoch": 0.6935483870967742,
"grad_norm": 39.71210861206055,
"learning_rate": 2.135950578958056e-05,
"loss": 14.5549,
"step": 86
},
{
"epoch": 0.7016129032258065,
"grad_norm": 23.699077606201172,
"learning_rate": 2.1319506715068234e-05,
"loss": 1.417,
"step": 87
},
{
"epoch": 0.7096774193548387,
"grad_norm": 49.412071228027344,
"learning_rate": 2.127950764055591e-05,
"loss": 1.2961,
"step": 88
},
{
"epoch": 0.717741935483871,
"grad_norm": 60.343544006347656,
"learning_rate": 2.1239508566043588e-05,
"loss": 2.5778,
"step": 89
},
{
"epoch": 0.7258064516129032,
"grad_norm": 62.71953582763672,
"learning_rate": 2.1199509491531265e-05,
"loss": 2.0629,
"step": 90
},
{
"epoch": 0.7338709677419355,
"grad_norm": 120.01128387451172,
"learning_rate": 2.1159510417018945e-05,
"loss": 19.2536,
"step": 91
},
{
"epoch": 0.7419354838709677,
"grad_norm": 59.287296295166016,
"learning_rate": 2.111951134250662e-05,
"loss": 5.4721,
"step": 92
},
{
"epoch": 0.75,
"grad_norm": 11.04751968383789,
"learning_rate": 2.1079512267994296e-05,
"loss": 0.1843,
"step": 93
},
{
"epoch": 0.7580645161290323,
"grad_norm": 101.02423858642578,
"learning_rate": 2.1039513193481973e-05,
"loss": 5.4431,
"step": 94
},
{
"epoch": 0.7661290322580645,
"grad_norm": 72.17565155029297,
"learning_rate": 2.099951411896965e-05,
"loss": 9.7386,
"step": 95
},
{
"epoch": 0.7741935483870968,
"grad_norm": 83.0063705444336,
"learning_rate": 2.0959515044457326e-05,
"loss": 3.1414,
"step": 96
},
{
"epoch": 0.782258064516129,
"grad_norm": 47.760589599609375,
"learning_rate": 2.0919515969945003e-05,
"loss": 1.2319,
"step": 97
},
{
"epoch": 0.7903225806451613,
"grad_norm": 43.6202507019043,
"learning_rate": 2.087951689543268e-05,
"loss": 1.2002,
"step": 98
},
{
"epoch": 0.7983870967741935,
"grad_norm": 42.899200439453125,
"learning_rate": 2.0839517820920357e-05,
"loss": 1.1685,
"step": 99
},
{
"epoch": 0.8064516129032258,
"grad_norm": 116.83399963378906,
"learning_rate": 2.0799518746408034e-05,
"loss": 19.3838,
"step": 100
},
{
"epoch": 0.8145161290322581,
"grad_norm": 16.667503356933594,
"learning_rate": 2.075951967189571e-05,
"loss": 0.8709,
"step": 101
},
{
"epoch": 0.8225806451612904,
"grad_norm": 13.620025634765625,
"learning_rate": 2.0719520597383388e-05,
"loss": 0.3991,
"step": 102
},
{
"epoch": 0.8306451612903226,
"grad_norm": 71.85466766357422,
"learning_rate": 2.0679521522871065e-05,
"loss": 11.6404,
"step": 103
},
{
"epoch": 0.8387096774193549,
"grad_norm": 11.629712104797363,
"learning_rate": 2.0639522448358742e-05,
"loss": 0.9058,
"step": 104
},
{
"epoch": 0.8467741935483871,
"grad_norm": 33.48215866088867,
"learning_rate": 2.059952337384642e-05,
"loss": 0.6932,
"step": 105
},
{
"epoch": 0.8548387096774194,
"grad_norm": 47.76714324951172,
"learning_rate": 2.0559524299334096e-05,
"loss": 2.1864,
"step": 106
},
{
"epoch": 0.8629032258064516,
"grad_norm": 40.71377944946289,
"learning_rate": 2.0519525224821773e-05,
"loss": 2.2803,
"step": 107
},
{
"epoch": 0.8709677419354839,
"grad_norm": 40.90349578857422,
"learning_rate": 2.047952615030945e-05,
"loss": 0.7261,
"step": 108
},
{
"epoch": 0.8790322580645161,
"grad_norm": 61.77216720581055,
"learning_rate": 2.0439527075797126e-05,
"loss": 1.9658,
"step": 109
},
{
"epoch": 0.8870967741935484,
"grad_norm": 15.168683052062988,
"learning_rate": 2.0399528001284803e-05,
"loss": 0.6779,
"step": 110
},
{
"epoch": 0.8951612903225806,
"grad_norm": 77.2820816040039,
"learning_rate": 2.035952892677248e-05,
"loss": 4.008,
"step": 111
},
{
"epoch": 0.9032258064516129,
"grad_norm": 9.020164489746094,
"learning_rate": 2.0319529852260157e-05,
"loss": 0.8001,
"step": 112
},
{
"epoch": 0.9112903225806451,
"grad_norm": 11.099763870239258,
"learning_rate": 2.0279530777747834e-05,
"loss": 0.4358,
"step": 113
},
{
"epoch": 0.9193548387096774,
"grad_norm": 39.36742401123047,
"learning_rate": 2.023953170323551e-05,
"loss": 1.39,
"step": 114
},
{
"epoch": 0.9274193548387096,
"grad_norm": 66.89869689941406,
"learning_rate": 2.0199532628723188e-05,
"loss": 4.703,
"step": 115
},
{
"epoch": 0.9354838709677419,
"grad_norm": 25.90433692932129,
"learning_rate": 2.0159533554210865e-05,
"loss": 1.4303,
"step": 116
},
{
"epoch": 0.9435483870967742,
"grad_norm": 21.590999603271484,
"learning_rate": 2.0119534479698542e-05,
"loss": 0.3786,
"step": 117
},
{
"epoch": 0.9516129032258065,
"grad_norm": 68.37796020507812,
"learning_rate": 2.007953540518622e-05,
"loss": 14.1292,
"step": 118
},
{
"epoch": 0.9596774193548387,
"grad_norm": 18.72194480895996,
"learning_rate": 2.0039536330673896e-05,
"loss": 2.0311,
"step": 119
},
{
"epoch": 0.967741935483871,
"grad_norm": 15.883110046386719,
"learning_rate": 1.999953725616157e-05,
"loss": 0.8801,
"step": 120
},
{
"epoch": 0.9758064516129032,
"grad_norm": 18.568222045898438,
"learning_rate": 1.9959538181649246e-05,
"loss": 0.403,
"step": 121
},
{
"epoch": 0.9838709677419355,
"grad_norm": 17.32382583618164,
"learning_rate": 1.9919539107136926e-05,
"loss": 0.3303,
"step": 122
},
{
"epoch": 0.9919354838709677,
"grad_norm": 10.257896423339844,
"learning_rate": 1.9879540032624603e-05,
"loss": 0.2042,
"step": 123
},
{
"epoch": 1.0,
"grad_norm": 25.707752227783203,
"learning_rate": 1.983954095811228e-05,
"loss": 0.3115,
"step": 124
},
{
"epoch": 1.0,
"eval_loss": 4.090590000152588,
"eval_mae": 1.2299772500991821,
"eval_mse": 4.090590000152588,
"eval_r2": -0.11993241310119629,
"eval_rmse": 2.0225207045052933,
"eval_runtime": 1.3441,
"eval_samples_per_second": 40.92,
"eval_smape": 50.51450729370117,
"eval_steps_per_second": 10.416,
"step": 124
},
{
"epoch": 1.0080645161290323,
"grad_norm": 37.24449920654297,
"learning_rate": 1.9799541883599954e-05,
"loss": 0.6422,
"step": 125
},
{
"epoch": 1.0161290322580645,
"grad_norm": 32.547119140625,
"learning_rate": 1.975954280908763e-05,
"loss": 0.9869,
"step": 126
},
{
"epoch": 1.0241935483870968,
"grad_norm": 70.05367279052734,
"learning_rate": 1.971954373457531e-05,
"loss": 4.6137,
"step": 127
},
{
"epoch": 1.032258064516129,
"grad_norm": 18.101661682128906,
"learning_rate": 1.9679544660062988e-05,
"loss": 0.4156,
"step": 128
},
{
"epoch": 1.0403225806451613,
"grad_norm": 22.2724609375,
"learning_rate": 1.9639545585550665e-05,
"loss": 0.841,
"step": 129
},
{
"epoch": 1.0483870967741935,
"grad_norm": 13.428308486938477,
"learning_rate": 1.959954651103834e-05,
"loss": 2.2846,
"step": 130
},
{
"epoch": 1.0564516129032258,
"grad_norm": 35.4150505065918,
"learning_rate": 1.9559547436526015e-05,
"loss": 0.7014,
"step": 131
},
{
"epoch": 1.064516129032258,
"grad_norm": 31.687740325927734,
"learning_rate": 1.9519548362013696e-05,
"loss": 3.4412,
"step": 132
},
{
"epoch": 1.0725806451612903,
"grad_norm": 54.95043182373047,
"learning_rate": 1.9479549287501373e-05,
"loss": 1.5723,
"step": 133
},
{
"epoch": 1.0806451612903225,
"grad_norm": 43.67780303955078,
"learning_rate": 1.9439550212989046e-05,
"loss": 1.161,
"step": 134
},
{
"epoch": 1.0887096774193548,
"grad_norm": 47.5596809387207,
"learning_rate": 1.9399551138476723e-05,
"loss": 5.3313,
"step": 135
},
{
"epoch": 1.096774193548387,
"grad_norm": 71.37387084960938,
"learning_rate": 1.93595520639644e-05,
"loss": 12.8636,
"step": 136
},
{
"epoch": 1.1048387096774193,
"grad_norm": 80.62567901611328,
"learning_rate": 1.9319552989452077e-05,
"loss": 7.3218,
"step": 137
},
{
"epoch": 1.1129032258064515,
"grad_norm": 59.973960876464844,
"learning_rate": 1.9279553914939757e-05,
"loss": 1.3948,
"step": 138
},
{
"epoch": 1.120967741935484,
"grad_norm": 179.83071899414062,
"learning_rate": 1.923955484042743e-05,
"loss": 34.7136,
"step": 139
},
{
"epoch": 1.129032258064516,
"grad_norm": 84.4344482421875,
"learning_rate": 1.9199555765915108e-05,
"loss": 2.435,
"step": 140
},
{
"epoch": 1.1370967741935485,
"grad_norm": 65.75166320800781,
"learning_rate": 1.9159556691402785e-05,
"loss": 2.0554,
"step": 141
},
{
"epoch": 1.1451612903225807,
"grad_norm": 29.691560745239258,
"learning_rate": 1.911955761689046e-05,
"loss": 0.8479,
"step": 142
},
{
"epoch": 1.153225806451613,
"grad_norm": 28.317535400390625,
"learning_rate": 1.9079558542378142e-05,
"loss": 0.4651,
"step": 143
},
{
"epoch": 1.1612903225806452,
"grad_norm": 28.46364402770996,
"learning_rate": 1.9039559467865815e-05,
"loss": 0.5143,
"step": 144
},
{
"epoch": 1.1693548387096775,
"grad_norm": 14.189237594604492,
"learning_rate": 1.8999560393353492e-05,
"loss": 0.2308,
"step": 145
},
{
"epoch": 1.1774193548387097,
"grad_norm": 16.224409103393555,
"learning_rate": 1.895956131884117e-05,
"loss": 1.8195,
"step": 146
},
{
"epoch": 1.185483870967742,
"grad_norm": 25.36056900024414,
"learning_rate": 1.8919562244328846e-05,
"loss": 0.6262,
"step": 147
},
{
"epoch": 1.1935483870967742,
"grad_norm": 20.145959854125977,
"learning_rate": 1.8879563169816523e-05,
"loss": 0.7281,
"step": 148
},
{
"epoch": 1.2016129032258065,
"grad_norm": 159.1549072265625,
"learning_rate": 1.88395640953042e-05,
"loss": 11.0343,
"step": 149
},
{
"epoch": 1.2096774193548387,
"grad_norm": 162.7301788330078,
"learning_rate": 1.8799565020791877e-05,
"loss": 34.9038,
"step": 150
},
{
"epoch": 1.217741935483871,
"grad_norm": 86.77201080322266,
"learning_rate": 1.8759565946279554e-05,
"loss": 16.0319,
"step": 151
},
{
"epoch": 1.2258064516129032,
"grad_norm": 48.960357666015625,
"learning_rate": 1.871956687176723e-05,
"loss": 1.4731,
"step": 152
},
{
"epoch": 1.2338709677419355,
"grad_norm": 82.25511932373047,
"learning_rate": 1.8679567797254908e-05,
"loss": 7.8374,
"step": 153
},
{
"epoch": 1.2419354838709677,
"grad_norm": 60.74580764770508,
"learning_rate": 1.8639568722742585e-05,
"loss": 10.0002,
"step": 154
},
{
"epoch": 1.25,
"grad_norm": 34.497222900390625,
"learning_rate": 1.859956964823026e-05,
"loss": 0.6464,
"step": 155
},
{
"epoch": 1.2580645161290323,
"grad_norm": 38.092491149902344,
"learning_rate": 1.855957057371794e-05,
"loss": 1.5106,
"step": 156
},
{
"epoch": 1.2661290322580645,
"grad_norm": 19.8438663482666,
"learning_rate": 1.8519571499205615e-05,
"loss": 1.5936,
"step": 157
},
{
"epoch": 1.2741935483870968,
"grad_norm": 69.82159423828125,
"learning_rate": 1.8479572424693292e-05,
"loss": 10.4774,
"step": 158
},
{
"epoch": 1.282258064516129,
"grad_norm": 125.10799407958984,
"learning_rate": 1.843957335018097e-05,
"loss": 15.5645,
"step": 159
},
{
"epoch": 1.2903225806451613,
"grad_norm": 49.95842361450195,
"learning_rate": 1.8399574275668646e-05,
"loss": 7.8997,
"step": 160
},
{
"epoch": 1.2983870967741935,
"grad_norm": 187.53903198242188,
"learning_rate": 1.8359575201156323e-05,
"loss": 19.5171,
"step": 161
},
{
"epoch": 1.3064516129032258,
"grad_norm": 137.54644775390625,
"learning_rate": 1.8319576126644e-05,
"loss": 4.1746,
"step": 162
},
{
"epoch": 1.314516129032258,
"grad_norm": 116.5649185180664,
"learning_rate": 1.8279577052131677e-05,
"loss": 2.607,
"step": 163
},
{
"epoch": 1.3225806451612903,
"grad_norm": 95.47283935546875,
"learning_rate": 1.8239577977619354e-05,
"loss": 5.3841,
"step": 164
},
{
"epoch": 1.3306451612903225,
"grad_norm": 354.6362609863281,
"learning_rate": 1.819957890310703e-05,
"loss": 13.093,
"step": 165
},
{
"epoch": 1.3387096774193548,
"grad_norm": 140.9376220703125,
"learning_rate": 1.8159579828594708e-05,
"loss": 4.0244,
"step": 166
},
{
"epoch": 1.346774193548387,
"grad_norm": 94.6503677368164,
"learning_rate": 1.811958075408238e-05,
"loss": 1.2288,
"step": 167
},
{
"epoch": 1.3548387096774195,
"grad_norm": 25.267553329467773,
"learning_rate": 1.807958167957006e-05,
"loss": 0.5603,
"step": 168
},
{
"epoch": 1.3629032258064515,
"grad_norm": 49.03358840942383,
"learning_rate": 1.803958260505774e-05,
"loss": 1.6284,
"step": 169
},
{
"epoch": 1.370967741935484,
"grad_norm": 369.3719787597656,
"learning_rate": 1.7999583530545415e-05,
"loss": 127.8047,
"step": 170
},
{
"epoch": 1.379032258064516,
"grad_norm": 30.973657608032227,
"learning_rate": 1.7959584456033092e-05,
"loss": 0.6907,
"step": 171
},
{
"epoch": 1.3870967741935485,
"grad_norm": 163.9529266357422,
"learning_rate": 1.7919585381520766e-05,
"loss": 15.7879,
"step": 172
},
{
"epoch": 1.3951612903225805,
"grad_norm": 13.607197761535645,
"learning_rate": 1.7879586307008446e-05,
"loss": 0.3138,
"step": 173
},
{
"epoch": 1.403225806451613,
"grad_norm": 86.09737396240234,
"learning_rate": 1.7839587232496123e-05,
"loss": 9.6102,
"step": 174
},
{
"epoch": 1.4112903225806452,
"grad_norm": 54.644554138183594,
"learning_rate": 1.77995881579838e-05,
"loss": 1.5776,
"step": 175
},
{
"epoch": 1.4193548387096775,
"grad_norm": 190.9008331298828,
"learning_rate": 1.7759589083471477e-05,
"loss": 22.1159,
"step": 176
},
{
"epoch": 1.4274193548387097,
"grad_norm": 50.92055130004883,
"learning_rate": 1.771959000895915e-05,
"loss": 2.3412,
"step": 177
},
{
"epoch": 1.435483870967742,
"grad_norm": 38.6739387512207,
"learning_rate": 1.7679590934446827e-05,
"loss": 0.7342,
"step": 178
},
{
"epoch": 1.4435483870967742,
"grad_norm": 28.455352783203125,
"learning_rate": 1.7639591859934508e-05,
"loss": 1.7024,
"step": 179
},
{
"epoch": 1.4516129032258065,
"grad_norm": 29.965150833129883,
"learning_rate": 1.7599592785422185e-05,
"loss": 0.6395,
"step": 180
},
{
"epoch": 1.4596774193548387,
"grad_norm": 46.12895202636719,
"learning_rate": 1.7559593710909858e-05,
"loss": 0.8689,
"step": 181
},
{
"epoch": 1.467741935483871,
"grad_norm": 21.45783805847168,
"learning_rate": 1.7519594636397535e-05,
"loss": 0.8418,
"step": 182
},
{
"epoch": 1.4758064516129032,
"grad_norm": 24.071561813354492,
"learning_rate": 1.7479595561885212e-05,
"loss": 1.8008,
"step": 183
},
{
"epoch": 1.4838709677419355,
"grad_norm": 35.17493438720703,
"learning_rate": 1.7439596487372892e-05,
"loss": 3.9543,
"step": 184
},
{
"epoch": 1.4919354838709677,
"grad_norm": 28.088899612426758,
"learning_rate": 1.739959741286057e-05,
"loss": 1.3278,
"step": 185
},
{
"epoch": 1.5,
"grad_norm": 15.711658477783203,
"learning_rate": 1.7359598338348243e-05,
"loss": 0.3969,
"step": 186
},
{
"epoch": 1.5080645161290323,
"grad_norm": 37.35240173339844,
"learning_rate": 1.731959926383592e-05,
"loss": 4.7095,
"step": 187
},
{
"epoch": 1.5161290322580645,
"grad_norm": 24.711170196533203,
"learning_rate": 1.7279600189323597e-05,
"loss": 1.3376,
"step": 188
},
{
"epoch": 1.5241935483870968,
"grad_norm": 40.604549407958984,
"learning_rate": 1.7239601114811277e-05,
"loss": 1.7629,
"step": 189
},
{
"epoch": 1.532258064516129,
"grad_norm": 25.869022369384766,
"learning_rate": 1.7199602040298954e-05,
"loss": 0.8421,
"step": 190
},
{
"epoch": 1.5403225806451613,
"grad_norm": 6.451545715332031,
"learning_rate": 1.7159602965786627e-05,
"loss": 0.2054,
"step": 191
},
{
"epoch": 1.5483870967741935,
"grad_norm": 8.05915641784668,
"learning_rate": 1.7119603891274304e-05,
"loss": 0.5445,
"step": 192
},
{
"epoch": 1.5564516129032258,
"grad_norm": 23.62920570373535,
"learning_rate": 1.707960481676198e-05,
"loss": 0.6784,
"step": 193
},
{
"epoch": 1.564516129032258,
"grad_norm": 19.833181381225586,
"learning_rate": 1.7039605742249658e-05,
"loss": 0.5534,
"step": 194
},
{
"epoch": 1.5725806451612905,
"grad_norm": 14.539905548095703,
"learning_rate": 1.6999606667737335e-05,
"loss": 0.4673,
"step": 195
},
{
"epoch": 1.5806451612903225,
"grad_norm": 103.28005981445312,
"learning_rate": 1.6959607593225012e-05,
"loss": 6.914,
"step": 196
},
{
"epoch": 1.588709677419355,
"grad_norm": 39.585941314697266,
"learning_rate": 1.691960851871269e-05,
"loss": 1.0972,
"step": 197
},
{
"epoch": 1.596774193548387,
"grad_norm": 15.518098831176758,
"learning_rate": 1.6879609444200366e-05,
"loss": 0.4614,
"step": 198
},
{
"epoch": 1.6048387096774195,
"grad_norm": 4.58119535446167,
"learning_rate": 1.6839610369688043e-05,
"loss": 0.1082,
"step": 199
},
{
"epoch": 1.6129032258064515,
"grad_norm": 25.006546020507812,
"learning_rate": 1.679961129517572e-05,
"loss": 0.4672,
"step": 200
},
{
"epoch": 1.620967741935484,
"grad_norm": 28.234201431274414,
"learning_rate": 1.6759612220663397e-05,
"loss": 0.5142,
"step": 201
},
{
"epoch": 1.629032258064516,
"grad_norm": 24.016407012939453,
"learning_rate": 1.6719613146151073e-05,
"loss": 0.9564,
"step": 202
},
{
"epoch": 1.6370967741935485,
"grad_norm": 18.26400375366211,
"learning_rate": 1.667961407163875e-05,
"loss": 0.3671,
"step": 203
},
{
"epoch": 1.6451612903225805,
"grad_norm": 12.657865524291992,
"learning_rate": 1.6639614997126427e-05,
"loss": 0.6099,
"step": 204
},
{
"epoch": 1.653225806451613,
"grad_norm": 117.20157623291016,
"learning_rate": 1.6599615922614104e-05,
"loss": 14.7071,
"step": 205
},
{
"epoch": 1.661290322580645,
"grad_norm": 5.277425289154053,
"learning_rate": 1.655961684810178e-05,
"loss": 0.0183,
"step": 206
},
{
"epoch": 1.6693548387096775,
"grad_norm": 8.111205101013184,
"learning_rate": 1.6519617773589458e-05,
"loss": 0.2201,
"step": 207
},
{
"epoch": 1.6774193548387095,
"grad_norm": 15.38776683807373,
"learning_rate": 1.6479618699077135e-05,
"loss": 0.3226,
"step": 208
},
{
"epoch": 1.685483870967742,
"grad_norm": 6.60953950881958,
"learning_rate": 1.6439619624564812e-05,
"loss": 0.3358,
"step": 209
},
{
"epoch": 1.6935483870967742,
"grad_norm": 17.016639709472656,
"learning_rate": 1.639962055005249e-05,
"loss": 0.2497,
"step": 210
},
{
"epoch": 1.7016129032258065,
"grad_norm": 54.830169677734375,
"learning_rate": 1.6359621475540166e-05,
"loss": 1.7451,
"step": 211
},
{
"epoch": 1.7096774193548387,
"grad_norm": 14.282036781311035,
"learning_rate": 1.6319622401027843e-05,
"loss": 1.1429,
"step": 212
},
{
"epoch": 1.717741935483871,
"grad_norm": 4.297520160675049,
"learning_rate": 1.627962332651552e-05,
"loss": 0.1797,
"step": 213
},
{
"epoch": 1.7258064516129032,
"grad_norm": 142.38172912597656,
"learning_rate": 1.6239624252003196e-05,
"loss": 21.8816,
"step": 214
},
{
"epoch": 1.7338709677419355,
"grad_norm": 88.52197265625,
"learning_rate": 1.6199625177490873e-05,
"loss": 15.5891,
"step": 215
},
{
"epoch": 1.7419354838709677,
"grad_norm": 15.318389892578125,
"learning_rate": 1.615962610297855e-05,
"loss": 0.4315,
"step": 216
},
{
"epoch": 1.75,
"grad_norm": 102.21027374267578,
"learning_rate": 1.6119627028466227e-05,
"loss": 6.8871,
"step": 217
},
{
"epoch": 1.7580645161290323,
"grad_norm": 12.100990295410156,
"learning_rate": 1.6079627953953904e-05,
"loss": 0.1098,
"step": 218
},
{
"epoch": 1.7661290322580645,
"grad_norm": 137.26162719726562,
"learning_rate": 1.6039628879441578e-05,
"loss": 12.6184,
"step": 219
},
{
"epoch": 1.7741935483870968,
"grad_norm": 16.48599624633789,
"learning_rate": 1.5999629804929258e-05,
"loss": 0.3973,
"step": 220
},
{
"epoch": 1.782258064516129,
"grad_norm": 23.34356689453125,
"learning_rate": 1.5959630730416935e-05,
"loss": 0.3448,
"step": 221
},
{
"epoch": 1.7903225806451613,
"grad_norm": 7.864492893218994,
"learning_rate": 1.5919631655904612e-05,
"loss": 0.502,
"step": 222
},
{
"epoch": 1.7983870967741935,
"grad_norm": 3.356924295425415,
"learning_rate": 1.587963258139229e-05,
"loss": 0.0997,
"step": 223
},
{
"epoch": 1.8064516129032258,
"grad_norm": 125.84968566894531,
"learning_rate": 1.5839633506879962e-05,
"loss": 13.3317,
"step": 224
},
{
"epoch": 1.814516129032258,
"grad_norm": 77.49720764160156,
"learning_rate": 1.5799634432367643e-05,
"loss": 4.3421,
"step": 225
},
{
"epoch": 1.8225806451612905,
"grad_norm": 24.029203414916992,
"learning_rate": 1.575963535785532e-05,
"loss": 1.8343,
"step": 226
},
{
"epoch": 1.8306451612903225,
"grad_norm": 70.15037536621094,
"learning_rate": 1.5719636283342996e-05,
"loss": 1.469,
"step": 227
},
{
"epoch": 1.838709677419355,
"grad_norm": 69.79930877685547,
"learning_rate": 1.567963720883067e-05,
"loss": 1.8992,
"step": 228
},
{
"epoch": 1.846774193548387,
"grad_norm": 63.507442474365234,
"learning_rate": 1.5639638134318347e-05,
"loss": 1.2837,
"step": 229
},
{
"epoch": 1.8548387096774195,
"grad_norm": 54.0720100402832,
"learning_rate": 1.5599639059806027e-05,
"loss": 3.8239,
"step": 230
},
{
"epoch": 1.8629032258064515,
"grad_norm": 71.55313873291016,
"learning_rate": 1.5559639985293704e-05,
"loss": 2.0285,
"step": 231
},
{
"epoch": 1.870967741935484,
"grad_norm": 9.896157264709473,
"learning_rate": 1.551964091078138e-05,
"loss": 0.329,
"step": 232
},
{
"epoch": 1.879032258064516,
"grad_norm": 19.860387802124023,
"learning_rate": 1.5479641836269055e-05,
"loss": 0.6743,
"step": 233
},
{
"epoch": 1.8870967741935485,
"grad_norm": 5.622892379760742,
"learning_rate": 1.543964276175673e-05,
"loss": 0.1553,
"step": 234
},
{
"epoch": 1.8951612903225805,
"grad_norm": 12.858086585998535,
"learning_rate": 1.539964368724441e-05,
"loss": 0.2809,
"step": 235
},
{
"epoch": 1.903225806451613,
"grad_norm": 108.66515350341797,
"learning_rate": 1.535964461273209e-05,
"loss": 5.8853,
"step": 236
},
{
"epoch": 1.911290322580645,
"grad_norm": 47.20570755004883,
"learning_rate": 1.5319645538219766e-05,
"loss": 4.3275,
"step": 237
},
{
"epoch": 1.9193548387096775,
"grad_norm": 43.51235580444336,
"learning_rate": 1.527964646370744e-05,
"loss": 0.7748,
"step": 238
},
{
"epoch": 1.9274193548387095,
"grad_norm": 27.735137939453125,
"learning_rate": 1.5239647389195118e-05,
"loss": 0.5191,
"step": 239
},
{
"epoch": 1.935483870967742,
"grad_norm": 27.011123657226562,
"learning_rate": 1.5199648314682795e-05,
"loss": 0.9489,
"step": 240
},
{
"epoch": 1.9435483870967742,
"grad_norm": 10.22940731048584,
"learning_rate": 1.5159649240170472e-05,
"loss": 0.3328,
"step": 241
},
{
"epoch": 1.9516129032258065,
"grad_norm": 17.85344886779785,
"learning_rate": 1.5119650165658147e-05,
"loss": 0.3374,
"step": 242
},
{
"epoch": 1.9596774193548387,
"grad_norm": 35.01970291137695,
"learning_rate": 1.5079651091145824e-05,
"loss": 0.7993,
"step": 243
},
{
"epoch": 1.967741935483871,
"grad_norm": 259.75616455078125,
"learning_rate": 1.50396520166335e-05,
"loss": 7.7043,
"step": 244
},
{
"epoch": 1.9758064516129032,
"grad_norm": 75.41777038574219,
"learning_rate": 1.499965294212118e-05,
"loss": 1.8485,
"step": 245
},
{
"epoch": 1.9838709677419355,
"grad_norm": 401.5027770996094,
"learning_rate": 1.4959653867608856e-05,
"loss": 19.4312,
"step": 246
},
{
"epoch": 1.9919354838709677,
"grad_norm": 46.57631301879883,
"learning_rate": 1.4919654793096532e-05,
"loss": 1.3285,
"step": 247
},
{
"epoch": 2.0,
"grad_norm": 110.93121337890625,
"learning_rate": 1.4879655718584208e-05,
"loss": 2.6528,
"step": 248
},
{
"epoch": 2.0,
"eval_loss": 4.852317810058594,
"eval_mae": 1.4657387733459473,
"eval_mse": 4.85231876373291,
"eval_r2": -0.32848048210144043,
"eval_rmse": 2.2027979398330912,
"eval_runtime": 1.3879,
"eval_samples_per_second": 39.629,
"eval_smape": 54.71565127372742,
"eval_steps_per_second": 10.087,
"step": 248
},
{
"epoch": 2.0080645161290325,
"grad_norm": 154.8404998779297,
"learning_rate": 1.4839656644071885e-05,
"loss": 3.6054,
"step": 249
},
{
"epoch": 2.0161290322580645,
"grad_norm": 62.42089080810547,
"learning_rate": 1.4799657569559564e-05,
"loss": 1.8889,
"step": 250
},
{
"epoch": 2.024193548387097,
"grad_norm": 70.86661529541016,
"learning_rate": 1.4759658495047241e-05,
"loss": 2.4493,
"step": 251
},
{
"epoch": 2.032258064516129,
"grad_norm": 51.09244155883789,
"learning_rate": 1.4719659420534916e-05,
"loss": 1.168,
"step": 252
},
{
"epoch": 2.0403225806451615,
"grad_norm": 18.558488845825195,
"learning_rate": 1.4679660346022593e-05,
"loss": 0.2762,
"step": 253
},
{
"epoch": 2.0483870967741935,
"grad_norm": 168.22433471679688,
"learning_rate": 1.463966127151027e-05,
"loss": 1.9643,
"step": 254
},
{
"epoch": 2.056451612903226,
"grad_norm": 40.86513900756836,
"learning_rate": 1.4599662196997949e-05,
"loss": 0.8871,
"step": 255
},
{
"epoch": 2.064516129032258,
"grad_norm": 169.68312072753906,
"learning_rate": 1.4559663122485622e-05,
"loss": 10.1988,
"step": 256
},
{
"epoch": 2.0725806451612905,
"grad_norm": 6.839456081390381,
"learning_rate": 1.45196640479733e-05,
"loss": 0.195,
"step": 257
},
{
"epoch": 2.0806451612903225,
"grad_norm": 29.532468795776367,
"learning_rate": 1.4479664973460978e-05,
"loss": 0.4214,
"step": 258
},
{
"epoch": 2.088709677419355,
"grad_norm": 134.2998504638672,
"learning_rate": 1.4439665898948655e-05,
"loss": 22.2033,
"step": 259
},
{
"epoch": 2.096774193548387,
"grad_norm": 229.64031982421875,
"learning_rate": 1.4399666824436333e-05,
"loss": 0.8783,
"step": 260
},
{
"epoch": 2.1048387096774195,
"grad_norm": 150.3468017578125,
"learning_rate": 1.4359667749924007e-05,
"loss": 22.9731,
"step": 261
},
{
"epoch": 2.1129032258064515,
"grad_norm": 104.23390197753906,
"learning_rate": 1.4319668675411685e-05,
"loss": 1.326,
"step": 262
},
{
"epoch": 2.120967741935484,
"grad_norm": 114.47550964355469,
"learning_rate": 1.4279669600899362e-05,
"loss": 1.3029,
"step": 263
},
{
"epoch": 2.129032258064516,
"grad_norm": 15.87612533569336,
"learning_rate": 1.423967052638704e-05,
"loss": 0.4699,
"step": 264
},
{
"epoch": 2.1370967741935485,
"grad_norm": 46.12168884277344,
"learning_rate": 1.4199671451874716e-05,
"loss": 0.7016,
"step": 265
},
{
"epoch": 2.1451612903225805,
"grad_norm": 279.55279541015625,
"learning_rate": 1.4159672377362391e-05,
"loss": 3.2752,
"step": 266
},
{
"epoch": 2.153225806451613,
"grad_norm": 20.628719329833984,
"learning_rate": 1.4119673302850068e-05,
"loss": 0.2388,
"step": 267
},
{
"epoch": 2.161290322580645,
"grad_norm": 22.857067108154297,
"learning_rate": 1.4079674228337747e-05,
"loss": 0.5555,
"step": 268
},
{
"epoch": 2.1693548387096775,
"grad_norm": 47.45686340332031,
"learning_rate": 1.4039675153825424e-05,
"loss": 2.8467,
"step": 269
},
{
"epoch": 2.1774193548387095,
"grad_norm": 22.756681442260742,
"learning_rate": 1.39996760793131e-05,
"loss": 0.7615,
"step": 270
},
{
"epoch": 2.185483870967742,
"grad_norm": 80.15179443359375,
"learning_rate": 1.3959677004800776e-05,
"loss": 8.9323,
"step": 271
},
{
"epoch": 2.193548387096774,
"grad_norm": 100.86170196533203,
"learning_rate": 1.3919677930288453e-05,
"loss": 12.4829,
"step": 272
},
{
"epoch": 2.2016129032258065,
"grad_norm": 72.2647705078125,
"learning_rate": 1.3879678855776132e-05,
"loss": 9.6687,
"step": 273
},
{
"epoch": 2.2096774193548385,
"grad_norm": 30.535030364990234,
"learning_rate": 1.3839679781263808e-05,
"loss": 0.4308,
"step": 274
},
{
"epoch": 2.217741935483871,
"grad_norm": 54.71157455444336,
"learning_rate": 1.3799680706751484e-05,
"loss": 1.1555,
"step": 275
},
{
"epoch": 2.225806451612903,
"grad_norm": 39.96649932861328,
"learning_rate": 1.375968163223916e-05,
"loss": 0.4534,
"step": 276
},
{
"epoch": 2.2338709677419355,
"grad_norm": 55.768898010253906,
"learning_rate": 1.3719682557726838e-05,
"loss": 0.9371,
"step": 277
},
{
"epoch": 2.241935483870968,
"grad_norm": 83.68522644042969,
"learning_rate": 1.3679683483214516e-05,
"loss": 2.4787,
"step": 278
},
{
"epoch": 2.25,
"grad_norm": 29.500429153442383,
"learning_rate": 1.3639684408702193e-05,
"loss": 1.0634,
"step": 279
},
{
"epoch": 2.258064516129032,
"grad_norm": 100.61076354980469,
"learning_rate": 1.3599685334189868e-05,
"loss": 10.4759,
"step": 280
},
{
"epoch": 2.2661290322580645,
"grad_norm": 182.71800231933594,
"learning_rate": 1.3559686259677545e-05,
"loss": 1.687,
"step": 281
},
{
"epoch": 2.274193548387097,
"grad_norm": 187.59808349609375,
"learning_rate": 1.3519687185165222e-05,
"loss": 15.9992,
"step": 282
},
{
"epoch": 2.282258064516129,
"grad_norm": 105.1393051147461,
"learning_rate": 1.3479688110652899e-05,
"loss": 4.9415,
"step": 283
},
{
"epoch": 2.2903225806451615,
"grad_norm": 115.4957504272461,
"learning_rate": 1.3439689036140578e-05,
"loss": 7.5135,
"step": 284
},
{
"epoch": 2.2983870967741935,
"grad_norm": 42.67521286010742,
"learning_rate": 1.3399689961628251e-05,
"loss": 0.9061,
"step": 285
},
{
"epoch": 2.306451612903226,
"grad_norm": 31.427526473999023,
"learning_rate": 1.335969088711593e-05,
"loss": 0.4332,
"step": 286
},
{
"epoch": 2.314516129032258,
"grad_norm": 47.62785339355469,
"learning_rate": 1.3319691812603607e-05,
"loss": 0.9642,
"step": 287
},
{
"epoch": 2.3225806451612905,
"grad_norm": 15.01259708404541,
"learning_rate": 1.3279692738091284e-05,
"loss": 0.2899,
"step": 288
},
{
"epoch": 2.3306451612903225,
"grad_norm": 37.724735260009766,
"learning_rate": 1.3239693663578959e-05,
"loss": 1.7805,
"step": 289
},
{
"epoch": 2.338709677419355,
"grad_norm": 79.78099060058594,
"learning_rate": 1.3199694589066636e-05,
"loss": 8.5522,
"step": 290
},
{
"epoch": 2.346774193548387,
"grad_norm": 20.72849464416504,
"learning_rate": 1.3159695514554314e-05,
"loss": 0.4305,
"step": 291
},
{
"epoch": 2.3548387096774195,
"grad_norm": 137.8477325439453,
"learning_rate": 1.3119696440041991e-05,
"loss": 5.5653,
"step": 292
},
{
"epoch": 2.3629032258064515,
"grad_norm": 49.036468505859375,
"learning_rate": 1.3079697365529668e-05,
"loss": 0.3396,
"step": 293
},
{
"epoch": 2.370967741935484,
"grad_norm": 77.85248565673828,
"learning_rate": 1.3039698291017344e-05,
"loss": 12.0845,
"step": 294
},
{
"epoch": 2.379032258064516,
"grad_norm": 84.66986846923828,
"learning_rate": 1.299969921650502e-05,
"loss": 13.1287,
"step": 295
},
{
"epoch": 2.3870967741935485,
"grad_norm": 19.693037033081055,
"learning_rate": 1.2959700141992699e-05,
"loss": 0.4963,
"step": 296
},
{
"epoch": 2.3951612903225805,
"grad_norm": 10.413263320922852,
"learning_rate": 1.2919701067480376e-05,
"loss": 0.2287,
"step": 297
},
{
"epoch": 2.403225806451613,
"grad_norm": 64.92196655273438,
"learning_rate": 1.2879701992968053e-05,
"loss": 1.6285,
"step": 298
},
{
"epoch": 2.411290322580645,
"grad_norm": 36.941349029541016,
"learning_rate": 1.2839702918455728e-05,
"loss": 0.8435,
"step": 299
},
{
"epoch": 2.4193548387096775,
"grad_norm": 149.9400177001953,
"learning_rate": 1.2799703843943405e-05,
"loss": 4.2246,
"step": 300
},
{
"epoch": 2.4274193548387095,
"grad_norm": 40.55129623413086,
"learning_rate": 1.2759704769431082e-05,
"loss": 0.628,
"step": 301
},
{
"epoch": 2.435483870967742,
"grad_norm": 39.12997817993164,
"learning_rate": 1.271970569491876e-05,
"loss": 0.4721,
"step": 302
},
{
"epoch": 2.443548387096774,
"grad_norm": 87.25220489501953,
"learning_rate": 1.2679706620406434e-05,
"loss": 6.3045,
"step": 303
},
{
"epoch": 2.4516129032258065,
"grad_norm": 47.91136169433594,
"learning_rate": 1.2639707545894113e-05,
"loss": 0.6415,
"step": 304
},
{
"epoch": 2.4596774193548385,
"grad_norm": 103.10691833496094,
"learning_rate": 1.259970847138179e-05,
"loss": 10.6521,
"step": 305
},
{
"epoch": 2.467741935483871,
"grad_norm": 16.19340705871582,
"learning_rate": 1.2559709396869467e-05,
"loss": 0.2191,
"step": 306
},
{
"epoch": 2.475806451612903,
"grad_norm": 18.110265731811523,
"learning_rate": 1.2519710322357145e-05,
"loss": 0.2425,
"step": 307
},
{
"epoch": 2.4838709677419355,
"grad_norm": 156.45272827148438,
"learning_rate": 1.2479711247844819e-05,
"loss": 4.5986,
"step": 308
},
{
"epoch": 2.491935483870968,
"grad_norm": 16.38495635986328,
"learning_rate": 1.2439712173332497e-05,
"loss": 0.3502,
"step": 309
},
{
"epoch": 2.5,
"grad_norm": 24.752309799194336,
"learning_rate": 1.2399713098820174e-05,
"loss": 0.4216,
"step": 310
},
{
"epoch": 2.508064516129032,
"grad_norm": 22.331012725830078,
"learning_rate": 1.2359714024307851e-05,
"loss": 0.3915,
"step": 311
},
{
"epoch": 2.5161290322580645,
"grad_norm": 4.984405994415283,
"learning_rate": 1.2319714949795528e-05,
"loss": 0.1369,
"step": 312
},
{
"epoch": 2.524193548387097,
"grad_norm": 4.796787261962891,
"learning_rate": 1.2279715875283205e-05,
"loss": 0.1145,
"step": 313
},
{
"epoch": 2.532258064516129,
"grad_norm": 11.541577339172363,
"learning_rate": 1.2239716800770882e-05,
"loss": 0.4872,
"step": 314
},
{
"epoch": 2.540322580645161,
"grad_norm": 117.4135971069336,
"learning_rate": 1.2199717726258559e-05,
"loss": 8.6798,
"step": 315
},
{
"epoch": 2.5483870967741935,
"grad_norm": 11.267465591430664,
"learning_rate": 1.2159718651746234e-05,
"loss": 0.2144,
"step": 316
},
{
"epoch": 2.556451612903226,
"grad_norm": 17.15199851989746,
"learning_rate": 1.2119719577233913e-05,
"loss": 0.4501,
"step": 317
},
{
"epoch": 2.564516129032258,
"grad_norm": 14.21686840057373,
"learning_rate": 1.207972050272159e-05,
"loss": 0.4759,
"step": 318
},
{
"epoch": 2.5725806451612905,
"grad_norm": 52.95610809326172,
"learning_rate": 1.2039721428209267e-05,
"loss": 0.792,
"step": 319
},
{
"epoch": 2.5806451612903225,
"grad_norm": 18.222946166992188,
"learning_rate": 1.1999722353696944e-05,
"loss": 0.9593,
"step": 320
},
{
"epoch": 2.588709677419355,
"grad_norm": 112.54412078857422,
"learning_rate": 1.1959723279184619e-05,
"loss": 5.1768,
"step": 321
},
{
"epoch": 2.596774193548387,
"grad_norm": 5.891172885894775,
"learning_rate": 1.1919724204672297e-05,
"loss": 0.336,
"step": 322
},
{
"epoch": 2.6048387096774195,
"grad_norm": 140.28492736816406,
"learning_rate": 1.1879725130159973e-05,
"loss": 11.1532,
"step": 323
},
{
"epoch": 2.6129032258064515,
"grad_norm": 27.81146812438965,
"learning_rate": 1.183972605564765e-05,
"loss": 0.4171,
"step": 324
},
{
"epoch": 2.620967741935484,
"grad_norm": 13.444504737854004,
"learning_rate": 1.1799726981135328e-05,
"loss": 0.181,
"step": 325
},
{
"epoch": 2.629032258064516,
"grad_norm": 19.829408645629883,
"learning_rate": 1.1759727906623003e-05,
"loss": 0.6601,
"step": 326
},
{
"epoch": 2.6370967741935485,
"grad_norm": 13.109299659729004,
"learning_rate": 1.1719728832110682e-05,
"loss": 0.8257,
"step": 327
},
{
"epoch": 2.6451612903225805,
"grad_norm": 37.20515823364258,
"learning_rate": 1.1679729757598357e-05,
"loss": 0.604,
"step": 328
},
{
"epoch": 2.653225806451613,
"grad_norm": 401.8742980957031,
"learning_rate": 1.1639730683086034e-05,
"loss": 123.0045,
"step": 329
},
{
"epoch": 2.661290322580645,
"grad_norm": 41.35789489746094,
"learning_rate": 1.1599731608573711e-05,
"loss": 0.8282,
"step": 330
},
{
"epoch": 2.6693548387096775,
"grad_norm": 36.715728759765625,
"learning_rate": 1.1559732534061388e-05,
"loss": 0.9222,
"step": 331
},
{
"epoch": 2.6774193548387095,
"grad_norm": 236.11050415039062,
"learning_rate": 1.1519733459549065e-05,
"loss": 2.1798,
"step": 332
},
{
"epoch": 2.685483870967742,
"grad_norm": 9.052068710327148,
"learning_rate": 1.1479734385036742e-05,
"loss": 0.3699,
"step": 333
},
{
"epoch": 2.693548387096774,
"grad_norm": 120.23004150390625,
"learning_rate": 1.1439735310524419e-05,
"loss": 4.8277,
"step": 334
},
{
"epoch": 2.7016129032258065,
"grad_norm": 86.39726257324219,
"learning_rate": 1.1399736236012096e-05,
"loss": 0.7964,
"step": 335
},
{
"epoch": 2.709677419354839,
"grad_norm": 43.46727752685547,
"learning_rate": 1.1359737161499773e-05,
"loss": 0.4678,
"step": 336
},
{
"epoch": 2.717741935483871,
"grad_norm": 92.9560546875,
"learning_rate": 1.131973808698745e-05,
"loss": 1.9784,
"step": 337
},
{
"epoch": 2.725806451612903,
"grad_norm": 72.29792785644531,
"learning_rate": 1.1279739012475126e-05,
"loss": 1.1618,
"step": 338
},
{
"epoch": 2.7338709677419355,
"grad_norm": 108.34342193603516,
"learning_rate": 1.1239739937962803e-05,
"loss": 8.9934,
"step": 339
},
{
"epoch": 2.741935483870968,
"grad_norm": 44.143707275390625,
"learning_rate": 1.119974086345048e-05,
"loss": 0.7319,
"step": 340
},
{
"epoch": 2.75,
"grad_norm": 14.93342113494873,
"learning_rate": 1.1159741788938157e-05,
"loss": 0.2592,
"step": 341
},
{
"epoch": 2.758064516129032,
"grad_norm": 213.9510040283203,
"learning_rate": 1.1119742714425832e-05,
"loss": 12.5033,
"step": 342
},
{
"epoch": 2.7661290322580645,
"grad_norm": 114.5474624633789,
"learning_rate": 1.1079743639913511e-05,
"loss": 1.5119,
"step": 343
},
{
"epoch": 2.774193548387097,
"grad_norm": 22.315725326538086,
"learning_rate": 1.1039744565401188e-05,
"loss": 0.6701,
"step": 344
},
{
"epoch": 2.782258064516129,
"grad_norm": 22.660259246826172,
"learning_rate": 1.0999745490888865e-05,
"loss": 0.7027,
"step": 345
},
{
"epoch": 2.790322580645161,
"grad_norm": 14.709903717041016,
"learning_rate": 1.0959746416376542e-05,
"loss": 0.213,
"step": 346
},
{
"epoch": 2.7983870967741935,
"grad_norm": 89.29466247558594,
"learning_rate": 1.0919747341864217e-05,
"loss": 1.5922,
"step": 347
},
{
"epoch": 2.806451612903226,
"grad_norm": 13.790899276733398,
"learning_rate": 1.0879748267351896e-05,
"loss": 0.4771,
"step": 348
},
{
"epoch": 2.814516129032258,
"grad_norm": 67.785888671875,
"learning_rate": 1.0839749192839571e-05,
"loss": 5.3096,
"step": 349
},
{
"epoch": 2.8225806451612905,
"grad_norm": 70.6512222290039,
"learning_rate": 1.0799750118327248e-05,
"loss": 0.8779,
"step": 350
},
{
"epoch": 2.8306451612903225,
"grad_norm": 51.96946334838867,
"learning_rate": 1.0759751043814926e-05,
"loss": 1.0843,
"step": 351
},
{
"epoch": 2.838709677419355,
"grad_norm": 112.01322937011719,
"learning_rate": 1.0719751969302602e-05,
"loss": 4.3431,
"step": 352
},
{
"epoch": 2.846774193548387,
"grad_norm": 30.747026443481445,
"learning_rate": 1.067975289479028e-05,
"loss": 0.6655,
"step": 353
},
{
"epoch": 2.8548387096774195,
"grad_norm": 152.95237731933594,
"learning_rate": 1.0639753820277956e-05,
"loss": 7.7018,
"step": 354
},
{
"epoch": 2.8629032258064515,
"grad_norm": 97.97068786621094,
"learning_rate": 1.0599754745765632e-05,
"loss": 6.0524,
"step": 355
},
{
"epoch": 2.870967741935484,
"grad_norm": 24.1806583404541,
"learning_rate": 1.055975567125331e-05,
"loss": 0.4385,
"step": 356
},
{
"epoch": 2.879032258064516,
"grad_norm": 125.32524871826172,
"learning_rate": 1.0519756596740986e-05,
"loss": 15.97,
"step": 357
},
{
"epoch": 2.8870967741935485,
"grad_norm": 13.418540000915527,
"learning_rate": 1.0479757522228663e-05,
"loss": 0.1098,
"step": 358
},
{
"epoch": 2.8951612903225805,
"grad_norm": 18.43660545349121,
"learning_rate": 1.043975844771634e-05,
"loss": 0.6929,
"step": 359
},
{
"epoch": 2.903225806451613,
"grad_norm": 14.304896354675293,
"learning_rate": 1.0399759373204017e-05,
"loss": 0.6683,
"step": 360
},
{
"epoch": 2.911290322580645,
"grad_norm": 124.25679016113281,
"learning_rate": 1.0359760298691694e-05,
"loss": 14.7953,
"step": 361
},
{
"epoch": 2.9193548387096775,
"grad_norm": 27.41398048400879,
"learning_rate": 1.0319761224179371e-05,
"loss": 0.4546,
"step": 362
},
{
"epoch": 2.9274193548387095,
"grad_norm": 17.463279724121094,
"learning_rate": 1.0279762149667048e-05,
"loss": 0.2985,
"step": 363
},
{
"epoch": 2.935483870967742,
"grad_norm": 36.6607666015625,
"learning_rate": 1.0239763075154725e-05,
"loss": 0.6629,
"step": 364
},
{
"epoch": 2.943548387096774,
"grad_norm": 36.471588134765625,
"learning_rate": 1.0199764000642402e-05,
"loss": 0.5639,
"step": 365
},
{
"epoch": 2.9516129032258065,
"grad_norm": 17.774343490600586,
"learning_rate": 1.0159764926130079e-05,
"loss": 0.3925,
"step": 366
},
{
"epoch": 2.959677419354839,
"grad_norm": 32.37789535522461,
"learning_rate": 1.0119765851617756e-05,
"loss": 0.5822,
"step": 367
},
{
"epoch": 2.967741935483871,
"grad_norm": 45.42538070678711,
"learning_rate": 1.0079766777105432e-05,
"loss": 0.7533,
"step": 368
},
{
"epoch": 2.975806451612903,
"grad_norm": 26.0896053314209,
"learning_rate": 1.003976770259311e-05,
"loss": 0.3197,
"step": 369
},
{
"epoch": 2.9838709677419355,
"grad_norm": 5.230876445770264,
"learning_rate": 9.999768628080785e-06,
"loss": 0.2765,
"step": 370
},
{
"epoch": 2.991935483870968,
"grad_norm": 50.81292724609375,
"learning_rate": 9.959769553568463e-06,
"loss": 2.345,
"step": 371
},
{
"epoch": 3.0,
"grad_norm": 310.2722473144531,
"learning_rate": 9.91977047905614e-06,
"loss": 24.6264,
"step": 372
},
{
"epoch": 3.0,
"eval_loss": 3.3316104412078857,
"eval_mae": 1.1439374685287476,
"eval_mse": 3.3316097259521484,
"eval_r2": 0.08786314725875854,
"eval_rmse": 1.8252697679938021,
"eval_runtime": 1.3357,
"eval_samples_per_second": 41.176,
"eval_smape": 51.89841985702515,
"eval_steps_per_second": 10.481,
"step": 372
},
{
"epoch": 3.0080645161290325,
"grad_norm": 41.96479415893555,
"learning_rate": 9.879771404543815e-06,
"loss": 1.2152,
"step": 373
},
{
"epoch": 3.0161290322580645,
"grad_norm": 136.83741760253906,
"learning_rate": 9.839772330031494e-06,
"loss": 3.896,
"step": 374
},
{
"epoch": 3.024193548387097,
"grad_norm": 22.60567283630371,
"learning_rate": 9.79977325551917e-06,
"loss": 0.3975,
"step": 375
},
{
"epoch": 3.032258064516129,
"grad_norm": 39.60881805419922,
"learning_rate": 9.759774181006848e-06,
"loss": 1.4481,
"step": 376
},
{
"epoch": 3.0403225806451615,
"grad_norm": 7.505491256713867,
"learning_rate": 9.719775106494523e-06,
"loss": 0.0605,
"step": 377
},
{
"epoch": 3.0483870967741935,
"grad_norm": 53.053001403808594,
"learning_rate": 9.6797760319822e-06,
"loss": 1.7315,
"step": 378
},
{
"epoch": 3.056451612903226,
"grad_norm": 33.12321853637695,
"learning_rate": 9.639776957469879e-06,
"loss": 0.5669,
"step": 379
},
{
"epoch": 3.064516129032258,
"grad_norm": 13.300590515136719,
"learning_rate": 9.599777882957554e-06,
"loss": 0.4431,
"step": 380
},
{
"epoch": 3.0725806451612905,
"grad_norm": 104.96222686767578,
"learning_rate": 9.55977880844523e-06,
"loss": 5.6698,
"step": 381
},
{
"epoch": 3.0806451612903225,
"grad_norm": 49.31301498413086,
"learning_rate": 9.519779733932908e-06,
"loss": 1.3213,
"step": 382
},
{
"epoch": 3.088709677419355,
"grad_norm": 16.676259994506836,
"learning_rate": 9.479780659420585e-06,
"loss": 0.266,
"step": 383
},
{
"epoch": 3.096774193548387,
"grad_norm": 34.20507049560547,
"learning_rate": 9.439781584908261e-06,
"loss": 1.5853,
"step": 384
},
{
"epoch": 3.1048387096774195,
"grad_norm": 39.18606948852539,
"learning_rate": 9.399782510395938e-06,
"loss": 1.0645,
"step": 385
},
{
"epoch": 3.1129032258064515,
"grad_norm": 5.354783535003662,
"learning_rate": 9.359783435883615e-06,
"loss": 0.1131,
"step": 386
},
{
"epoch": 3.120967741935484,
"grad_norm": 41.82234573364258,
"learning_rate": 9.319784361371292e-06,
"loss": 0.5984,
"step": 387
},
{
"epoch": 3.129032258064516,
"grad_norm": 16.759111404418945,
"learning_rate": 9.27978528685897e-06,
"loss": 0.4225,
"step": 388
},
{
"epoch": 3.1370967741935485,
"grad_norm": 24.134700775146484,
"learning_rate": 9.239786212346646e-06,
"loss": 0.434,
"step": 389
},
{
"epoch": 3.1451612903225805,
"grad_norm": 13.111348152160645,
"learning_rate": 9.199787137834323e-06,
"loss": 0.2413,
"step": 390
},
{
"epoch": 3.153225806451613,
"grad_norm": 115.44222259521484,
"learning_rate": 9.159788063322e-06,
"loss": 8.9723,
"step": 391
},
{
"epoch": 3.161290322580645,
"grad_norm": 13.346231460571289,
"learning_rate": 9.119788988809677e-06,
"loss": 0.8709,
"step": 392
},
{
"epoch": 3.1693548387096775,
"grad_norm": 1.1584579944610596,
"learning_rate": 9.079789914297354e-06,
"loss": 0.0055,
"step": 393
},
{
"epoch": 3.1774193548387095,
"grad_norm": 70.0774154663086,
"learning_rate": 9.03979083978503e-06,
"loss": 0.9703,
"step": 394
},
{
"epoch": 3.185483870967742,
"grad_norm": 14.28967571258545,
"learning_rate": 8.999791765272708e-06,
"loss": 0.2817,
"step": 395
},
{
"epoch": 3.193548387096774,
"grad_norm": 4.5178303718566895,
"learning_rate": 8.959792690760383e-06,
"loss": 0.0217,
"step": 396
},
{
"epoch": 3.2016129032258065,
"grad_norm": 9.761589050292969,
"learning_rate": 8.919793616248061e-06,
"loss": 0.4557,
"step": 397
},
{
"epoch": 3.2096774193548385,
"grad_norm": 53.35697555541992,
"learning_rate": 8.879794541735738e-06,
"loss": 0.8752,
"step": 398
},
{
"epoch": 3.217741935483871,
"grad_norm": 165.54457092285156,
"learning_rate": 8.839795467223414e-06,
"loss": 14.6604,
"step": 399
},
{
"epoch": 3.225806451612903,
"grad_norm": 247.7133331298828,
"learning_rate": 8.799796392711092e-06,
"loss": 9.448,
"step": 400
},
{
"epoch": 3.2338709677419355,
"grad_norm": 53.525306701660156,
"learning_rate": 8.759797318198767e-06,
"loss": 0.724,
"step": 401
},
{
"epoch": 3.241935483870968,
"grad_norm": 172.49952697753906,
"learning_rate": 8.719798243686446e-06,
"loss": 6.2532,
"step": 402
},
{
"epoch": 3.25,
"grad_norm": 99.93900299072266,
"learning_rate": 8.679799169174121e-06,
"loss": 2.2522,
"step": 403
},
{
"epoch": 3.258064516129032,
"grad_norm": 15.439353942871094,
"learning_rate": 8.639800094661798e-06,
"loss": 0.1913,
"step": 404
},
{
"epoch": 3.2661290322580645,
"grad_norm": 49.3162956237793,
"learning_rate": 8.599801020149477e-06,
"loss": 0.4811,
"step": 405
},
{
"epoch": 3.274193548387097,
"grad_norm": 184.40086364746094,
"learning_rate": 8.559801945637152e-06,
"loss": 7.606,
"step": 406
},
{
"epoch": 3.282258064516129,
"grad_norm": 132.2401580810547,
"learning_rate": 8.519802871124829e-06,
"loss": 3.4354,
"step": 407
},
{
"epoch": 3.2903225806451615,
"grad_norm": 64.83243560791016,
"learning_rate": 8.479803796612506e-06,
"loss": 2.5171,
"step": 408
},
{
"epoch": 3.2983870967741935,
"grad_norm": 158.75596618652344,
"learning_rate": 8.439804722100183e-06,
"loss": 2.1743,
"step": 409
},
{
"epoch": 3.306451612903226,
"grad_norm": 103.98823547363281,
"learning_rate": 8.39980564758786e-06,
"loss": 1.4439,
"step": 410
},
{
"epoch": 3.314516129032258,
"grad_norm": 43.638450622558594,
"learning_rate": 8.359806573075537e-06,
"loss": 0.573,
"step": 411
},
{
"epoch": 3.3225806451612905,
"grad_norm": 41.889686584472656,
"learning_rate": 8.319807498563214e-06,
"loss": 1.4203,
"step": 412
},
{
"epoch": 3.3306451612903225,
"grad_norm": 6.998268127441406,
"learning_rate": 8.27980842405089e-06,
"loss": 0.0949,
"step": 413
},
{
"epoch": 3.338709677419355,
"grad_norm": 167.7267303466797,
"learning_rate": 8.239809349538567e-06,
"loss": 3.6027,
"step": 414
},
{
"epoch": 3.346774193548387,
"grad_norm": 58.6676025390625,
"learning_rate": 8.199810275026244e-06,
"loss": 1.0112,
"step": 415
},
{
"epoch": 3.3548387096774195,
"grad_norm": 19.077991485595703,
"learning_rate": 8.159811200513921e-06,
"loss": 0.1047,
"step": 416
},
{
"epoch": 3.3629032258064515,
"grad_norm": 61.30634307861328,
"learning_rate": 8.119812126001598e-06,
"loss": 0.3403,
"step": 417
},
{
"epoch": 3.370967741935484,
"grad_norm": 40.68547058105469,
"learning_rate": 8.079813051489275e-06,
"loss": 0.3131,
"step": 418
},
{
"epoch": 3.379032258064516,
"grad_norm": 39.6519660949707,
"learning_rate": 8.039813976976952e-06,
"loss": 0.0895,
"step": 419
},
{
"epoch": 3.3870967741935485,
"grad_norm": 54.81489181518555,
"learning_rate": 7.999814902464629e-06,
"loss": 0.9378,
"step": 420
},
{
"epoch": 3.3951612903225805,
"grad_norm": 43.20634460449219,
"learning_rate": 7.959815827952306e-06,
"loss": 0.5461,
"step": 421
},
{
"epoch": 3.403225806451613,
"grad_norm": 35.164852142333984,
"learning_rate": 7.919816753439981e-06,
"loss": 1.0319,
"step": 422
},
{
"epoch": 3.411290322580645,
"grad_norm": 283.366455078125,
"learning_rate": 7.87981767892766e-06,
"loss": 8.4101,
"step": 423
},
{
"epoch": 3.4193548387096775,
"grad_norm": 507.85150146484375,
"learning_rate": 7.839818604415335e-06,
"loss": 17.8365,
"step": 424
},
{
"epoch": 3.4274193548387095,
"grad_norm": 8.4439697265625,
"learning_rate": 7.799819529903014e-06,
"loss": 0.2679,
"step": 425
},
{
"epoch": 3.435483870967742,
"grad_norm": 40.22681427001953,
"learning_rate": 7.75982045539069e-06,
"loss": 0.3714,
"step": 426
},
{
"epoch": 3.443548387096774,
"grad_norm": 32.92212677001953,
"learning_rate": 7.719821380878366e-06,
"loss": 0.4915,
"step": 427
},
{
"epoch": 3.4516129032258065,
"grad_norm": 26.323545455932617,
"learning_rate": 7.679822306366044e-06,
"loss": 0.4087,
"step": 428
},
{
"epoch": 3.4596774193548385,
"grad_norm": 146.9882049560547,
"learning_rate": 7.63982323185372e-06,
"loss": 0.2726,
"step": 429
},
{
"epoch": 3.467741935483871,
"grad_norm": 4.548903465270996,
"learning_rate": 7.599824157341397e-06,
"loss": 0.1744,
"step": 430
},
{
"epoch": 3.475806451612903,
"grad_norm": 228.15928649902344,
"learning_rate": 7.5598250828290735e-06,
"loss": 10.8116,
"step": 431
},
{
"epoch": 3.4838709677419355,
"grad_norm": 60.10261154174805,
"learning_rate": 7.51982600831675e-06,
"loss": 0.6974,
"step": 432
},
{
"epoch": 3.491935483870968,
"grad_norm": 13.38789176940918,
"learning_rate": 7.479826933804428e-06,
"loss": 0.4782,
"step": 433
},
{
"epoch": 3.5,
"grad_norm": 68.60205841064453,
"learning_rate": 7.439827859292104e-06,
"loss": 1.7533,
"step": 434
},
{
"epoch": 3.508064516129032,
"grad_norm": 6.820003986358643,
"learning_rate": 7.399828784779782e-06,
"loss": 0.0396,
"step": 435
},
{
"epoch": 3.5161290322580645,
"grad_norm": 6.984181880950928,
"learning_rate": 7.359829710267458e-06,
"loss": 0.0715,
"step": 436
},
{
"epoch": 3.524193548387097,
"grad_norm": 26.827526092529297,
"learning_rate": 7.319830635755135e-06,
"loss": 0.1687,
"step": 437
},
{
"epoch": 3.532258064516129,
"grad_norm": 8.610593795776367,
"learning_rate": 7.279831561242811e-06,
"loss": 0.134,
"step": 438
},
{
"epoch": 3.540322580645161,
"grad_norm": 39.13728332519531,
"learning_rate": 7.239832486730489e-06,
"loss": 0.44,
"step": 439
},
{
"epoch": 3.5483870967741935,
"grad_norm": 56.36937713623047,
"learning_rate": 7.199833412218167e-06,
"loss": 3.6674,
"step": 440
},
{
"epoch": 3.556451612903226,
"grad_norm": 38.50959396362305,
"learning_rate": 7.159834337705843e-06,
"loss": 0.7563,
"step": 441
},
{
"epoch": 3.564516129032258,
"grad_norm": 44.96533203125,
"learning_rate": 7.11983526319352e-06,
"loss": 0.7596,
"step": 442
},
{
"epoch": 3.5725806451612905,
"grad_norm": 59.28168487548828,
"learning_rate": 7.079836188681196e-06,
"loss": 0.5658,
"step": 443
},
{
"epoch": 3.5806451612903225,
"grad_norm": 24.149091720581055,
"learning_rate": 7.0398371141688735e-06,
"loss": 0.3909,
"step": 444
},
{
"epoch": 3.588709677419355,
"grad_norm": 44.97443389892578,
"learning_rate": 6.99983803965655e-06,
"loss": 0.8401,
"step": 445
},
{
"epoch": 3.596774193548387,
"grad_norm": 210.90919494628906,
"learning_rate": 6.9598389651442265e-06,
"loss": 8.9542,
"step": 446
},
{
"epoch": 3.6048387096774195,
"grad_norm": 78.13578796386719,
"learning_rate": 6.919839890631904e-06,
"loss": 7.4033,
"step": 447
},
{
"epoch": 3.6129032258064515,
"grad_norm": 54.309566497802734,
"learning_rate": 6.87984081611958e-06,
"loss": 0.8781,
"step": 448
},
{
"epoch": 3.620967741935484,
"grad_norm": 8.429338455200195,
"learning_rate": 6.839841741607258e-06,
"loss": 0.2823,
"step": 449
},
{
"epoch": 3.629032258064516,
"grad_norm": 32.88882827758789,
"learning_rate": 6.799842667094934e-06,
"loss": 1.153,
"step": 450
},
{
"epoch": 3.6370967741935485,
"grad_norm": 3.0679757595062256,
"learning_rate": 6.759843592582611e-06,
"loss": 0.0494,
"step": 451
},
{
"epoch": 3.6451612903225805,
"grad_norm": 17.686809539794922,
"learning_rate": 6.719844518070289e-06,
"loss": 0.1084,
"step": 452
},
{
"epoch": 3.653225806451613,
"grad_norm": 48.20186996459961,
"learning_rate": 6.679845443557965e-06,
"loss": 0.7689,
"step": 453
},
{
"epoch": 3.661290322580645,
"grad_norm": 134.13829040527344,
"learning_rate": 6.639846369045642e-06,
"loss": 12.3841,
"step": 454
},
{
"epoch": 3.6693548387096775,
"grad_norm": 244.57977294921875,
"learning_rate": 6.599847294533318e-06,
"loss": 8.8699,
"step": 455
},
{
"epoch": 3.6774193548387095,
"grad_norm": 218.47779846191406,
"learning_rate": 6.559848220020996e-06,
"loss": 4.3449,
"step": 456
},
{
"epoch": 3.685483870967742,
"grad_norm": 4.526096820831299,
"learning_rate": 6.519849145508672e-06,
"loss": 0.0972,
"step": 457
},
{
"epoch": 3.693548387096774,
"grad_norm": 134.6807861328125,
"learning_rate": 6.4798500709963495e-06,
"loss": 5.5296,
"step": 458
},
{
"epoch": 3.7016129032258065,
"grad_norm": 16.304237365722656,
"learning_rate": 6.4398509964840265e-06,
"loss": 0.2073,
"step": 459
},
{
"epoch": 3.709677419354839,
"grad_norm": 98.8707275390625,
"learning_rate": 6.3998519219717025e-06,
"loss": 0.9317,
"step": 460
},
{
"epoch": 3.717741935483871,
"grad_norm": 38.19011688232422,
"learning_rate": 6.35985284745938e-06,
"loss": 0.5302,
"step": 461
},
{
"epoch": 3.725806451612903,
"grad_norm": 56.57841110229492,
"learning_rate": 6.319853772947056e-06,
"loss": 1.2759,
"step": 462
},
{
"epoch": 3.7338709677419355,
"grad_norm": 24.514759063720703,
"learning_rate": 6.279854698434733e-06,
"loss": 0.2036,
"step": 463
},
{
"epoch": 3.741935483870968,
"grad_norm": 191.78668212890625,
"learning_rate": 6.239855623922409e-06,
"loss": 8.38,
"step": 464
},
{
"epoch": 3.75,
"grad_norm": 7.4319376945495605,
"learning_rate": 6.199856549410087e-06,
"loss": 0.3825,
"step": 465
},
{
"epoch": 3.758064516129032,
"grad_norm": 520.401123046875,
"learning_rate": 6.159857474897764e-06,
"loss": 115.6645,
"step": 466
},
{
"epoch": 3.7661290322580645,
"grad_norm": 46.42934036254883,
"learning_rate": 6.119858400385441e-06,
"loss": 0.5399,
"step": 467
},
{
"epoch": 3.774193548387097,
"grad_norm": 190.69224548339844,
"learning_rate": 6.079859325873117e-06,
"loss": 4.1707,
"step": 468
},
{
"epoch": 3.782258064516129,
"grad_norm": 23.724159240722656,
"learning_rate": 6.039860251360795e-06,
"loss": 0.4132,
"step": 469
},
{
"epoch": 3.790322580645161,
"grad_norm": 13.517512321472168,
"learning_rate": 5.999861176848472e-06,
"loss": 0.0815,
"step": 470
},
{
"epoch": 3.7983870967741935,
"grad_norm": 70.30924987792969,
"learning_rate": 5.959862102336149e-06,
"loss": 2.7397,
"step": 471
},
{
"epoch": 3.806451612903226,
"grad_norm": 16.972890853881836,
"learning_rate": 5.919863027823825e-06,
"loss": 0.0473,
"step": 472
},
{
"epoch": 3.814516129032258,
"grad_norm": 56.665103912353516,
"learning_rate": 5.879863953311502e-06,
"loss": 0.5666,
"step": 473
},
{
"epoch": 3.8225806451612905,
"grad_norm": 38.07454299926758,
"learning_rate": 5.839864878799179e-06,
"loss": 0.3797,
"step": 474
},
{
"epoch": 3.8306451612903225,
"grad_norm": 63.56098937988281,
"learning_rate": 5.7998658042868555e-06,
"loss": 1.4492,
"step": 475
},
{
"epoch": 3.838709677419355,
"grad_norm": 841.0067138671875,
"learning_rate": 5.7598667297745325e-06,
"loss": 8.2394,
"step": 476
},
{
"epoch": 3.846774193548387,
"grad_norm": 77.66548919677734,
"learning_rate": 5.719867655262209e-06,
"loss": 1.4433,
"step": 477
},
{
"epoch": 3.8548387096774195,
"grad_norm": 130.56881713867188,
"learning_rate": 5.679868580749886e-06,
"loss": 2.8763,
"step": 478
},
{
"epoch": 3.8629032258064515,
"grad_norm": 5.675972938537598,
"learning_rate": 5.639869506237563e-06,
"loss": 0.0891,
"step": 479
},
{
"epoch": 3.870967741935484,
"grad_norm": 69.9577407836914,
"learning_rate": 5.59987043172524e-06,
"loss": 2.6558,
"step": 480
},
{
"epoch": 3.879032258064516,
"grad_norm": 14.687697410583496,
"learning_rate": 5.559871357212916e-06,
"loss": 0.1182,
"step": 481
},
{
"epoch": 3.8870967741935485,
"grad_norm": 155.2462921142578,
"learning_rate": 5.519872282700594e-06,
"loss": 2.598,
"step": 482
},
{
"epoch": 3.8951612903225805,
"grad_norm": 31.2712345123291,
"learning_rate": 5.479873208188271e-06,
"loss": 0.7228,
"step": 483
},
{
"epoch": 3.903225806451613,
"grad_norm": 201.28076171875,
"learning_rate": 5.439874133675948e-06,
"loss": 12.3541,
"step": 484
},
{
"epoch": 3.911290322580645,
"grad_norm": 70.37089538574219,
"learning_rate": 5.399875059163624e-06,
"loss": 0.7182,
"step": 485
},
{
"epoch": 3.9193548387096775,
"grad_norm": 29.314970016479492,
"learning_rate": 5.359875984651301e-06,
"loss": 0.1898,
"step": 486
},
{
"epoch": 3.9274193548387095,
"grad_norm": 25.780534744262695,
"learning_rate": 5.319876910138978e-06,
"loss": 0.3334,
"step": 487
},
{
"epoch": 3.935483870967742,
"grad_norm": 8.816149711608887,
"learning_rate": 5.279877835626655e-06,
"loss": 0.3082,
"step": 488
},
{
"epoch": 3.943548387096774,
"grad_norm": 348.0280456542969,
"learning_rate": 5.239878761114332e-06,
"loss": 4.8654,
"step": 489
},
{
"epoch": 3.9516129032258065,
"grad_norm": 42.4134407043457,
"learning_rate": 5.1998796866020085e-06,
"loss": 1.2419,
"step": 490
},
{
"epoch": 3.959677419354839,
"grad_norm": 31.053295135498047,
"learning_rate": 5.1598806120896854e-06,
"loss": 0.6271,
"step": 491
},
{
"epoch": 3.967741935483871,
"grad_norm": 70.98059844970703,
"learning_rate": 5.119881537577362e-06,
"loss": 0.5582,
"step": 492
},
{
"epoch": 3.975806451612903,
"grad_norm": 118.1554946899414,
"learning_rate": 5.079882463065039e-06,
"loss": 1.5444,
"step": 493
},
{
"epoch": 3.9838709677419355,
"grad_norm": 288.1326599121094,
"learning_rate": 5.039883388552716e-06,
"loss": 7.396,
"step": 494
},
{
"epoch": 3.991935483870968,
"grad_norm": 19.853105545043945,
"learning_rate": 4.999884314040392e-06,
"loss": 0.9112,
"step": 495
},
{
"epoch": 4.0,
"grad_norm": 32.062965393066406,
"learning_rate": 4.95988523952807e-06,
"loss": 0.2896,
"step": 496
},
{
"epoch": 4.0,
"eval_loss": 2.353773355484009,
"eval_mae": 1.0791250467300415,
"eval_mse": 2.353773355484009,
"eval_r2": 0.3555777668952942,
"eval_rmse": 1.5342012108859804,
"eval_runtime": 1.3428,
"eval_samples_per_second": 40.961,
"eval_smape": 55.77985644340515,
"eval_steps_per_second": 10.426,
"step": 496
}
],
"logging_steps": 1,
"max_steps": 620,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2687879255015424.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": {
"learning_rate": 2.479942619764035e-05
}
}