|
{ |
|
"best_metric": 1.5342012108859804, |
|
"best_model_checkpoint": "./modernBERT-content-regression/run-2/checkpoint-496", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 496, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008064516129032258, |
|
"grad_norm": 312.814453125, |
|
"learning_rate": 2.4759427123128026e-05, |
|
"loss": 21.3087, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.016129032258064516, |
|
"grad_norm": 73.38264465332031, |
|
"learning_rate": 2.4719428048615702e-05, |
|
"loss": 0.7206, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.024193548387096774, |
|
"grad_norm": 363.2410583496094, |
|
"learning_rate": 2.467942897410338e-05, |
|
"loss": 135.449, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03225806451612903, |
|
"grad_norm": 64.86761474609375, |
|
"learning_rate": 2.4639429899591056e-05, |
|
"loss": 12.5636, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04032258064516129, |
|
"grad_norm": 86.92879486083984, |
|
"learning_rate": 2.4599430825078733e-05, |
|
"loss": 2.4774, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04838709677419355, |
|
"grad_norm": 271.8641662597656, |
|
"learning_rate": 2.455943175056641e-05, |
|
"loss": 3.729, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.056451612903225805, |
|
"grad_norm": 41.483768463134766, |
|
"learning_rate": 2.4519432676054087e-05, |
|
"loss": 5.4716, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.06451612903225806, |
|
"grad_norm": 163.63929748535156, |
|
"learning_rate": 2.4479433601541764e-05, |
|
"loss": 11.4921, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.07258064516129033, |
|
"grad_norm": 67.34258270263672, |
|
"learning_rate": 2.443943452702944e-05, |
|
"loss": 0.827, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.08064516129032258, |
|
"grad_norm": 45.18867874145508, |
|
"learning_rate": 2.4399435452517118e-05, |
|
"loss": 2.3716, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08870967741935484, |
|
"grad_norm": 130.92532348632812, |
|
"learning_rate": 2.4359436378004795e-05, |
|
"loss": 10.3088, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0967741935483871, |
|
"grad_norm": 34.51155471801758, |
|
"learning_rate": 2.4319437303492468e-05, |
|
"loss": 0.7379, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.10483870967741936, |
|
"grad_norm": 109.77779388427734, |
|
"learning_rate": 2.427943822898015e-05, |
|
"loss": 1.3991, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.11290322580645161, |
|
"grad_norm": 33.590213775634766, |
|
"learning_rate": 2.4239439154467826e-05, |
|
"loss": 1.3007, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.12096774193548387, |
|
"grad_norm": 34.60383605957031, |
|
"learning_rate": 2.4199440079955502e-05, |
|
"loss": 0.1627, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.12903225806451613, |
|
"grad_norm": 183.3640899658203, |
|
"learning_rate": 2.415944100544318e-05, |
|
"loss": 9.0763, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.13709677419354838, |
|
"grad_norm": 27.07301139831543, |
|
"learning_rate": 2.4119441930930853e-05, |
|
"loss": 1.9223, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.14516129032258066, |
|
"grad_norm": 106.09356689453125, |
|
"learning_rate": 2.4079442856418533e-05, |
|
"loss": 2.1224, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.1532258064516129, |
|
"grad_norm": 32.14213562011719, |
|
"learning_rate": 2.403944378190621e-05, |
|
"loss": 0.84, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.16129032258064516, |
|
"grad_norm": 186.9098358154297, |
|
"learning_rate": 2.3999444707393887e-05, |
|
"loss": 6.1026, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1693548387096774, |
|
"grad_norm": 252.75877380371094, |
|
"learning_rate": 2.3959445632881564e-05, |
|
"loss": 4.5225, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.1774193548387097, |
|
"grad_norm": 577.6574096679688, |
|
"learning_rate": 2.3919446558369238e-05, |
|
"loss": 13.0971, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.18548387096774194, |
|
"grad_norm": 236.11961364746094, |
|
"learning_rate": 2.3879447483856914e-05, |
|
"loss": 12.425, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.1935483870967742, |
|
"grad_norm": 329.27984619140625, |
|
"learning_rate": 2.3839448409344595e-05, |
|
"loss": 7.6775, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.20161290322580644, |
|
"grad_norm": 100.30821990966797, |
|
"learning_rate": 2.379944933483227e-05, |
|
"loss": 1.4652, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.20967741935483872, |
|
"grad_norm": 96.21698760986328, |
|
"learning_rate": 2.3759450260319945e-05, |
|
"loss": 1.4142, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.21774193548387097, |
|
"grad_norm": 31.13923454284668, |
|
"learning_rate": 2.3719451185807622e-05, |
|
"loss": 1.5587, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.22580645161290322, |
|
"grad_norm": 37.34580612182617, |
|
"learning_rate": 2.36794521112953e-05, |
|
"loss": 0.6072, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.23387096774193547, |
|
"grad_norm": 50.031856536865234, |
|
"learning_rate": 2.363945303678298e-05, |
|
"loss": 1.2987, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.24193548387096775, |
|
"grad_norm": 65.12112426757812, |
|
"learning_rate": 2.3599453962270656e-05, |
|
"loss": 1.5842, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 64.7094497680664, |
|
"learning_rate": 2.355945488775833e-05, |
|
"loss": 2.2444, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.25806451612903225, |
|
"grad_norm": 85.2149429321289, |
|
"learning_rate": 2.3519455813246007e-05, |
|
"loss": 2.9294, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2661290322580645, |
|
"grad_norm": 20.915496826171875, |
|
"learning_rate": 2.3479456738733684e-05, |
|
"loss": 0.6207, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.27419354838709675, |
|
"grad_norm": 54.965335845947266, |
|
"learning_rate": 2.3439457664221364e-05, |
|
"loss": 1.9851, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.28225806451612906, |
|
"grad_norm": 187.736328125, |
|
"learning_rate": 2.339945858970904e-05, |
|
"loss": 21.4765, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.2903225806451613, |
|
"grad_norm": 65.61473846435547, |
|
"learning_rate": 2.3359459515196714e-05, |
|
"loss": 1.4577, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.29838709677419356, |
|
"grad_norm": 32.118228912353516, |
|
"learning_rate": 2.331946044068439e-05, |
|
"loss": 0.4661, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.3064516129032258, |
|
"grad_norm": 78.08635711669922, |
|
"learning_rate": 2.3279461366172068e-05, |
|
"loss": 19.0058, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.31451612903225806, |
|
"grad_norm": 153.27804565429688, |
|
"learning_rate": 2.3239462291659745e-05, |
|
"loss": 5.0193, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.3225806451612903, |
|
"grad_norm": 205.57205200195312, |
|
"learning_rate": 2.3199463217147422e-05, |
|
"loss": 17.3633, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33064516129032256, |
|
"grad_norm": 207.53005981445312, |
|
"learning_rate": 2.31594641426351e-05, |
|
"loss": 7.6631, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.3387096774193548, |
|
"grad_norm": 85.1321029663086, |
|
"learning_rate": 2.3119465068122776e-05, |
|
"loss": 19.0139, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3467741935483871, |
|
"grad_norm": 133.19154357910156, |
|
"learning_rate": 2.3079465993610453e-05, |
|
"loss": 21.4597, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.3548387096774194, |
|
"grad_norm": 116.96082305908203, |
|
"learning_rate": 2.303946691909813e-05, |
|
"loss": 3.4332, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.3629032258064516, |
|
"grad_norm": 99.06409454345703, |
|
"learning_rate": 2.2999467844585807e-05, |
|
"loss": 3.114, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.3709677419354839, |
|
"grad_norm": 58.03582763671875, |
|
"learning_rate": 2.2959468770073484e-05, |
|
"loss": 2.1934, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3790322580645161, |
|
"grad_norm": 66.2879409790039, |
|
"learning_rate": 2.291946969556116e-05, |
|
"loss": 3.4978, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.3870967741935484, |
|
"grad_norm": 84.04444122314453, |
|
"learning_rate": 2.2879470621048837e-05, |
|
"loss": 2.5483, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3951612903225806, |
|
"grad_norm": 40.6508903503418, |
|
"learning_rate": 2.2839471546536514e-05, |
|
"loss": 0.8297, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.4032258064516129, |
|
"grad_norm": 75.82645416259766, |
|
"learning_rate": 2.279947247202419e-05, |
|
"loss": 8.1934, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4112903225806452, |
|
"grad_norm": 24.05048179626465, |
|
"learning_rate": 2.2759473397511868e-05, |
|
"loss": 0.3819, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.41935483870967744, |
|
"grad_norm": 39.44225311279297, |
|
"learning_rate": 2.2719474322999545e-05, |
|
"loss": 1.1629, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.4274193548387097, |
|
"grad_norm": 195.91845703125, |
|
"learning_rate": 2.2679475248487222e-05, |
|
"loss": 19.0751, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.43548387096774194, |
|
"grad_norm": 10.670635223388672, |
|
"learning_rate": 2.26394761739749e-05, |
|
"loss": 0.5451, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.4435483870967742, |
|
"grad_norm": 45.76976013183594, |
|
"learning_rate": 2.2599477099462576e-05, |
|
"loss": 5.5405, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.45161290322580644, |
|
"grad_norm": 31.259849548339844, |
|
"learning_rate": 2.2559478024950253e-05, |
|
"loss": 1.616, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.4596774193548387, |
|
"grad_norm": 16.430635452270508, |
|
"learning_rate": 2.251947895043793e-05, |
|
"loss": 0.2542, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.46774193548387094, |
|
"grad_norm": 83.4207992553711, |
|
"learning_rate": 2.2479479875925607e-05, |
|
"loss": 9.8378, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.47580645161290325, |
|
"grad_norm": 18.212440490722656, |
|
"learning_rate": 2.2439480801413284e-05, |
|
"loss": 0.2447, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.4838709677419355, |
|
"grad_norm": 107.16078186035156, |
|
"learning_rate": 2.239948172690096e-05, |
|
"loss": 14.317, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.49193548387096775, |
|
"grad_norm": 32.10540771484375, |
|
"learning_rate": 2.2359482652388637e-05, |
|
"loss": 0.6299, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 73.37329864501953, |
|
"learning_rate": 2.2319483577876314e-05, |
|
"loss": 21.8458, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.5080645161290323, |
|
"grad_norm": 46.12656784057617, |
|
"learning_rate": 2.227948450336399e-05, |
|
"loss": 1.2991, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.5161290322580645, |
|
"grad_norm": 18.842960357666016, |
|
"learning_rate": 2.2239485428851665e-05, |
|
"loss": 2.414, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.5241935483870968, |
|
"grad_norm": 77.6194839477539, |
|
"learning_rate": 2.2199486354339345e-05, |
|
"loss": 13.3138, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.532258064516129, |
|
"grad_norm": 66.23066711425781, |
|
"learning_rate": 2.2159487279827022e-05, |
|
"loss": 1.6535, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.5403225806451613, |
|
"grad_norm": 37.77677917480469, |
|
"learning_rate": 2.21194882053147e-05, |
|
"loss": 1.8361, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.5483870967741935, |
|
"grad_norm": 46.24187088012695, |
|
"learning_rate": 2.2079489130802376e-05, |
|
"loss": 1.4393, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.5564516129032258, |
|
"grad_norm": 80.01097106933594, |
|
"learning_rate": 2.203949005629005e-05, |
|
"loss": 3.5612, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.5645161290322581, |
|
"grad_norm": 7.726450443267822, |
|
"learning_rate": 2.199949098177773e-05, |
|
"loss": 0.3842, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5725806451612904, |
|
"grad_norm": 13.420072555541992, |
|
"learning_rate": 2.1959491907265407e-05, |
|
"loss": 2.0311, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.5806451612903226, |
|
"grad_norm": 81.66263580322266, |
|
"learning_rate": 2.1919492832753084e-05, |
|
"loss": 15.1423, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.5887096774193549, |
|
"grad_norm": 30.341995239257812, |
|
"learning_rate": 2.1879493758240757e-05, |
|
"loss": 0.4867, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.5967741935483871, |
|
"grad_norm": 83.49819946289062, |
|
"learning_rate": 2.1839494683728434e-05, |
|
"loss": 3.1285, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.6048387096774194, |
|
"grad_norm": 62.19274139404297, |
|
"learning_rate": 2.1799495609216114e-05, |
|
"loss": 6.7166, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6129032258064516, |
|
"grad_norm": 25.30666732788086, |
|
"learning_rate": 2.175949653470379e-05, |
|
"loss": 0.4676, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.6209677419354839, |
|
"grad_norm": 33.63331604003906, |
|
"learning_rate": 2.1719497460191468e-05, |
|
"loss": 0.7665, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.6290322580645161, |
|
"grad_norm": 42.89381408691406, |
|
"learning_rate": 2.1679498385679142e-05, |
|
"loss": 12.7414, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.6370967741935484, |
|
"grad_norm": 87.96309661865234, |
|
"learning_rate": 2.163949931116682e-05, |
|
"loss": 23.3716, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"grad_norm": 11.2321195602417, |
|
"learning_rate": 2.1599500236654496e-05, |
|
"loss": 1.0413, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6532258064516129, |
|
"grad_norm": 21.426607131958008, |
|
"learning_rate": 2.1559501162142176e-05, |
|
"loss": 0.4798, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.6612903225806451, |
|
"grad_norm": 26.169578552246094, |
|
"learning_rate": 2.1519502087629853e-05, |
|
"loss": 1.9313, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.6693548387096774, |
|
"grad_norm": 18.651702880859375, |
|
"learning_rate": 2.1479503013117526e-05, |
|
"loss": 0.3472, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.6774193548387096, |
|
"grad_norm": 7.624361038208008, |
|
"learning_rate": 2.1439503938605203e-05, |
|
"loss": 0.6932, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.6854838709677419, |
|
"grad_norm": 123.84193420410156, |
|
"learning_rate": 2.139950486409288e-05, |
|
"loss": 17.2375, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.6935483870967742, |
|
"grad_norm": 39.71210861206055, |
|
"learning_rate": 2.135950578958056e-05, |
|
"loss": 14.5549, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.7016129032258065, |
|
"grad_norm": 23.699077606201172, |
|
"learning_rate": 2.1319506715068234e-05, |
|
"loss": 1.417, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.7096774193548387, |
|
"grad_norm": 49.412071228027344, |
|
"learning_rate": 2.127950764055591e-05, |
|
"loss": 1.2961, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.717741935483871, |
|
"grad_norm": 60.343544006347656, |
|
"learning_rate": 2.1239508566043588e-05, |
|
"loss": 2.5778, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.7258064516129032, |
|
"grad_norm": 62.71953582763672, |
|
"learning_rate": 2.1199509491531265e-05, |
|
"loss": 2.0629, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.7338709677419355, |
|
"grad_norm": 120.01128387451172, |
|
"learning_rate": 2.1159510417018945e-05, |
|
"loss": 19.2536, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.7419354838709677, |
|
"grad_norm": 59.287296295166016, |
|
"learning_rate": 2.111951134250662e-05, |
|
"loss": 5.4721, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 11.04751968383789, |
|
"learning_rate": 2.1079512267994296e-05, |
|
"loss": 0.1843, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.7580645161290323, |
|
"grad_norm": 101.02423858642578, |
|
"learning_rate": 2.1039513193481973e-05, |
|
"loss": 5.4431, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.7661290322580645, |
|
"grad_norm": 72.17565155029297, |
|
"learning_rate": 2.099951411896965e-05, |
|
"loss": 9.7386, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.7741935483870968, |
|
"grad_norm": 83.0063705444336, |
|
"learning_rate": 2.0959515044457326e-05, |
|
"loss": 3.1414, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.782258064516129, |
|
"grad_norm": 47.760589599609375, |
|
"learning_rate": 2.0919515969945003e-05, |
|
"loss": 1.2319, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.7903225806451613, |
|
"grad_norm": 43.6202507019043, |
|
"learning_rate": 2.087951689543268e-05, |
|
"loss": 1.2002, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.7983870967741935, |
|
"grad_norm": 42.899200439453125, |
|
"learning_rate": 2.0839517820920357e-05, |
|
"loss": 1.1685, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.8064516129032258, |
|
"grad_norm": 116.83399963378906, |
|
"learning_rate": 2.0799518746408034e-05, |
|
"loss": 19.3838, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8145161290322581, |
|
"grad_norm": 16.667503356933594, |
|
"learning_rate": 2.075951967189571e-05, |
|
"loss": 0.8709, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.8225806451612904, |
|
"grad_norm": 13.620025634765625, |
|
"learning_rate": 2.0719520597383388e-05, |
|
"loss": 0.3991, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.8306451612903226, |
|
"grad_norm": 71.85466766357422, |
|
"learning_rate": 2.0679521522871065e-05, |
|
"loss": 11.6404, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.8387096774193549, |
|
"grad_norm": 11.629712104797363, |
|
"learning_rate": 2.0639522448358742e-05, |
|
"loss": 0.9058, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.8467741935483871, |
|
"grad_norm": 33.48215866088867, |
|
"learning_rate": 2.059952337384642e-05, |
|
"loss": 0.6932, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.8548387096774194, |
|
"grad_norm": 47.76714324951172, |
|
"learning_rate": 2.0559524299334096e-05, |
|
"loss": 2.1864, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.8629032258064516, |
|
"grad_norm": 40.71377944946289, |
|
"learning_rate": 2.0519525224821773e-05, |
|
"loss": 2.2803, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.8709677419354839, |
|
"grad_norm": 40.90349578857422, |
|
"learning_rate": 2.047952615030945e-05, |
|
"loss": 0.7261, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.8790322580645161, |
|
"grad_norm": 61.77216720581055, |
|
"learning_rate": 2.0439527075797126e-05, |
|
"loss": 1.9658, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.8870967741935484, |
|
"grad_norm": 15.168683052062988, |
|
"learning_rate": 2.0399528001284803e-05, |
|
"loss": 0.6779, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8951612903225806, |
|
"grad_norm": 77.2820816040039, |
|
"learning_rate": 2.035952892677248e-05, |
|
"loss": 4.008, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.9032258064516129, |
|
"grad_norm": 9.020164489746094, |
|
"learning_rate": 2.0319529852260157e-05, |
|
"loss": 0.8001, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.9112903225806451, |
|
"grad_norm": 11.099763870239258, |
|
"learning_rate": 2.0279530777747834e-05, |
|
"loss": 0.4358, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.9193548387096774, |
|
"grad_norm": 39.36742401123047, |
|
"learning_rate": 2.023953170323551e-05, |
|
"loss": 1.39, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.9274193548387096, |
|
"grad_norm": 66.89869689941406, |
|
"learning_rate": 2.0199532628723188e-05, |
|
"loss": 4.703, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.9354838709677419, |
|
"grad_norm": 25.90433692932129, |
|
"learning_rate": 2.0159533554210865e-05, |
|
"loss": 1.4303, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.9435483870967742, |
|
"grad_norm": 21.590999603271484, |
|
"learning_rate": 2.0119534479698542e-05, |
|
"loss": 0.3786, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.9516129032258065, |
|
"grad_norm": 68.37796020507812, |
|
"learning_rate": 2.007953540518622e-05, |
|
"loss": 14.1292, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.9596774193548387, |
|
"grad_norm": 18.72194480895996, |
|
"learning_rate": 2.0039536330673896e-05, |
|
"loss": 2.0311, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.967741935483871, |
|
"grad_norm": 15.883110046386719, |
|
"learning_rate": 1.999953725616157e-05, |
|
"loss": 0.8801, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.9758064516129032, |
|
"grad_norm": 18.568222045898438, |
|
"learning_rate": 1.9959538181649246e-05, |
|
"loss": 0.403, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.9838709677419355, |
|
"grad_norm": 17.32382583618164, |
|
"learning_rate": 1.9919539107136926e-05, |
|
"loss": 0.3303, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.9919354838709677, |
|
"grad_norm": 10.257896423339844, |
|
"learning_rate": 1.9879540032624603e-05, |
|
"loss": 0.2042, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 25.707752227783203, |
|
"learning_rate": 1.983954095811228e-05, |
|
"loss": 0.3115, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 4.090590000152588, |
|
"eval_mae": 1.2299772500991821, |
|
"eval_mse": 4.090590000152588, |
|
"eval_r2": -0.11993241310119629, |
|
"eval_rmse": 2.0225207045052933, |
|
"eval_runtime": 1.3441, |
|
"eval_samples_per_second": 40.92, |
|
"eval_smape": 50.51450729370117, |
|
"eval_steps_per_second": 10.416, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.0080645161290323, |
|
"grad_norm": 37.24449920654297, |
|
"learning_rate": 1.9799541883599954e-05, |
|
"loss": 0.6422, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.0161290322580645, |
|
"grad_norm": 32.547119140625, |
|
"learning_rate": 1.975954280908763e-05, |
|
"loss": 0.9869, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.0241935483870968, |
|
"grad_norm": 70.05367279052734, |
|
"learning_rate": 1.971954373457531e-05, |
|
"loss": 4.6137, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.032258064516129, |
|
"grad_norm": 18.101661682128906, |
|
"learning_rate": 1.9679544660062988e-05, |
|
"loss": 0.4156, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.0403225806451613, |
|
"grad_norm": 22.2724609375, |
|
"learning_rate": 1.9639545585550665e-05, |
|
"loss": 0.841, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.0483870967741935, |
|
"grad_norm": 13.428308486938477, |
|
"learning_rate": 1.959954651103834e-05, |
|
"loss": 2.2846, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0564516129032258, |
|
"grad_norm": 35.4150505065918, |
|
"learning_rate": 1.9559547436526015e-05, |
|
"loss": 0.7014, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.064516129032258, |
|
"grad_norm": 31.687740325927734, |
|
"learning_rate": 1.9519548362013696e-05, |
|
"loss": 3.4412, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.0725806451612903, |
|
"grad_norm": 54.95043182373047, |
|
"learning_rate": 1.9479549287501373e-05, |
|
"loss": 1.5723, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.0806451612903225, |
|
"grad_norm": 43.67780303955078, |
|
"learning_rate": 1.9439550212989046e-05, |
|
"loss": 1.161, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.0887096774193548, |
|
"grad_norm": 47.5596809387207, |
|
"learning_rate": 1.9399551138476723e-05, |
|
"loss": 5.3313, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.096774193548387, |
|
"grad_norm": 71.37387084960938, |
|
"learning_rate": 1.93595520639644e-05, |
|
"loss": 12.8636, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.1048387096774193, |
|
"grad_norm": 80.62567901611328, |
|
"learning_rate": 1.9319552989452077e-05, |
|
"loss": 7.3218, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.1129032258064515, |
|
"grad_norm": 59.973960876464844, |
|
"learning_rate": 1.9279553914939757e-05, |
|
"loss": 1.3948, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.120967741935484, |
|
"grad_norm": 179.83071899414062, |
|
"learning_rate": 1.923955484042743e-05, |
|
"loss": 34.7136, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 1.129032258064516, |
|
"grad_norm": 84.4344482421875, |
|
"learning_rate": 1.9199555765915108e-05, |
|
"loss": 2.435, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.1370967741935485, |
|
"grad_norm": 65.75166320800781, |
|
"learning_rate": 1.9159556691402785e-05, |
|
"loss": 2.0554, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.1451612903225807, |
|
"grad_norm": 29.691560745239258, |
|
"learning_rate": 1.911955761689046e-05, |
|
"loss": 0.8479, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.153225806451613, |
|
"grad_norm": 28.317535400390625, |
|
"learning_rate": 1.9079558542378142e-05, |
|
"loss": 0.4651, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.1612903225806452, |
|
"grad_norm": 28.46364402770996, |
|
"learning_rate": 1.9039559467865815e-05, |
|
"loss": 0.5143, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.1693548387096775, |
|
"grad_norm": 14.189237594604492, |
|
"learning_rate": 1.8999560393353492e-05, |
|
"loss": 0.2308, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.1774193548387097, |
|
"grad_norm": 16.224409103393555, |
|
"learning_rate": 1.895956131884117e-05, |
|
"loss": 1.8195, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.185483870967742, |
|
"grad_norm": 25.36056900024414, |
|
"learning_rate": 1.8919562244328846e-05, |
|
"loss": 0.6262, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.1935483870967742, |
|
"grad_norm": 20.145959854125977, |
|
"learning_rate": 1.8879563169816523e-05, |
|
"loss": 0.7281, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.2016129032258065, |
|
"grad_norm": 159.1549072265625, |
|
"learning_rate": 1.88395640953042e-05, |
|
"loss": 11.0343, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.2096774193548387, |
|
"grad_norm": 162.7301788330078, |
|
"learning_rate": 1.8799565020791877e-05, |
|
"loss": 34.9038, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.217741935483871, |
|
"grad_norm": 86.77201080322266, |
|
"learning_rate": 1.8759565946279554e-05, |
|
"loss": 16.0319, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.2258064516129032, |
|
"grad_norm": 48.960357666015625, |
|
"learning_rate": 1.871956687176723e-05, |
|
"loss": 1.4731, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.2338709677419355, |
|
"grad_norm": 82.25511932373047, |
|
"learning_rate": 1.8679567797254908e-05, |
|
"loss": 7.8374, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.2419354838709677, |
|
"grad_norm": 60.74580764770508, |
|
"learning_rate": 1.8639568722742585e-05, |
|
"loss": 10.0002, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 34.497222900390625, |
|
"learning_rate": 1.859956964823026e-05, |
|
"loss": 0.6464, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.2580645161290323, |
|
"grad_norm": 38.092491149902344, |
|
"learning_rate": 1.855957057371794e-05, |
|
"loss": 1.5106, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.2661290322580645, |
|
"grad_norm": 19.8438663482666, |
|
"learning_rate": 1.8519571499205615e-05, |
|
"loss": 1.5936, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.2741935483870968, |
|
"grad_norm": 69.82159423828125, |
|
"learning_rate": 1.8479572424693292e-05, |
|
"loss": 10.4774, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.282258064516129, |
|
"grad_norm": 125.10799407958984, |
|
"learning_rate": 1.843957335018097e-05, |
|
"loss": 15.5645, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.2903225806451613, |
|
"grad_norm": 49.95842361450195, |
|
"learning_rate": 1.8399574275668646e-05, |
|
"loss": 7.8997, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.2983870967741935, |
|
"grad_norm": 187.53903198242188, |
|
"learning_rate": 1.8359575201156323e-05, |
|
"loss": 19.5171, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.3064516129032258, |
|
"grad_norm": 137.54644775390625, |
|
"learning_rate": 1.8319576126644e-05, |
|
"loss": 4.1746, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.314516129032258, |
|
"grad_norm": 116.5649185180664, |
|
"learning_rate": 1.8279577052131677e-05, |
|
"loss": 2.607, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.3225806451612903, |
|
"grad_norm": 95.47283935546875, |
|
"learning_rate": 1.8239577977619354e-05, |
|
"loss": 5.3841, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.3306451612903225, |
|
"grad_norm": 354.6362609863281, |
|
"learning_rate": 1.819957890310703e-05, |
|
"loss": 13.093, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.3387096774193548, |
|
"grad_norm": 140.9376220703125, |
|
"learning_rate": 1.8159579828594708e-05, |
|
"loss": 4.0244, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.346774193548387, |
|
"grad_norm": 94.6503677368164, |
|
"learning_rate": 1.811958075408238e-05, |
|
"loss": 1.2288, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.3548387096774195, |
|
"grad_norm": 25.267553329467773, |
|
"learning_rate": 1.807958167957006e-05, |
|
"loss": 0.5603, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.3629032258064515, |
|
"grad_norm": 49.03358840942383, |
|
"learning_rate": 1.803958260505774e-05, |
|
"loss": 1.6284, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.370967741935484, |
|
"grad_norm": 369.3719787597656, |
|
"learning_rate": 1.7999583530545415e-05, |
|
"loss": 127.8047, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.379032258064516, |
|
"grad_norm": 30.973657608032227, |
|
"learning_rate": 1.7959584456033092e-05, |
|
"loss": 0.6907, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.3870967741935485, |
|
"grad_norm": 163.9529266357422, |
|
"learning_rate": 1.7919585381520766e-05, |
|
"loss": 15.7879, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.3951612903225805, |
|
"grad_norm": 13.607197761535645, |
|
"learning_rate": 1.7879586307008446e-05, |
|
"loss": 0.3138, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.403225806451613, |
|
"grad_norm": 86.09737396240234, |
|
"learning_rate": 1.7839587232496123e-05, |
|
"loss": 9.6102, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.4112903225806452, |
|
"grad_norm": 54.644554138183594, |
|
"learning_rate": 1.77995881579838e-05, |
|
"loss": 1.5776, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.4193548387096775, |
|
"grad_norm": 190.9008331298828, |
|
"learning_rate": 1.7759589083471477e-05, |
|
"loss": 22.1159, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.4274193548387097, |
|
"grad_norm": 50.92055130004883, |
|
"learning_rate": 1.771959000895915e-05, |
|
"loss": 2.3412, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.435483870967742, |
|
"grad_norm": 38.6739387512207, |
|
"learning_rate": 1.7679590934446827e-05, |
|
"loss": 0.7342, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.4435483870967742, |
|
"grad_norm": 28.455352783203125, |
|
"learning_rate": 1.7639591859934508e-05, |
|
"loss": 1.7024, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.4516129032258065, |
|
"grad_norm": 29.965150833129883, |
|
"learning_rate": 1.7599592785422185e-05, |
|
"loss": 0.6395, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.4596774193548387, |
|
"grad_norm": 46.12895202636719, |
|
"learning_rate": 1.7559593710909858e-05, |
|
"loss": 0.8689, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.467741935483871, |
|
"grad_norm": 21.45783805847168, |
|
"learning_rate": 1.7519594636397535e-05, |
|
"loss": 0.8418, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.4758064516129032, |
|
"grad_norm": 24.071561813354492, |
|
"learning_rate": 1.7479595561885212e-05, |
|
"loss": 1.8008, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.4838709677419355, |
|
"grad_norm": 35.17493438720703, |
|
"learning_rate": 1.7439596487372892e-05, |
|
"loss": 3.9543, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.4919354838709677, |
|
"grad_norm": 28.088899612426758, |
|
"learning_rate": 1.739959741286057e-05, |
|
"loss": 1.3278, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 15.711658477783203, |
|
"learning_rate": 1.7359598338348243e-05, |
|
"loss": 0.3969, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.5080645161290323, |
|
"grad_norm": 37.35240173339844, |
|
"learning_rate": 1.731959926383592e-05, |
|
"loss": 4.7095, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.5161290322580645, |
|
"grad_norm": 24.711170196533203, |
|
"learning_rate": 1.7279600189323597e-05, |
|
"loss": 1.3376, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.5241935483870968, |
|
"grad_norm": 40.604549407958984, |
|
"learning_rate": 1.7239601114811277e-05, |
|
"loss": 1.7629, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.532258064516129, |
|
"grad_norm": 25.869022369384766, |
|
"learning_rate": 1.7199602040298954e-05, |
|
"loss": 0.8421, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.5403225806451613, |
|
"grad_norm": 6.451545715332031, |
|
"learning_rate": 1.7159602965786627e-05, |
|
"loss": 0.2054, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.5483870967741935, |
|
"grad_norm": 8.05915641784668, |
|
"learning_rate": 1.7119603891274304e-05, |
|
"loss": 0.5445, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.5564516129032258, |
|
"grad_norm": 23.62920570373535, |
|
"learning_rate": 1.707960481676198e-05, |
|
"loss": 0.6784, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.564516129032258, |
|
"grad_norm": 19.833181381225586, |
|
"learning_rate": 1.7039605742249658e-05, |
|
"loss": 0.5534, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.5725806451612905, |
|
"grad_norm": 14.539905548095703, |
|
"learning_rate": 1.6999606667737335e-05, |
|
"loss": 0.4673, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.5806451612903225, |
|
"grad_norm": 103.28005981445312, |
|
"learning_rate": 1.6959607593225012e-05, |
|
"loss": 6.914, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.588709677419355, |
|
"grad_norm": 39.585941314697266, |
|
"learning_rate": 1.691960851871269e-05, |
|
"loss": 1.0972, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.596774193548387, |
|
"grad_norm": 15.518098831176758, |
|
"learning_rate": 1.6879609444200366e-05, |
|
"loss": 0.4614, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.6048387096774195, |
|
"grad_norm": 4.58119535446167, |
|
"learning_rate": 1.6839610369688043e-05, |
|
"loss": 0.1082, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.6129032258064515, |
|
"grad_norm": 25.006546020507812, |
|
"learning_rate": 1.679961129517572e-05, |
|
"loss": 0.4672, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.620967741935484, |
|
"grad_norm": 28.234201431274414, |
|
"learning_rate": 1.6759612220663397e-05, |
|
"loss": 0.5142, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.629032258064516, |
|
"grad_norm": 24.016407012939453, |
|
"learning_rate": 1.6719613146151073e-05, |
|
"loss": 0.9564, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.6370967741935485, |
|
"grad_norm": 18.26400375366211, |
|
"learning_rate": 1.667961407163875e-05, |
|
"loss": 0.3671, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.6451612903225805, |
|
"grad_norm": 12.657865524291992, |
|
"learning_rate": 1.6639614997126427e-05, |
|
"loss": 0.6099, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.653225806451613, |
|
"grad_norm": 117.20157623291016, |
|
"learning_rate": 1.6599615922614104e-05, |
|
"loss": 14.7071, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.661290322580645, |
|
"grad_norm": 5.277425289154053, |
|
"learning_rate": 1.655961684810178e-05, |
|
"loss": 0.0183, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.6693548387096775, |
|
"grad_norm": 8.111205101013184, |
|
"learning_rate": 1.6519617773589458e-05, |
|
"loss": 0.2201, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.6774193548387095, |
|
"grad_norm": 15.38776683807373, |
|
"learning_rate": 1.6479618699077135e-05, |
|
"loss": 0.3226, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.685483870967742, |
|
"grad_norm": 6.60953950881958, |
|
"learning_rate": 1.6439619624564812e-05, |
|
"loss": 0.3358, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.6935483870967742, |
|
"grad_norm": 17.016639709472656, |
|
"learning_rate": 1.639962055005249e-05, |
|
"loss": 0.2497, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.7016129032258065, |
|
"grad_norm": 54.830169677734375, |
|
"learning_rate": 1.6359621475540166e-05, |
|
"loss": 1.7451, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.7096774193548387, |
|
"grad_norm": 14.282036781311035, |
|
"learning_rate": 1.6319622401027843e-05, |
|
"loss": 1.1429, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.717741935483871, |
|
"grad_norm": 4.297520160675049, |
|
"learning_rate": 1.627962332651552e-05, |
|
"loss": 0.1797, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.7258064516129032, |
|
"grad_norm": 142.38172912597656, |
|
"learning_rate": 1.6239624252003196e-05, |
|
"loss": 21.8816, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.7338709677419355, |
|
"grad_norm": 88.52197265625, |
|
"learning_rate": 1.6199625177490873e-05, |
|
"loss": 15.5891, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.7419354838709677, |
|
"grad_norm": 15.318389892578125, |
|
"learning_rate": 1.615962610297855e-05, |
|
"loss": 0.4315, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 102.21027374267578, |
|
"learning_rate": 1.6119627028466227e-05, |
|
"loss": 6.8871, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.7580645161290323, |
|
"grad_norm": 12.100990295410156, |
|
"learning_rate": 1.6079627953953904e-05, |
|
"loss": 0.1098, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.7661290322580645, |
|
"grad_norm": 137.26162719726562, |
|
"learning_rate": 1.6039628879441578e-05, |
|
"loss": 12.6184, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.7741935483870968, |
|
"grad_norm": 16.48599624633789, |
|
"learning_rate": 1.5999629804929258e-05, |
|
"loss": 0.3973, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.782258064516129, |
|
"grad_norm": 23.34356689453125, |
|
"learning_rate": 1.5959630730416935e-05, |
|
"loss": 0.3448, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.7903225806451613, |
|
"grad_norm": 7.864492893218994, |
|
"learning_rate": 1.5919631655904612e-05, |
|
"loss": 0.502, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.7983870967741935, |
|
"grad_norm": 3.356924295425415, |
|
"learning_rate": 1.587963258139229e-05, |
|
"loss": 0.0997, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.8064516129032258, |
|
"grad_norm": 125.84968566894531, |
|
"learning_rate": 1.5839633506879962e-05, |
|
"loss": 13.3317, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.814516129032258, |
|
"grad_norm": 77.49720764160156, |
|
"learning_rate": 1.5799634432367643e-05, |
|
"loss": 4.3421, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.8225806451612905, |
|
"grad_norm": 24.029203414916992, |
|
"learning_rate": 1.575963535785532e-05, |
|
"loss": 1.8343, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.8306451612903225, |
|
"grad_norm": 70.15037536621094, |
|
"learning_rate": 1.5719636283342996e-05, |
|
"loss": 1.469, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.838709677419355, |
|
"grad_norm": 69.79930877685547, |
|
"learning_rate": 1.567963720883067e-05, |
|
"loss": 1.8992, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.846774193548387, |
|
"grad_norm": 63.507442474365234, |
|
"learning_rate": 1.5639638134318347e-05, |
|
"loss": 1.2837, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.8548387096774195, |
|
"grad_norm": 54.0720100402832, |
|
"learning_rate": 1.5599639059806027e-05, |
|
"loss": 3.8239, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.8629032258064515, |
|
"grad_norm": 71.55313873291016, |
|
"learning_rate": 1.5559639985293704e-05, |
|
"loss": 2.0285, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.870967741935484, |
|
"grad_norm": 9.896157264709473, |
|
"learning_rate": 1.551964091078138e-05, |
|
"loss": 0.329, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.879032258064516, |
|
"grad_norm": 19.860387802124023, |
|
"learning_rate": 1.5479641836269055e-05, |
|
"loss": 0.6743, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.8870967741935485, |
|
"grad_norm": 5.622892379760742, |
|
"learning_rate": 1.543964276175673e-05, |
|
"loss": 0.1553, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.8951612903225805, |
|
"grad_norm": 12.858086585998535, |
|
"learning_rate": 1.539964368724441e-05, |
|
"loss": 0.2809, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.903225806451613, |
|
"grad_norm": 108.66515350341797, |
|
"learning_rate": 1.535964461273209e-05, |
|
"loss": 5.8853, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.911290322580645, |
|
"grad_norm": 47.20570755004883, |
|
"learning_rate": 1.5319645538219766e-05, |
|
"loss": 4.3275, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.9193548387096775, |
|
"grad_norm": 43.51235580444336, |
|
"learning_rate": 1.527964646370744e-05, |
|
"loss": 0.7748, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.9274193548387095, |
|
"grad_norm": 27.735137939453125, |
|
"learning_rate": 1.5239647389195118e-05, |
|
"loss": 0.5191, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.935483870967742, |
|
"grad_norm": 27.011123657226562, |
|
"learning_rate": 1.5199648314682795e-05, |
|
"loss": 0.9489, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.9435483870967742, |
|
"grad_norm": 10.22940731048584, |
|
"learning_rate": 1.5159649240170472e-05, |
|
"loss": 0.3328, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.9516129032258065, |
|
"grad_norm": 17.85344886779785, |
|
"learning_rate": 1.5119650165658147e-05, |
|
"loss": 0.3374, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.9596774193548387, |
|
"grad_norm": 35.01970291137695, |
|
"learning_rate": 1.5079651091145824e-05, |
|
"loss": 0.7993, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.967741935483871, |
|
"grad_norm": 259.75616455078125, |
|
"learning_rate": 1.50396520166335e-05, |
|
"loss": 7.7043, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.9758064516129032, |
|
"grad_norm": 75.41777038574219, |
|
"learning_rate": 1.499965294212118e-05, |
|
"loss": 1.8485, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.9838709677419355, |
|
"grad_norm": 401.5027770996094, |
|
"learning_rate": 1.4959653867608856e-05, |
|
"loss": 19.4312, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.9919354838709677, |
|
"grad_norm": 46.57631301879883, |
|
"learning_rate": 1.4919654793096532e-05, |
|
"loss": 1.3285, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 110.93121337890625, |
|
"learning_rate": 1.4879655718584208e-05, |
|
"loss": 2.6528, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 4.852317810058594, |
|
"eval_mae": 1.4657387733459473, |
|
"eval_mse": 4.85231876373291, |
|
"eval_r2": -0.32848048210144043, |
|
"eval_rmse": 2.2027979398330912, |
|
"eval_runtime": 1.3879, |
|
"eval_samples_per_second": 39.629, |
|
"eval_smape": 54.71565127372742, |
|
"eval_steps_per_second": 10.087, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 2.0080645161290325, |
|
"grad_norm": 154.8404998779297, |
|
"learning_rate": 1.4839656644071885e-05, |
|
"loss": 3.6054, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 2.0161290322580645, |
|
"grad_norm": 62.42089080810547, |
|
"learning_rate": 1.4799657569559564e-05, |
|
"loss": 1.8889, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.024193548387097, |
|
"grad_norm": 70.86661529541016, |
|
"learning_rate": 1.4759658495047241e-05, |
|
"loss": 2.4493, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 2.032258064516129, |
|
"grad_norm": 51.09244155883789, |
|
"learning_rate": 1.4719659420534916e-05, |
|
"loss": 1.168, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 2.0403225806451615, |
|
"grad_norm": 18.558488845825195, |
|
"learning_rate": 1.4679660346022593e-05, |
|
"loss": 0.2762, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 2.0483870967741935, |
|
"grad_norm": 168.22433471679688, |
|
"learning_rate": 1.463966127151027e-05, |
|
"loss": 1.9643, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 2.056451612903226, |
|
"grad_norm": 40.86513900756836, |
|
"learning_rate": 1.4599662196997949e-05, |
|
"loss": 0.8871, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.064516129032258, |
|
"grad_norm": 169.68312072753906, |
|
"learning_rate": 1.4559663122485622e-05, |
|
"loss": 10.1988, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 2.0725806451612905, |
|
"grad_norm": 6.839456081390381, |
|
"learning_rate": 1.45196640479733e-05, |
|
"loss": 0.195, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 2.0806451612903225, |
|
"grad_norm": 29.532468795776367, |
|
"learning_rate": 1.4479664973460978e-05, |
|
"loss": 0.4214, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 2.088709677419355, |
|
"grad_norm": 134.2998504638672, |
|
"learning_rate": 1.4439665898948655e-05, |
|
"loss": 22.2033, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 2.096774193548387, |
|
"grad_norm": 229.64031982421875, |
|
"learning_rate": 1.4399666824436333e-05, |
|
"loss": 0.8783, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.1048387096774195, |
|
"grad_norm": 150.3468017578125, |
|
"learning_rate": 1.4359667749924007e-05, |
|
"loss": 22.9731, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 2.1129032258064515, |
|
"grad_norm": 104.23390197753906, |
|
"learning_rate": 1.4319668675411685e-05, |
|
"loss": 1.326, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 2.120967741935484, |
|
"grad_norm": 114.47550964355469, |
|
"learning_rate": 1.4279669600899362e-05, |
|
"loss": 1.3029, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 2.129032258064516, |
|
"grad_norm": 15.87612533569336, |
|
"learning_rate": 1.423967052638704e-05, |
|
"loss": 0.4699, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 2.1370967741935485, |
|
"grad_norm": 46.12168884277344, |
|
"learning_rate": 1.4199671451874716e-05, |
|
"loss": 0.7016, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.1451612903225805, |
|
"grad_norm": 279.55279541015625, |
|
"learning_rate": 1.4159672377362391e-05, |
|
"loss": 3.2752, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 2.153225806451613, |
|
"grad_norm": 20.628719329833984, |
|
"learning_rate": 1.4119673302850068e-05, |
|
"loss": 0.2388, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 2.161290322580645, |
|
"grad_norm": 22.857067108154297, |
|
"learning_rate": 1.4079674228337747e-05, |
|
"loss": 0.5555, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 2.1693548387096775, |
|
"grad_norm": 47.45686340332031, |
|
"learning_rate": 1.4039675153825424e-05, |
|
"loss": 2.8467, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 2.1774193548387095, |
|
"grad_norm": 22.756681442260742, |
|
"learning_rate": 1.39996760793131e-05, |
|
"loss": 0.7615, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.185483870967742, |
|
"grad_norm": 80.15179443359375, |
|
"learning_rate": 1.3959677004800776e-05, |
|
"loss": 8.9323, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 2.193548387096774, |
|
"grad_norm": 100.86170196533203, |
|
"learning_rate": 1.3919677930288453e-05, |
|
"loss": 12.4829, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 2.2016129032258065, |
|
"grad_norm": 72.2647705078125, |
|
"learning_rate": 1.3879678855776132e-05, |
|
"loss": 9.6687, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 2.2096774193548385, |
|
"grad_norm": 30.535030364990234, |
|
"learning_rate": 1.3839679781263808e-05, |
|
"loss": 0.4308, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 2.217741935483871, |
|
"grad_norm": 54.71157455444336, |
|
"learning_rate": 1.3799680706751484e-05, |
|
"loss": 1.1555, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.225806451612903, |
|
"grad_norm": 39.96649932861328, |
|
"learning_rate": 1.375968163223916e-05, |
|
"loss": 0.4534, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.2338709677419355, |
|
"grad_norm": 55.768898010253906, |
|
"learning_rate": 1.3719682557726838e-05, |
|
"loss": 0.9371, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 2.241935483870968, |
|
"grad_norm": 83.68522644042969, |
|
"learning_rate": 1.3679683483214516e-05, |
|
"loss": 2.4787, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 29.500429153442383, |
|
"learning_rate": 1.3639684408702193e-05, |
|
"loss": 1.0634, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 2.258064516129032, |
|
"grad_norm": 100.61076354980469, |
|
"learning_rate": 1.3599685334189868e-05, |
|
"loss": 10.4759, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.2661290322580645, |
|
"grad_norm": 182.71800231933594, |
|
"learning_rate": 1.3559686259677545e-05, |
|
"loss": 1.687, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 2.274193548387097, |
|
"grad_norm": 187.59808349609375, |
|
"learning_rate": 1.3519687185165222e-05, |
|
"loss": 15.9992, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 2.282258064516129, |
|
"grad_norm": 105.1393051147461, |
|
"learning_rate": 1.3479688110652899e-05, |
|
"loss": 4.9415, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 2.2903225806451615, |
|
"grad_norm": 115.4957504272461, |
|
"learning_rate": 1.3439689036140578e-05, |
|
"loss": 7.5135, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 2.2983870967741935, |
|
"grad_norm": 42.67521286010742, |
|
"learning_rate": 1.3399689961628251e-05, |
|
"loss": 0.9061, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.306451612903226, |
|
"grad_norm": 31.427526473999023, |
|
"learning_rate": 1.335969088711593e-05, |
|
"loss": 0.4332, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.314516129032258, |
|
"grad_norm": 47.62785339355469, |
|
"learning_rate": 1.3319691812603607e-05, |
|
"loss": 0.9642, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 2.3225806451612905, |
|
"grad_norm": 15.01259708404541, |
|
"learning_rate": 1.3279692738091284e-05, |
|
"loss": 0.2899, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.3306451612903225, |
|
"grad_norm": 37.724735260009766, |
|
"learning_rate": 1.3239693663578959e-05, |
|
"loss": 1.7805, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 2.338709677419355, |
|
"grad_norm": 79.78099060058594, |
|
"learning_rate": 1.3199694589066636e-05, |
|
"loss": 8.5522, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.346774193548387, |
|
"grad_norm": 20.72849464416504, |
|
"learning_rate": 1.3159695514554314e-05, |
|
"loss": 0.4305, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 2.3548387096774195, |
|
"grad_norm": 137.8477325439453, |
|
"learning_rate": 1.3119696440041991e-05, |
|
"loss": 5.5653, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 2.3629032258064515, |
|
"grad_norm": 49.036468505859375, |
|
"learning_rate": 1.3079697365529668e-05, |
|
"loss": 0.3396, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 2.370967741935484, |
|
"grad_norm": 77.85248565673828, |
|
"learning_rate": 1.3039698291017344e-05, |
|
"loss": 12.0845, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.379032258064516, |
|
"grad_norm": 84.66986846923828, |
|
"learning_rate": 1.299969921650502e-05, |
|
"loss": 13.1287, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.3870967741935485, |
|
"grad_norm": 19.693037033081055, |
|
"learning_rate": 1.2959700141992699e-05, |
|
"loss": 0.4963, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.3951612903225805, |
|
"grad_norm": 10.413263320922852, |
|
"learning_rate": 1.2919701067480376e-05, |
|
"loss": 0.2287, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 2.403225806451613, |
|
"grad_norm": 64.92196655273438, |
|
"learning_rate": 1.2879701992968053e-05, |
|
"loss": 1.6285, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.411290322580645, |
|
"grad_norm": 36.941349029541016, |
|
"learning_rate": 1.2839702918455728e-05, |
|
"loss": 0.8435, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 2.4193548387096775, |
|
"grad_norm": 149.9400177001953, |
|
"learning_rate": 1.2799703843943405e-05, |
|
"loss": 4.2246, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.4274193548387095, |
|
"grad_norm": 40.55129623413086, |
|
"learning_rate": 1.2759704769431082e-05, |
|
"loss": 0.628, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 2.435483870967742, |
|
"grad_norm": 39.12997817993164, |
|
"learning_rate": 1.271970569491876e-05, |
|
"loss": 0.4721, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.443548387096774, |
|
"grad_norm": 87.25220489501953, |
|
"learning_rate": 1.2679706620406434e-05, |
|
"loss": 6.3045, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 2.4516129032258065, |
|
"grad_norm": 47.91136169433594, |
|
"learning_rate": 1.2639707545894113e-05, |
|
"loss": 0.6415, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.4596774193548385, |
|
"grad_norm": 103.10691833496094, |
|
"learning_rate": 1.259970847138179e-05, |
|
"loss": 10.6521, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.467741935483871, |
|
"grad_norm": 16.19340705871582, |
|
"learning_rate": 1.2559709396869467e-05, |
|
"loss": 0.2191, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.475806451612903, |
|
"grad_norm": 18.110265731811523, |
|
"learning_rate": 1.2519710322357145e-05, |
|
"loss": 0.2425, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 2.4838709677419355, |
|
"grad_norm": 156.45272827148438, |
|
"learning_rate": 1.2479711247844819e-05, |
|
"loss": 4.5986, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.491935483870968, |
|
"grad_norm": 16.38495635986328, |
|
"learning_rate": 1.2439712173332497e-05, |
|
"loss": 0.3502, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 24.752309799194336, |
|
"learning_rate": 1.2399713098820174e-05, |
|
"loss": 0.4216, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.508064516129032, |
|
"grad_norm": 22.331012725830078, |
|
"learning_rate": 1.2359714024307851e-05, |
|
"loss": 0.3915, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 2.5161290322580645, |
|
"grad_norm": 4.984405994415283, |
|
"learning_rate": 1.2319714949795528e-05, |
|
"loss": 0.1369, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.524193548387097, |
|
"grad_norm": 4.796787261962891, |
|
"learning_rate": 1.2279715875283205e-05, |
|
"loss": 0.1145, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 2.532258064516129, |
|
"grad_norm": 11.541577339172363, |
|
"learning_rate": 1.2239716800770882e-05, |
|
"loss": 0.4872, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.540322580645161, |
|
"grad_norm": 117.4135971069336, |
|
"learning_rate": 1.2199717726258559e-05, |
|
"loss": 8.6798, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.5483870967741935, |
|
"grad_norm": 11.267465591430664, |
|
"learning_rate": 1.2159718651746234e-05, |
|
"loss": 0.2144, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.556451612903226, |
|
"grad_norm": 17.15199851989746, |
|
"learning_rate": 1.2119719577233913e-05, |
|
"loss": 0.4501, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 2.564516129032258, |
|
"grad_norm": 14.21686840057373, |
|
"learning_rate": 1.207972050272159e-05, |
|
"loss": 0.4759, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.5725806451612905, |
|
"grad_norm": 52.95610809326172, |
|
"learning_rate": 1.2039721428209267e-05, |
|
"loss": 0.792, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 2.5806451612903225, |
|
"grad_norm": 18.222946166992188, |
|
"learning_rate": 1.1999722353696944e-05, |
|
"loss": 0.9593, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.588709677419355, |
|
"grad_norm": 112.54412078857422, |
|
"learning_rate": 1.1959723279184619e-05, |
|
"loss": 5.1768, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.596774193548387, |
|
"grad_norm": 5.891172885894775, |
|
"learning_rate": 1.1919724204672297e-05, |
|
"loss": 0.336, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.6048387096774195, |
|
"grad_norm": 140.28492736816406, |
|
"learning_rate": 1.1879725130159973e-05, |
|
"loss": 11.1532, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 2.6129032258064515, |
|
"grad_norm": 27.81146812438965, |
|
"learning_rate": 1.183972605564765e-05, |
|
"loss": 0.4171, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.620967741935484, |
|
"grad_norm": 13.444504737854004, |
|
"learning_rate": 1.1799726981135328e-05, |
|
"loss": 0.181, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.629032258064516, |
|
"grad_norm": 19.829408645629883, |
|
"learning_rate": 1.1759727906623003e-05, |
|
"loss": 0.6601, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.6370967741935485, |
|
"grad_norm": 13.109299659729004, |
|
"learning_rate": 1.1719728832110682e-05, |
|
"loss": 0.8257, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 2.6451612903225805, |
|
"grad_norm": 37.20515823364258, |
|
"learning_rate": 1.1679729757598357e-05, |
|
"loss": 0.604, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.653225806451613, |
|
"grad_norm": 401.8742980957031, |
|
"learning_rate": 1.1639730683086034e-05, |
|
"loss": 123.0045, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 2.661290322580645, |
|
"grad_norm": 41.35789489746094, |
|
"learning_rate": 1.1599731608573711e-05, |
|
"loss": 0.8282, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.6693548387096775, |
|
"grad_norm": 36.715728759765625, |
|
"learning_rate": 1.1559732534061388e-05, |
|
"loss": 0.9222, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 2.6774193548387095, |
|
"grad_norm": 236.11050415039062, |
|
"learning_rate": 1.1519733459549065e-05, |
|
"loss": 2.1798, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.685483870967742, |
|
"grad_norm": 9.052068710327148, |
|
"learning_rate": 1.1479734385036742e-05, |
|
"loss": 0.3699, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 2.693548387096774, |
|
"grad_norm": 120.23004150390625, |
|
"learning_rate": 1.1439735310524419e-05, |
|
"loss": 4.8277, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.7016129032258065, |
|
"grad_norm": 86.39726257324219, |
|
"learning_rate": 1.1399736236012096e-05, |
|
"loss": 0.7964, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.709677419354839, |
|
"grad_norm": 43.46727752685547, |
|
"learning_rate": 1.1359737161499773e-05, |
|
"loss": 0.4678, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.717741935483871, |
|
"grad_norm": 92.9560546875, |
|
"learning_rate": 1.131973808698745e-05, |
|
"loss": 1.9784, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 2.725806451612903, |
|
"grad_norm": 72.29792785644531, |
|
"learning_rate": 1.1279739012475126e-05, |
|
"loss": 1.1618, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.7338709677419355, |
|
"grad_norm": 108.34342193603516, |
|
"learning_rate": 1.1239739937962803e-05, |
|
"loss": 8.9934, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 2.741935483870968, |
|
"grad_norm": 44.143707275390625, |
|
"learning_rate": 1.119974086345048e-05, |
|
"loss": 0.7319, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 14.93342113494873, |
|
"learning_rate": 1.1159741788938157e-05, |
|
"loss": 0.2592, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 2.758064516129032, |
|
"grad_norm": 213.9510040283203, |
|
"learning_rate": 1.1119742714425832e-05, |
|
"loss": 12.5033, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.7661290322580645, |
|
"grad_norm": 114.5474624633789, |
|
"learning_rate": 1.1079743639913511e-05, |
|
"loss": 1.5119, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 2.774193548387097, |
|
"grad_norm": 22.315725326538086, |
|
"learning_rate": 1.1039744565401188e-05, |
|
"loss": 0.6701, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.782258064516129, |
|
"grad_norm": 22.660259246826172, |
|
"learning_rate": 1.0999745490888865e-05, |
|
"loss": 0.7027, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.790322580645161, |
|
"grad_norm": 14.709903717041016, |
|
"learning_rate": 1.0959746416376542e-05, |
|
"loss": 0.213, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.7983870967741935, |
|
"grad_norm": 89.29466247558594, |
|
"learning_rate": 1.0919747341864217e-05, |
|
"loss": 1.5922, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 2.806451612903226, |
|
"grad_norm": 13.790899276733398, |
|
"learning_rate": 1.0879748267351896e-05, |
|
"loss": 0.4771, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.814516129032258, |
|
"grad_norm": 67.785888671875, |
|
"learning_rate": 1.0839749192839571e-05, |
|
"loss": 5.3096, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 2.8225806451612905, |
|
"grad_norm": 70.6512222290039, |
|
"learning_rate": 1.0799750118327248e-05, |
|
"loss": 0.8779, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.8306451612903225, |
|
"grad_norm": 51.96946334838867, |
|
"learning_rate": 1.0759751043814926e-05, |
|
"loss": 1.0843, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 2.838709677419355, |
|
"grad_norm": 112.01322937011719, |
|
"learning_rate": 1.0719751969302602e-05, |
|
"loss": 4.3431, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.846774193548387, |
|
"grad_norm": 30.747026443481445, |
|
"learning_rate": 1.067975289479028e-05, |
|
"loss": 0.6655, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 2.8548387096774195, |
|
"grad_norm": 152.95237731933594, |
|
"learning_rate": 1.0639753820277956e-05, |
|
"loss": 7.7018, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.8629032258064515, |
|
"grad_norm": 97.97068786621094, |
|
"learning_rate": 1.0599754745765632e-05, |
|
"loss": 6.0524, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.870967741935484, |
|
"grad_norm": 24.1806583404541, |
|
"learning_rate": 1.055975567125331e-05, |
|
"loss": 0.4385, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.879032258064516, |
|
"grad_norm": 125.32524871826172, |
|
"learning_rate": 1.0519756596740986e-05, |
|
"loss": 15.97, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 2.8870967741935485, |
|
"grad_norm": 13.418540000915527, |
|
"learning_rate": 1.0479757522228663e-05, |
|
"loss": 0.1098, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.8951612903225805, |
|
"grad_norm": 18.43660545349121, |
|
"learning_rate": 1.043975844771634e-05, |
|
"loss": 0.6929, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 2.903225806451613, |
|
"grad_norm": 14.304896354675293, |
|
"learning_rate": 1.0399759373204017e-05, |
|
"loss": 0.6683, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.911290322580645, |
|
"grad_norm": 124.25679016113281, |
|
"learning_rate": 1.0359760298691694e-05, |
|
"loss": 14.7953, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 2.9193548387096775, |
|
"grad_norm": 27.41398048400879, |
|
"learning_rate": 1.0319761224179371e-05, |
|
"loss": 0.4546, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.9274193548387095, |
|
"grad_norm": 17.463279724121094, |
|
"learning_rate": 1.0279762149667048e-05, |
|
"loss": 0.2985, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 2.935483870967742, |
|
"grad_norm": 36.6607666015625, |
|
"learning_rate": 1.0239763075154725e-05, |
|
"loss": 0.6629, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.943548387096774, |
|
"grad_norm": 36.471588134765625, |
|
"learning_rate": 1.0199764000642402e-05, |
|
"loss": 0.5639, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.9516129032258065, |
|
"grad_norm": 17.774343490600586, |
|
"learning_rate": 1.0159764926130079e-05, |
|
"loss": 0.3925, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.959677419354839, |
|
"grad_norm": 32.37789535522461, |
|
"learning_rate": 1.0119765851617756e-05, |
|
"loss": 0.5822, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 2.967741935483871, |
|
"grad_norm": 45.42538070678711, |
|
"learning_rate": 1.0079766777105432e-05, |
|
"loss": 0.7533, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.975806451612903, |
|
"grad_norm": 26.0896053314209, |
|
"learning_rate": 1.003976770259311e-05, |
|
"loss": 0.3197, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 2.9838709677419355, |
|
"grad_norm": 5.230876445770264, |
|
"learning_rate": 9.999768628080785e-06, |
|
"loss": 0.2765, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.991935483870968, |
|
"grad_norm": 50.81292724609375, |
|
"learning_rate": 9.959769553568463e-06, |
|
"loss": 2.345, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 310.2722473144531, |
|
"learning_rate": 9.91977047905614e-06, |
|
"loss": 24.6264, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 3.3316104412078857, |
|
"eval_mae": 1.1439374685287476, |
|
"eval_mse": 3.3316097259521484, |
|
"eval_r2": 0.08786314725875854, |
|
"eval_rmse": 1.8252697679938021, |
|
"eval_runtime": 1.3357, |
|
"eval_samples_per_second": 41.176, |
|
"eval_smape": 51.89841985702515, |
|
"eval_steps_per_second": 10.481, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 3.0080645161290325, |
|
"grad_norm": 41.96479415893555, |
|
"learning_rate": 9.879771404543815e-06, |
|
"loss": 1.2152, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 3.0161290322580645, |
|
"grad_norm": 136.83741760253906, |
|
"learning_rate": 9.839772330031494e-06, |
|
"loss": 3.896, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 3.024193548387097, |
|
"grad_norm": 22.60567283630371, |
|
"learning_rate": 9.79977325551917e-06, |
|
"loss": 0.3975, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 3.032258064516129, |
|
"grad_norm": 39.60881805419922, |
|
"learning_rate": 9.759774181006848e-06, |
|
"loss": 1.4481, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 3.0403225806451615, |
|
"grad_norm": 7.505491256713867, |
|
"learning_rate": 9.719775106494523e-06, |
|
"loss": 0.0605, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 3.0483870967741935, |
|
"grad_norm": 53.053001403808594, |
|
"learning_rate": 9.6797760319822e-06, |
|
"loss": 1.7315, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 3.056451612903226, |
|
"grad_norm": 33.12321853637695, |
|
"learning_rate": 9.639776957469879e-06, |
|
"loss": 0.5669, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 3.064516129032258, |
|
"grad_norm": 13.300590515136719, |
|
"learning_rate": 9.599777882957554e-06, |
|
"loss": 0.4431, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.0725806451612905, |
|
"grad_norm": 104.96222686767578, |
|
"learning_rate": 9.55977880844523e-06, |
|
"loss": 5.6698, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 3.0806451612903225, |
|
"grad_norm": 49.31301498413086, |
|
"learning_rate": 9.519779733932908e-06, |
|
"loss": 1.3213, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 3.088709677419355, |
|
"grad_norm": 16.676259994506836, |
|
"learning_rate": 9.479780659420585e-06, |
|
"loss": 0.266, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 3.096774193548387, |
|
"grad_norm": 34.20507049560547, |
|
"learning_rate": 9.439781584908261e-06, |
|
"loss": 1.5853, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 3.1048387096774195, |
|
"grad_norm": 39.18606948852539, |
|
"learning_rate": 9.399782510395938e-06, |
|
"loss": 1.0645, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 3.1129032258064515, |
|
"grad_norm": 5.354783535003662, |
|
"learning_rate": 9.359783435883615e-06, |
|
"loss": 0.1131, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 3.120967741935484, |
|
"grad_norm": 41.82234573364258, |
|
"learning_rate": 9.319784361371292e-06, |
|
"loss": 0.5984, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 3.129032258064516, |
|
"grad_norm": 16.759111404418945, |
|
"learning_rate": 9.27978528685897e-06, |
|
"loss": 0.4225, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 3.1370967741935485, |
|
"grad_norm": 24.134700775146484, |
|
"learning_rate": 9.239786212346646e-06, |
|
"loss": 0.434, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 3.1451612903225805, |
|
"grad_norm": 13.111348152160645, |
|
"learning_rate": 9.199787137834323e-06, |
|
"loss": 0.2413, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.153225806451613, |
|
"grad_norm": 115.44222259521484, |
|
"learning_rate": 9.159788063322e-06, |
|
"loss": 8.9723, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 3.161290322580645, |
|
"grad_norm": 13.346231460571289, |
|
"learning_rate": 9.119788988809677e-06, |
|
"loss": 0.8709, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 3.1693548387096775, |
|
"grad_norm": 1.1584579944610596, |
|
"learning_rate": 9.079789914297354e-06, |
|
"loss": 0.0055, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 3.1774193548387095, |
|
"grad_norm": 70.0774154663086, |
|
"learning_rate": 9.03979083978503e-06, |
|
"loss": 0.9703, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 3.185483870967742, |
|
"grad_norm": 14.28967571258545, |
|
"learning_rate": 8.999791765272708e-06, |
|
"loss": 0.2817, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 3.193548387096774, |
|
"grad_norm": 4.5178303718566895, |
|
"learning_rate": 8.959792690760383e-06, |
|
"loss": 0.0217, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 3.2016129032258065, |
|
"grad_norm": 9.761589050292969, |
|
"learning_rate": 8.919793616248061e-06, |
|
"loss": 0.4557, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 3.2096774193548385, |
|
"grad_norm": 53.35697555541992, |
|
"learning_rate": 8.879794541735738e-06, |
|
"loss": 0.8752, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 3.217741935483871, |
|
"grad_norm": 165.54457092285156, |
|
"learning_rate": 8.839795467223414e-06, |
|
"loss": 14.6604, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 3.225806451612903, |
|
"grad_norm": 247.7133331298828, |
|
"learning_rate": 8.799796392711092e-06, |
|
"loss": 9.448, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.2338709677419355, |
|
"grad_norm": 53.525306701660156, |
|
"learning_rate": 8.759797318198767e-06, |
|
"loss": 0.724, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 3.241935483870968, |
|
"grad_norm": 172.49952697753906, |
|
"learning_rate": 8.719798243686446e-06, |
|
"loss": 6.2532, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 99.93900299072266, |
|
"learning_rate": 8.679799169174121e-06, |
|
"loss": 2.2522, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 3.258064516129032, |
|
"grad_norm": 15.439353942871094, |
|
"learning_rate": 8.639800094661798e-06, |
|
"loss": 0.1913, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 3.2661290322580645, |
|
"grad_norm": 49.3162956237793, |
|
"learning_rate": 8.599801020149477e-06, |
|
"loss": 0.4811, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 3.274193548387097, |
|
"grad_norm": 184.40086364746094, |
|
"learning_rate": 8.559801945637152e-06, |
|
"loss": 7.606, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 3.282258064516129, |
|
"grad_norm": 132.2401580810547, |
|
"learning_rate": 8.519802871124829e-06, |
|
"loss": 3.4354, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 3.2903225806451615, |
|
"grad_norm": 64.83243560791016, |
|
"learning_rate": 8.479803796612506e-06, |
|
"loss": 2.5171, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 3.2983870967741935, |
|
"grad_norm": 158.75596618652344, |
|
"learning_rate": 8.439804722100183e-06, |
|
"loss": 2.1743, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 3.306451612903226, |
|
"grad_norm": 103.98823547363281, |
|
"learning_rate": 8.39980564758786e-06, |
|
"loss": 1.4439, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.314516129032258, |
|
"grad_norm": 43.638450622558594, |
|
"learning_rate": 8.359806573075537e-06, |
|
"loss": 0.573, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 3.3225806451612905, |
|
"grad_norm": 41.889686584472656, |
|
"learning_rate": 8.319807498563214e-06, |
|
"loss": 1.4203, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 3.3306451612903225, |
|
"grad_norm": 6.998268127441406, |
|
"learning_rate": 8.27980842405089e-06, |
|
"loss": 0.0949, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 3.338709677419355, |
|
"grad_norm": 167.7267303466797, |
|
"learning_rate": 8.239809349538567e-06, |
|
"loss": 3.6027, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 3.346774193548387, |
|
"grad_norm": 58.6676025390625, |
|
"learning_rate": 8.199810275026244e-06, |
|
"loss": 1.0112, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 3.3548387096774195, |
|
"grad_norm": 19.077991485595703, |
|
"learning_rate": 8.159811200513921e-06, |
|
"loss": 0.1047, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 3.3629032258064515, |
|
"grad_norm": 61.30634307861328, |
|
"learning_rate": 8.119812126001598e-06, |
|
"loss": 0.3403, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 3.370967741935484, |
|
"grad_norm": 40.68547058105469, |
|
"learning_rate": 8.079813051489275e-06, |
|
"loss": 0.3131, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 3.379032258064516, |
|
"grad_norm": 39.6519660949707, |
|
"learning_rate": 8.039813976976952e-06, |
|
"loss": 0.0895, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 3.3870967741935485, |
|
"grad_norm": 54.81489181518555, |
|
"learning_rate": 7.999814902464629e-06, |
|
"loss": 0.9378, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.3951612903225805, |
|
"grad_norm": 43.20634460449219, |
|
"learning_rate": 7.959815827952306e-06, |
|
"loss": 0.5461, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 3.403225806451613, |
|
"grad_norm": 35.164852142333984, |
|
"learning_rate": 7.919816753439981e-06, |
|
"loss": 1.0319, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 3.411290322580645, |
|
"grad_norm": 283.366455078125, |
|
"learning_rate": 7.87981767892766e-06, |
|
"loss": 8.4101, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 3.4193548387096775, |
|
"grad_norm": 507.85150146484375, |
|
"learning_rate": 7.839818604415335e-06, |
|
"loss": 17.8365, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 3.4274193548387095, |
|
"grad_norm": 8.4439697265625, |
|
"learning_rate": 7.799819529903014e-06, |
|
"loss": 0.2679, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 3.435483870967742, |
|
"grad_norm": 40.22681427001953, |
|
"learning_rate": 7.75982045539069e-06, |
|
"loss": 0.3714, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 3.443548387096774, |
|
"grad_norm": 32.92212677001953, |
|
"learning_rate": 7.719821380878366e-06, |
|
"loss": 0.4915, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 3.4516129032258065, |
|
"grad_norm": 26.323545455932617, |
|
"learning_rate": 7.679822306366044e-06, |
|
"loss": 0.4087, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 3.4596774193548385, |
|
"grad_norm": 146.9882049560547, |
|
"learning_rate": 7.63982323185372e-06, |
|
"loss": 0.2726, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 3.467741935483871, |
|
"grad_norm": 4.548903465270996, |
|
"learning_rate": 7.599824157341397e-06, |
|
"loss": 0.1744, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.475806451612903, |
|
"grad_norm": 228.15928649902344, |
|
"learning_rate": 7.5598250828290735e-06, |
|
"loss": 10.8116, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 3.4838709677419355, |
|
"grad_norm": 60.10261154174805, |
|
"learning_rate": 7.51982600831675e-06, |
|
"loss": 0.6974, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 3.491935483870968, |
|
"grad_norm": 13.38789176940918, |
|
"learning_rate": 7.479826933804428e-06, |
|
"loss": 0.4782, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 68.60205841064453, |
|
"learning_rate": 7.439827859292104e-06, |
|
"loss": 1.7533, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 3.508064516129032, |
|
"grad_norm": 6.820003986358643, |
|
"learning_rate": 7.399828784779782e-06, |
|
"loss": 0.0396, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 3.5161290322580645, |
|
"grad_norm": 6.984181880950928, |
|
"learning_rate": 7.359829710267458e-06, |
|
"loss": 0.0715, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 3.524193548387097, |
|
"grad_norm": 26.827526092529297, |
|
"learning_rate": 7.319830635755135e-06, |
|
"loss": 0.1687, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 3.532258064516129, |
|
"grad_norm": 8.610593795776367, |
|
"learning_rate": 7.279831561242811e-06, |
|
"loss": 0.134, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 3.540322580645161, |
|
"grad_norm": 39.13728332519531, |
|
"learning_rate": 7.239832486730489e-06, |
|
"loss": 0.44, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 3.5483870967741935, |
|
"grad_norm": 56.36937713623047, |
|
"learning_rate": 7.199833412218167e-06, |
|
"loss": 3.6674, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.556451612903226, |
|
"grad_norm": 38.50959396362305, |
|
"learning_rate": 7.159834337705843e-06, |
|
"loss": 0.7563, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 3.564516129032258, |
|
"grad_norm": 44.96533203125, |
|
"learning_rate": 7.11983526319352e-06, |
|
"loss": 0.7596, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 3.5725806451612905, |
|
"grad_norm": 59.28168487548828, |
|
"learning_rate": 7.079836188681196e-06, |
|
"loss": 0.5658, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 3.5806451612903225, |
|
"grad_norm": 24.149091720581055, |
|
"learning_rate": 7.0398371141688735e-06, |
|
"loss": 0.3909, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 3.588709677419355, |
|
"grad_norm": 44.97443389892578, |
|
"learning_rate": 6.99983803965655e-06, |
|
"loss": 0.8401, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 3.596774193548387, |
|
"grad_norm": 210.90919494628906, |
|
"learning_rate": 6.9598389651442265e-06, |
|
"loss": 8.9542, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 3.6048387096774195, |
|
"grad_norm": 78.13578796386719, |
|
"learning_rate": 6.919839890631904e-06, |
|
"loss": 7.4033, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 3.6129032258064515, |
|
"grad_norm": 54.309566497802734, |
|
"learning_rate": 6.87984081611958e-06, |
|
"loss": 0.8781, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 3.620967741935484, |
|
"grad_norm": 8.429338455200195, |
|
"learning_rate": 6.839841741607258e-06, |
|
"loss": 0.2823, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 3.629032258064516, |
|
"grad_norm": 32.88882827758789, |
|
"learning_rate": 6.799842667094934e-06, |
|
"loss": 1.153, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.6370967741935485, |
|
"grad_norm": 3.0679757595062256, |
|
"learning_rate": 6.759843592582611e-06, |
|
"loss": 0.0494, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 3.6451612903225805, |
|
"grad_norm": 17.686809539794922, |
|
"learning_rate": 6.719844518070289e-06, |
|
"loss": 0.1084, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 3.653225806451613, |
|
"grad_norm": 48.20186996459961, |
|
"learning_rate": 6.679845443557965e-06, |
|
"loss": 0.7689, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 3.661290322580645, |
|
"grad_norm": 134.13829040527344, |
|
"learning_rate": 6.639846369045642e-06, |
|
"loss": 12.3841, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 3.6693548387096775, |
|
"grad_norm": 244.57977294921875, |
|
"learning_rate": 6.599847294533318e-06, |
|
"loss": 8.8699, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 3.6774193548387095, |
|
"grad_norm": 218.47779846191406, |
|
"learning_rate": 6.559848220020996e-06, |
|
"loss": 4.3449, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 3.685483870967742, |
|
"grad_norm": 4.526096820831299, |
|
"learning_rate": 6.519849145508672e-06, |
|
"loss": 0.0972, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 3.693548387096774, |
|
"grad_norm": 134.6807861328125, |
|
"learning_rate": 6.4798500709963495e-06, |
|
"loss": 5.5296, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 3.7016129032258065, |
|
"grad_norm": 16.304237365722656, |
|
"learning_rate": 6.4398509964840265e-06, |
|
"loss": 0.2073, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 3.709677419354839, |
|
"grad_norm": 98.8707275390625, |
|
"learning_rate": 6.3998519219717025e-06, |
|
"loss": 0.9317, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.717741935483871, |
|
"grad_norm": 38.19011688232422, |
|
"learning_rate": 6.35985284745938e-06, |
|
"loss": 0.5302, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 3.725806451612903, |
|
"grad_norm": 56.57841110229492, |
|
"learning_rate": 6.319853772947056e-06, |
|
"loss": 1.2759, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 3.7338709677419355, |
|
"grad_norm": 24.514759063720703, |
|
"learning_rate": 6.279854698434733e-06, |
|
"loss": 0.2036, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 3.741935483870968, |
|
"grad_norm": 191.78668212890625, |
|
"learning_rate": 6.239855623922409e-06, |
|
"loss": 8.38, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 7.4319376945495605, |
|
"learning_rate": 6.199856549410087e-06, |
|
"loss": 0.3825, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 3.758064516129032, |
|
"grad_norm": 520.401123046875, |
|
"learning_rate": 6.159857474897764e-06, |
|
"loss": 115.6645, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 3.7661290322580645, |
|
"grad_norm": 46.42934036254883, |
|
"learning_rate": 6.119858400385441e-06, |
|
"loss": 0.5399, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 3.774193548387097, |
|
"grad_norm": 190.69224548339844, |
|
"learning_rate": 6.079859325873117e-06, |
|
"loss": 4.1707, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 3.782258064516129, |
|
"grad_norm": 23.724159240722656, |
|
"learning_rate": 6.039860251360795e-06, |
|
"loss": 0.4132, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 3.790322580645161, |
|
"grad_norm": 13.517512321472168, |
|
"learning_rate": 5.999861176848472e-06, |
|
"loss": 0.0815, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.7983870967741935, |
|
"grad_norm": 70.30924987792969, |
|
"learning_rate": 5.959862102336149e-06, |
|
"loss": 2.7397, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 3.806451612903226, |
|
"grad_norm": 16.972890853881836, |
|
"learning_rate": 5.919863027823825e-06, |
|
"loss": 0.0473, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 3.814516129032258, |
|
"grad_norm": 56.665103912353516, |
|
"learning_rate": 5.879863953311502e-06, |
|
"loss": 0.5666, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 3.8225806451612905, |
|
"grad_norm": 38.07454299926758, |
|
"learning_rate": 5.839864878799179e-06, |
|
"loss": 0.3797, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 3.8306451612903225, |
|
"grad_norm": 63.56098937988281, |
|
"learning_rate": 5.7998658042868555e-06, |
|
"loss": 1.4492, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 3.838709677419355, |
|
"grad_norm": 841.0067138671875, |
|
"learning_rate": 5.7598667297745325e-06, |
|
"loss": 8.2394, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 3.846774193548387, |
|
"grad_norm": 77.66548919677734, |
|
"learning_rate": 5.719867655262209e-06, |
|
"loss": 1.4433, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 3.8548387096774195, |
|
"grad_norm": 130.56881713867188, |
|
"learning_rate": 5.679868580749886e-06, |
|
"loss": 2.8763, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 3.8629032258064515, |
|
"grad_norm": 5.675972938537598, |
|
"learning_rate": 5.639869506237563e-06, |
|
"loss": 0.0891, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 3.870967741935484, |
|
"grad_norm": 69.9577407836914, |
|
"learning_rate": 5.59987043172524e-06, |
|
"loss": 2.6558, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.879032258064516, |
|
"grad_norm": 14.687697410583496, |
|
"learning_rate": 5.559871357212916e-06, |
|
"loss": 0.1182, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 3.8870967741935485, |
|
"grad_norm": 155.2462921142578, |
|
"learning_rate": 5.519872282700594e-06, |
|
"loss": 2.598, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 3.8951612903225805, |
|
"grad_norm": 31.2712345123291, |
|
"learning_rate": 5.479873208188271e-06, |
|
"loss": 0.7228, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 3.903225806451613, |
|
"grad_norm": 201.28076171875, |
|
"learning_rate": 5.439874133675948e-06, |
|
"loss": 12.3541, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 3.911290322580645, |
|
"grad_norm": 70.37089538574219, |
|
"learning_rate": 5.399875059163624e-06, |
|
"loss": 0.7182, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 3.9193548387096775, |
|
"grad_norm": 29.314970016479492, |
|
"learning_rate": 5.359875984651301e-06, |
|
"loss": 0.1898, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 3.9274193548387095, |
|
"grad_norm": 25.780534744262695, |
|
"learning_rate": 5.319876910138978e-06, |
|
"loss": 0.3334, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 3.935483870967742, |
|
"grad_norm": 8.816149711608887, |
|
"learning_rate": 5.279877835626655e-06, |
|
"loss": 0.3082, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 3.943548387096774, |
|
"grad_norm": 348.0280456542969, |
|
"learning_rate": 5.239878761114332e-06, |
|
"loss": 4.8654, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 3.9516129032258065, |
|
"grad_norm": 42.4134407043457, |
|
"learning_rate": 5.1998796866020085e-06, |
|
"loss": 1.2419, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.959677419354839, |
|
"grad_norm": 31.053295135498047, |
|
"learning_rate": 5.1598806120896854e-06, |
|
"loss": 0.6271, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 3.967741935483871, |
|
"grad_norm": 70.98059844970703, |
|
"learning_rate": 5.119881537577362e-06, |
|
"loss": 0.5582, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 3.975806451612903, |
|
"grad_norm": 118.1554946899414, |
|
"learning_rate": 5.079882463065039e-06, |
|
"loss": 1.5444, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 3.9838709677419355, |
|
"grad_norm": 288.1326599121094, |
|
"learning_rate": 5.039883388552716e-06, |
|
"loss": 7.396, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 3.991935483870968, |
|
"grad_norm": 19.853105545043945, |
|
"learning_rate": 4.999884314040392e-06, |
|
"loss": 0.9112, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 32.062965393066406, |
|
"learning_rate": 4.95988523952807e-06, |
|
"loss": 0.2896, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.353773355484009, |
|
"eval_mae": 1.0791250467300415, |
|
"eval_mse": 2.353773355484009, |
|
"eval_r2": 0.3555777668952942, |
|
"eval_rmse": 1.5342012108859804, |
|
"eval_runtime": 1.3428, |
|
"eval_samples_per_second": 40.961, |
|
"eval_smape": 55.77985644340515, |
|
"eval_steps_per_second": 10.426, |
|
"step": 496 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 620, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2687879255015424.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 2.479942619764035e-05 |
|
} |
|
} |
|
|