|
{ |
|
"best_metric": 0.3927477017364658, |
|
"best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/MY DATA/VideoMAE_WLASL_2000_200_epochs_p20_SR_8_kinetics/checkpoint-28580", |
|
"epoch": 35.005000699888015, |
|
"eval_steps": 500, |
|
"global_step": 64305, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002799552071668533, |
|
"grad_norm": 20.226421356201172, |
|
"learning_rate": 1.3997760358342666e-07, |
|
"loss": 30.4518, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0005599104143337066, |
|
"grad_norm": 24.225967407226562, |
|
"learning_rate": 2.799552071668533e-07, |
|
"loss": 30.4528, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0008398656215005599, |
|
"grad_norm": 20.26617431640625, |
|
"learning_rate": 4.1993281075028e-07, |
|
"loss": 30.5309, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0011198208286674132, |
|
"grad_norm": 18.84654426574707, |
|
"learning_rate": 5.599104143337066e-07, |
|
"loss": 30.4432, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0013997760358342665, |
|
"grad_norm": 19.313968658447266, |
|
"learning_rate": 6.998880179171333e-07, |
|
"loss": 30.476, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0016797312430011197, |
|
"grad_norm": 19.507740020751953, |
|
"learning_rate": 8.3986562150056e-07, |
|
"loss": 30.4617, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.001959686450167973, |
|
"grad_norm": 18.0329647064209, |
|
"learning_rate": 9.798432250839866e-07, |
|
"loss": 30.4795, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0022396416573348264, |
|
"grad_norm": 17.514469146728516, |
|
"learning_rate": 1.1198208286674133e-06, |
|
"loss": 30.4514, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0025195968645016797, |
|
"grad_norm": 18.40747833251953, |
|
"learning_rate": 1.25979843225084e-06, |
|
"loss": 30.4258, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.002799552071668533, |
|
"grad_norm": 15.273112297058105, |
|
"learning_rate": 1.3997760358342666e-06, |
|
"loss": 30.4209, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.003079507278835386, |
|
"grad_norm": 16.360700607299805, |
|
"learning_rate": 1.5397536394176933e-06, |
|
"loss": 30.4484, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.0033594624860022394, |
|
"grad_norm": 14.241786003112793, |
|
"learning_rate": 1.67973124300112e-06, |
|
"loss": 30.4332, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.003639417693169093, |
|
"grad_norm": 13.431915283203125, |
|
"learning_rate": 1.8197088465845464e-06, |
|
"loss": 30.4436, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.003919372900335946, |
|
"grad_norm": 12.783411026000977, |
|
"learning_rate": 1.9596864501679732e-06, |
|
"loss": 30.436, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.0041993281075028, |
|
"grad_norm": 12.723618507385254, |
|
"learning_rate": 2.0996640537514e-06, |
|
"loss": 30.4174, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.004479283314669653, |
|
"grad_norm": 14.043514251708984, |
|
"learning_rate": 2.2396416573348266e-06, |
|
"loss": 30.4404, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.004759238521836506, |
|
"grad_norm": 11.425649642944336, |
|
"learning_rate": 2.379619260918253e-06, |
|
"loss": 30.4419, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.005, |
|
"eval_accuracy": 0.0012768130745658835, |
|
"eval_f1": 0.0003286915125900798, |
|
"eval_loss": 7.599897861480713, |
|
"eval_precision": 0.0003945151684672421, |
|
"eval_recall": 0.0012768130745658835, |
|
"eval_runtime": 160.9217, |
|
"eval_samples_per_second": 24.335, |
|
"eval_steps_per_second": 12.167, |
|
"eval_top_10_accuracy": 0.007660878447395302, |
|
"eval_top_1_accuracy": 0.0012768130745658835, |
|
"eval_top_5_accuracy": 0.0040858018386108275, |
|
"step": 1786 |
|
}, |
|
{ |
|
"epoch": 1.0000384938409854, |
|
"grad_norm": 11.807793617248535, |
|
"learning_rate": 2.5181970884658456e-06, |
|
"loss": 30.4173, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.0003184490481523, |
|
"grad_norm": 12.485947608947754, |
|
"learning_rate": 2.6581746920492722e-06, |
|
"loss": 30.3847, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.000598404255319, |
|
"grad_norm": 12.61637020111084, |
|
"learning_rate": 2.798152295632699e-06, |
|
"loss": 30.3864, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.000878359462486, |
|
"grad_norm": 14.853839874267578, |
|
"learning_rate": 2.9381298992161256e-06, |
|
"loss": 30.3646, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.0011583146696528, |
|
"grad_norm": 13.900428771972656, |
|
"learning_rate": 3.0781075027995522e-06, |
|
"loss": 30.3906, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.0014382698768196, |
|
"grad_norm": 15.140420913696289, |
|
"learning_rate": 3.218085106382979e-06, |
|
"loss": 30.38, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.0017182250839867, |
|
"grad_norm": 13.726295471191406, |
|
"learning_rate": 3.358062709966406e-06, |
|
"loss": 30.387, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.0019981802911535, |
|
"grad_norm": 14.190909385681152, |
|
"learning_rate": 3.498040313549832e-06, |
|
"loss": 30.3637, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0022781354983203, |
|
"grad_norm": 14.675251960754395, |
|
"learning_rate": 3.638017917133259e-06, |
|
"loss": 30.3778, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.0025580907054872, |
|
"grad_norm": 12.957648277282715, |
|
"learning_rate": 3.777995520716685e-06, |
|
"loss": 30.3976, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.002838045912654, |
|
"grad_norm": 12.860355377197266, |
|
"learning_rate": 3.917973124300112e-06, |
|
"loss": 30.3693, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.0031180011198209, |
|
"grad_norm": 14.768610954284668, |
|
"learning_rate": 4.057950727883539e-06, |
|
"loss": 30.3588, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.0033979563269877, |
|
"grad_norm": 16.286272048950195, |
|
"learning_rate": 4.1979283314669655e-06, |
|
"loss": 30.3603, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0036779115341545, |
|
"grad_norm": 18.51091766357422, |
|
"learning_rate": 4.337905935050392e-06, |
|
"loss": 30.3436, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.0039578667413214, |
|
"grad_norm": 19.337949752807617, |
|
"learning_rate": 4.477883538633819e-06, |
|
"loss": 30.3448, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.0042378219484882, |
|
"grad_norm": 19.713043212890625, |
|
"learning_rate": 4.6178611422172455e-06, |
|
"loss": 30.3127, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.004517777155655, |
|
"grad_norm": 20.15603256225586, |
|
"learning_rate": 4.757838745800672e-06, |
|
"loss": 30.3277, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.0047977323628219, |
|
"grad_norm": 20.172250747680664, |
|
"learning_rate": 4.897816349384099e-06, |
|
"loss": 30.3231, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.0049993001119821, |
|
"eval_accuracy": 0.001787538304392237, |
|
"eval_f1": 0.0005964394765241968, |
|
"eval_loss": 7.570636749267578, |
|
"eval_precision": 0.0006831536678159338, |
|
"eval_recall": 0.001787538304392237, |
|
"eval_runtime": 116.0933, |
|
"eval_samples_per_second": 33.731, |
|
"eval_steps_per_second": 16.866, |
|
"eval_top_10_accuracy": 0.01174668028600613, |
|
"eval_top_1_accuracy": 0.001787538304392237, |
|
"eval_top_5_accuracy": 0.007660878447395302, |
|
"step": 3572 |
|
}, |
|
{ |
|
"epoch": 2.000076987681971, |
|
"grad_norm": 22.496971130371094, |
|
"learning_rate": 5.0377939529675254e-06, |
|
"loss": 30.2773, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.000356942889138, |
|
"grad_norm": 29.170331954956055, |
|
"learning_rate": 5.177771556550952e-06, |
|
"loss": 30.1604, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.0006368980963045, |
|
"grad_norm": 33.307071685791016, |
|
"learning_rate": 5.3163493840985445e-06, |
|
"loss": 30.077, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.0009168533034716, |
|
"grad_norm": 29.411121368408203, |
|
"learning_rate": 5.456326987681971e-06, |
|
"loss": 30.0944, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.001196808510638, |
|
"grad_norm": 35.26206588745117, |
|
"learning_rate": 5.596304591265398e-06, |
|
"loss": 30.0089, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.0014767637178053, |
|
"grad_norm": 38.77720260620117, |
|
"learning_rate": 5.7362821948488245e-06, |
|
"loss": 29.9731, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.001756718924972, |
|
"grad_norm": 33.86006164550781, |
|
"learning_rate": 5.876259798432251e-06, |
|
"loss": 30.0306, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.002036674132139, |
|
"grad_norm": 38.09397888183594, |
|
"learning_rate": 6.016237402015678e-06, |
|
"loss": 29.8864, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.0023166293393055, |
|
"grad_norm": 39.75291442871094, |
|
"learning_rate": 6.1562150055991044e-06, |
|
"loss": 29.8617, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.0025965845464726, |
|
"grad_norm": 41.37080764770508, |
|
"learning_rate": 6.294792833146697e-06, |
|
"loss": 29.9367, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.002876539753639, |
|
"grad_norm": 37.2781982421875, |
|
"learning_rate": 6.434770436730124e-06, |
|
"loss": 29.9061, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.0031564949608063, |
|
"grad_norm": 41.08019256591797, |
|
"learning_rate": 6.57474804031355e-06, |
|
"loss": 29.779, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.0034364501679733, |
|
"grad_norm": 38.2447624206543, |
|
"learning_rate": 6.714725643896977e-06, |
|
"loss": 29.893, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.00371640537514, |
|
"grad_norm": 42.33695983886719, |
|
"learning_rate": 6.854703247480404e-06, |
|
"loss": 29.6515, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.003996360582307, |
|
"grad_norm": 40.0054817199707, |
|
"learning_rate": 6.99468085106383e-06, |
|
"loss": 29.7127, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.0042763157894736, |
|
"grad_norm": 39.20315170288086, |
|
"learning_rate": 7.134658454647257e-06, |
|
"loss": 29.6478, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.0045562709966407, |
|
"grad_norm": 39.36686325073242, |
|
"learning_rate": 7.274636058230684e-06, |
|
"loss": 29.506, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.0048362262038073, |
|
"grad_norm": 43.698448181152344, |
|
"learning_rate": 7.41461366181411e-06, |
|
"loss": 29.5546, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.0049986002239644, |
|
"eval_accuracy": 0.007405515832482125, |
|
"eval_f1": 0.0019352484689869383, |
|
"eval_loss": 7.384550094604492, |
|
"eval_precision": 0.0016670227951139786, |
|
"eval_recall": 0.007405515832482125, |
|
"eval_runtime": 115.3473, |
|
"eval_samples_per_second": 33.95, |
|
"eval_steps_per_second": 16.975, |
|
"eval_top_10_accuracy": 0.04162410623084781, |
|
"eval_top_1_accuracy": 0.007405515832482125, |
|
"eval_top_5_accuracy": 0.02400408580183861, |
|
"step": 5358 |
|
}, |
|
{ |
|
"epoch": 3.0001154815229563, |
|
"grad_norm": 41.90152359008789, |
|
"learning_rate": 7.554591265397537e-06, |
|
"loss": 29.4201, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.0003954367301233, |
|
"grad_norm": 40.79745864868164, |
|
"learning_rate": 7.694568868980963e-06, |
|
"loss": 28.9946, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.00067539193729, |
|
"grad_norm": 42.561485290527344, |
|
"learning_rate": 7.83454647256439e-06, |
|
"loss": 28.9452, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.000955347144457, |
|
"grad_norm": 43.85258865356445, |
|
"learning_rate": 7.974524076147817e-06, |
|
"loss": 28.9244, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.0012353023516236, |
|
"grad_norm": 42.8238639831543, |
|
"learning_rate": 8.114501679731243e-06, |
|
"loss": 28.8971, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.0015152575587907, |
|
"grad_norm": 42.562843322753906, |
|
"learning_rate": 8.25447928331467e-06, |
|
"loss": 28.8098, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 3.0017952127659573, |
|
"grad_norm": 49.49258041381836, |
|
"learning_rate": 8.394456886898098e-06, |
|
"loss": 28.7524, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.0020751679731243, |
|
"grad_norm": 42.201019287109375, |
|
"learning_rate": 8.534434490481523e-06, |
|
"loss": 28.8442, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 3.002355123180291, |
|
"grad_norm": 40.5860595703125, |
|
"learning_rate": 8.674412094064951e-06, |
|
"loss": 28.6063, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.002635078387458, |
|
"grad_norm": 42.844871520996094, |
|
"learning_rate": 8.814389697648377e-06, |
|
"loss": 28.6592, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.0029150335946246, |
|
"grad_norm": 39.86319351196289, |
|
"learning_rate": 8.954367301231802e-06, |
|
"loss": 28.5535, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.0031949888017917, |
|
"grad_norm": 46.02956771850586, |
|
"learning_rate": 9.09434490481523e-06, |
|
"loss": 28.6243, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.0034749440089588, |
|
"grad_norm": 44.78327560424805, |
|
"learning_rate": 9.232922732362822e-06, |
|
"loss": 28.456, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.0037548992161254, |
|
"grad_norm": 45.030609130859375, |
|
"learning_rate": 9.37290033594625e-06, |
|
"loss": 28.4532, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.0040348544232924, |
|
"grad_norm": 42.88111877441406, |
|
"learning_rate": 9.512877939529676e-06, |
|
"loss": 28.4727, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.004314809630459, |
|
"grad_norm": 40.01094436645508, |
|
"learning_rate": 9.652855543113103e-06, |
|
"loss": 28.3569, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.004594764837626, |
|
"grad_norm": 41.569541931152344, |
|
"learning_rate": 9.792833146696529e-06, |
|
"loss": 28.2736, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.0048747200447927, |
|
"grad_norm": 41.992950439453125, |
|
"learning_rate": 9.932810750279955e-06, |
|
"loss": 28.1321, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 3.005000699888018, |
|
"eval_accuracy": 0.02400408580183861, |
|
"eval_f1": 0.008749887799994431, |
|
"eval_loss": 7.097176551818848, |
|
"eval_precision": 0.008811658496728353, |
|
"eval_recall": 0.02400408580183861, |
|
"eval_runtime": 116.5095, |
|
"eval_samples_per_second": 33.611, |
|
"eval_steps_per_second": 16.806, |
|
"eval_top_10_accuracy": 0.10929519918283963, |
|
"eval_top_1_accuracy": 0.02400408580183861, |
|
"eval_top_5_accuracy": 0.07073544433094994, |
|
"step": 7145 |
|
}, |
|
{ |
|
"epoch": 4.000153975363942, |
|
"grad_norm": 40.83894729614258, |
|
"learning_rate": 1.0072788353863382e-05, |
|
"loss": 27.8419, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 4.000433930571108, |
|
"grad_norm": 40.60577392578125, |
|
"learning_rate": 1.0212765957446808e-05, |
|
"loss": 27.5321, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 4.000713885778276, |
|
"grad_norm": 44.68207931518555, |
|
"learning_rate": 1.0352743561030236e-05, |
|
"loss": 27.5769, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 4.000993840985442, |
|
"grad_norm": 43.748348236083984, |
|
"learning_rate": 1.0492721164613663e-05, |
|
"loss": 27.4798, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.001273796192609, |
|
"grad_norm": 40.99502182006836, |
|
"learning_rate": 1.0632698768197089e-05, |
|
"loss": 27.3829, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 4.001553751399776, |
|
"grad_norm": 42.76571273803711, |
|
"learning_rate": 1.0772676371780516e-05, |
|
"loss": 27.3276, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 4.001833706606943, |
|
"grad_norm": 45.32221221923828, |
|
"learning_rate": 1.0912653975363942e-05, |
|
"loss": 27.2542, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 4.00211366181411, |
|
"grad_norm": 44.121070861816406, |
|
"learning_rate": 1.1052631578947368e-05, |
|
"loss": 27.3298, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 4.002393617021276, |
|
"grad_norm": 42.33357238769531, |
|
"learning_rate": 1.1192609182530796e-05, |
|
"loss": 27.1349, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.002673572228444, |
|
"grad_norm": 41.43338394165039, |
|
"learning_rate": 1.1332586786114223e-05, |
|
"loss": 27.1831, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 4.0029535274356105, |
|
"grad_norm": 43.54603958129883, |
|
"learning_rate": 1.1472564389697649e-05, |
|
"loss": 26.9691, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 4.003233482642777, |
|
"grad_norm": 40.624549865722656, |
|
"learning_rate": 1.1612541993281076e-05, |
|
"loss": 27.0156, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 4.003513437849944, |
|
"grad_norm": 43.285614013671875, |
|
"learning_rate": 1.1752519596864502e-05, |
|
"loss": 27.0116, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.003793393057111, |
|
"grad_norm": 40.69886779785156, |
|
"learning_rate": 1.1892497200447928e-05, |
|
"loss": 26.9703, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.004073348264278, |
|
"grad_norm": 40.920372009277344, |
|
"learning_rate": 1.2032474804031356e-05, |
|
"loss": 26.9118, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 4.0043533034714445, |
|
"grad_norm": 43.02329635620117, |
|
"learning_rate": 1.2172452407614781e-05, |
|
"loss": 26.81, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 4.004633258678611, |
|
"grad_norm": 46.108150482177734, |
|
"learning_rate": 1.2312430011198209e-05, |
|
"loss": 26.6285, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 4.004913213885779, |
|
"grad_norm": 43.06632995605469, |
|
"learning_rate": 1.2452407614781636e-05, |
|
"loss": 26.7987, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 4.005, |
|
"eval_accuracy": 0.04647599591419816, |
|
"eval_f1": 0.01843357325169779, |
|
"eval_loss": 6.765562534332275, |
|
"eval_precision": 0.01696919088530181, |
|
"eval_recall": 0.04647599591419816, |
|
"eval_runtime": 114.7714, |
|
"eval_samples_per_second": 34.12, |
|
"eval_steps_per_second": 17.06, |
|
"eval_top_10_accuracy": 0.19126659856996936, |
|
"eval_top_1_accuracy": 0.04647599591419816, |
|
"eval_top_5_accuracy": 0.127170582226762, |
|
"step": 8931 |
|
}, |
|
{ |
|
"epoch": 5.0001924692049275, |
|
"grad_norm": 38.85215377807617, |
|
"learning_rate": 1.2590985442329229e-05, |
|
"loss": 26.275, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.000472424412094, |
|
"grad_norm": 44.39185333251953, |
|
"learning_rate": 1.2730963045912656e-05, |
|
"loss": 25.8789, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 5.000752379619261, |
|
"grad_norm": 42.62602996826172, |
|
"learning_rate": 1.287094064949608e-05, |
|
"loss": 25.7782, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 5.001032334826427, |
|
"grad_norm": 41.51466751098633, |
|
"learning_rate": 1.3010918253079508e-05, |
|
"loss": 25.8335, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 5.001312290033595, |
|
"grad_norm": 42.93080139160156, |
|
"learning_rate": 1.3150895856662934e-05, |
|
"loss": 25.7338, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 5.0015922452407615, |
|
"grad_norm": 42.483642578125, |
|
"learning_rate": 1.3290873460246361e-05, |
|
"loss": 25.7467, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.001872200447928, |
|
"grad_norm": 45.41472625732422, |
|
"learning_rate": 1.3430851063829789e-05, |
|
"loss": 25.5684, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 5.002152155655095, |
|
"grad_norm": 42.98210144042969, |
|
"learning_rate": 1.3570828667413216e-05, |
|
"loss": 25.6864, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 5.002432110862262, |
|
"grad_norm": 48.234275817871094, |
|
"learning_rate": 1.371080627099664e-05, |
|
"loss": 25.4836, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 5.002712066069429, |
|
"grad_norm": 43.54291915893555, |
|
"learning_rate": 1.3850783874580068e-05, |
|
"loss": 25.404, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 5.0029920212765955, |
|
"grad_norm": 44.19446563720703, |
|
"learning_rate": 1.3990761478163494e-05, |
|
"loss": 25.3612, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.003271976483763, |
|
"grad_norm": 40.334381103515625, |
|
"learning_rate": 1.4130739081746921e-05, |
|
"loss": 25.5031, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 5.00355193169093, |
|
"grad_norm": 44.043067932128906, |
|
"learning_rate": 1.4270716685330349e-05, |
|
"loss": 25.2018, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 5.003831886898096, |
|
"grad_norm": 39.68805694580078, |
|
"learning_rate": 1.4410694288913776e-05, |
|
"loss": 25.3125, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 5.004111842105263, |
|
"grad_norm": 45.52452850341797, |
|
"learning_rate": 1.45506718924972e-05, |
|
"loss": 25.1784, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 5.00439179731243, |
|
"grad_norm": 43.4058723449707, |
|
"learning_rate": 1.4690649496080628e-05, |
|
"loss": 25.1405, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.004671752519597, |
|
"grad_norm": 43.045166015625, |
|
"learning_rate": 1.4830627099664055e-05, |
|
"loss": 25.0181, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 5.004951707726764, |
|
"grad_norm": 44.4958381652832, |
|
"learning_rate": 1.4970604703247481e-05, |
|
"loss": 24.7006, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 5.004999300111982, |
|
"eval_accuracy": 0.0702247191011236, |
|
"eval_f1": 0.03296463425661127, |
|
"eval_loss": 6.407422065734863, |
|
"eval_precision": 0.030481743763272673, |
|
"eval_recall": 0.0702247191011236, |
|
"eval_runtime": 112.2695, |
|
"eval_samples_per_second": 34.88, |
|
"eval_steps_per_second": 17.44, |
|
"eval_top_10_accuracy": 0.28651685393258425, |
|
"eval_top_1_accuracy": 0.0702247191011236, |
|
"eval_top_5_accuracy": 0.19816138917262513, |
|
"step": 10717 |
|
}, |
|
{ |
|
"epoch": 6.0002309630459125, |
|
"grad_norm": 46.9362907409668, |
|
"learning_rate": 1.5110582306830909e-05, |
|
"loss": 24.0997, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 6.000510918253079, |
|
"grad_norm": 40.902610778808594, |
|
"learning_rate": 1.5250559910414333e-05, |
|
"loss": 24.1422, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 6.000790873460247, |
|
"grad_norm": 44.164756774902344, |
|
"learning_rate": 1.5389137737961927e-05, |
|
"loss": 23.8297, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 6.001070828667413, |
|
"grad_norm": 46.19126510620117, |
|
"learning_rate": 1.5529115341545353e-05, |
|
"loss": 23.9123, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 6.00135078387458, |
|
"grad_norm": 42.22810363769531, |
|
"learning_rate": 1.566909294512878e-05, |
|
"loss": 23.7447, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 6.0016307390817465, |
|
"grad_norm": 43.56826400756836, |
|
"learning_rate": 1.5809070548712208e-05, |
|
"loss": 23.7598, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 6.001910694288914, |
|
"grad_norm": 44.37257385253906, |
|
"learning_rate": 1.5949048152295633e-05, |
|
"loss": 23.6209, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 6.002190649496081, |
|
"grad_norm": 47.96746063232422, |
|
"learning_rate": 1.6089025755879063e-05, |
|
"loss": 23.9762, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 6.002470604703247, |
|
"grad_norm": 46.42436599731445, |
|
"learning_rate": 1.6229003359462485e-05, |
|
"loss": 23.7592, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 6.002750559910415, |
|
"grad_norm": 41.59648895263672, |
|
"learning_rate": 1.6368980963045914e-05, |
|
"loss": 23.4793, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 6.003030515117581, |
|
"grad_norm": 43.254478454589844, |
|
"learning_rate": 1.650895856662934e-05, |
|
"loss": 23.5082, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 6.003310470324748, |
|
"grad_norm": 42.162628173828125, |
|
"learning_rate": 1.6648936170212766e-05, |
|
"loss": 23.494, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 6.003590425531915, |
|
"grad_norm": 47.11122512817383, |
|
"learning_rate": 1.6788913773796195e-05, |
|
"loss": 23.5767, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 6.003870380739082, |
|
"grad_norm": 43.33349609375, |
|
"learning_rate": 1.6928891377379617e-05, |
|
"loss": 23.3579, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 6.004150335946249, |
|
"grad_norm": 44.804664611816406, |
|
"learning_rate": 1.7068868980963047e-05, |
|
"loss": 23.2691, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 6.004430291153415, |
|
"grad_norm": 43.124515533447266, |
|
"learning_rate": 1.7208846584546473e-05, |
|
"loss": 23.1194, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 6.004710246360582, |
|
"grad_norm": 43.7430419921875, |
|
"learning_rate": 1.7348824188129902e-05, |
|
"loss": 23.0302, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 6.004990201567749, |
|
"grad_norm": 52.04240798950195, |
|
"learning_rate": 1.7488801791713328e-05, |
|
"loss": 22.9951, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 6.004998600223964, |
|
"eval_accuracy": 0.10342185903983657, |
|
"eval_f1": 0.05378757190292093, |
|
"eval_loss": 6.031359672546387, |
|
"eval_precision": 0.05112106716669837, |
|
"eval_recall": 0.10342185903983657, |
|
"eval_runtime": 116.7973, |
|
"eval_samples_per_second": 33.528, |
|
"eval_steps_per_second": 16.764, |
|
"eval_top_10_accuracy": 0.368488253319714, |
|
"eval_top_1_accuracy": 0.10342185903983657, |
|
"eval_top_5_accuracy": 0.26940755873340144, |
|
"step": 12503 |
|
}, |
|
{ |
|
"epoch": 7.000269456886898, |
|
"grad_norm": 44.359230041503906, |
|
"learning_rate": 1.7628779395296753e-05, |
|
"loss": 22.1196, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 7.000549412094065, |
|
"grad_norm": 48.224517822265625, |
|
"learning_rate": 1.776875699888018e-05, |
|
"loss": 22.1125, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 7.000829367301232, |
|
"grad_norm": 48.176475524902344, |
|
"learning_rate": 1.7908734602463605e-05, |
|
"loss": 21.6989, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 7.001109322508398, |
|
"grad_norm": 46.10124588012695, |
|
"learning_rate": 1.8048712206047034e-05, |
|
"loss": 21.7023, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 7.001389277715566, |
|
"grad_norm": 48.56136703491211, |
|
"learning_rate": 1.818868980963046e-05, |
|
"loss": 21.6049, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 7.001669232922732, |
|
"grad_norm": 45.572750091552734, |
|
"learning_rate": 1.8328667413213886e-05, |
|
"loss": 21.7628, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 7.001949188129899, |
|
"grad_norm": 46.16699981689453, |
|
"learning_rate": 1.846724524076148e-05, |
|
"loss": 21.8182, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 7.0022291433370665, |
|
"grad_norm": 45.37190628051758, |
|
"learning_rate": 1.8607222844344906e-05, |
|
"loss": 21.6372, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 7.002509098544233, |
|
"grad_norm": 48.9770622253418, |
|
"learning_rate": 1.874720044792833e-05, |
|
"loss": 21.6686, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 7.0027890537514, |
|
"grad_norm": 45.02006530761719, |
|
"learning_rate": 1.8887178051511757e-05, |
|
"loss": 21.6113, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 7.003069008958566, |
|
"grad_norm": 42.47629928588867, |
|
"learning_rate": 1.9027155655095186e-05, |
|
"loss": 21.3825, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 7.003348964165734, |
|
"grad_norm": 49.70724868774414, |
|
"learning_rate": 1.9167133258678612e-05, |
|
"loss": 21.0877, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 7.0036289193729, |
|
"grad_norm": 45.634117126464844, |
|
"learning_rate": 1.930711086226204e-05, |
|
"loss": 21.2323, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 7.003908874580067, |
|
"grad_norm": 48.9789924621582, |
|
"learning_rate": 1.9447088465845464e-05, |
|
"loss": 20.915, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 7.004188829787234, |
|
"grad_norm": 46.66007614135742, |
|
"learning_rate": 1.9587066069428893e-05, |
|
"loss": 20.9474, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 7.004468784994401, |
|
"grad_norm": 48.38516616821289, |
|
"learning_rate": 1.972704367301232e-05, |
|
"loss": 21.0617, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 7.004748740201568, |
|
"grad_norm": 52.5923957824707, |
|
"learning_rate": 1.9867021276595745e-05, |
|
"loss": 21.0796, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 7.005000699888018, |
|
"eval_accuracy": 0.14555669050051073, |
|
"eval_f1": 0.0839986463173031, |
|
"eval_loss": 5.593417644500732, |
|
"eval_precision": 0.08039314901923203, |
|
"eval_recall": 0.14555669050051073, |
|
"eval_runtime": 115.0353, |
|
"eval_samples_per_second": 34.042, |
|
"eval_steps_per_second": 17.021, |
|
"eval_top_10_accuracy": 0.45965270684371806, |
|
"eval_top_1_accuracy": 0.14555669050051073, |
|
"eval_top_5_accuracy": 0.3455056179775281, |
|
"step": 14290 |
|
}, |
|
{ |
|
"epoch": 8.000027995520716, |
|
"grad_norm": 50.10724639892578, |
|
"learning_rate": 2.0006998880179174e-05, |
|
"loss": 20.794, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 8.000307950727883, |
|
"grad_norm": 47.62452697753906, |
|
"learning_rate": 2.01469764837626e-05, |
|
"loss": 19.6639, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 8.00058790593505, |
|
"grad_norm": 52.315673828125, |
|
"learning_rate": 2.0286954087346026e-05, |
|
"loss": 19.4833, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 8.000867861142217, |
|
"grad_norm": 47.47260665893555, |
|
"learning_rate": 2.042693169092945e-05, |
|
"loss": 19.5356, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 8.001147816349384, |
|
"grad_norm": 49.96697235107422, |
|
"learning_rate": 2.0566909294512877e-05, |
|
"loss": 19.4718, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 8.001427771556552, |
|
"grad_norm": 52.33249282836914, |
|
"learning_rate": 2.0706886898096306e-05, |
|
"loss": 19.4401, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 8.001707726763717, |
|
"grad_norm": 51.28528594970703, |
|
"learning_rate": 2.0846864501679732e-05, |
|
"loss": 19.3228, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 8.001987681970885, |
|
"grad_norm": 55.38609313964844, |
|
"learning_rate": 2.0986842105263158e-05, |
|
"loss": 19.431, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 8.002267637178052, |
|
"grad_norm": 44.709232330322266, |
|
"learning_rate": 2.1126819708846584e-05, |
|
"loss": 19.0222, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 8.002547592385218, |
|
"grad_norm": 50.02477264404297, |
|
"learning_rate": 2.1266797312430013e-05, |
|
"loss": 19.1266, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 8.002827547592386, |
|
"grad_norm": 53.93327331542969, |
|
"learning_rate": 2.1405375139977604e-05, |
|
"loss": 19.0785, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 8.003107502799551, |
|
"grad_norm": 47.67805862426758, |
|
"learning_rate": 2.1545352743561033e-05, |
|
"loss": 19.1712, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 8.003387458006719, |
|
"grad_norm": 51.58271789550781, |
|
"learning_rate": 2.168533034714446e-05, |
|
"loss": 19.1443, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 8.003667413213886, |
|
"grad_norm": 50.68724822998047, |
|
"learning_rate": 2.1825307950727885e-05, |
|
"loss": 18.8917, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 8.003947368421052, |
|
"grad_norm": 56.362327575683594, |
|
"learning_rate": 2.196528555431131e-05, |
|
"loss": 18.6252, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 8.00422732362822, |
|
"grad_norm": 50.97798538208008, |
|
"learning_rate": 2.2105263157894736e-05, |
|
"loss": 18.6619, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 8.004507278835387, |
|
"grad_norm": 47.420658111572266, |
|
"learning_rate": 2.2245240761478165e-05, |
|
"loss": 18.7196, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 8.004787234042553, |
|
"grad_norm": 49.1622428894043, |
|
"learning_rate": 2.238521836506159e-05, |
|
"loss": 18.8279, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 8.005, |
|
"eval_accuracy": 0.17288049029622063, |
|
"eval_f1": 0.10595764407765323, |
|
"eval_loss": 5.153491020202637, |
|
"eval_precision": 0.10202419282977301, |
|
"eval_recall": 0.17288049029622063, |
|
"eval_runtime": 118.0289, |
|
"eval_samples_per_second": 33.178, |
|
"eval_steps_per_second": 16.589, |
|
"eval_top_10_accuracy": 0.5298774259448417, |
|
"eval_top_1_accuracy": 0.17288049029622063, |
|
"eval_top_5_accuracy": 0.4090909090909091, |
|
"step": 16076 |
|
}, |
|
{ |
|
"epoch": 9.000066489361702, |
|
"grad_norm": 55.131221771240234, |
|
"learning_rate": 2.252519596864502e-05, |
|
"loss": 18.3135, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 9.00034644456887, |
|
"grad_norm": 49.48210144042969, |
|
"learning_rate": 2.2665173572228446e-05, |
|
"loss": 17.3571, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 9.000626399776035, |
|
"grad_norm": 48.50625228881836, |
|
"learning_rate": 2.280515117581187e-05, |
|
"loss": 17.119, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 9.000906354983202, |
|
"grad_norm": 51.08823013305664, |
|
"learning_rate": 2.2945128779395298e-05, |
|
"loss": 17.2323, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 9.00118631019037, |
|
"grad_norm": 47.95245361328125, |
|
"learning_rate": 2.3085106382978724e-05, |
|
"loss": 17.0125, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 9.001466265397536, |
|
"grad_norm": 54.29073715209961, |
|
"learning_rate": 2.3225083986562153e-05, |
|
"loss": 16.7668, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 9.001746220604703, |
|
"grad_norm": 48.46099090576172, |
|
"learning_rate": 2.336506159014558e-05, |
|
"loss": 16.9132, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 9.00202617581187, |
|
"grad_norm": 56.089088439941406, |
|
"learning_rate": 2.3505039193729004e-05, |
|
"loss": 16.6505, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 9.002306131019036, |
|
"grad_norm": 51.676971435546875, |
|
"learning_rate": 2.364501679731243e-05, |
|
"loss": 16.1727, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 9.002586086226204, |
|
"grad_norm": 47.286293029785156, |
|
"learning_rate": 2.3784994400895856e-05, |
|
"loss": 16.6123, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 9.002866041433371, |
|
"grad_norm": 50.159095764160156, |
|
"learning_rate": 2.3924972004479285e-05, |
|
"loss": 16.6695, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 9.003145996640537, |
|
"grad_norm": 54.44445037841797, |
|
"learning_rate": 2.406494960806271e-05, |
|
"loss": 16.6419, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 9.003425951847705, |
|
"grad_norm": 51.42140579223633, |
|
"learning_rate": 2.420492721164614e-05, |
|
"loss": 16.5902, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 9.00370590705487, |
|
"grad_norm": 66.35865783691406, |
|
"learning_rate": 2.4344904815229563e-05, |
|
"loss": 16.2157, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 9.003985862262038, |
|
"grad_norm": 56.74697494506836, |
|
"learning_rate": 2.4484882418812992e-05, |
|
"loss": 16.3898, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 9.004265817469205, |
|
"grad_norm": 63.02745819091797, |
|
"learning_rate": 2.4624860022396418e-05, |
|
"loss": 16.0808, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 9.004545772676371, |
|
"grad_norm": 54.27665328979492, |
|
"learning_rate": 2.4764837625979844e-05, |
|
"loss": 15.9748, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 9.004825727883539, |
|
"grad_norm": 51.662681579589844, |
|
"learning_rate": 2.4904815229563273e-05, |
|
"loss": 16.0168, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 9.004999300111981, |
|
"eval_accuracy": 0.2270173646578141, |
|
"eval_f1": 0.15350337193318386, |
|
"eval_loss": 4.687228679656982, |
|
"eval_precision": 0.14829049623881735, |
|
"eval_recall": 0.2270173646578141, |
|
"eval_runtime": 116.4833, |
|
"eval_samples_per_second": 33.619, |
|
"eval_steps_per_second": 16.809, |
|
"eval_top_10_accuracy": 0.6123595505617978, |
|
"eval_top_1_accuracy": 0.2270173646578141, |
|
"eval_top_5_accuracy": 0.486976506639428, |
|
"step": 17862 |
|
}, |
|
{ |
|
"epoch": 10.000104983202688, |
|
"grad_norm": 55.48484802246094, |
|
"learning_rate": 2.5043393057110863e-05, |
|
"loss": 15.4913, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 10.000384938409855, |
|
"grad_norm": 54.82772445678711, |
|
"learning_rate": 2.518337066069429e-05, |
|
"loss": 14.5521, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 10.00066489361702, |
|
"grad_norm": 57.67549514770508, |
|
"learning_rate": 2.532334826427772e-05, |
|
"loss": 14.5236, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 10.000944848824188, |
|
"grad_norm": 58.726463317871094, |
|
"learning_rate": 2.5463325867861144e-05, |
|
"loss": 14.3662, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 10.001224804031356, |
|
"grad_norm": 53.089969635009766, |
|
"learning_rate": 2.5603303471444567e-05, |
|
"loss": 14.2559, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 10.001504759238522, |
|
"grad_norm": 57.39337921142578, |
|
"learning_rate": 2.5743281075027996e-05, |
|
"loss": 14.4143, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 10.001784714445689, |
|
"grad_norm": 56.75421905517578, |
|
"learning_rate": 2.588325867861142e-05, |
|
"loss": 14.2805, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 10.002064669652855, |
|
"grad_norm": 52.258358001708984, |
|
"learning_rate": 2.602323628219485e-05, |
|
"loss": 14.1713, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 10.002344624860022, |
|
"grad_norm": 47.68207550048828, |
|
"learning_rate": 2.6163213885778277e-05, |
|
"loss": 13.8049, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 10.00262458006719, |
|
"grad_norm": 52.4375, |
|
"learning_rate": 2.6303191489361706e-05, |
|
"loss": 13.8381, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 10.002904535274356, |
|
"grad_norm": 52.6395378112793, |
|
"learning_rate": 2.644316909294513e-05, |
|
"loss": 13.5344, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 10.003184490481523, |
|
"grad_norm": 59.29886245727539, |
|
"learning_rate": 2.6583146696528554e-05, |
|
"loss": 13.7621, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 10.00346444568869, |
|
"grad_norm": 49.460113525390625, |
|
"learning_rate": 2.6723124300111983e-05, |
|
"loss": 13.7878, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 10.003744400895856, |
|
"grad_norm": 56.663902282714844, |
|
"learning_rate": 2.686310190369541e-05, |
|
"loss": 13.856, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 10.004024356103024, |
|
"grad_norm": 58.83359909057617, |
|
"learning_rate": 2.700307950727884e-05, |
|
"loss": 13.6796, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 10.00430431131019, |
|
"grad_norm": 60.854942321777344, |
|
"learning_rate": 2.7143057110862264e-05, |
|
"loss": 13.3986, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 10.004584266517357, |
|
"grad_norm": 48.193965911865234, |
|
"learning_rate": 2.7283034714445687e-05, |
|
"loss": 13.606, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 10.004864221724524, |
|
"grad_norm": 57.63452911376953, |
|
"learning_rate": 2.742301231802912e-05, |
|
"loss": 13.4662, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 10.004998600223963, |
|
"eval_accuracy": 0.27068437180796734, |
|
"eval_f1": 0.19264529608572983, |
|
"eval_loss": 4.217360019683838, |
|
"eval_precision": 0.18252703447090352, |
|
"eval_recall": 0.27068437180796734, |
|
"eval_runtime": 119.985, |
|
"eval_samples_per_second": 32.637, |
|
"eval_steps_per_second": 16.319, |
|
"eval_top_10_accuracy": 0.6802860061287027, |
|
"eval_top_1_accuracy": 0.2704290091930541, |
|
"eval_top_5_accuracy": 0.5584780388151175, |
|
"step": 19648 |
|
}, |
|
{ |
|
"epoch": 11.000143477043673, |
|
"grad_norm": 45.823238372802734, |
|
"learning_rate": 2.756298992161254e-05, |
|
"loss": 12.3464, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 11.00042343225084, |
|
"grad_norm": 63.88374328613281, |
|
"learning_rate": 2.770296752519597e-05, |
|
"loss": 11.6032, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 11.000703387458007, |
|
"grad_norm": 64.50922393798828, |
|
"learning_rate": 2.7841545352743565e-05, |
|
"loss": 11.5978, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 11.000983342665174, |
|
"grad_norm": 58.88920211791992, |
|
"learning_rate": 2.7981522956326987e-05, |
|
"loss": 11.3596, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 11.00126329787234, |
|
"grad_norm": 56.350189208984375, |
|
"learning_rate": 2.8121500559910413e-05, |
|
"loss": 11.7526, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 11.001543253079507, |
|
"grad_norm": 56.470664978027344, |
|
"learning_rate": 2.8261478163493842e-05, |
|
"loss": 11.2894, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 11.001823208286675, |
|
"grad_norm": 69.60688781738281, |
|
"learning_rate": 2.8401455767077268e-05, |
|
"loss": 11.3725, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 11.00210316349384, |
|
"grad_norm": 52.4871711730957, |
|
"learning_rate": 2.8541433370660697e-05, |
|
"loss": 11.4314, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 11.002383118701008, |
|
"grad_norm": 55.38204574584961, |
|
"learning_rate": 2.8681410974244123e-05, |
|
"loss": 11.2979, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 11.002663073908174, |
|
"grad_norm": 75.11087036132812, |
|
"learning_rate": 2.8821388577827552e-05, |
|
"loss": 11.306, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 11.002943029115341, |
|
"grad_norm": 57.3330078125, |
|
"learning_rate": 2.8961366181410975e-05, |
|
"loss": 11.0933, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 11.003222984322509, |
|
"grad_norm": 54.95749282836914, |
|
"learning_rate": 2.91013437849944e-05, |
|
"loss": 11.3495, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 11.003502939529675, |
|
"grad_norm": 64.22066497802734, |
|
"learning_rate": 2.924132138857783e-05, |
|
"loss": 11.4248, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 11.003782894736842, |
|
"grad_norm": 59.43482208251953, |
|
"learning_rate": 2.9381298992161256e-05, |
|
"loss": 11.1135, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 11.00406284994401, |
|
"grad_norm": 53.853492736816406, |
|
"learning_rate": 2.9521276595744685e-05, |
|
"loss": 11.0782, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 11.004342805151175, |
|
"grad_norm": 63.04794692993164, |
|
"learning_rate": 2.966125419932811e-05, |
|
"loss": 10.8959, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 11.004622760358343, |
|
"grad_norm": 59.868072509765625, |
|
"learning_rate": 2.97998320268757e-05, |
|
"loss": 11.1934, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 11.00490271556551, |
|
"grad_norm": 62.26585006713867, |
|
"learning_rate": 2.9939809630459127e-05, |
|
"loss": 10.8825, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 11.005000699888019, |
|
"eval_accuracy": 0.3166496424923391, |
|
"eval_f1": 0.24054554724380248, |
|
"eval_loss": 3.7936577796936035, |
|
"eval_precision": 0.2340502427789698, |
|
"eval_recall": 0.3166496424923391, |
|
"eval_runtime": 113.6495, |
|
"eval_samples_per_second": 34.457, |
|
"eval_steps_per_second": 17.228, |
|
"eval_top_10_accuracy": 0.7288049029622063, |
|
"eval_top_1_accuracy": 0.3158835546475996, |
|
"eval_top_5_accuracy": 0.6182328907048008, |
|
"step": 21435 |
|
}, |
|
{ |
|
"epoch": 12.00018197088466, |
|
"grad_norm": 61.41596984863281, |
|
"learning_rate": 3.0079787234042556e-05, |
|
"loss": 9.9124, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 12.000461926091825, |
|
"grad_norm": 44.31859588623047, |
|
"learning_rate": 3.021976483762598e-05, |
|
"loss": 9.0461, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 12.000741881298993, |
|
"grad_norm": 52.90311813354492, |
|
"learning_rate": 3.035974244120941e-05, |
|
"loss": 8.8258, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 12.001021836506158, |
|
"grad_norm": 54.84385299682617, |
|
"learning_rate": 3.0499720044792834e-05, |
|
"loss": 8.6913, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 12.001301791713326, |
|
"grad_norm": 56.61054611206055, |
|
"learning_rate": 3.063969764837626e-05, |
|
"loss": 9.0271, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 12.001581746920493, |
|
"grad_norm": 68.57939147949219, |
|
"learning_rate": 3.077967525195969e-05, |
|
"loss": 9.0834, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 12.001861702127659, |
|
"grad_norm": 42.94862747192383, |
|
"learning_rate": 3.091965285554311e-05, |
|
"loss": 8.8857, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 12.002141657334827, |
|
"grad_norm": 58.98466491699219, |
|
"learning_rate": 3.1059630459126544e-05, |
|
"loss": 8.98, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 12.002421612541994, |
|
"grad_norm": 46.89601135253906, |
|
"learning_rate": 3.119960806270997e-05, |
|
"loss": 8.6856, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 12.00270156774916, |
|
"grad_norm": 60.492645263671875, |
|
"learning_rate": 3.1339585666293395e-05, |
|
"loss": 8.4309, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 12.002981522956327, |
|
"grad_norm": 55.55988693237305, |
|
"learning_rate": 3.147956326987682e-05, |
|
"loss": 9.0933, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 12.003261478163493, |
|
"grad_norm": 59.575557708740234, |
|
"learning_rate": 3.161954087346025e-05, |
|
"loss": 8.7039, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 12.00354143337066, |
|
"grad_norm": 66.46175384521484, |
|
"learning_rate": 3.175951847704367e-05, |
|
"loss": 8.8195, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 12.003821388577828, |
|
"grad_norm": 57.742042541503906, |
|
"learning_rate": 3.18994960806271e-05, |
|
"loss": 8.4553, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 12.004101343784994, |
|
"grad_norm": 56.0836181640625, |
|
"learning_rate": 3.203947368421053e-05, |
|
"loss": 8.6364, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 12.004381298992161, |
|
"grad_norm": 75.34262084960938, |
|
"learning_rate": 3.217945128779396e-05, |
|
"loss": 8.6335, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 12.004661254199329, |
|
"grad_norm": 60.73629379272461, |
|
"learning_rate": 3.2319428891377376e-05, |
|
"loss": 8.6328, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 12.004941209406494, |
|
"grad_norm": 46.728450775146484, |
|
"learning_rate": 3.245940649496081e-05, |
|
"loss": 8.493, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 12.005, |
|
"eval_accuracy": 0.3406537282941777, |
|
"eval_f1": 0.27469889034445055, |
|
"eval_loss": 3.452850103378296, |
|
"eval_precision": 0.26851533056393956, |
|
"eval_recall": 0.3406537282941777, |
|
"eval_runtime": 114.9185, |
|
"eval_samples_per_second": 34.076, |
|
"eval_steps_per_second": 17.038, |
|
"eval_top_10_accuracy": 0.7602145045965271, |
|
"eval_top_1_accuracy": 0.34014300306435136, |
|
"eval_top_5_accuracy": 0.6618998978549541, |
|
"step": 23221 |
|
}, |
|
{ |
|
"epoch": 13.000220464725643, |
|
"grad_norm": 60.434505462646484, |
|
"learning_rate": 3.2599384098544234e-05, |
|
"loss": 7.3664, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 13.000500419932811, |
|
"grad_norm": 44.0378532409668, |
|
"learning_rate": 3.273936170212766e-05, |
|
"loss": 6.6296, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 13.000780375139978, |
|
"grad_norm": 50.515316009521484, |
|
"learning_rate": 3.2879339305711086e-05, |
|
"loss": 6.7951, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 13.001060330347144, |
|
"grad_norm": 60.511566162109375, |
|
"learning_rate": 3.301931690929451e-05, |
|
"loss": 6.8428, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 13.001340285554312, |
|
"grad_norm": 50.42853546142578, |
|
"learning_rate": 3.3159294512877944e-05, |
|
"loss": 6.8744, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 13.001620240761477, |
|
"grad_norm": 53.49345016479492, |
|
"learning_rate": 3.3299272116461363e-05, |
|
"loss": 6.7632, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 13.001900195968645, |
|
"grad_norm": 61.3757438659668, |
|
"learning_rate": 3.3439249720044796e-05, |
|
"loss": 6.7297, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 13.002180151175812, |
|
"grad_norm": 53.99778747558594, |
|
"learning_rate": 3.357922732362822e-05, |
|
"loss": 6.6704, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 13.002460106382978, |
|
"grad_norm": 55.42316818237305, |
|
"learning_rate": 3.371920492721165e-05, |
|
"loss": 6.6194, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 13.002740061590146, |
|
"grad_norm": 52.400909423828125, |
|
"learning_rate": 3.3859182530795074e-05, |
|
"loss": 6.4215, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 13.003020016797313, |
|
"grad_norm": 52.664127349853516, |
|
"learning_rate": 3.39991601343785e-05, |
|
"loss": 6.8347, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 13.003299972004479, |
|
"grad_norm": 54.587646484375, |
|
"learning_rate": 3.413913773796193e-05, |
|
"loss": 6.3553, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 13.003579927211646, |
|
"grad_norm": 57.914894104003906, |
|
"learning_rate": 3.427911534154535e-05, |
|
"loss": 6.9907, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 13.003859882418814, |
|
"grad_norm": 53.43699645996094, |
|
"learning_rate": 3.4419092945128784e-05, |
|
"loss": 6.2836, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 13.00413983762598, |
|
"grad_norm": 51.49525833129883, |
|
"learning_rate": 3.455907054871221e-05, |
|
"loss": 6.4987, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 13.004419792833147, |
|
"grad_norm": 66.59857177734375, |
|
"learning_rate": 3.4699048152295635e-05, |
|
"loss": 6.4633, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 13.004699748040313, |
|
"grad_norm": 59.3377685546875, |
|
"learning_rate": 3.483902575587906e-05, |
|
"loss": 6.4775, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 13.00497970324748, |
|
"grad_norm": 53.3898811340332, |
|
"learning_rate": 3.497900335946249e-05, |
|
"loss": 6.3864, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 13.004999300111981, |
|
"eval_accuracy": 0.3631256384065373, |
|
"eval_f1": 0.3063358878242328, |
|
"eval_loss": 3.142681837081909, |
|
"eval_precision": 0.3058190177087762, |
|
"eval_recall": 0.3631256384065373, |
|
"eval_runtime": 117.2715, |
|
"eval_samples_per_second": 33.393, |
|
"eval_steps_per_second": 16.696, |
|
"eval_top_10_accuracy": 0.7852400408580184, |
|
"eval_top_1_accuracy": 0.3631256384065373, |
|
"eval_top_5_accuracy": 0.6973953013278856, |
|
"step": 25007 |
|
}, |
|
{ |
|
"epoch": 14.00025895856663, |
|
"grad_norm": 56.198509216308594, |
|
"learning_rate": 3.511898096304592e-05, |
|
"loss": 4.7395, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 14.000538913773797, |
|
"grad_norm": 34.69329833984375, |
|
"learning_rate": 3.525895856662934e-05, |
|
"loss": 4.5337, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 14.000818868980963, |
|
"grad_norm": 40.5659065246582, |
|
"learning_rate": 3.5398936170212764e-05, |
|
"loss": 4.9053, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 14.00109882418813, |
|
"grad_norm": 54.99299240112305, |
|
"learning_rate": 3.553751399776036e-05, |
|
"loss": 4.9332, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 14.001378779395298, |
|
"grad_norm": 65.95709228515625, |
|
"learning_rate": 3.567749160134379e-05, |
|
"loss": 4.9058, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 14.001658734602463, |
|
"grad_norm": 64.16633605957031, |
|
"learning_rate": 3.581746920492721e-05, |
|
"loss": 4.9348, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 14.00193868980963, |
|
"grad_norm": 61.57122039794922, |
|
"learning_rate": 3.595744680851064e-05, |
|
"loss": 4.7432, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 14.002218645016796, |
|
"grad_norm": 47.70122528076172, |
|
"learning_rate": 3.609742441209407e-05, |
|
"loss": 4.7825, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 14.002498600223964, |
|
"grad_norm": 42.792518615722656, |
|
"learning_rate": 3.623740201567749e-05, |
|
"loss": 4.7859, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 14.002778555431131, |
|
"grad_norm": 54.17988204956055, |
|
"learning_rate": 3.637737961926092e-05, |
|
"loss": 4.8806, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 14.003058510638297, |
|
"grad_norm": 55.54975509643555, |
|
"learning_rate": 3.6517357222844346e-05, |
|
"loss": 5.0199, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 14.003338465845465, |
|
"grad_norm": 53.45595932006836, |
|
"learning_rate": 3.665733482642777e-05, |
|
"loss": 4.8493, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 14.003618421052632, |
|
"grad_norm": 59.02440643310547, |
|
"learning_rate": 3.67973124300112e-05, |
|
"loss": 5.0224, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 14.003898376259798, |
|
"grad_norm": 51.76025390625, |
|
"learning_rate": 3.693729003359463e-05, |
|
"loss": 4.7993, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 14.004178331466965, |
|
"grad_norm": 57.27727508544922, |
|
"learning_rate": 3.7077267637178056e-05, |
|
"loss": 4.6562, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 14.004458286674133, |
|
"grad_norm": 57.676143646240234, |
|
"learning_rate": 3.7217245240761475e-05, |
|
"loss": 4.848, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 14.004738241881299, |
|
"grad_norm": 42.8398551940918, |
|
"learning_rate": 3.735722284434491e-05, |
|
"loss": 4.8598, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 14.004998600223963, |
|
"eval_accuracy": 0.3672114402451481, |
|
"eval_f1": 0.31953414391226087, |
|
"eval_loss": 2.9818193912506104, |
|
"eval_precision": 0.32370179182577785, |
|
"eval_recall": 0.3672114402451481, |
|
"eval_runtime": 116.3065, |
|
"eval_samples_per_second": 33.67, |
|
"eval_steps_per_second": 16.835, |
|
"eval_top_10_accuracy": 0.8008171603677222, |
|
"eval_top_1_accuracy": 0.3669560776302349, |
|
"eval_top_5_accuracy": 0.6996935648621042, |
|
"step": 26793 |
|
}, |
|
{ |
|
"epoch": 15.000017497200448, |
|
"grad_norm": 55.68752670288086, |
|
"learning_rate": 3.749720044792833e-05, |
|
"loss": 4.6942, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 15.000297452407615, |
|
"grad_norm": 47.778018951416016, |
|
"learning_rate": 3.763717805151176e-05, |
|
"loss": 3.488, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 15.00057740761478, |
|
"grad_norm": 50.85312271118164, |
|
"learning_rate": 3.7777155655095185e-05, |
|
"loss": 3.2244, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 15.000857362821948, |
|
"grad_norm": 51.211734771728516, |
|
"learning_rate": 3.791713325867861e-05, |
|
"loss": 3.3155, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 15.001137318029116, |
|
"grad_norm": 39.02397918701172, |
|
"learning_rate": 3.805711086226204e-05, |
|
"loss": 3.47, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 15.001417273236282, |
|
"grad_norm": 55.1838264465332, |
|
"learning_rate": 3.819708846584546e-05, |
|
"loss": 3.4093, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 15.001697228443449, |
|
"grad_norm": 53.85865783691406, |
|
"learning_rate": 3.8335666293393056e-05, |
|
"loss": 3.3777, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 15.001977183650617, |
|
"grad_norm": 52.40718460083008, |
|
"learning_rate": 3.847564389697649e-05, |
|
"loss": 3.7357, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 15.002257138857782, |
|
"grad_norm": 42.77007293701172, |
|
"learning_rate": 3.8615621500559915e-05, |
|
"loss": 3.5572, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 15.00253709406495, |
|
"grad_norm": 75.84447479248047, |
|
"learning_rate": 3.8755599104143334e-05, |
|
"loss": 3.65, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 15.002817049272117, |
|
"grad_norm": 39.93196105957031, |
|
"learning_rate": 3.8895576707726766e-05, |
|
"loss": 3.3187, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 15.003097004479283, |
|
"grad_norm": 55.67115020751953, |
|
"learning_rate": 3.903555431131019e-05, |
|
"loss": 3.3455, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 15.00337695968645, |
|
"grad_norm": 32.676544189453125, |
|
"learning_rate": 3.917553191489362e-05, |
|
"loss": 3.6407, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 15.003656914893616, |
|
"grad_norm": 51.30035400390625, |
|
"learning_rate": 3.9315509518477044e-05, |
|
"loss": 3.6281, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 15.003936870100784, |
|
"grad_norm": 77.08026123046875, |
|
"learning_rate": 3.9455487122060476e-05, |
|
"loss": 3.4781, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 15.004216825307951, |
|
"grad_norm": 41.604736328125, |
|
"learning_rate": 3.95954647256439e-05, |
|
"loss": 3.4542, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 15.004496780515117, |
|
"grad_norm": 39.74190902709961, |
|
"learning_rate": 3.973544232922732e-05, |
|
"loss": 3.5986, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 15.004776735722285, |
|
"grad_norm": 44.98125076293945, |
|
"learning_rate": 3.9875419932810754e-05, |
|
"loss": 3.5894, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 15.005000699888019, |
|
"eval_accuracy": 0.3927477017364658, |
|
"eval_f1": 0.34565750681985735, |
|
"eval_loss": 2.7899587154388428, |
|
"eval_precision": 0.349307388786449, |
|
"eval_recall": 0.3927477017364658, |
|
"eval_runtime": 114.0889, |
|
"eval_samples_per_second": 34.324, |
|
"eval_steps_per_second": 17.162, |
|
"eval_top_10_accuracy": 0.8207354443309499, |
|
"eval_top_1_accuracy": 0.3924923391215526, |
|
"eval_top_5_accuracy": 0.7359550561797753, |
|
"step": 28580 |
|
}, |
|
{ |
|
"epoch": 16.00005599104143, |
|
"grad_norm": 45.47757339477539, |
|
"learning_rate": 4.001539753639418e-05, |
|
"loss": 3.3691, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 16.0003359462486, |
|
"grad_norm": 52.39859390258789, |
|
"learning_rate": 4.0155375139977605e-05, |
|
"loss": 2.2598, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 16.000615901455767, |
|
"grad_norm": 42.28738784790039, |
|
"learning_rate": 4.029535274356103e-05, |
|
"loss": 2.4562, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 16.000895856662932, |
|
"grad_norm": 49.735939025878906, |
|
"learning_rate": 4.043533034714446e-05, |
|
"loss": 2.5948, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 16.0011758118701, |
|
"grad_norm": 54.75900650024414, |
|
"learning_rate": 4.057530795072789e-05, |
|
"loss": 2.5475, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 16.001455767077267, |
|
"grad_norm": 54.90763473510742, |
|
"learning_rate": 4.071528555431131e-05, |
|
"loss": 2.519, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 16.001735722284433, |
|
"grad_norm": 61.103111267089844, |
|
"learning_rate": 4.085526315789474e-05, |
|
"loss": 2.4507, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 16.002015677491602, |
|
"grad_norm": 47.821693420410156, |
|
"learning_rate": 4.099524076147817e-05, |
|
"loss": 2.5811, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 16.002295632698768, |
|
"grad_norm": 44.895851135253906, |
|
"learning_rate": 4.113521836506159e-05, |
|
"loss": 2.6464, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 16.002575587905934, |
|
"grad_norm": 24.25828742980957, |
|
"learning_rate": 4.127379619260918e-05, |
|
"loss": 2.5047, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 16.002855543113103, |
|
"grad_norm": 54.69134521484375, |
|
"learning_rate": 4.141377379619261e-05, |
|
"loss": 2.6585, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 16.00313549832027, |
|
"grad_norm": 61.3412971496582, |
|
"learning_rate": 4.155375139977604e-05, |
|
"loss": 2.6409, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 16.003415453527435, |
|
"grad_norm": 52.66725540161133, |
|
"learning_rate": 4.1693729003359464e-05, |
|
"loss": 2.7054, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 16.003695408734604, |
|
"grad_norm": 53.61050033569336, |
|
"learning_rate": 4.183370660694289e-05, |
|
"loss": 2.6963, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 16.00397536394177, |
|
"grad_norm": 53.02050018310547, |
|
"learning_rate": 4.1973684210526316e-05, |
|
"loss": 2.6771, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 16.004255319148935, |
|
"grad_norm": 61.56930160522461, |
|
"learning_rate": 4.211366181410974e-05, |
|
"loss": 2.7266, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 16.004535274356105, |
|
"grad_norm": 52.07630920410156, |
|
"learning_rate": 4.225363941769317e-05, |
|
"loss": 2.7177, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 16.00481522956327, |
|
"grad_norm": 40.4132080078125, |
|
"learning_rate": 4.23936170212766e-05, |
|
"loss": 2.7053, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 16.005, |
|
"eval_accuracy": 0.39198161389172625, |
|
"eval_f1": 0.348649355978263, |
|
"eval_loss": 2.7045156955718994, |
|
"eval_precision": 0.3579045415485661, |
|
"eval_recall": 0.39198161389172625, |
|
"eval_runtime": 115.0059, |
|
"eval_samples_per_second": 34.05, |
|
"eval_steps_per_second": 17.025, |
|
"eval_top_10_accuracy": 0.8296731358529111, |
|
"eval_top_1_accuracy": 0.39198161389172625, |
|
"eval_top_5_accuracy": 0.7402962206332993, |
|
"step": 30366 |
|
}, |
|
{ |
|
"epoch": 17.000094484882418, |
|
"grad_norm": 53.307334899902344, |
|
"learning_rate": 4.2532194848824194e-05, |
|
"loss": 2.7488, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 17.000374440089587, |
|
"grad_norm": 52.3875732421875, |
|
"learning_rate": 4.267217245240761e-05, |
|
"loss": 1.6991, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 17.000654395296753, |
|
"grad_norm": 13.349233627319336, |
|
"learning_rate": 4.2812150055991046e-05, |
|
"loss": 1.7715, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 17.00093435050392, |
|
"grad_norm": 50.092220306396484, |
|
"learning_rate": 4.295212765957447e-05, |
|
"loss": 1.8627, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 17.001214305711088, |
|
"grad_norm": 41.946617126464844, |
|
"learning_rate": 4.30921052631579e-05, |
|
"loss": 1.8379, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 17.001494260918253, |
|
"grad_norm": 31.524587631225586, |
|
"learning_rate": 4.323208286674132e-05, |
|
"loss": 1.7017, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 17.00177421612542, |
|
"grad_norm": 27.594568252563477, |
|
"learning_rate": 4.337206047032475e-05, |
|
"loss": 1.9809, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 17.00205417133259, |
|
"grad_norm": 35.60299301147461, |
|
"learning_rate": 4.351203807390818e-05, |
|
"loss": 2.1175, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 17.002334126539754, |
|
"grad_norm": 53.827823638916016, |
|
"learning_rate": 4.36520156774916e-05, |
|
"loss": 2.0042, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 17.00261408174692, |
|
"grad_norm": 49.536380767822266, |
|
"learning_rate": 4.379199328107503e-05, |
|
"loss": 1.9332, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 17.00289403695409, |
|
"grad_norm": 31.038576126098633, |
|
"learning_rate": 4.393197088465846e-05, |
|
"loss": 2.1337, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 17.003173992161255, |
|
"grad_norm": 62.93170928955078, |
|
"learning_rate": 4.4071948488241885e-05, |
|
"loss": 2.1531, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 17.00345394736842, |
|
"grad_norm": 66.991455078125, |
|
"learning_rate": 4.421192609182531e-05, |
|
"loss": 2.003, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 17.003733902575586, |
|
"grad_norm": 44.7550048828125, |
|
"learning_rate": 4.435190369540874e-05, |
|
"loss": 2.3327, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 17.004013857782756, |
|
"grad_norm": 79.54408264160156, |
|
"learning_rate": 4.449188129899216e-05, |
|
"loss": 2.3132, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 17.00429381298992, |
|
"grad_norm": 57.92308807373047, |
|
"learning_rate": 4.463185890257559e-05, |
|
"loss": 2.4269, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 17.004573768197087, |
|
"grad_norm": 36.562435150146484, |
|
"learning_rate": 4.4771836506159014e-05, |
|
"loss": 2.1766, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 17.004853723404256, |
|
"grad_norm": 45.46358871459961, |
|
"learning_rate": 4.491181410974245e-05, |
|
"loss": 2.0517, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 17.004999300111983, |
|
"eval_accuracy": 0.3878958120531154, |
|
"eval_f1": 0.34810126375123673, |
|
"eval_loss": 2.733853816986084, |
|
"eval_precision": 0.35983602993306774, |
|
"eval_recall": 0.3878958120531154, |
|
"eval_runtime": 118.3652, |
|
"eval_samples_per_second": 33.084, |
|
"eval_steps_per_second": 16.542, |
|
"eval_top_10_accuracy": 0.8204800817160368, |
|
"eval_top_1_accuracy": 0.3884065372829418, |
|
"eval_top_5_accuracy": 0.7344228804902962, |
|
"step": 32152 |
|
}, |
|
{ |
|
"epoch": 18.000132978723403, |
|
"grad_norm": 26.470172882080078, |
|
"learning_rate": 4.505179171332587e-05, |
|
"loss": 1.7708, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 18.000412933930573, |
|
"grad_norm": 28.779521942138672, |
|
"learning_rate": 4.51917693169093e-05, |
|
"loss": 1.3716, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 18.00069288913774, |
|
"grad_norm": 63.082313537597656, |
|
"learning_rate": 4.5331746920492724e-05, |
|
"loss": 1.5498, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 18.000972844344904, |
|
"grad_norm": 43.071693420410156, |
|
"learning_rate": 4.547172452407615e-05, |
|
"loss": 1.6103, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 18.00125279955207, |
|
"grad_norm": 46.50571823120117, |
|
"learning_rate": 4.5611702127659576e-05, |
|
"loss": 1.493, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 18.00153275475924, |
|
"grad_norm": 76.01538848876953, |
|
"learning_rate": 4.5751679731243e-05, |
|
"loss": 1.5295, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 18.001812709966405, |
|
"grad_norm": 11.543736457824707, |
|
"learning_rate": 4.5891657334826434e-05, |
|
"loss": 1.8705, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 18.00209266517357, |
|
"grad_norm": 39.0593147277832, |
|
"learning_rate": 4.603163493840986e-05, |
|
"loss": 1.6423, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 18.00237262038074, |
|
"grad_norm": 46.54063415527344, |
|
"learning_rate": 4.617161254199328e-05, |
|
"loss": 1.6499, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 18.002652575587906, |
|
"grad_norm": 45.89179992675781, |
|
"learning_rate": 4.631159014557671e-05, |
|
"loss": 1.7307, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 18.00293253079507, |
|
"grad_norm": 37.720703125, |
|
"learning_rate": 4.645156774916014e-05, |
|
"loss": 1.8645, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 18.00321248600224, |
|
"grad_norm": 37.555179595947266, |
|
"learning_rate": 4.659154535274356e-05, |
|
"loss": 1.7513, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 18.003492441209406, |
|
"grad_norm": 30.639272689819336, |
|
"learning_rate": 4.673152295632699e-05, |
|
"loss": 1.8245, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 18.003772396416572, |
|
"grad_norm": 41.111690521240234, |
|
"learning_rate": 4.6871500559910415e-05, |
|
"loss": 1.7562, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 18.00405235162374, |
|
"grad_norm": 27.824993133544922, |
|
"learning_rate": 4.701147816349384e-05, |
|
"loss": 1.7368, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 18.004332306830907, |
|
"grad_norm": 11.345212936401367, |
|
"learning_rate": 4.7151455767077266e-05, |
|
"loss": 1.8328, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 18.004612262038073, |
|
"grad_norm": 53.33864974975586, |
|
"learning_rate": 4.72914333706607e-05, |
|
"loss": 2.1876, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 18.004892217245242, |
|
"grad_norm": 58.86853790283203, |
|
"learning_rate": 4.7431410974244125e-05, |
|
"loss": 1.9862, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 18.004998600223963, |
|
"eval_accuracy": 0.3817671092951992, |
|
"eval_f1": 0.34462121977443727, |
|
"eval_loss": 2.774939775466919, |
|
"eval_precision": 0.3614003286181529, |
|
"eval_recall": 0.3817671092951992, |
|
"eval_runtime": 118.5272, |
|
"eval_samples_per_second": 33.039, |
|
"eval_steps_per_second": 16.519, |
|
"eval_top_10_accuracy": 0.8171603677221655, |
|
"eval_top_1_accuracy": 0.38202247191011235, |
|
"eval_top_5_accuracy": 0.7285495403472931, |
|
"step": 33938 |
|
}, |
|
{ |
|
"epoch": 19.00017147256439, |
|
"grad_norm": 31.951032638549805, |
|
"learning_rate": 4.757138857782755e-05, |
|
"loss": 1.6611, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 19.000451427771555, |
|
"grad_norm": 17.33244514465332, |
|
"learning_rate": 4.7711366181410976e-05, |
|
"loss": 1.0391, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 19.000731382978724, |
|
"grad_norm": 46.239593505859375, |
|
"learning_rate": 4.78513437849944e-05, |
|
"loss": 1.2688, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 19.00101133818589, |
|
"grad_norm": 26.96588706970215, |
|
"learning_rate": 4.799132138857783e-05, |
|
"loss": 1.4947, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 19.001291293393056, |
|
"grad_norm": 34.70318603515625, |
|
"learning_rate": 4.812989921612542e-05, |
|
"loss": 1.3438, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 19.001571248600225, |
|
"grad_norm": 14.275991439819336, |
|
"learning_rate": 4.826987681970885e-05, |
|
"loss": 1.5921, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 19.00185120380739, |
|
"grad_norm": 20.836231231689453, |
|
"learning_rate": 4.840985442329228e-05, |
|
"loss": 1.4238, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 19.002131159014557, |
|
"grad_norm": 45.50111389160156, |
|
"learning_rate": 4.85498320268757e-05, |
|
"loss": 1.4567, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 19.002411114221726, |
|
"grad_norm": 17.043527603149414, |
|
"learning_rate": 4.8689809630459125e-05, |
|
"loss": 1.5576, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 19.00269106942889, |
|
"grad_norm": 71.65300750732422, |
|
"learning_rate": 4.882978723404256e-05, |
|
"loss": 1.5049, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 19.002971024636057, |
|
"grad_norm": 50.4085807800293, |
|
"learning_rate": 4.8969764837625984e-05, |
|
"loss": 1.671, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 19.003250979843227, |
|
"grad_norm": 41.303321838378906, |
|
"learning_rate": 4.910974244120941e-05, |
|
"loss": 1.5439, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 19.003530935050392, |
|
"grad_norm": 61.70454406738281, |
|
"learning_rate": 4.9249720044792835e-05, |
|
"loss": 1.4943, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 19.003810890257558, |
|
"grad_norm": 37.11140060424805, |
|
"learning_rate": 4.938969764837626e-05, |
|
"loss": 1.9324, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 19.004090845464727, |
|
"grad_norm": 22.8467960357666, |
|
"learning_rate": 4.952967525195969e-05, |
|
"loss": 1.5832, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 19.004370800671893, |
|
"grad_norm": 24.005002975463867, |
|
"learning_rate": 4.966965285554311e-05, |
|
"loss": 1.7288, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 19.00465075587906, |
|
"grad_norm": 68.68001556396484, |
|
"learning_rate": 4.9809630459126545e-05, |
|
"loss": 1.8915, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 19.004930711086224, |
|
"grad_norm": 40.64888000488281, |
|
"learning_rate": 4.994960806270997e-05, |
|
"loss": 1.9352, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 19.00500069988802, |
|
"eval_accuracy": 0.36338100102145043, |
|
"eval_f1": 0.32556302652063634, |
|
"eval_loss": 2.815650701522827, |
|
"eval_precision": 0.3396402371887561, |
|
"eval_recall": 0.36338100102145043, |
|
"eval_runtime": 119.1107, |
|
"eval_samples_per_second": 32.877, |
|
"eval_steps_per_second": 16.438, |
|
"eval_top_10_accuracy": 0.8156281920326864, |
|
"eval_top_1_accuracy": 0.36338100102145043, |
|
"eval_top_5_accuracy": 0.7134831460674157, |
|
"step": 35725 |
|
}, |
|
{ |
|
"epoch": 20.000209966405375, |
|
"grad_norm": 37.2190055847168, |
|
"learning_rate": 4.9990046037078515e-05, |
|
"loss": 1.304, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 20.00048992161254, |
|
"grad_norm": 18.588062286376953, |
|
"learning_rate": 4.997449297001369e-05, |
|
"loss": 1.2178, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 20.00076987681971, |
|
"grad_norm": 46.44628143310547, |
|
"learning_rate": 4.9958939902948865e-05, |
|
"loss": 1.2133, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 20.001049832026876, |
|
"grad_norm": 17.273088455200195, |
|
"learning_rate": 4.994338683588404e-05, |
|
"loss": 1.1894, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 20.00132978723404, |
|
"grad_norm": 52.89341735839844, |
|
"learning_rate": 4.9927833768819216e-05, |
|
"loss": 1.1865, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 20.00160974244121, |
|
"grad_norm": 35.168704986572266, |
|
"learning_rate": 4.991228070175439e-05, |
|
"loss": 1.4459, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 20.001889697648377, |
|
"grad_norm": 21.019437789916992, |
|
"learning_rate": 4.989672763468956e-05, |
|
"loss": 1.3725, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 20.002169652855542, |
|
"grad_norm": 30.197154998779297, |
|
"learning_rate": 4.988133009829539e-05, |
|
"loss": 1.3246, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 20.00244960806271, |
|
"grad_norm": 76.85697174072266, |
|
"learning_rate": 4.986577703123056e-05, |
|
"loss": 1.3171, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 20.002729563269877, |
|
"grad_norm": 29.73351287841797, |
|
"learning_rate": 4.985022396416574e-05, |
|
"loss": 1.3113, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 20.003009518477043, |
|
"grad_norm": 50.711971282958984, |
|
"learning_rate": 4.983467089710091e-05, |
|
"loss": 1.4899, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 20.00328947368421, |
|
"grad_norm": 34.413719177246094, |
|
"learning_rate": 4.9819117830036085e-05, |
|
"loss": 1.6135, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 20.003569428891378, |
|
"grad_norm": 51.75621032714844, |
|
"learning_rate": 4.980356476297126e-05, |
|
"loss": 1.4968, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 20.003849384098544, |
|
"grad_norm": 28.784854888916016, |
|
"learning_rate": 4.9788011695906435e-05, |
|
"loss": 1.5756, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 20.00412933930571, |
|
"grad_norm": 15.39171314239502, |
|
"learning_rate": 4.9772458628841614e-05, |
|
"loss": 1.3599, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 20.00440929451288, |
|
"grad_norm": 43.16920852661133, |
|
"learning_rate": 4.9756905561776786e-05, |
|
"loss": 1.3928, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 20.004689249720045, |
|
"grad_norm": 63.47052001953125, |
|
"learning_rate": 4.974135249471196e-05, |
|
"loss": 1.671, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 20.00496920492721, |
|
"grad_norm": 30.383426666259766, |
|
"learning_rate": 4.9725799427647136e-05, |
|
"loss": 1.6389, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 20.005, |
|
"eval_accuracy": 0.3799795709908069, |
|
"eval_f1": 0.3403035730650944, |
|
"eval_loss": 2.796839475631714, |
|
"eval_precision": 0.35319952859534065, |
|
"eval_recall": 0.3799795709908069, |
|
"eval_runtime": 116.2913, |
|
"eval_samples_per_second": 33.674, |
|
"eval_steps_per_second": 16.837, |
|
"eval_top_10_accuracy": 0.8143513789581205, |
|
"eval_top_1_accuracy": 0.3799795709908069, |
|
"eval_top_5_accuracy": 0.7247191011235955, |
|
"step": 37511 |
|
}, |
|
{ |
|
"epoch": 21.00024846024636, |
|
"grad_norm": 7.234004497528076, |
|
"learning_rate": 4.971024636058231e-05, |
|
"loss": 0.9637, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 21.000528415453527, |
|
"grad_norm": 46.028507232666016, |
|
"learning_rate": 4.969469329351749e-05, |
|
"loss": 0.8584, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 21.000808370660696, |
|
"grad_norm": 19.228715896606445, |
|
"learning_rate": 4.967914022645266e-05, |
|
"loss": 1.1736, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 21.001088325867862, |
|
"grad_norm": 38.96889114379883, |
|
"learning_rate": 4.966358715938783e-05, |
|
"loss": 1.135, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 21.001368281075028, |
|
"grad_norm": 61.55207824707031, |
|
"learning_rate": 4.964803409232301e-05, |
|
"loss": 1.1689, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 21.001648236282193, |
|
"grad_norm": 24.2206974029541, |
|
"learning_rate": 4.963248102525818e-05, |
|
"loss": 1.0731, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 21.001928191489363, |
|
"grad_norm": 26.989789962768555, |
|
"learning_rate": 4.961692795819336e-05, |
|
"loss": 1.0659, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 21.00220814669653, |
|
"grad_norm": 29.301572799682617, |
|
"learning_rate": 4.9601530421799184e-05, |
|
"loss": 1.2649, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 21.002488101903694, |
|
"grad_norm": 41.62022018432617, |
|
"learning_rate": 4.9585977354734355e-05, |
|
"loss": 1.2247, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 21.002768057110863, |
|
"grad_norm": 24.48542022705078, |
|
"learning_rate": 4.9570424287669534e-05, |
|
"loss": 1.2741, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 21.00304801231803, |
|
"grad_norm": 72.86893463134766, |
|
"learning_rate": 4.9554871220604706e-05, |
|
"loss": 1.4002, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 21.003327967525195, |
|
"grad_norm": 70.68368530273438, |
|
"learning_rate": 4.9539318153539885e-05, |
|
"loss": 1.5305, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 21.003607922732364, |
|
"grad_norm": 43.46640396118164, |
|
"learning_rate": 4.9523765086475057e-05, |
|
"loss": 1.29, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 21.00388787793953, |
|
"grad_norm": 63.277320861816406, |
|
"learning_rate": 4.9508212019410235e-05, |
|
"loss": 1.4255, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 21.004167833146695, |
|
"grad_norm": 35.36826705932617, |
|
"learning_rate": 4.949265895234541e-05, |
|
"loss": 1.3306, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 21.004447788353865, |
|
"grad_norm": 46.617313385009766, |
|
"learning_rate": 4.947710588528058e-05, |
|
"loss": 1.4394, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 21.00472774356103, |
|
"grad_norm": 19.11472511291504, |
|
"learning_rate": 4.946155281821576e-05, |
|
"loss": 1.4166, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 21.004999300111983, |
|
"eval_accuracy": 0.37410623084780387, |
|
"eval_f1": 0.3358086003770376, |
|
"eval_loss": 2.8413541316986084, |
|
"eval_precision": 0.34984856960086785, |
|
"eval_recall": 0.37410623084780387, |
|
"eval_runtime": 115.7697, |
|
"eval_samples_per_second": 33.826, |
|
"eval_steps_per_second": 16.913, |
|
"eval_top_10_accuracy": 0.8066905005107252, |
|
"eval_top_1_accuracy": 0.3738508682328907, |
|
"eval_top_5_accuracy": 0.7132277834525026, |
|
"step": 39297 |
|
}, |
|
{ |
|
"epoch": 22.000006998880178, |
|
"grad_norm": 12.816142082214355, |
|
"learning_rate": 4.944599975115093e-05, |
|
"loss": 1.6177, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 22.000286954087347, |
|
"grad_norm": 17.605751037597656, |
|
"learning_rate": 4.943044668408611e-05, |
|
"loss": 0.7587, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 22.000566909294513, |
|
"grad_norm": 73.58073425292969, |
|
"learning_rate": 4.941489361702128e-05, |
|
"loss": 0.9073, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 22.00084686450168, |
|
"grad_norm": 69.34747314453125, |
|
"learning_rate": 4.939934054995645e-05, |
|
"loss": 0.9082, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 22.001126819708848, |
|
"grad_norm": 40.95439147949219, |
|
"learning_rate": 4.938378748289163e-05, |
|
"loss": 0.9475, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 22.001406774916013, |
|
"grad_norm": 37.23710632324219, |
|
"learning_rate": 4.93682344158268e-05, |
|
"loss": 1.0648, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 22.00168673012318, |
|
"grad_norm": 45.12014389038086, |
|
"learning_rate": 4.9352681348761975e-05, |
|
"loss": 1.1583, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 22.00196668533035, |
|
"grad_norm": 21.451051712036133, |
|
"learning_rate": 4.933712828169715e-05, |
|
"loss": 1.1223, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 22.002246640537514, |
|
"grad_norm": 6.596524238586426, |
|
"learning_rate": 4.9321575214632325e-05, |
|
"loss": 1.2806, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 22.00252659574468, |
|
"grad_norm": 40.44515609741211, |
|
"learning_rate": 4.9306022147567504e-05, |
|
"loss": 1.22, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 22.00280655095185, |
|
"grad_norm": 47.3612060546875, |
|
"learning_rate": 4.9290469080502676e-05, |
|
"loss": 1.3957, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 22.003086506159015, |
|
"grad_norm": 9.846660614013672, |
|
"learning_rate": 4.927491601343785e-05, |
|
"loss": 1.2558, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 22.00336646136618, |
|
"grad_norm": 10.630510330200195, |
|
"learning_rate": 4.9259362946373026e-05, |
|
"loss": 1.2827, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 22.00364641657335, |
|
"grad_norm": 25.466978073120117, |
|
"learning_rate": 4.92438098793082e-05, |
|
"loss": 1.2335, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 22.003926371780516, |
|
"grad_norm": 46.57871627807617, |
|
"learning_rate": 4.922825681224338e-05, |
|
"loss": 1.2621, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 22.00420632698768, |
|
"grad_norm": 38.56644821166992, |
|
"learning_rate": 4.921270374517855e-05, |
|
"loss": 1.3509, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 22.00448628219485, |
|
"grad_norm": 53.1245002746582, |
|
"learning_rate": 4.919715067811372e-05, |
|
"loss": 1.3637, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 22.004766237402016, |
|
"grad_norm": 92.77754974365234, |
|
"learning_rate": 4.91815976110489e-05, |
|
"loss": 1.3113, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 22.004998600223963, |
|
"eval_accuracy": 0.3669560776302349, |
|
"eval_f1": 0.33397568836792535, |
|
"eval_loss": 2.911128282546997, |
|
"eval_precision": 0.35075591826740965, |
|
"eval_recall": 0.3669560776302349, |
|
"eval_runtime": 117.1252, |
|
"eval_samples_per_second": 33.434, |
|
"eval_steps_per_second": 16.717, |
|
"eval_top_10_accuracy": 0.8041368743615934, |
|
"eval_top_1_accuracy": 0.36670071501532175, |
|
"eval_top_5_accuracy": 0.7032686414708886, |
|
"step": 41083 |
|
}, |
|
{ |
|
"epoch": 23.000045492721163, |
|
"grad_norm": 67.09935760498047, |
|
"learning_rate": 4.916604454398407e-05, |
|
"loss": 1.4924, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 23.000325447928333, |
|
"grad_norm": 65.86184692382812, |
|
"learning_rate": 4.915049147691925e-05, |
|
"loss": 0.8501, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 23.0006054031355, |
|
"grad_norm": 27.802021026611328, |
|
"learning_rate": 4.913493840985443e-05, |
|
"loss": 0.6177, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 23.000885358342664, |
|
"grad_norm": 53.507080078125, |
|
"learning_rate": 4.91193853427896e-05, |
|
"loss": 0.921, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 23.001165313549834, |
|
"grad_norm": 34.499412536621094, |
|
"learning_rate": 4.910383227572478e-05, |
|
"loss": 0.9269, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 23.001445268757, |
|
"grad_norm": 19.095460891723633, |
|
"learning_rate": 4.908827920865995e-05, |
|
"loss": 0.9606, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 23.001725223964165, |
|
"grad_norm": 18.78254508972168, |
|
"learning_rate": 4.907272614159513e-05, |
|
"loss": 1.0296, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 23.002005179171334, |
|
"grad_norm": 51.5577392578125, |
|
"learning_rate": 4.90571730745303e-05, |
|
"loss": 1.0291, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 23.0022851343785, |
|
"grad_norm": 5.804263114929199, |
|
"learning_rate": 4.9041620007465474e-05, |
|
"loss": 1.1576, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 23.002565089585666, |
|
"grad_norm": 36.027217864990234, |
|
"learning_rate": 4.902606694040065e-05, |
|
"loss": 1.1973, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 23.00284504479283, |
|
"grad_norm": 15.984498023986816, |
|
"learning_rate": 4.9010513873335824e-05, |
|
"loss": 1.2031, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 23.003125, |
|
"grad_norm": 45.6442985534668, |
|
"learning_rate": 4.8994960806271e-05, |
|
"loss": 1.1533, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 23.003404955207166, |
|
"grad_norm": 49.245113372802734, |
|
"learning_rate": 4.897956326987682e-05, |
|
"loss": 1.239, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 23.003684910414332, |
|
"grad_norm": 2.6122968196868896, |
|
"learning_rate": 4.8964010202812e-05, |
|
"loss": 1.0613, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 23.0039648656215, |
|
"grad_norm": 63.001007080078125, |
|
"learning_rate": 4.894845713574717e-05, |
|
"loss": 0.9579, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 23.004244820828667, |
|
"grad_norm": 47.75780487060547, |
|
"learning_rate": 4.893290406868234e-05, |
|
"loss": 1.444, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 23.004524776035833, |
|
"grad_norm": 26.68849754333496, |
|
"learning_rate": 4.891735100161752e-05, |
|
"loss": 1.2768, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 23.004804731243002, |
|
"grad_norm": 31.364978790283203, |
|
"learning_rate": 4.890179793455269e-05, |
|
"loss": 1.4698, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 23.00500069988802, |
|
"eval_accuracy": 0.36772216547497444, |
|
"eval_f1": 0.32961532849173303, |
|
"eval_loss": 2.9281723499298096, |
|
"eval_precision": 0.34658649431376704, |
|
"eval_recall": 0.36772216547497444, |
|
"eval_runtime": 114.379, |
|
"eval_samples_per_second": 34.237, |
|
"eval_steps_per_second": 17.119, |
|
"eval_top_10_accuracy": 0.7946884576098059, |
|
"eval_top_1_accuracy": 0.3674668028600613, |
|
"eval_top_5_accuracy": 0.7070990806945863, |
|
"step": 42870 |
|
}, |
|
{ |
|
"epoch": 24.00008398656215, |
|
"grad_norm": 29.290653228759766, |
|
"learning_rate": 4.888624486748787e-05, |
|
"loss": 1.3773, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 24.00036394176932, |
|
"grad_norm": 7.684507369995117, |
|
"learning_rate": 4.8870847331093695e-05, |
|
"loss": 0.7986, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 24.000643896976484, |
|
"grad_norm": 3.6387572288513184, |
|
"learning_rate": 4.885529426402887e-05, |
|
"loss": 0.889, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 24.00092385218365, |
|
"grad_norm": 16.366535186767578, |
|
"learning_rate": 4.8839741196964046e-05, |
|
"loss": 0.7856, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 24.001203807390816, |
|
"grad_norm": 41.7845573425293, |
|
"learning_rate": 4.882418812989922e-05, |
|
"loss": 0.9726, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 24.001483762597985, |
|
"grad_norm": 15.093663215637207, |
|
"learning_rate": 4.8808635062834396e-05, |
|
"loss": 0.8313, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 24.00176371780515, |
|
"grad_norm": 53.5677490234375, |
|
"learning_rate": 4.879308199576957e-05, |
|
"loss": 1.0816, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 24.002043673012317, |
|
"grad_norm": 54.367271423339844, |
|
"learning_rate": 4.877752892870474e-05, |
|
"loss": 1.0475, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 24.002323628219486, |
|
"grad_norm": 41.49662399291992, |
|
"learning_rate": 4.876197586163992e-05, |
|
"loss": 0.8916, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 24.00260358342665, |
|
"grad_norm": 16.519433975219727, |
|
"learning_rate": 4.874642279457509e-05, |
|
"loss": 0.8659, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 24.002883538633817, |
|
"grad_norm": 61.23699188232422, |
|
"learning_rate": 4.873086972751027e-05, |
|
"loss": 1.2549, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 24.003163493840987, |
|
"grad_norm": 10.837386131286621, |
|
"learning_rate": 4.871531666044544e-05, |
|
"loss": 1.1515, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 24.003443449048152, |
|
"grad_norm": 27.969383239746094, |
|
"learning_rate": 4.869976359338061e-05, |
|
"loss": 1.1793, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 24.003723404255318, |
|
"grad_norm": 50.31840133666992, |
|
"learning_rate": 4.868421052631579e-05, |
|
"loss": 1.3247, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 24.004003359462487, |
|
"grad_norm": 11.990558624267578, |
|
"learning_rate": 4.8668657459250964e-05, |
|
"loss": 1.1745, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 24.004283314669653, |
|
"grad_norm": 46.31821060180664, |
|
"learning_rate": 4.865310439218614e-05, |
|
"loss": 1.1745, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 24.00456326987682, |
|
"grad_norm": 8.647761344909668, |
|
"learning_rate": 4.8637551325121314e-05, |
|
"loss": 1.1952, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 24.004843225083988, |
|
"grad_norm": 9.188798904418945, |
|
"learning_rate": 4.8621998258056486e-05, |
|
"loss": 1.1594, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 24.005, |
|
"eval_accuracy": 0.38993871297242083, |
|
"eval_f1": 0.35323001094121087, |
|
"eval_loss": 2.9185523986816406, |
|
"eval_precision": 0.36621209910280395, |
|
"eval_recall": 0.38993871297242083, |
|
"eval_runtime": 116.0344, |
|
"eval_samples_per_second": 33.749, |
|
"eval_steps_per_second": 16.874, |
|
"eval_top_10_accuracy": 0.7987742594484167, |
|
"eval_top_1_accuracy": 0.38993871297242083, |
|
"eval_top_5_accuracy": 0.7124616956077631, |
|
"step": 44656 |
|
}, |
|
{ |
|
"epoch": 25.000122480403135, |
|
"grad_norm": 7.127791404724121, |
|
"learning_rate": 4.8606445190991665e-05, |
|
"loss": 0.9656, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 25.0004024356103, |
|
"grad_norm": 6.424502372741699, |
|
"learning_rate": 4.859089212392684e-05, |
|
"loss": 0.8375, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 25.00068239081747, |
|
"grad_norm": 56.99434280395508, |
|
"learning_rate": 4.8575339056862015e-05, |
|
"loss": 0.7758, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 25.000962346024636, |
|
"grad_norm": 29.912485122680664, |
|
"learning_rate": 4.8559785989797194e-05, |
|
"loss": 0.7439, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 25.0012423012318, |
|
"grad_norm": 35.43897247314453, |
|
"learning_rate": 4.8544232922732366e-05, |
|
"loss": 0.7085, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 25.00152225643897, |
|
"grad_norm": 9.219779968261719, |
|
"learning_rate": 4.8528679855667544e-05, |
|
"loss": 0.9102, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 25.001802211646137, |
|
"grad_norm": 4.801015853881836, |
|
"learning_rate": 4.8513126788602716e-05, |
|
"loss": 1.014, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 25.002082166853302, |
|
"grad_norm": 43.83305740356445, |
|
"learning_rate": 4.8497573721537895e-05, |
|
"loss": 0.8622, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 25.00236212206047, |
|
"grad_norm": 66.25143432617188, |
|
"learning_rate": 4.848202065447307e-05, |
|
"loss": 0.9431, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 25.002642077267637, |
|
"grad_norm": 29.633420944213867, |
|
"learning_rate": 4.846646758740824e-05, |
|
"loss": 1.0897, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 25.002922032474803, |
|
"grad_norm": 33.82661819458008, |
|
"learning_rate": 4.845091452034342e-05, |
|
"loss": 1.117, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 25.003201987681972, |
|
"grad_norm": 46.27246856689453, |
|
"learning_rate": 4.843536145327859e-05, |
|
"loss": 1.2575, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 25.003481942889138, |
|
"grad_norm": 18.312467575073242, |
|
"learning_rate": 4.841980838621377e-05, |
|
"loss": 1.2425, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 25.003761898096304, |
|
"grad_norm": 28.17131233215332, |
|
"learning_rate": 4.840425531914894e-05, |
|
"loss": 1.2341, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 25.004041853303473, |
|
"grad_norm": 28.144180297851562, |
|
"learning_rate": 4.838870225208411e-05, |
|
"loss": 1.2243, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 25.00432180851064, |
|
"grad_norm": 16.919340133666992, |
|
"learning_rate": 4.837314918501929e-05, |
|
"loss": 0.9636, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 25.004601763717805, |
|
"grad_norm": 5.783956527709961, |
|
"learning_rate": 4.835759611795446e-05, |
|
"loss": 1.0324, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 25.00488171892497, |
|
"grad_norm": 9.570374488830566, |
|
"learning_rate": 4.834204305088964e-05, |
|
"loss": 0.8815, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 25.004999300111983, |
|
"eval_accuracy": 0.3825331971399387, |
|
"eval_f1": 0.34693060387008295, |
|
"eval_loss": 3.0209546089172363, |
|
"eval_precision": 0.3639688225745427, |
|
"eval_recall": 0.3825331971399387, |
|
"eval_runtime": 115.6367, |
|
"eval_samples_per_second": 33.865, |
|
"eval_steps_per_second": 16.932, |
|
"eval_top_10_accuracy": 0.7964759959141982, |
|
"eval_top_1_accuracy": 0.3827885597548519, |
|
"eval_top_5_accuracy": 0.7053115423901941, |
|
"step": 46442 |
|
}, |
|
{ |
|
"epoch": 26.00016097424412, |
|
"grad_norm": 16.35182762145996, |
|
"learning_rate": 4.832648998382481e-05, |
|
"loss": 0.7362, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 26.000440929451287, |
|
"grad_norm": 34.7078742980957, |
|
"learning_rate": 4.8310936916759985e-05, |
|
"loss": 0.8688, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 26.000720884658456, |
|
"grad_norm": 2.940587043762207, |
|
"learning_rate": 4.8295383849695164e-05, |
|
"loss": 0.8609, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 26.001000839865622, |
|
"grad_norm": 35.10813903808594, |
|
"learning_rate": 4.8279830782630336e-05, |
|
"loss": 0.6674, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 26.001280795072788, |
|
"grad_norm": 4.418734550476074, |
|
"learning_rate": 4.8264277715565514e-05, |
|
"loss": 0.856, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 26.001560750279957, |
|
"grad_norm": 37.9604377746582, |
|
"learning_rate": 4.824888017917134e-05, |
|
"loss": 0.7902, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 26.001840705487123, |
|
"grad_norm": 32.70397186279297, |
|
"learning_rate": 4.823332711210651e-05, |
|
"loss": 0.7813, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 26.00212066069429, |
|
"grad_norm": 35.73707962036133, |
|
"learning_rate": 4.821777404504169e-05, |
|
"loss": 0.9995, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 26.002400615901454, |
|
"grad_norm": 9.47586727142334, |
|
"learning_rate": 4.820222097797686e-05, |
|
"loss": 1.0135, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 26.002680571108623, |
|
"grad_norm": 37.69544982910156, |
|
"learning_rate": 4.818666791091204e-05, |
|
"loss": 0.9796, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 26.00296052631579, |
|
"grad_norm": 10.6676025390625, |
|
"learning_rate": 4.817111484384721e-05, |
|
"loss": 1.3322, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 26.003240481522955, |
|
"grad_norm": 40.86244201660156, |
|
"learning_rate": 4.815556177678238e-05, |
|
"loss": 1.1818, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 26.003520436730124, |
|
"grad_norm": 34.827247619628906, |
|
"learning_rate": 4.814000870971756e-05, |
|
"loss": 0.8761, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 26.00380039193729, |
|
"grad_norm": 23.81336784362793, |
|
"learning_rate": 4.812445564265273e-05, |
|
"loss": 1.1277, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 26.004080347144455, |
|
"grad_norm": 44.09092330932617, |
|
"learning_rate": 4.810890257558791e-05, |
|
"loss": 1.4091, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 26.004360302351625, |
|
"grad_norm": 36.06610107421875, |
|
"learning_rate": 4.8093349508523084e-05, |
|
"loss": 1.1976, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 26.00464025755879, |
|
"grad_norm": 2.117263078689575, |
|
"learning_rate": 4.8077796441458256e-05, |
|
"loss": 1.1112, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 26.004920212765956, |
|
"grad_norm": 9.393363952636719, |
|
"learning_rate": 4.806239890506408e-05, |
|
"loss": 1.348, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 26.004998600223963, |
|
"eval_accuracy": 0.377170582226762, |
|
"eval_f1": 0.33966766691485795, |
|
"eval_loss": 3.026718854904175, |
|
"eval_precision": 0.3537270321042027, |
|
"eval_recall": 0.377170582226762, |
|
"eval_runtime": 117.8101, |
|
"eval_samples_per_second": 33.24, |
|
"eval_steps_per_second": 16.62, |
|
"eval_top_10_accuracy": 0.797752808988764, |
|
"eval_top_1_accuracy": 0.377170582226762, |
|
"eval_top_5_accuracy": 0.7073544433094995, |
|
"step": 48228 |
|
}, |
|
{ |
|
"epoch": 27.000199468085107, |
|
"grad_norm": 48.105777740478516, |
|
"learning_rate": 4.804684583799926e-05, |
|
"loss": 0.9049, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 27.000479423292273, |
|
"grad_norm": 39.231109619140625, |
|
"learning_rate": 4.803129277093443e-05, |
|
"loss": 0.8953, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 27.00075937849944, |
|
"grad_norm": 2.2942845821380615, |
|
"learning_rate": 4.801573970386961e-05, |
|
"loss": 0.7185, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 27.001039333706608, |
|
"grad_norm": 0.9364669322967529, |
|
"learning_rate": 4.800018663680478e-05, |
|
"loss": 0.7835, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 27.001319288913773, |
|
"grad_norm": 40.069271087646484, |
|
"learning_rate": 4.798463356973996e-05, |
|
"loss": 0.8621, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 27.00159924412094, |
|
"grad_norm": 11.61902141571045, |
|
"learning_rate": 4.796908050267513e-05, |
|
"loss": 0.9434, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 27.00187919932811, |
|
"grad_norm": 3.5938429832458496, |
|
"learning_rate": 4.795352743561031e-05, |
|
"loss": 0.9811, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 27.002159154535274, |
|
"grad_norm": 47.871726989746094, |
|
"learning_rate": 4.793797436854548e-05, |
|
"loss": 0.9615, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 27.00243910974244, |
|
"grad_norm": 22.639846801757812, |
|
"learning_rate": 4.7922421301480654e-05, |
|
"loss": 1.0804, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 27.00271906494961, |
|
"grad_norm": 53.62788772583008, |
|
"learning_rate": 4.790686823441583e-05, |
|
"loss": 0.9659, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 27.002999020156775, |
|
"grad_norm": 18.483760833740234, |
|
"learning_rate": 4.7891315167351004e-05, |
|
"loss": 0.9848, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 27.00327897536394, |
|
"grad_norm": 30.66340446472168, |
|
"learning_rate": 4.787576210028618e-05, |
|
"loss": 1.042, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 27.00355893057111, |
|
"grad_norm": 29.91460418701172, |
|
"learning_rate": 4.7860209033221355e-05, |
|
"loss": 0.9379, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 27.003838885778276, |
|
"grad_norm": 17.159460067749023, |
|
"learning_rate": 4.784465596615653e-05, |
|
"loss": 1.1205, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 27.00411884098544, |
|
"grad_norm": 46.86989974975586, |
|
"learning_rate": 4.7829102899091705e-05, |
|
"loss": 1.08, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 27.00439879619261, |
|
"grad_norm": 13.506775856018066, |
|
"learning_rate": 4.781354983202688e-05, |
|
"loss": 1.2232, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 27.004678751399776, |
|
"grad_norm": 33.25643539428711, |
|
"learning_rate": 4.7797996764962056e-05, |
|
"loss": 1.1331, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 27.004958706606942, |
|
"grad_norm": 47.94986343383789, |
|
"learning_rate": 4.778244369789723e-05, |
|
"loss": 1.0531, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 27.00500069988802, |
|
"eval_accuracy": 0.38074565883554645, |
|
"eval_f1": 0.3463921527059934, |
|
"eval_loss": 3.005545139312744, |
|
"eval_precision": 0.36555650965763326, |
|
"eval_recall": 0.38074565883554645, |
|
"eval_runtime": 114.6503, |
|
"eval_samples_per_second": 34.156, |
|
"eval_steps_per_second": 17.078, |
|
"eval_top_10_accuracy": 0.8107763023493361, |
|
"eval_top_1_accuracy": 0.3804902962206333, |
|
"eval_top_5_accuracy": 0.719611848825332, |
|
"step": 50015 |
|
}, |
|
{ |
|
"epoch": 28.000237961926093, |
|
"grad_norm": 6.364796161651611, |
|
"learning_rate": 4.77668906308324e-05, |
|
"loss": 0.5291, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 28.00051791713326, |
|
"grad_norm": 45.56444549560547, |
|
"learning_rate": 4.775133756376758e-05, |
|
"loss": 0.7383, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 28.000797872340424, |
|
"grad_norm": 2.9990179538726807, |
|
"learning_rate": 4.773578449670275e-05, |
|
"loss": 0.6026, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 28.001077827547594, |
|
"grad_norm": 44.893760681152344, |
|
"learning_rate": 4.772023142963793e-05, |
|
"loss": 0.6877, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 28.00135778275476, |
|
"grad_norm": 4.229214191436768, |
|
"learning_rate": 4.77046783625731e-05, |
|
"loss": 0.9156, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 28.001637737961925, |
|
"grad_norm": 3.375356912612915, |
|
"learning_rate": 4.768912529550827e-05, |
|
"loss": 0.8209, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 28.001917693169094, |
|
"grad_norm": 68.21538543701172, |
|
"learning_rate": 4.767357222844345e-05, |
|
"loss": 0.8576, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 28.00219764837626, |
|
"grad_norm": 36.52906799316406, |
|
"learning_rate": 4.765801916137862e-05, |
|
"loss": 0.8978, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 28.002477603583426, |
|
"grad_norm": 46.26045227050781, |
|
"learning_rate": 4.76424660943138e-05, |
|
"loss": 1.1163, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 28.002757558790595, |
|
"grad_norm": 21.620988845825195, |
|
"learning_rate": 4.7626913027248974e-05, |
|
"loss": 0.9089, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 28.00303751399776, |
|
"grad_norm": 1.2796549797058105, |
|
"learning_rate": 4.7611359960184146e-05, |
|
"loss": 1.1172, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 28.003317469204926, |
|
"grad_norm": 28.369571685791016, |
|
"learning_rate": 4.7595806893119324e-05, |
|
"loss": 0.9379, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 28.003597424412096, |
|
"grad_norm": 71.3536148071289, |
|
"learning_rate": 4.7580253826054496e-05, |
|
"loss": 1.0674, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 28.00387737961926, |
|
"grad_norm": 32.85470199584961, |
|
"learning_rate": 4.7564700758989675e-05, |
|
"loss": 1.1896, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 28.004157334826427, |
|
"grad_norm": 26.999794006347656, |
|
"learning_rate": 4.754914769192485e-05, |
|
"loss": 0.8049, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 28.004437290033593, |
|
"grad_norm": 6.688983917236328, |
|
"learning_rate": 4.7533594624860026e-05, |
|
"loss": 1.221, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 28.004717245240762, |
|
"grad_norm": 31.977493286132812, |
|
"learning_rate": 4.7518041557795204e-05, |
|
"loss": 1.0372, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 28.004997200447928, |
|
"grad_norm": 45.08998107910156, |
|
"learning_rate": 4.7502488490730376e-05, |
|
"loss": 1.2516, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 28.005, |
|
"eval_accuracy": 0.3631256384065373, |
|
"eval_f1": 0.3243790610322335, |
|
"eval_loss": 3.1701900959014893, |
|
"eval_precision": 0.3353089935827423, |
|
"eval_recall": 0.3631256384065373, |
|
"eval_runtime": 114.3388, |
|
"eval_samples_per_second": 34.249, |
|
"eval_steps_per_second": 17.125, |
|
"eval_top_10_accuracy": 0.7883043922369765, |
|
"eval_top_1_accuracy": 0.36338100102145043, |
|
"eval_top_5_accuracy": 0.6841164453524005, |
|
"step": 51801 |
|
}, |
|
{ |
|
"epoch": 29.00027645576708, |
|
"grad_norm": 24.97677230834961, |
|
"learning_rate": 4.7486935423665555e-05, |
|
"loss": 0.6765, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 29.000556410974244, |
|
"grad_norm": 2.3787806034088135, |
|
"learning_rate": 4.747138235660073e-05, |
|
"loss": 0.7211, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 29.00083636618141, |
|
"grad_norm": 36.792728424072266, |
|
"learning_rate": 4.74558292895359e-05, |
|
"loss": 0.9286, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 29.00111632138858, |
|
"grad_norm": 39.00687026977539, |
|
"learning_rate": 4.744043175314172e-05, |
|
"loss": 0.7545, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 29.001396276595745, |
|
"grad_norm": 6.44840669631958, |
|
"learning_rate": 4.7424878686076894e-05, |
|
"loss": 0.6815, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 29.00167623180291, |
|
"grad_norm": 21.892784118652344, |
|
"learning_rate": 4.740932561901207e-05, |
|
"loss": 0.7581, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 29.00195618701008, |
|
"grad_norm": 53.59468078613281, |
|
"learning_rate": 4.7393772551947245e-05, |
|
"loss": 0.9388, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 29.002236142217246, |
|
"grad_norm": 19.31954002380371, |
|
"learning_rate": 4.737821948488242e-05, |
|
"loss": 0.7623, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 29.00251609742441, |
|
"grad_norm": 56.00475311279297, |
|
"learning_rate": 4.7362666417817595e-05, |
|
"loss": 1.1146, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 29.002796052631577, |
|
"grad_norm": 69.8625259399414, |
|
"learning_rate": 4.734711335075277e-05, |
|
"loss": 0.8305, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 29.003076007838747, |
|
"grad_norm": 15.013059616088867, |
|
"learning_rate": 4.7331560283687946e-05, |
|
"loss": 0.9888, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 29.003355963045912, |
|
"grad_norm": 45.95860290527344, |
|
"learning_rate": 4.731600721662312e-05, |
|
"loss": 0.9892, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 29.003635918253078, |
|
"grad_norm": 8.254734992980957, |
|
"learning_rate": 4.7300454149558296e-05, |
|
"loss": 0.9714, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 29.003915873460247, |
|
"grad_norm": 32.338558197021484, |
|
"learning_rate": 4.728490108249347e-05, |
|
"loss": 0.9985, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 29.004195828667413, |
|
"grad_norm": 43.390541076660156, |
|
"learning_rate": 4.726934801542865e-05, |
|
"loss": 0.948, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 29.00447578387458, |
|
"grad_norm": 3.346212148666382, |
|
"learning_rate": 4.725379494836382e-05, |
|
"loss": 0.9203, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 29.004755739081748, |
|
"grad_norm": 53.539886474609375, |
|
"learning_rate": 4.7238241881299e-05, |
|
"loss": 1.2142, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 29.004999300111983, |
|
"eval_accuracy": 0.37461695607763024, |
|
"eval_f1": 0.34094689004673684, |
|
"eval_loss": 3.153719902038574, |
|
"eval_precision": 0.3627320351592568, |
|
"eval_recall": 0.37461695607763024, |
|
"eval_runtime": 112.1729, |
|
"eval_samples_per_second": 34.91, |
|
"eval_steps_per_second": 17.455, |
|
"eval_top_10_accuracy": 0.7972420837589377, |
|
"eval_top_1_accuracy": 0.3743615934627171, |
|
"eval_top_5_accuracy": 0.6948416751787538, |
|
"step": 53587 |
|
}, |
|
{ |
|
"epoch": 30.000034994400895, |
|
"grad_norm": 1.2987140417099, |
|
"learning_rate": 4.722268881423417e-05, |
|
"loss": 0.994, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 30.00031494960806, |
|
"grad_norm": 40.64413070678711, |
|
"learning_rate": 4.720713574716935e-05, |
|
"loss": 0.5028, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 30.00059490481523, |
|
"grad_norm": 5.865800857543945, |
|
"learning_rate": 4.719158268010452e-05, |
|
"loss": 0.7444, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 30.000874860022396, |
|
"grad_norm": 7.504943370819092, |
|
"learning_rate": 4.71760296130397e-05, |
|
"loss": 0.828, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 30.00115481522956, |
|
"grad_norm": 7.175074100494385, |
|
"learning_rate": 4.716047654597487e-05, |
|
"loss": 0.6409, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 30.00143477043673, |
|
"grad_norm": 3.8528194427490234, |
|
"learning_rate": 4.714492347891004e-05, |
|
"loss": 0.7132, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 30.001714725643897, |
|
"grad_norm": 1.616517424583435, |
|
"learning_rate": 4.7129525942515866e-05, |
|
"loss": 0.7694, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 30.001994680851062, |
|
"grad_norm": 58.55043411254883, |
|
"learning_rate": 4.711397287545104e-05, |
|
"loss": 1.0431, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 30.00227463605823, |
|
"grad_norm": 5.023679733276367, |
|
"learning_rate": 4.709841980838622e-05, |
|
"loss": 0.6762, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 30.002554591265397, |
|
"grad_norm": 21.790382385253906, |
|
"learning_rate": 4.708286674132139e-05, |
|
"loss": 1.0054, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 30.002834546472563, |
|
"grad_norm": 21.34770965576172, |
|
"learning_rate": 4.706731367425656e-05, |
|
"loss": 0.8381, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 30.003114501679732, |
|
"grad_norm": 35.34033966064453, |
|
"learning_rate": 4.705176060719174e-05, |
|
"loss": 0.8623, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 30.003394456886898, |
|
"grad_norm": 45.18914031982422, |
|
"learning_rate": 4.703620754012691e-05, |
|
"loss": 1.0882, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 30.003674412094064, |
|
"grad_norm": 4.911450386047363, |
|
"learning_rate": 4.702065447306209e-05, |
|
"loss": 0.9956, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 30.003954367301233, |
|
"grad_norm": 4.490671157836914, |
|
"learning_rate": 4.700510140599726e-05, |
|
"loss": 0.9534, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 30.0042343225084, |
|
"grad_norm": 0.5849565267562866, |
|
"learning_rate": 4.698954833893244e-05, |
|
"loss": 0.7995, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 30.004514277715565, |
|
"grad_norm": 1.6884137392044067, |
|
"learning_rate": 4.697399527186762e-05, |
|
"loss": 1.029, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 30.004794232922734, |
|
"grad_norm": 44.37993240356445, |
|
"learning_rate": 4.695844220480279e-05, |
|
"loss": 1.1783, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 30.004998600223963, |
|
"eval_accuracy": 0.3659346271705822, |
|
"eval_f1": 0.327783869178149, |
|
"eval_loss": 3.2329089641571045, |
|
"eval_precision": 0.3399942520026789, |
|
"eval_recall": 0.3659346271705822, |
|
"eval_runtime": 111.6292, |
|
"eval_samples_per_second": 35.08, |
|
"eval_steps_per_second": 17.54, |
|
"eval_top_10_accuracy": 0.7916241062308478, |
|
"eval_top_1_accuracy": 0.3659346271705822, |
|
"eval_top_5_accuracy": 0.695097037793667, |
|
"step": 55373 |
|
}, |
|
{ |
|
"epoch": 31.00007348824188, |
|
"grad_norm": 0.8294526934623718, |
|
"learning_rate": 4.694288913773797e-05, |
|
"loss": 0.7754, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 31.000353443449047, |
|
"grad_norm": 38.75194549560547, |
|
"learning_rate": 4.692733607067314e-05, |
|
"loss": 0.635, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 31.000633398656216, |
|
"grad_norm": 9.357181549072266, |
|
"learning_rate": 4.6911783003608313e-05, |
|
"loss": 0.5596, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 31.000913353863382, |
|
"grad_norm": 7.658017158508301, |
|
"learning_rate": 4.689622993654349e-05, |
|
"loss": 0.6122, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 31.001193309070548, |
|
"grad_norm": 6.124042510986328, |
|
"learning_rate": 4.6880676869478664e-05, |
|
"loss": 0.8859, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 31.001473264277717, |
|
"grad_norm": 2.957393169403076, |
|
"learning_rate": 4.686512380241384e-05, |
|
"loss": 0.6045, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 31.001753219484883, |
|
"grad_norm": 11.278263092041016, |
|
"learning_rate": 4.6849570735349015e-05, |
|
"loss": 0.8514, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 31.00203317469205, |
|
"grad_norm": 32.86345291137695, |
|
"learning_rate": 4.6834017668284186e-05, |
|
"loss": 0.9362, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 31.002313129899218, |
|
"grad_norm": 37.377540588378906, |
|
"learning_rate": 4.681862013189001e-05, |
|
"loss": 1.0054, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 31.002593085106383, |
|
"grad_norm": 8.889802932739258, |
|
"learning_rate": 4.680306706482518e-05, |
|
"loss": 0.8472, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 31.00287304031355, |
|
"grad_norm": 9.699048042297363, |
|
"learning_rate": 4.678751399776036e-05, |
|
"loss": 0.7464, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 31.00315299552072, |
|
"grad_norm": 62.45649719238281, |
|
"learning_rate": 4.677196093069553e-05, |
|
"loss": 0.892, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 31.003432950727884, |
|
"grad_norm": 6.620049476623535, |
|
"learning_rate": 4.675640786363071e-05, |
|
"loss": 0.7575, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 31.00371290593505, |
|
"grad_norm": 0.34919270873069763, |
|
"learning_rate": 4.674085479656588e-05, |
|
"loss": 1.1288, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 31.003992861142216, |
|
"grad_norm": 73.19112396240234, |
|
"learning_rate": 4.672530172950106e-05, |
|
"loss": 1.1417, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 31.004272816349385, |
|
"grad_norm": 35.101932525634766, |
|
"learning_rate": 4.6709748662436234e-05, |
|
"loss": 1.1342, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 31.00455277155655, |
|
"grad_norm": 4.475014686584473, |
|
"learning_rate": 4.669419559537141e-05, |
|
"loss": 1.2289, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 31.004832726763716, |
|
"grad_norm": 40.93881607055664, |
|
"learning_rate": 4.6678642528306584e-05, |
|
"loss": 1.2075, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 31.00500069988802, |
|
"eval_accuracy": 0.36925434116445355, |
|
"eval_f1": 0.3347721266424357, |
|
"eval_loss": 3.2250540256500244, |
|
"eval_precision": 0.3521287566422908, |
|
"eval_recall": 0.36925434116445355, |
|
"eval_runtime": 117.4887, |
|
"eval_samples_per_second": 33.331, |
|
"eval_steps_per_second": 16.665, |
|
"eval_top_10_accuracy": 0.7880490296220634, |
|
"eval_top_1_accuracy": 0.3695097037793667, |
|
"eval_top_5_accuracy": 0.6971399387129724, |
|
"step": 57160 |
|
}, |
|
{ |
|
"epoch": 32.00011198208286, |
|
"grad_norm": 1.2258549928665161, |
|
"learning_rate": 4.666308946124176e-05, |
|
"loss": 0.689, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 32.00039193729003, |
|
"grad_norm": 4.694971084594727, |
|
"learning_rate": 4.6647536394176935e-05, |
|
"loss": 0.5123, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 32.0006718924972, |
|
"grad_norm": 60.558834075927734, |
|
"learning_rate": 4.6631983327112114e-05, |
|
"loss": 0.4678, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 32.000951847704364, |
|
"grad_norm": 21.36684799194336, |
|
"learning_rate": 4.6616430260047285e-05, |
|
"loss": 0.6802, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 32.00123180291153, |
|
"grad_norm": 2.4743974208831787, |
|
"learning_rate": 4.660087719298246e-05, |
|
"loss": 0.7395, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 32.0015117581187, |
|
"grad_norm": 11.220129013061523, |
|
"learning_rate": 4.6585324125917636e-05, |
|
"loss": 0.7529, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 32.001791713325865, |
|
"grad_norm": 33.19414138793945, |
|
"learning_rate": 4.656977105885281e-05, |
|
"loss": 0.8596, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 32.002071668533034, |
|
"grad_norm": 16.343273162841797, |
|
"learning_rate": 4.6554217991787987e-05, |
|
"loss": 0.6243, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 32.0023516237402, |
|
"grad_norm": 36.327816009521484, |
|
"learning_rate": 4.653866492472316e-05, |
|
"loss": 0.9409, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 32.002631578947366, |
|
"grad_norm": 41.437198638916016, |
|
"learning_rate": 4.652311185765833e-05, |
|
"loss": 0.8562, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 32.002911534154535, |
|
"grad_norm": 47.712486267089844, |
|
"learning_rate": 4.6507714321264154e-05, |
|
"loss": 0.504, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 32.003191489361704, |
|
"grad_norm": 36.536251068115234, |
|
"learning_rate": 4.6492161254199326e-05, |
|
"loss": 1.0219, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 32.003471444568866, |
|
"grad_norm": 24.974342346191406, |
|
"learning_rate": 4.6476608187134505e-05, |
|
"loss": 1.0403, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 32.003751399776036, |
|
"grad_norm": 37.766822814941406, |
|
"learning_rate": 4.6461055120069676e-05, |
|
"loss": 0.9055, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 32.004031354983205, |
|
"grad_norm": 5.955631732940674, |
|
"learning_rate": 4.6445502053004855e-05, |
|
"loss": 0.9022, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 32.00431131019037, |
|
"grad_norm": 36.062278747558594, |
|
"learning_rate": 4.643010451661068e-05, |
|
"loss": 1.2884, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 32.004591265397536, |
|
"grad_norm": 42.342796325683594, |
|
"learning_rate": 4.641455144954585e-05, |
|
"loss": 1.1833, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 32.004871220604706, |
|
"grad_norm": 44.055450439453125, |
|
"learning_rate": 4.639899838248103e-05, |
|
"loss": 1.1369, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 32.005, |
|
"eval_accuracy": 0.34601634320735447, |
|
"eval_f1": 0.3114462019084082, |
|
"eval_loss": 3.3421857357025146, |
|
"eval_precision": 0.3287510207134824, |
|
"eval_recall": 0.34601634320735447, |
|
"eval_runtime": 114.9634, |
|
"eval_samples_per_second": 34.063, |
|
"eval_steps_per_second": 17.032, |
|
"eval_top_10_accuracy": 0.7737487231869254, |
|
"eval_top_1_accuracy": 0.34576098059244126, |
|
"eval_top_5_accuracy": 0.6802860061287027, |
|
"step": 58946 |
|
}, |
|
{ |
|
"epoch": 33.00015047592385, |
|
"grad_norm": 3.8549163341522217, |
|
"learning_rate": 4.63834453154162e-05, |
|
"loss": 0.8772, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 33.00043043113102, |
|
"grad_norm": 1.2819772958755493, |
|
"learning_rate": 4.636789224835137e-05, |
|
"loss": 0.5074, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 33.000710386338184, |
|
"grad_norm": 69.6226806640625, |
|
"learning_rate": 4.635233918128655e-05, |
|
"loss": 0.8393, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 33.000990341545354, |
|
"grad_norm": 13.237231254577637, |
|
"learning_rate": 4.6336786114221724e-05, |
|
"loss": 0.7097, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 33.00127029675252, |
|
"grad_norm": 24.04904556274414, |
|
"learning_rate": 4.63212330471569e-05, |
|
"loss": 0.8112, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 33.001550251959685, |
|
"grad_norm": 36.736419677734375, |
|
"learning_rate": 4.6305679980092074e-05, |
|
"loss": 0.8325, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 33.001830207166854, |
|
"grad_norm": 1.0379083156585693, |
|
"learning_rate": 4.6290126913027246e-05, |
|
"loss": 0.8391, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 33.002110162374024, |
|
"grad_norm": 30.109365463256836, |
|
"learning_rate": 4.6274573845962425e-05, |
|
"loss": 0.8687, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 33.002390117581186, |
|
"grad_norm": 6.3692731857299805, |
|
"learning_rate": 4.62590207788976e-05, |
|
"loss": 0.9515, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 33.002670072788355, |
|
"grad_norm": 0.17226508259773254, |
|
"learning_rate": 4.6243467711832775e-05, |
|
"loss": 0.7252, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 33.00295002799552, |
|
"grad_norm": 2.027529716491699, |
|
"learning_rate": 4.622791464476795e-05, |
|
"loss": 0.7717, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 33.00322998320269, |
|
"grad_norm": 29.886762619018555, |
|
"learning_rate": 4.6212361577703126e-05, |
|
"loss": 1.0916, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 33.003509938409856, |
|
"grad_norm": 4.087385177612305, |
|
"learning_rate": 4.61968085106383e-05, |
|
"loss": 0.8275, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 33.00378989361702, |
|
"grad_norm": 12.894125938415527, |
|
"learning_rate": 4.6181255443573477e-05, |
|
"loss": 0.8847, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 33.00406984882419, |
|
"grad_norm": 44.40382385253906, |
|
"learning_rate": 4.616570237650865e-05, |
|
"loss": 0.9739, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 33.00434980403136, |
|
"grad_norm": 7.521993160247803, |
|
"learning_rate": 4.615014930944383e-05, |
|
"loss": 0.9827, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 33.00462975923852, |
|
"grad_norm": 2.905057430267334, |
|
"learning_rate": 4.6134596242379e-05, |
|
"loss": 1.1328, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 33.00490971444569, |
|
"grad_norm": 9.729948997497559, |
|
"learning_rate": 4.611904317531418e-05, |
|
"loss": 1.1948, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 33.00499930011198, |
|
"eval_accuracy": 0.36338100102145043, |
|
"eval_f1": 0.3240467396278912, |
|
"eval_loss": 3.34759521484375, |
|
"eval_precision": 0.33750582212022456, |
|
"eval_recall": 0.36338100102145043, |
|
"eval_runtime": 118.2099, |
|
"eval_samples_per_second": 33.128, |
|
"eval_steps_per_second": 16.564, |
|
"eval_top_10_accuracy": 0.7734933605720122, |
|
"eval_top_1_accuracy": 0.36338100102145043, |
|
"eval_top_5_accuracy": 0.6726251276813074, |
|
"step": 60732 |
|
}, |
|
{ |
|
"epoch": 34.000188969764835, |
|
"grad_norm": 1.6296502351760864, |
|
"learning_rate": 4.610349010824935e-05, |
|
"loss": 0.7557, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 34.000468924972004, |
|
"grad_norm": 5.730977535247803, |
|
"learning_rate": 4.608793704118453e-05, |
|
"loss": 0.6354, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 34.000748880179174, |
|
"grad_norm": 3.109783172607422, |
|
"learning_rate": 4.60723839741197e-05, |
|
"loss": 0.6765, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 34.001028835386336, |
|
"grad_norm": 0.2169467806816101, |
|
"learning_rate": 4.605683090705487e-05, |
|
"loss": 0.6485, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 34.001308790593505, |
|
"grad_norm": 12.250176429748535, |
|
"learning_rate": 4.604127783999005e-05, |
|
"loss": 0.9218, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 34.001588745800674, |
|
"grad_norm": 28.112674713134766, |
|
"learning_rate": 4.602572477292522e-05, |
|
"loss": 0.6046, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 34.00186870100784, |
|
"grad_norm": 0.6601081490516663, |
|
"learning_rate": 4.60101717058604e-05, |
|
"loss": 0.8272, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 34.002148656215006, |
|
"grad_norm": 10.42507266998291, |
|
"learning_rate": 4.599461863879557e-05, |
|
"loss": 0.8769, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 34.002428611422175, |
|
"grad_norm": 42.463401794433594, |
|
"learning_rate": 4.597922110240139e-05, |
|
"loss": 0.9232, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 34.00270856662934, |
|
"grad_norm": 0.37827855348587036, |
|
"learning_rate": 4.596366803533657e-05, |
|
"loss": 0.809, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 34.00298852183651, |
|
"grad_norm": 60.71481704711914, |
|
"learning_rate": 4.594811496827174e-05, |
|
"loss": 1.0087, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 34.003268477043676, |
|
"grad_norm": 30.099050521850586, |
|
"learning_rate": 4.593256190120692e-05, |
|
"loss": 0.7332, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 34.00354843225084, |
|
"grad_norm": 37.70903396606445, |
|
"learning_rate": 4.591700883414209e-05, |
|
"loss": 0.9752, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 34.00382838745801, |
|
"grad_norm": 36.76481246948242, |
|
"learning_rate": 4.590145576707727e-05, |
|
"loss": 1.3231, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 34.00410834266518, |
|
"grad_norm": 24.240903854370117, |
|
"learning_rate": 4.588590270001245e-05, |
|
"loss": 0.7914, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 34.00438829787234, |
|
"grad_norm": 1.8038338422775269, |
|
"learning_rate": 4.587034963294762e-05, |
|
"loss": 0.9946, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 34.00466825307951, |
|
"grad_norm": 27.41489028930664, |
|
"learning_rate": 4.58547965658828e-05, |
|
"loss": 0.9436, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 34.00494820828668, |
|
"grad_norm": 73.32023620605469, |
|
"learning_rate": 4.583924349881797e-05, |
|
"loss": 1.0164, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 34.00499860022396, |
|
"eval_accuracy": 0.3687436159346272, |
|
"eval_f1": 0.33117920128900724, |
|
"eval_loss": 3.318777322769165, |
|
"eval_precision": 0.3433794202052629, |
|
"eval_recall": 0.3687436159346272, |
|
"eval_runtime": 115.7117, |
|
"eval_samples_per_second": 33.843, |
|
"eval_steps_per_second": 16.921, |
|
"eval_top_10_accuracy": 0.7854954034729316, |
|
"eval_top_1_accuracy": 0.3687436159346272, |
|
"eval_top_5_accuracy": 0.6968845760980592, |
|
"step": 62518 |
|
}, |
|
{ |
|
"epoch": 35.000227463605825, |
|
"grad_norm": 17.592552185058594, |
|
"learning_rate": 4.582369043175314e-05, |
|
"loss": 0.6981, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 35.00050741881299, |
|
"grad_norm": 34.496055603027344, |
|
"learning_rate": 4.580813736468832e-05, |
|
"loss": 0.5374, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 35.000787374020156, |
|
"grad_norm": 48.30421829223633, |
|
"learning_rate": 4.5792584297623494e-05, |
|
"loss": 0.7976, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 35.001067329227325, |
|
"grad_norm": 36.826202392578125, |
|
"learning_rate": 4.577703123055867e-05, |
|
"loss": 0.6289, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 35.00134728443449, |
|
"grad_norm": 31.521106719970703, |
|
"learning_rate": 4.5761478163493844e-05, |
|
"loss": 0.7489, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 35.00162723964166, |
|
"grad_norm": 0.38109827041625977, |
|
"learning_rate": 4.5745925096429016e-05, |
|
"loss": 0.6528, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 35.001907194848826, |
|
"grad_norm": 0.5344606041908264, |
|
"learning_rate": 4.5730372029364195e-05, |
|
"loss": 0.4075, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 35.00218715005599, |
|
"grad_norm": 1.7213187217712402, |
|
"learning_rate": 4.5714818962299367e-05, |
|
"loss": 0.9644, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 35.00246710526316, |
|
"grad_norm": 0.6551365852355957, |
|
"learning_rate": 4.5699265895234545e-05, |
|
"loss": 0.8157, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 35.00274706047033, |
|
"grad_norm": 27.040754318237305, |
|
"learning_rate": 4.568371282816972e-05, |
|
"loss": 0.797, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 35.00302701567749, |
|
"grad_norm": 10.12726879119873, |
|
"learning_rate": 4.566815976110489e-05, |
|
"loss": 0.9778, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 35.00330697088466, |
|
"grad_norm": 43.65861892700195, |
|
"learning_rate": 4.565260669404007e-05, |
|
"loss": 0.9172, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 35.00358692609183, |
|
"grad_norm": 23.98292350769043, |
|
"learning_rate": 4.563705362697524e-05, |
|
"loss": 0.8713, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 35.00386688129899, |
|
"grad_norm": 35.00185012817383, |
|
"learning_rate": 4.562150055991042e-05, |
|
"loss": 0.87, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 35.00414683650616, |
|
"grad_norm": 1.3145835399627686, |
|
"learning_rate": 4.560594749284559e-05, |
|
"loss": 0.9978, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 35.00442679171333, |
|
"grad_norm": 0.8550679683685303, |
|
"learning_rate": 4.559039442578076e-05, |
|
"loss": 1.0991, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 35.00470674692049, |
|
"grad_norm": 7.469823360443115, |
|
"learning_rate": 4.557484135871594e-05, |
|
"loss": 1.0481, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 35.00498670212766, |
|
"grad_norm": 8.4567232131958, |
|
"learning_rate": 4.555928829165111e-05, |
|
"loss": 0.8987, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 35.005000699888015, |
|
"eval_accuracy": 0.3718079673135853, |
|
"eval_f1": 0.33662947681078426, |
|
"eval_loss": 3.3163700103759766, |
|
"eval_precision": 0.35664473422389664, |
|
"eval_recall": 0.3718079673135853, |
|
"eval_runtime": 118.3976, |
|
"eval_samples_per_second": 33.075, |
|
"eval_steps_per_second": 16.538, |
|
"eval_top_10_accuracy": 0.7911133810010215, |
|
"eval_top_1_accuracy": 0.37206332992849844, |
|
"eval_top_5_accuracy": 0.6986721144024515, |
|
"step": 64305 |
|
}, |
|
{ |
|
"epoch": 35.005000699888015, |
|
"step": 64305, |
|
"total_flos": 6.524018866222793e+20, |
|
"train_loss": 9.042802423402353, |
|
"train_runtime": 31106.056, |
|
"train_samples_per_second": 91.866, |
|
"train_steps_per_second": 11.483 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 357200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 20, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 20 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.524018866222793e+20, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|