|
{ |
|
"best_metric": 0.8285714285714286, |
|
"best_model_checkpoint": "/home/ubuntu/utah/babylm-24/src/evaluation/results/finetune/DebertaV2-Base-10M_babylm-A/qnli/checkpoint-52372", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 52372, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.038188344917131294, |
|
"grad_norm": 5.27421760559082, |
|
"learning_rate": 2.9770869930497214e-05, |
|
"loss": 0.6892, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07637668983426259, |
|
"grad_norm": 4.82838773727417, |
|
"learning_rate": 2.9541739860994424e-05, |
|
"loss": 0.6448, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11456503475139387, |
|
"grad_norm": 12.10763931274414, |
|
"learning_rate": 2.9312609791491637e-05, |
|
"loss": 0.6037, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.15275337966852517, |
|
"grad_norm": 8.465042114257812, |
|
"learning_rate": 2.9083479721988847e-05, |
|
"loss": 0.5814, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.19094172458565645, |
|
"grad_norm": 11.114578247070312, |
|
"learning_rate": 2.885434965248606e-05, |
|
"loss": 0.5602, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.22913006950278775, |
|
"grad_norm": 6.436436653137207, |
|
"learning_rate": 2.8625219582983274e-05, |
|
"loss": 0.5531, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.26731841441991905, |
|
"grad_norm": 7.126890182495117, |
|
"learning_rate": 2.8396089513480484e-05, |
|
"loss": 0.5461, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.30550675933705035, |
|
"grad_norm": 11.758148193359375, |
|
"learning_rate": 2.8166959443977697e-05, |
|
"loss": 0.5357, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.34369510425418165, |
|
"grad_norm": 8.311270713806152, |
|
"learning_rate": 2.7937829374474914e-05, |
|
"loss": 0.5264, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.3818834491713129, |
|
"grad_norm": 19.002521514892578, |
|
"learning_rate": 2.7708699304972124e-05, |
|
"loss": 0.508, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.4200717940884442, |
|
"grad_norm": 6.31497859954834, |
|
"learning_rate": 2.7479569235469337e-05, |
|
"loss": 0.5139, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.4582601390055755, |
|
"grad_norm": 5.799989223480225, |
|
"learning_rate": 2.7250439165966547e-05, |
|
"loss": 0.5135, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.4964484839227068, |
|
"grad_norm": 12.306049346923828, |
|
"learning_rate": 2.702130909646376e-05, |
|
"loss": 0.5084, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.5346368288398381, |
|
"grad_norm": 5.672856330871582, |
|
"learning_rate": 2.6792179026960974e-05, |
|
"loss": 0.5093, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.5728251737569694, |
|
"grad_norm": 5.512757778167725, |
|
"learning_rate": 2.6563048957458184e-05, |
|
"loss": 0.4886, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.6110135186741007, |
|
"grad_norm": 8.749648094177246, |
|
"learning_rate": 2.6333918887955397e-05, |
|
"loss": 0.5072, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.649201863591232, |
|
"grad_norm": 7.571271896362305, |
|
"learning_rate": 2.6104788818452607e-05, |
|
"loss": 0.5016, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.6873902085083633, |
|
"grad_norm": 17.182308197021484, |
|
"learning_rate": 2.587565874894982e-05, |
|
"loss": 0.5002, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.7255785534254945, |
|
"grad_norm": 12.367780685424805, |
|
"learning_rate": 2.5646528679447034e-05, |
|
"loss": 0.5002, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.7637668983426258, |
|
"grad_norm": 5.078122138977051, |
|
"learning_rate": 2.5417398609944244e-05, |
|
"loss": 0.4825, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.8019552432597571, |
|
"grad_norm": 5.737787246704102, |
|
"learning_rate": 2.5188268540441457e-05, |
|
"loss": 0.4814, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.8401435881768884, |
|
"grad_norm": 3.5042452812194824, |
|
"learning_rate": 2.495913847093867e-05, |
|
"loss": 0.4748, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.8783319330940197, |
|
"grad_norm": 7.273848056793213, |
|
"learning_rate": 2.473000840143588e-05, |
|
"loss": 0.4685, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.916520278011151, |
|
"grad_norm": 3.022127628326416, |
|
"learning_rate": 2.4500878331933094e-05, |
|
"loss": 0.4793, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.9547086229282823, |
|
"grad_norm": 8.954468727111816, |
|
"learning_rate": 2.4271748262430307e-05, |
|
"loss": 0.4817, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.9928969678454136, |
|
"grad_norm": 3.7548470497131348, |
|
"learning_rate": 2.404261819292752e-05, |
|
"loss": 0.4805, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8078330755233765, |
|
"eval_f1": 0.8059149722735675, |
|
"eval_loss": 0.4447055459022522, |
|
"eval_mcc": 0.6169132771266066, |
|
"eval_runtime": 4.5017, |
|
"eval_samples_per_second": 606.882, |
|
"eval_steps_per_second": 75.971, |
|
"step": 13093 |
|
}, |
|
{ |
|
"epoch": 1.0310853127625448, |
|
"grad_norm": 1.004704475402832, |
|
"learning_rate": 2.3813488123424734e-05, |
|
"loss": 0.4283, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.0692736576796762, |
|
"grad_norm": 1.3949764966964722, |
|
"learning_rate": 2.3584358053921944e-05, |
|
"loss": 0.4103, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.1074620025968074, |
|
"grad_norm": 3.696608781814575, |
|
"learning_rate": 2.3355227984419157e-05, |
|
"loss": 0.3922, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.1456503475139388, |
|
"grad_norm": 6.074676990509033, |
|
"learning_rate": 2.3126097914916367e-05, |
|
"loss": 0.422, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.18383869243107, |
|
"grad_norm": 15.201245307922363, |
|
"learning_rate": 2.289696784541358e-05, |
|
"loss": 0.3991, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.2220270373482014, |
|
"grad_norm": 8.428204536437988, |
|
"learning_rate": 2.2667837775910794e-05, |
|
"loss": 0.3953, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.2602153822653326, |
|
"grad_norm": 8.055763244628906, |
|
"learning_rate": 2.2438707706408004e-05, |
|
"loss": 0.4149, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.298403727182464, |
|
"grad_norm": 6.819204330444336, |
|
"learning_rate": 2.2209577636905217e-05, |
|
"loss": 0.4093, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.3365920720995952, |
|
"grad_norm": 11.55788516998291, |
|
"learning_rate": 2.198044756740243e-05, |
|
"loss": 0.4205, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.3747804170167264, |
|
"grad_norm": 11.068483352661133, |
|
"learning_rate": 2.175131749789964e-05, |
|
"loss": 0.4016, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.4129687619338578, |
|
"grad_norm": 7.251551151275635, |
|
"learning_rate": 2.1522187428396854e-05, |
|
"loss": 0.4117, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.4511571068509892, |
|
"grad_norm": 3.3298726081848145, |
|
"learning_rate": 2.1293057358894064e-05, |
|
"loss": 0.4165, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.4893454517681204, |
|
"grad_norm": 10.534771919250488, |
|
"learning_rate": 2.1063927289391277e-05, |
|
"loss": 0.3855, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.5275337966852516, |
|
"grad_norm": 2.159674882888794, |
|
"learning_rate": 2.083479721988849e-05, |
|
"loss": 0.4037, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.565722141602383, |
|
"grad_norm": 17.90852165222168, |
|
"learning_rate": 2.06056671503857e-05, |
|
"loss": 0.4016, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.6039104865195144, |
|
"grad_norm": 8.16418170928955, |
|
"learning_rate": 2.0376537080882917e-05, |
|
"loss": 0.4137, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.6420988314366456, |
|
"grad_norm": 12.395405769348145, |
|
"learning_rate": 2.0147407011380127e-05, |
|
"loss": 0.4151, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.6802871763537768, |
|
"grad_norm": 11.432701110839844, |
|
"learning_rate": 1.991827694187734e-05, |
|
"loss": 0.4108, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.718475521270908, |
|
"grad_norm": 8.871509552001953, |
|
"learning_rate": 1.9689146872374554e-05, |
|
"loss": 0.4162, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.7566638661880394, |
|
"grad_norm": 14.043828010559082, |
|
"learning_rate": 1.9460016802871764e-05, |
|
"loss": 0.4132, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.7948522111051708, |
|
"grad_norm": 2.865736722946167, |
|
"learning_rate": 1.9230886733368977e-05, |
|
"loss": 0.4134, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.833040556022302, |
|
"grad_norm": 22.327316284179688, |
|
"learning_rate": 1.900175666386619e-05, |
|
"loss": 0.4115, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.8712289009394332, |
|
"grad_norm": 4.896880626678467, |
|
"learning_rate": 1.87726265943634e-05, |
|
"loss": 0.4055, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.9094172458565646, |
|
"grad_norm": 7.3645453453063965, |
|
"learning_rate": 1.8543496524860614e-05, |
|
"loss": 0.3995, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.947605590773696, |
|
"grad_norm": 7.702456951141357, |
|
"learning_rate": 1.8314366455357823e-05, |
|
"loss": 0.4007, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.9857939356908272, |
|
"grad_norm": 1.3061147928237915, |
|
"learning_rate": 1.8085236385855037e-05, |
|
"loss": 0.3896, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8056368827819824, |
|
"eval_f1": 0.7880239520958083, |
|
"eval_loss": 0.46099478006362915, |
|
"eval_mcc": 0.6254887910718564, |
|
"eval_runtime": 4.3521, |
|
"eval_samples_per_second": 627.742, |
|
"eval_steps_per_second": 78.583, |
|
"step": 26186 |
|
}, |
|
{ |
|
"epoch": 2.0239822806079584, |
|
"grad_norm": 32.53457260131836, |
|
"learning_rate": 1.785610631635225e-05, |
|
"loss": 0.3674, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.0621706255250896, |
|
"grad_norm": 28.81082534790039, |
|
"learning_rate": 1.762697624684946e-05, |
|
"loss": 0.3316, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.100358970442221, |
|
"grad_norm": 30.623323440551758, |
|
"learning_rate": 1.7397846177346673e-05, |
|
"loss": 0.353, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.1385473153593524, |
|
"grad_norm": 4.29934024810791, |
|
"learning_rate": 1.7168716107843887e-05, |
|
"loss": 0.3368, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.1767356602764836, |
|
"grad_norm": 16.6721248626709, |
|
"learning_rate": 1.6939586038341097e-05, |
|
"loss": 0.3285, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.2149240051936148, |
|
"grad_norm": 23.68825912475586, |
|
"learning_rate": 1.6710455968838313e-05, |
|
"loss": 0.3423, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.2531123501107464, |
|
"grad_norm": 10.835516929626465, |
|
"learning_rate": 1.6481325899335523e-05, |
|
"loss": 0.3585, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.2913006950278776, |
|
"grad_norm": 9.098734855651855, |
|
"learning_rate": 1.6252195829832737e-05, |
|
"loss": 0.3239, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.3294890399450088, |
|
"grad_norm": 22.620588302612305, |
|
"learning_rate": 1.602306576032995e-05, |
|
"loss": 0.3298, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.36767738486214, |
|
"grad_norm": 12.412976264953613, |
|
"learning_rate": 1.579393569082716e-05, |
|
"loss": 0.3335, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.405865729779271, |
|
"grad_norm": 1.1997746229171753, |
|
"learning_rate": 1.5564805621324373e-05, |
|
"loss": 0.3369, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.444054074696403, |
|
"grad_norm": 1.9365686178207397, |
|
"learning_rate": 1.5335675551821583e-05, |
|
"loss": 0.3513, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.482242419613534, |
|
"grad_norm": 1.2392919063568115, |
|
"learning_rate": 1.5106545482318797e-05, |
|
"loss": 0.3404, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.520430764530665, |
|
"grad_norm": 4.929393291473389, |
|
"learning_rate": 1.4877415412816008e-05, |
|
"loss": 0.3532, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.558619109447797, |
|
"grad_norm": 9.98500919342041, |
|
"learning_rate": 1.4648285343313222e-05, |
|
"loss": 0.3464, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.596807454364928, |
|
"grad_norm": 20.652223587036133, |
|
"learning_rate": 1.4419155273810433e-05, |
|
"loss": 0.3387, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.634995799282059, |
|
"grad_norm": 16.235082626342773, |
|
"learning_rate": 1.4190025204307645e-05, |
|
"loss": 0.3416, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.6731841441991904, |
|
"grad_norm": 14.930472373962402, |
|
"learning_rate": 1.3960895134804858e-05, |
|
"loss": 0.3536, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.7113724891163216, |
|
"grad_norm": 5.0768723487854, |
|
"learning_rate": 1.3731765065302072e-05, |
|
"loss": 0.3465, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.7495608340334527, |
|
"grad_norm": 28.997390747070312, |
|
"learning_rate": 1.3502634995799283e-05, |
|
"loss": 0.3315, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.7877491789505844, |
|
"grad_norm": 11.442214012145996, |
|
"learning_rate": 1.3273504926296495e-05, |
|
"loss": 0.3451, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.8259375238677156, |
|
"grad_norm": 0.6783624291419983, |
|
"learning_rate": 1.3044374856793707e-05, |
|
"loss": 0.3514, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.8641258687848468, |
|
"grad_norm": 37.28209686279297, |
|
"learning_rate": 1.2815244787290918e-05, |
|
"loss": 0.3356, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.9023142137019784, |
|
"grad_norm": 43.36020278930664, |
|
"learning_rate": 1.2586114717788132e-05, |
|
"loss": 0.3429, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.9405025586191096, |
|
"grad_norm": 7.131562232971191, |
|
"learning_rate": 1.2356984648285343e-05, |
|
"loss": 0.3591, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.9786909035362408, |
|
"grad_norm": 9.949460983276367, |
|
"learning_rate": 1.2127854578782555e-05, |
|
"loss": 0.3421, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8199121356010437, |
|
"eval_f1": 0.8107692307692308, |
|
"eval_loss": 0.5570406317710876, |
|
"eval_mcc": 0.6463183939049854, |
|
"eval_runtime": 4.2333, |
|
"eval_samples_per_second": 645.355, |
|
"eval_steps_per_second": 80.787, |
|
"step": 39279 |
|
}, |
|
{ |
|
"epoch": 3.016879248453372, |
|
"grad_norm": 0.43843379616737366, |
|
"learning_rate": 1.1898724509279768e-05, |
|
"loss": 0.3235, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.055067593370503, |
|
"grad_norm": 61.34454345703125, |
|
"learning_rate": 1.1669594439776982e-05, |
|
"loss": 0.271, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.093255938287635, |
|
"grad_norm": 0.9818670153617859, |
|
"learning_rate": 1.1440464370274193e-05, |
|
"loss": 0.2757, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.131444283204766, |
|
"grad_norm": 0.36656156182289124, |
|
"learning_rate": 1.1211334300771405e-05, |
|
"loss": 0.2941, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.169632628121897, |
|
"grad_norm": 2.658578634262085, |
|
"learning_rate": 1.0982204231268616e-05, |
|
"loss": 0.304, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.2078209730390284, |
|
"grad_norm": 62.08125686645508, |
|
"learning_rate": 1.075307416176583e-05, |
|
"loss": 0.2656, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.24600931795616, |
|
"grad_norm": 1.8512933254241943, |
|
"learning_rate": 1.0523944092263041e-05, |
|
"loss": 0.2882, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.284197662873291, |
|
"grad_norm": 13.63339614868164, |
|
"learning_rate": 1.0294814022760253e-05, |
|
"loss": 0.3025, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.3223860077904224, |
|
"grad_norm": 2.177273988723755, |
|
"learning_rate": 1.0065683953257466e-05, |
|
"loss": 0.2961, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.3605743527075536, |
|
"grad_norm": 1.4219642877578735, |
|
"learning_rate": 9.836553883754678e-06, |
|
"loss": 0.2742, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.3987626976246847, |
|
"grad_norm": 0.64596027135849, |
|
"learning_rate": 9.607423814251891e-06, |
|
"loss": 0.2994, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.4369510425418164, |
|
"grad_norm": 77.24871063232422, |
|
"learning_rate": 9.378293744749103e-06, |
|
"loss": 0.2916, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.4751393874589476, |
|
"grad_norm": 7.46405029296875, |
|
"learning_rate": 9.149163675246315e-06, |
|
"loss": 0.3085, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.5133277323760788, |
|
"grad_norm": 4.723397254943848, |
|
"learning_rate": 8.920033605743526e-06, |
|
"loss": 0.2931, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.55151607729321, |
|
"grad_norm": 97.39447784423828, |
|
"learning_rate": 8.69090353624074e-06, |
|
"loss": 0.2926, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 3.5897044222103416, |
|
"grad_norm": 0.4084111750125885, |
|
"learning_rate": 8.461773466737951e-06, |
|
"loss": 0.2992, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.6278927671274728, |
|
"grad_norm": 27.625211715698242, |
|
"learning_rate": 8.232643397235165e-06, |
|
"loss": 0.3038, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.666081112044604, |
|
"grad_norm": 0.5350797772407532, |
|
"learning_rate": 8.003513327732376e-06, |
|
"loss": 0.3016, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.704269456961735, |
|
"grad_norm": 24.899646759033203, |
|
"learning_rate": 7.77438325822959e-06, |
|
"loss": 0.2929, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 3.7424578018788663, |
|
"grad_norm": 0.9800614714622498, |
|
"learning_rate": 7.545253188726801e-06, |
|
"loss": 0.2738, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 3.780646146795998, |
|
"grad_norm": 0.16734516620635986, |
|
"learning_rate": 7.316123119224013e-06, |
|
"loss": 0.2904, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 3.818834491713129, |
|
"grad_norm": 10.135973930358887, |
|
"learning_rate": 7.086993049721225e-06, |
|
"loss": 0.2886, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 3.8570228366302604, |
|
"grad_norm": 61.60314178466797, |
|
"learning_rate": 6.857862980218438e-06, |
|
"loss": 0.2952, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 3.895211181547392, |
|
"grad_norm": 9.588881492614746, |
|
"learning_rate": 6.6287329107156496e-06, |
|
"loss": 0.2921, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 3.933399526464523, |
|
"grad_norm": 0.1929616928100586, |
|
"learning_rate": 6.399602841212862e-06, |
|
"loss": 0.2709, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 3.9715878713816544, |
|
"grad_norm": 0.6599931716918945, |
|
"learning_rate": 6.170472771710074e-06, |
|
"loss": 0.3067, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.833089292049408, |
|
"eval_f1": 0.8285714285714286, |
|
"eval_loss": 0.6773508191108704, |
|
"eval_mcc": 0.6692795780687539, |
|
"eval_runtime": 4.2344, |
|
"eval_samples_per_second": 645.189, |
|
"eval_steps_per_second": 80.767, |
|
"step": 52372 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 65465, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.001 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.471840049314611e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|