{ "best_metric": 0.8285714285714286, "best_model_checkpoint": "/home/ubuntu/utah/babylm-24/src/evaluation/results/finetune/DebertaV2-Base-10M_babylm-A/qnli/checkpoint-52372", "epoch": 5.0, "eval_steps": 500, "global_step": 65465, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.038188344917131294, "grad_norm": 5.27421760559082, "learning_rate": 2.9770869930497214e-05, "loss": 0.6892, "step": 500 }, { "epoch": 0.07637668983426259, "grad_norm": 4.82838773727417, "learning_rate": 2.9541739860994424e-05, "loss": 0.6448, "step": 1000 }, { "epoch": 0.11456503475139387, "grad_norm": 12.10763931274414, "learning_rate": 2.9312609791491637e-05, "loss": 0.6037, "step": 1500 }, { "epoch": 0.15275337966852517, "grad_norm": 8.465042114257812, "learning_rate": 2.9083479721988847e-05, "loss": 0.5814, "step": 2000 }, { "epoch": 0.19094172458565645, "grad_norm": 11.114578247070312, "learning_rate": 2.885434965248606e-05, "loss": 0.5602, "step": 2500 }, { "epoch": 0.22913006950278775, "grad_norm": 6.436436653137207, "learning_rate": 2.8625219582983274e-05, "loss": 0.5531, "step": 3000 }, { "epoch": 0.26731841441991905, "grad_norm": 7.126890182495117, "learning_rate": 2.8396089513480484e-05, "loss": 0.5461, "step": 3500 }, { "epoch": 0.30550675933705035, "grad_norm": 11.758148193359375, "learning_rate": 2.8166959443977697e-05, "loss": 0.5357, "step": 4000 }, { "epoch": 0.34369510425418165, "grad_norm": 8.311270713806152, "learning_rate": 2.7937829374474914e-05, "loss": 0.5264, "step": 4500 }, { "epoch": 0.3818834491713129, "grad_norm": 19.002521514892578, "learning_rate": 2.7708699304972124e-05, "loss": 0.508, "step": 5000 }, { "epoch": 0.4200717940884442, "grad_norm": 6.31497859954834, "learning_rate": 2.7479569235469337e-05, "loss": 0.5139, "step": 5500 }, { "epoch": 0.4582601390055755, "grad_norm": 5.799989223480225, "learning_rate": 2.7250439165966547e-05, "loss": 0.5135, "step": 6000 }, { "epoch": 0.4964484839227068, "grad_norm": 12.306049346923828, "learning_rate": 2.702130909646376e-05, "loss": 0.5084, "step": 6500 }, { "epoch": 0.5346368288398381, "grad_norm": 5.672856330871582, "learning_rate": 2.6792179026960974e-05, "loss": 0.5093, "step": 7000 }, { "epoch": 0.5728251737569694, "grad_norm": 5.512757778167725, "learning_rate": 2.6563048957458184e-05, "loss": 0.4886, "step": 7500 }, { "epoch": 0.6110135186741007, "grad_norm": 8.749648094177246, "learning_rate": 2.6333918887955397e-05, "loss": 0.5072, "step": 8000 }, { "epoch": 0.649201863591232, "grad_norm": 7.571271896362305, "learning_rate": 2.6104788818452607e-05, "loss": 0.5016, "step": 8500 }, { "epoch": 0.6873902085083633, "grad_norm": 17.182308197021484, "learning_rate": 2.587565874894982e-05, "loss": 0.5002, "step": 9000 }, { "epoch": 0.7255785534254945, "grad_norm": 12.367780685424805, "learning_rate": 2.5646528679447034e-05, "loss": 0.5002, "step": 9500 }, { "epoch": 0.7637668983426258, "grad_norm": 5.078122138977051, "learning_rate": 2.5417398609944244e-05, "loss": 0.4825, "step": 10000 }, { "epoch": 0.8019552432597571, "grad_norm": 5.737787246704102, "learning_rate": 2.5188268540441457e-05, "loss": 0.4814, "step": 10500 }, { "epoch": 0.8401435881768884, "grad_norm": 3.5042452812194824, "learning_rate": 2.495913847093867e-05, "loss": 0.4748, "step": 11000 }, { "epoch": 0.8783319330940197, "grad_norm": 7.273848056793213, "learning_rate": 2.473000840143588e-05, "loss": 0.4685, "step": 11500 }, { "epoch": 0.916520278011151, "grad_norm": 3.022127628326416, "learning_rate": 2.4500878331933094e-05, "loss": 0.4793, "step": 12000 }, { "epoch": 0.9547086229282823, "grad_norm": 8.954468727111816, "learning_rate": 2.4271748262430307e-05, "loss": 0.4817, "step": 12500 }, { "epoch": 0.9928969678454136, "grad_norm": 3.7548470497131348, "learning_rate": 2.404261819292752e-05, "loss": 0.4805, "step": 13000 }, { "epoch": 1.0, "eval_accuracy": 0.8078330755233765, "eval_f1": 0.8059149722735675, "eval_loss": 0.4447055459022522, "eval_mcc": 0.6169132771266066, "eval_runtime": 4.5017, "eval_samples_per_second": 606.882, "eval_steps_per_second": 75.971, "step": 13093 }, { "epoch": 1.0310853127625448, "grad_norm": 1.004704475402832, "learning_rate": 2.3813488123424734e-05, "loss": 0.4283, "step": 13500 }, { "epoch": 1.0692736576796762, "grad_norm": 1.3949764966964722, "learning_rate": 2.3584358053921944e-05, "loss": 0.4103, "step": 14000 }, { "epoch": 1.1074620025968074, "grad_norm": 3.696608781814575, "learning_rate": 2.3355227984419157e-05, "loss": 0.3922, "step": 14500 }, { "epoch": 1.1456503475139388, "grad_norm": 6.074676990509033, "learning_rate": 2.3126097914916367e-05, "loss": 0.422, "step": 15000 }, { "epoch": 1.18383869243107, "grad_norm": 15.201245307922363, "learning_rate": 2.289696784541358e-05, "loss": 0.3991, "step": 15500 }, { "epoch": 1.2220270373482014, "grad_norm": 8.428204536437988, "learning_rate": 2.2667837775910794e-05, "loss": 0.3953, "step": 16000 }, { "epoch": 1.2602153822653326, "grad_norm": 8.055763244628906, "learning_rate": 2.2438707706408004e-05, "loss": 0.4149, "step": 16500 }, { "epoch": 1.298403727182464, "grad_norm": 6.819204330444336, "learning_rate": 2.2209577636905217e-05, "loss": 0.4093, "step": 17000 }, { "epoch": 1.3365920720995952, "grad_norm": 11.55788516998291, "learning_rate": 2.198044756740243e-05, "loss": 0.4205, "step": 17500 }, { "epoch": 1.3747804170167264, "grad_norm": 11.068483352661133, "learning_rate": 2.175131749789964e-05, "loss": 0.4016, "step": 18000 }, { "epoch": 1.4129687619338578, "grad_norm": 7.251551151275635, "learning_rate": 2.1522187428396854e-05, "loss": 0.4117, "step": 18500 }, { "epoch": 1.4511571068509892, "grad_norm": 3.3298726081848145, "learning_rate": 2.1293057358894064e-05, "loss": 0.4165, "step": 19000 }, { "epoch": 1.4893454517681204, "grad_norm": 10.534771919250488, "learning_rate": 2.1063927289391277e-05, "loss": 0.3855, "step": 19500 }, { "epoch": 1.5275337966852516, "grad_norm": 2.159674882888794, "learning_rate": 2.083479721988849e-05, "loss": 0.4037, "step": 20000 }, { "epoch": 1.565722141602383, "grad_norm": 17.90852165222168, "learning_rate": 2.06056671503857e-05, "loss": 0.4016, "step": 20500 }, { "epoch": 1.6039104865195144, "grad_norm": 8.16418170928955, "learning_rate": 2.0376537080882917e-05, "loss": 0.4137, "step": 21000 }, { "epoch": 1.6420988314366456, "grad_norm": 12.395405769348145, "learning_rate": 2.0147407011380127e-05, "loss": 0.4151, "step": 21500 }, { "epoch": 1.6802871763537768, "grad_norm": 11.432701110839844, "learning_rate": 1.991827694187734e-05, "loss": 0.4108, "step": 22000 }, { "epoch": 1.718475521270908, "grad_norm": 8.871509552001953, "learning_rate": 1.9689146872374554e-05, "loss": 0.4162, "step": 22500 }, { "epoch": 1.7566638661880394, "grad_norm": 14.043828010559082, "learning_rate": 1.9460016802871764e-05, "loss": 0.4132, "step": 23000 }, { "epoch": 1.7948522111051708, "grad_norm": 2.865736722946167, "learning_rate": 1.9230886733368977e-05, "loss": 0.4134, "step": 23500 }, { "epoch": 1.833040556022302, "grad_norm": 22.327316284179688, "learning_rate": 1.900175666386619e-05, "loss": 0.4115, "step": 24000 }, { "epoch": 1.8712289009394332, "grad_norm": 4.896880626678467, "learning_rate": 1.87726265943634e-05, "loss": 0.4055, "step": 24500 }, { "epoch": 1.9094172458565646, "grad_norm": 7.3645453453063965, "learning_rate": 1.8543496524860614e-05, "loss": 0.3995, "step": 25000 }, { "epoch": 1.947605590773696, "grad_norm": 7.702456951141357, "learning_rate": 1.8314366455357823e-05, "loss": 0.4007, "step": 25500 }, { "epoch": 1.9857939356908272, "grad_norm": 1.3061147928237915, "learning_rate": 1.8085236385855037e-05, "loss": 0.3896, "step": 26000 }, { "epoch": 2.0, "eval_accuracy": 0.8056368827819824, "eval_f1": 0.7880239520958083, "eval_loss": 0.46099478006362915, "eval_mcc": 0.6254887910718564, "eval_runtime": 4.3521, "eval_samples_per_second": 627.742, "eval_steps_per_second": 78.583, "step": 26186 }, { "epoch": 2.0239822806079584, "grad_norm": 32.53457260131836, "learning_rate": 1.785610631635225e-05, "loss": 0.3674, "step": 26500 }, { "epoch": 2.0621706255250896, "grad_norm": 28.81082534790039, "learning_rate": 1.762697624684946e-05, "loss": 0.3316, "step": 27000 }, { "epoch": 2.100358970442221, "grad_norm": 30.623323440551758, "learning_rate": 1.7397846177346673e-05, "loss": 0.353, "step": 27500 }, { "epoch": 2.1385473153593524, "grad_norm": 4.29934024810791, "learning_rate": 1.7168716107843887e-05, "loss": 0.3368, "step": 28000 }, { "epoch": 2.1767356602764836, "grad_norm": 16.6721248626709, "learning_rate": 1.6939586038341097e-05, "loss": 0.3285, "step": 28500 }, { "epoch": 2.2149240051936148, "grad_norm": 23.68825912475586, "learning_rate": 1.6710455968838313e-05, "loss": 0.3423, "step": 29000 }, { "epoch": 2.2531123501107464, "grad_norm": 10.835516929626465, "learning_rate": 1.6481325899335523e-05, "loss": 0.3585, "step": 29500 }, { "epoch": 2.2913006950278776, "grad_norm": 9.098734855651855, "learning_rate": 1.6252195829832737e-05, "loss": 0.3239, "step": 30000 }, { "epoch": 2.3294890399450088, "grad_norm": 22.620588302612305, "learning_rate": 1.602306576032995e-05, "loss": 0.3298, "step": 30500 }, { "epoch": 2.36767738486214, "grad_norm": 12.412976264953613, "learning_rate": 1.579393569082716e-05, "loss": 0.3335, "step": 31000 }, { "epoch": 2.405865729779271, "grad_norm": 1.1997746229171753, "learning_rate": 1.5564805621324373e-05, "loss": 0.3369, "step": 31500 }, { "epoch": 2.444054074696403, "grad_norm": 1.9365686178207397, "learning_rate": 1.5335675551821583e-05, "loss": 0.3513, "step": 32000 }, { "epoch": 2.482242419613534, "grad_norm": 1.2392919063568115, "learning_rate": 1.5106545482318797e-05, "loss": 0.3404, "step": 32500 }, { "epoch": 2.520430764530665, "grad_norm": 4.929393291473389, "learning_rate": 1.4877415412816008e-05, "loss": 0.3532, "step": 33000 }, { "epoch": 2.558619109447797, "grad_norm": 9.98500919342041, "learning_rate": 1.4648285343313222e-05, "loss": 0.3464, "step": 33500 }, { "epoch": 2.596807454364928, "grad_norm": 20.652223587036133, "learning_rate": 1.4419155273810433e-05, "loss": 0.3387, "step": 34000 }, { "epoch": 2.634995799282059, "grad_norm": 16.235082626342773, "learning_rate": 1.4190025204307645e-05, "loss": 0.3416, "step": 34500 }, { "epoch": 2.6731841441991904, "grad_norm": 14.930472373962402, "learning_rate": 1.3960895134804858e-05, "loss": 0.3536, "step": 35000 }, { "epoch": 2.7113724891163216, "grad_norm": 5.0768723487854, "learning_rate": 1.3731765065302072e-05, "loss": 0.3465, "step": 35500 }, { "epoch": 2.7495608340334527, "grad_norm": 28.997390747070312, "learning_rate": 1.3502634995799283e-05, "loss": 0.3315, "step": 36000 }, { "epoch": 2.7877491789505844, "grad_norm": 11.442214012145996, "learning_rate": 1.3273504926296495e-05, "loss": 0.3451, "step": 36500 }, { "epoch": 2.8259375238677156, "grad_norm": 0.6783624291419983, "learning_rate": 1.3044374856793707e-05, "loss": 0.3514, "step": 37000 }, { "epoch": 2.8641258687848468, "grad_norm": 37.28209686279297, "learning_rate": 1.2815244787290918e-05, "loss": 0.3356, "step": 37500 }, { "epoch": 2.9023142137019784, "grad_norm": 43.36020278930664, "learning_rate": 1.2586114717788132e-05, "loss": 0.3429, "step": 38000 }, { "epoch": 2.9405025586191096, "grad_norm": 7.131562232971191, "learning_rate": 1.2356984648285343e-05, "loss": 0.3591, "step": 38500 }, { "epoch": 2.9786909035362408, "grad_norm": 9.949460983276367, "learning_rate": 1.2127854578782555e-05, "loss": 0.3421, "step": 39000 }, { "epoch": 3.0, "eval_accuracy": 0.8199121356010437, "eval_f1": 0.8107692307692308, "eval_loss": 0.5570406317710876, "eval_mcc": 0.6463183939049854, "eval_runtime": 4.2333, "eval_samples_per_second": 645.355, "eval_steps_per_second": 80.787, "step": 39279 }, { "epoch": 3.016879248453372, "grad_norm": 0.43843379616737366, "learning_rate": 1.1898724509279768e-05, "loss": 0.3235, "step": 39500 }, { "epoch": 3.055067593370503, "grad_norm": 61.34454345703125, "learning_rate": 1.1669594439776982e-05, "loss": 0.271, "step": 40000 }, { "epoch": 3.093255938287635, "grad_norm": 0.9818670153617859, "learning_rate": 1.1440464370274193e-05, "loss": 0.2757, "step": 40500 }, { "epoch": 3.131444283204766, "grad_norm": 0.36656156182289124, "learning_rate": 1.1211334300771405e-05, "loss": 0.2941, "step": 41000 }, { "epoch": 3.169632628121897, "grad_norm": 2.658578634262085, "learning_rate": 1.0982204231268616e-05, "loss": 0.304, "step": 41500 }, { "epoch": 3.2078209730390284, "grad_norm": 62.08125686645508, "learning_rate": 1.075307416176583e-05, "loss": 0.2656, "step": 42000 }, { "epoch": 3.24600931795616, "grad_norm": 1.8512933254241943, "learning_rate": 1.0523944092263041e-05, "loss": 0.2882, "step": 42500 }, { "epoch": 3.284197662873291, "grad_norm": 13.63339614868164, "learning_rate": 1.0294814022760253e-05, "loss": 0.3025, "step": 43000 }, { "epoch": 3.3223860077904224, "grad_norm": 2.177273988723755, "learning_rate": 1.0065683953257466e-05, "loss": 0.2961, "step": 43500 }, { "epoch": 3.3605743527075536, "grad_norm": 1.4219642877578735, "learning_rate": 9.836553883754678e-06, "loss": 0.2742, "step": 44000 }, { "epoch": 3.3987626976246847, "grad_norm": 0.64596027135849, "learning_rate": 9.607423814251891e-06, "loss": 0.2994, "step": 44500 }, { "epoch": 3.4369510425418164, "grad_norm": 77.24871063232422, "learning_rate": 9.378293744749103e-06, "loss": 0.2916, "step": 45000 }, { "epoch": 3.4751393874589476, "grad_norm": 7.46405029296875, "learning_rate": 9.149163675246315e-06, "loss": 0.3085, "step": 45500 }, { "epoch": 3.5133277323760788, "grad_norm": 4.723397254943848, "learning_rate": 8.920033605743526e-06, "loss": 0.2931, "step": 46000 }, { "epoch": 3.55151607729321, "grad_norm": 97.39447784423828, "learning_rate": 8.69090353624074e-06, "loss": 0.2926, "step": 46500 }, { "epoch": 3.5897044222103416, "grad_norm": 0.4084111750125885, "learning_rate": 8.461773466737951e-06, "loss": 0.2992, "step": 47000 }, { "epoch": 3.6278927671274728, "grad_norm": 27.625211715698242, "learning_rate": 8.232643397235165e-06, "loss": 0.3038, "step": 47500 }, { "epoch": 3.666081112044604, "grad_norm": 0.5350797772407532, "learning_rate": 8.003513327732376e-06, "loss": 0.3016, "step": 48000 }, { "epoch": 3.704269456961735, "grad_norm": 24.899646759033203, "learning_rate": 7.77438325822959e-06, "loss": 0.2929, "step": 48500 }, { "epoch": 3.7424578018788663, "grad_norm": 0.9800614714622498, "learning_rate": 7.545253188726801e-06, "loss": 0.2738, "step": 49000 }, { "epoch": 3.780646146795998, "grad_norm": 0.16734516620635986, "learning_rate": 7.316123119224013e-06, "loss": 0.2904, "step": 49500 }, { "epoch": 3.818834491713129, "grad_norm": 10.135973930358887, "learning_rate": 7.086993049721225e-06, "loss": 0.2886, "step": 50000 }, { "epoch": 3.8570228366302604, "grad_norm": 61.60314178466797, "learning_rate": 6.857862980218438e-06, "loss": 0.2952, "step": 50500 }, { "epoch": 3.895211181547392, "grad_norm": 9.588881492614746, "learning_rate": 6.6287329107156496e-06, "loss": 0.2921, "step": 51000 }, { "epoch": 3.933399526464523, "grad_norm": 0.1929616928100586, "learning_rate": 6.399602841212862e-06, "loss": 0.2709, "step": 51500 }, { "epoch": 3.9715878713816544, "grad_norm": 0.6599931716918945, "learning_rate": 6.170472771710074e-06, "loss": 0.3067, "step": 52000 }, { "epoch": 4.0, "eval_accuracy": 0.833089292049408, "eval_f1": 0.8285714285714286, "eval_loss": 0.6773508191108704, "eval_mcc": 0.6692795780687539, "eval_runtime": 4.2344, "eval_samples_per_second": 645.189, "eval_steps_per_second": 80.767, "step": 52372 }, { "epoch": 4.009776216298786, "grad_norm": 0.5341033339500427, "learning_rate": 5.941342702207287e-06, "loss": 0.2573, "step": 52500 }, { "epoch": 4.047964561215917, "grad_norm": 0.10887029767036438, "learning_rate": 5.712212632704499e-06, "loss": 0.2086, "step": 53000 }, { "epoch": 4.086152906133048, "grad_norm": 0.5474696755409241, "learning_rate": 5.483082563201711e-06, "loss": 0.2096, "step": 53500 }, { "epoch": 4.124341251050179, "grad_norm": 0.2599523365497589, "learning_rate": 5.253952493698923e-06, "loss": 0.2287, "step": 54000 }, { "epoch": 4.162529595967311, "grad_norm": 0.21023152768611908, "learning_rate": 5.024822424196136e-06, "loss": 0.2255, "step": 54500 }, { "epoch": 4.200717940884442, "grad_norm": 5.383127689361572, "learning_rate": 4.795692354693348e-06, "loss": 0.2258, "step": 55000 }, { "epoch": 4.238906285801574, "grad_norm": 67.38241577148438, "learning_rate": 4.56656228519056e-06, "loss": 0.2449, "step": 55500 }, { "epoch": 4.277094630718705, "grad_norm": 23.330398559570312, "learning_rate": 4.337432215687772e-06, "loss": 0.2202, "step": 56000 }, { "epoch": 4.315282975635836, "grad_norm": 0.09070462733507156, "learning_rate": 4.1083021461849844e-06, "loss": 0.2381, "step": 56500 }, { "epoch": 4.353471320552967, "grad_norm": 0.6017701029777527, "learning_rate": 3.879172076682197e-06, "loss": 0.2297, "step": 57000 }, { "epoch": 4.391659665470098, "grad_norm": 121.69049072265625, "learning_rate": 3.650042007179409e-06, "loss": 0.2281, "step": 57500 }, { "epoch": 4.4298480103872295, "grad_norm": 8.436279296875, "learning_rate": 3.420911937676621e-06, "loss": 0.2058, "step": 58000 }, { "epoch": 4.468036355304361, "grad_norm": 0.06213747337460518, "learning_rate": 3.1917818681738336e-06, "loss": 0.2357, "step": 58500 }, { "epoch": 4.506224700221493, "grad_norm": 27.62065315246582, "learning_rate": 2.9626517986710456e-06, "loss": 0.2321, "step": 59000 }, { "epoch": 4.544413045138624, "grad_norm": 14.322306632995605, "learning_rate": 2.733521729168258e-06, "loss": 0.2368, "step": 59500 }, { "epoch": 4.582601390055755, "grad_norm": 31.77568244934082, "learning_rate": 2.50439165966547e-06, "loss": 0.1999, "step": 60000 }, { "epoch": 4.620789734972886, "grad_norm": 86.43828582763672, "learning_rate": 2.2752615901626823e-06, "loss": 0.218, "step": 60500 }, { "epoch": 4.6589780798900176, "grad_norm": 0.2887605130672455, "learning_rate": 2.0461315206598943e-06, "loss": 0.2306, "step": 61000 }, { "epoch": 4.697166424807149, "grad_norm": 13.985712051391602, "learning_rate": 1.817001451157107e-06, "loss": 0.2345, "step": 61500 }, { "epoch": 4.73535476972428, "grad_norm": 0.07132984697818756, "learning_rate": 1.5878713816543193e-06, "loss": 0.2151, "step": 62000 }, { "epoch": 4.773543114641411, "grad_norm": 0.13393868505954742, "learning_rate": 1.3587413121515314e-06, "loss": 0.2338, "step": 62500 }, { "epoch": 4.811731459558542, "grad_norm": 0.11832548677921295, "learning_rate": 1.1296112426487437e-06, "loss": 0.1972, "step": 63000 }, { "epoch": 4.849919804475674, "grad_norm": 0.08967792987823486, "learning_rate": 9.004811731459558e-07, "loss": 0.2259, "step": 63500 }, { "epoch": 4.888108149392806, "grad_norm": 18.879093170166016, "learning_rate": 6.713511036431681e-07, "loss": 0.2202, "step": 64000 }, { "epoch": 4.926296494309937, "grad_norm": 3.323814868927002, "learning_rate": 4.4222103414038035e-07, "loss": 0.2104, "step": 64500 }, { "epoch": 4.964484839227068, "grad_norm": 0.28909558057785034, "learning_rate": 2.1309096463759263e-07, "loss": 0.2006, "step": 65000 }, { "epoch": 5.0, "eval_accuracy": 0.8239384889602661, "eval_f1": 0.8211230940870212, "eval_loss": 0.8540835380554199, "eval_mcc": 0.6497022655427117, "eval_runtime": 4.2538, "eval_samples_per_second": 642.254, "eval_steps_per_second": 80.399, "step": 65465 }, { "epoch": 5.0, "step": 65465, "total_flos": 3.089800061643264e+16, "train_loss": 0.35705531047534644, "train_runtime": 3742.5961, "train_samples_per_second": 139.934, "train_steps_per_second": 17.492 } ], "logging_steps": 500, "max_steps": 65465, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.089800061643264e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }