{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9792492422476101, "eval_steps": 300, "global_step": 4200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0023315458148752623, "grad_norm": 58.996910095214844, "learning_rate": 4.997668454185125e-05, "loss": 2.8188, "step": 10 }, { "epoch": 0.004663091629750525, "grad_norm": 46.450401306152344, "learning_rate": 4.995336908370249e-05, "loss": 2.2969, "step": 20 }, { "epoch": 0.006994637444625787, "grad_norm": 20.279075622558594, "learning_rate": 4.993005362555374e-05, "loss": 2.2492, "step": 30 }, { "epoch": 0.00932618325950105, "grad_norm": 54.02119445800781, "learning_rate": 4.9906738167404995e-05, "loss": 2.0938, "step": 40 }, { "epoch": 0.011657729074376311, "grad_norm": 20.223501205444336, "learning_rate": 4.988342270925624e-05, "loss": 1.9648, "step": 50 }, { "epoch": 0.013989274889251575, "grad_norm": 32.29741668701172, "learning_rate": 4.9860107251107484e-05, "loss": 1.8797, "step": 60 }, { "epoch": 0.016320820704126836, "grad_norm": 36.739723205566406, "learning_rate": 4.983679179295873e-05, "loss": 1.6664, "step": 70 }, { "epoch": 0.0186523665190021, "grad_norm": 53.299015045166016, "learning_rate": 4.981347633480998e-05, "loss": 1.6, "step": 80 }, { "epoch": 0.02098391233387736, "grad_norm": 25.623676300048828, "learning_rate": 4.979016087666123e-05, "loss": 1.6898, "step": 90 }, { "epoch": 0.023315458148752622, "grad_norm": 34.87362289428711, "learning_rate": 4.9766845418512476e-05, "loss": 1.5578, "step": 100 }, { "epoch": 0.025647003963627884, "grad_norm": 25.20810317993164, "learning_rate": 4.974352996036372e-05, "loss": 1.5078, "step": 110 }, { "epoch": 0.02797854977850315, "grad_norm": 43.89160919189453, "learning_rate": 4.972021450221497e-05, "loss": 1.4242, "step": 120 }, { "epoch": 0.03031009559337841, "grad_norm": 43.445186614990234, "learning_rate": 4.969689904406622e-05, "loss": 1.3207, "step": 130 }, { "epoch": 0.03264164140825367, "grad_norm": 49.01000213623047, "learning_rate": 4.967358358591747e-05, "loss": 1.1633, "step": 140 }, { "epoch": 0.034973187223128935, "grad_norm": 32.247344970703125, "learning_rate": 4.9650268127768713e-05, "loss": 0.9754, "step": 150 }, { "epoch": 0.0373047330380042, "grad_norm": 37.74547576904297, "learning_rate": 4.962695266961996e-05, "loss": 0.9918, "step": 160 }, { "epoch": 0.03963627885287946, "grad_norm": 29.272994995117188, "learning_rate": 4.960363721147121e-05, "loss": 1.132, "step": 170 }, { "epoch": 0.04196782466775472, "grad_norm": 34.0861930847168, "learning_rate": 4.9580321753322454e-05, "loss": 1.0285, "step": 180 }, { "epoch": 0.04429937048262998, "grad_norm": 41.01604461669922, "learning_rate": 4.95570062951737e-05, "loss": 0.9484, "step": 190 }, { "epoch": 0.046630916297505244, "grad_norm": 27.387298583984375, "learning_rate": 4.953369083702495e-05, "loss": 0.9852, "step": 200 }, { "epoch": 0.048962462112380506, "grad_norm": 27.99677848815918, "learning_rate": 4.9510375378876195e-05, "loss": 0.6836, "step": 210 }, { "epoch": 0.05129400792725577, "grad_norm": 26.071035385131836, "learning_rate": 4.948705992072745e-05, "loss": 0.8875, "step": 220 }, { "epoch": 0.05362555374213103, "grad_norm": 29.15469741821289, "learning_rate": 4.946374446257869e-05, "loss": 0.893, "step": 230 }, { "epoch": 0.0559570995570063, "grad_norm": 39.19640350341797, "learning_rate": 4.9440429004429936e-05, "loss": 0.8641, "step": 240 }, { "epoch": 0.05828864537188156, "grad_norm": 24.756563186645508, "learning_rate": 4.941711354628119e-05, "loss": 0.9574, "step": 250 }, { "epoch": 0.06062019118675682, "grad_norm": 33.53086471557617, "learning_rate": 4.939379808813244e-05, "loss": 0.8523, "step": 260 }, { "epoch": 0.06295173700163208, "grad_norm": 14.436066627502441, "learning_rate": 4.9370482629983684e-05, "loss": 0.7646, "step": 270 }, { "epoch": 0.06528328281650735, "grad_norm": 20.99724769592285, "learning_rate": 4.934716717183493e-05, "loss": 0.8172, "step": 280 }, { "epoch": 0.0676148286313826, "grad_norm": 27.321861267089844, "learning_rate": 4.932385171368617e-05, "loss": 0.8965, "step": 290 }, { "epoch": 0.06994637444625787, "grad_norm": 14.228981971740723, "learning_rate": 4.9300536255537425e-05, "loss": 0.8838, "step": 300 }, { "epoch": 0.06994637444625787, "eval_accuracy": 0.6251025430680885, "eval_f1": 0.6198441320333742, "eval_loss": 0.9746333956718445, "eval_precision": 0.6288553718864676, "eval_recall": 0.6346714934867023, "eval_runtime": 32.8297, "eval_samples_per_second": 37.131, "eval_steps_per_second": 2.345, "step": 300 }, { "epoch": 0.07227792026113312, "grad_norm": 11.636761665344238, "learning_rate": 4.9277220797388676e-05, "loss": 0.6803, "step": 310 }, { "epoch": 0.0746094660760084, "grad_norm": 28.35625457763672, "learning_rate": 4.925390533923992e-05, "loss": 0.7066, "step": 320 }, { "epoch": 0.07694101189088366, "grad_norm": 34.743988037109375, "learning_rate": 4.9230589881091165e-05, "loss": 0.6387, "step": 330 }, { "epoch": 0.07927255770575892, "grad_norm": 27.67451286315918, "learning_rate": 4.920727442294241e-05, "loss": 0.8309, "step": 340 }, { "epoch": 0.08160410352063419, "grad_norm": 35.13928985595703, "learning_rate": 4.918395896479366e-05, "loss": 0.8715, "step": 350 }, { "epoch": 0.08393564933550944, "grad_norm": 35.64329147338867, "learning_rate": 4.916064350664491e-05, "loss": 0.6961, "step": 360 }, { "epoch": 0.08626719515038471, "grad_norm": 26.17702293395996, "learning_rate": 4.913732804849616e-05, "loss": 0.7254, "step": 370 }, { "epoch": 0.08859874096525996, "grad_norm": 26.370773315429688, "learning_rate": 4.91140125903474e-05, "loss": 0.7217, "step": 380 }, { "epoch": 0.09093028678013523, "grad_norm": 34.7267951965332, "learning_rate": 4.909069713219865e-05, "loss": 0.7297, "step": 390 }, { "epoch": 0.09326183259501049, "grad_norm": 40.13215637207031, "learning_rate": 4.906738167404989e-05, "loss": 0.865, "step": 400 }, { "epoch": 0.09559337840988576, "grad_norm": 31.081972122192383, "learning_rate": 4.904406621590114e-05, "loss": 0.7848, "step": 410 }, { "epoch": 0.09792492422476101, "grad_norm": 29.940229415893555, "learning_rate": 4.9020750757752395e-05, "loss": 0.7045, "step": 420 }, { "epoch": 0.10025647003963628, "grad_norm": 28.462858200073242, "learning_rate": 4.899743529960364e-05, "loss": 0.5851, "step": 430 }, { "epoch": 0.10258801585451154, "grad_norm": 26.055572509765625, "learning_rate": 4.8974119841454884e-05, "loss": 0.6502, "step": 440 }, { "epoch": 0.1049195616693868, "grad_norm": 31.78554344177246, "learning_rate": 4.8950804383306136e-05, "loss": 0.8398, "step": 450 }, { "epoch": 0.10725110748426206, "grad_norm": 27.621715545654297, "learning_rate": 4.892748892515738e-05, "loss": 0.6201, "step": 460 }, { "epoch": 0.10958265329913733, "grad_norm": 46.45154571533203, "learning_rate": 4.890417346700863e-05, "loss": 0.8129, "step": 470 }, { "epoch": 0.1119141991140126, "grad_norm": 30.32659339904785, "learning_rate": 4.8880858008859876e-05, "loss": 0.8564, "step": 480 }, { "epoch": 0.11424574492888785, "grad_norm": 11.306530952453613, "learning_rate": 4.885754255071112e-05, "loss": 0.7492, "step": 490 }, { "epoch": 0.11657729074376312, "grad_norm": 33.427490234375, "learning_rate": 4.883422709256237e-05, "loss": 0.6204, "step": 500 }, { "epoch": 0.11890883655863838, "grad_norm": 28.030242919921875, "learning_rate": 4.881091163441362e-05, "loss": 0.6666, "step": 510 }, { "epoch": 0.12124038237351364, "grad_norm": 26.319486618041992, "learning_rate": 4.878759617626487e-05, "loss": 0.6586, "step": 520 }, { "epoch": 0.1235719281883889, "grad_norm": 25.426727294921875, "learning_rate": 4.8764280718116114e-05, "loss": 0.7229, "step": 530 }, { "epoch": 0.12590347400326415, "grad_norm": 27.011367797851562, "learning_rate": 4.874096525996736e-05, "loss": 0.8982, "step": 540 }, { "epoch": 0.12823501981813942, "grad_norm": 18.395326614379883, "learning_rate": 4.871764980181861e-05, "loss": 0.7205, "step": 550 }, { "epoch": 0.1305665656330147, "grad_norm": 21.085630416870117, "learning_rate": 4.8694334343669854e-05, "loss": 0.582, "step": 560 }, { "epoch": 0.13289811144788996, "grad_norm": 38.923683166503906, "learning_rate": 4.86710188855211e-05, "loss": 0.5496, "step": 570 }, { "epoch": 0.1352296572627652, "grad_norm": 39.22763442993164, "learning_rate": 4.864770342737235e-05, "loss": 0.734, "step": 580 }, { "epoch": 0.13756120307764047, "grad_norm": 22.916170120239258, "learning_rate": 4.8624387969223595e-05, "loss": 0.6928, "step": 590 }, { "epoch": 0.13989274889251574, "grad_norm": 19.733055114746094, "learning_rate": 4.860107251107485e-05, "loss": 0.4986, "step": 600 }, { "epoch": 0.13989274889251574, "eval_accuracy": 0.7456931911402789, "eval_f1": 0.7266139292840452, "eval_loss": 0.7722646594047546, "eval_precision": 0.7451348277162813, "eval_recall": 0.7211806530410576, "eval_runtime": 32.3956, "eval_samples_per_second": 37.629, "eval_steps_per_second": 2.377, "step": 600 }, { "epoch": 0.142224294707391, "grad_norm": 17.900259017944336, "learning_rate": 4.857775705292609e-05, "loss": 0.5705, "step": 610 }, { "epoch": 0.14455584052226625, "grad_norm": 26.365203857421875, "learning_rate": 4.8554441594777336e-05, "loss": 0.5932, "step": 620 }, { "epoch": 0.14688738633714152, "grad_norm": 20.65036392211914, "learning_rate": 4.853112613662859e-05, "loss": 0.6246, "step": 630 }, { "epoch": 0.1492189321520168, "grad_norm": 20.070476531982422, "learning_rate": 4.850781067847984e-05, "loss": 0.7621, "step": 640 }, { "epoch": 0.15155047796689206, "grad_norm": 27.528751373291016, "learning_rate": 4.8484495220331084e-05, "loss": 0.7121, "step": 650 }, { "epoch": 0.15388202378176732, "grad_norm": 30.218090057373047, "learning_rate": 4.846117976218233e-05, "loss": 0.7557, "step": 660 }, { "epoch": 0.15621356959664257, "grad_norm": 32.3998908996582, "learning_rate": 4.843786430403357e-05, "loss": 0.5539, "step": 670 }, { "epoch": 0.15854511541151783, "grad_norm": 26.95191192626953, "learning_rate": 4.8414548845884825e-05, "loss": 0.5303, "step": 680 }, { "epoch": 0.1608766612263931, "grad_norm": 30.945283889770508, "learning_rate": 4.8391233387736076e-05, "loss": 0.6001, "step": 690 }, { "epoch": 0.16320820704126837, "grad_norm": 29.30241584777832, "learning_rate": 4.836791792958732e-05, "loss": 0.7291, "step": 700 }, { "epoch": 0.1655397528561436, "grad_norm": 13.888816833496094, "learning_rate": 4.8344602471438565e-05, "loss": 0.8428, "step": 710 }, { "epoch": 0.16787129867101888, "grad_norm": 29.725255966186523, "learning_rate": 4.832128701328981e-05, "loss": 0.6883, "step": 720 }, { "epoch": 0.17020284448589415, "grad_norm": 42.3590202331543, "learning_rate": 4.829797155514106e-05, "loss": 0.6275, "step": 730 }, { "epoch": 0.17253439030076942, "grad_norm": 23.562644958496094, "learning_rate": 4.827465609699231e-05, "loss": 0.5752, "step": 740 }, { "epoch": 0.17486593611564466, "grad_norm": 32.83530044555664, "learning_rate": 4.825134063884356e-05, "loss": 0.6953, "step": 750 }, { "epoch": 0.17719748193051993, "grad_norm": 12.841109275817871, "learning_rate": 4.82280251806948e-05, "loss": 0.4848, "step": 760 }, { "epoch": 0.1795290277453952, "grad_norm": 22.86924171447754, "learning_rate": 4.820470972254605e-05, "loss": 0.6314, "step": 770 }, { "epoch": 0.18186057356027047, "grad_norm": 16.436222076416016, "learning_rate": 4.81813942643973e-05, "loss": 0.6568, "step": 780 }, { "epoch": 0.18419211937514574, "grad_norm": 13.749951362609863, "learning_rate": 4.815807880624854e-05, "loss": 0.7607, "step": 790 }, { "epoch": 0.18652366519002098, "grad_norm": 42.645729064941406, "learning_rate": 4.8134763348099795e-05, "loss": 0.536, "step": 800 }, { "epoch": 0.18885521100489625, "grad_norm": 25.09123420715332, "learning_rate": 4.811144788995104e-05, "loss": 0.6258, "step": 810 }, { "epoch": 0.19118675681977151, "grad_norm": 24.370229721069336, "learning_rate": 4.8088132431802284e-05, "loss": 0.4913, "step": 820 }, { "epoch": 0.19351830263464678, "grad_norm": 34.05779266357422, "learning_rate": 4.8064816973653536e-05, "loss": 0.8139, "step": 830 }, { "epoch": 0.19584984844952202, "grad_norm": 24.232662200927734, "learning_rate": 4.804150151550478e-05, "loss": 0.7322, "step": 840 }, { "epoch": 0.1981813942643973, "grad_norm": 20.707740783691406, "learning_rate": 4.801818605735603e-05, "loss": 0.591, "step": 850 }, { "epoch": 0.20051294007927256, "grad_norm": 26.865257263183594, "learning_rate": 4.7994870599207277e-05, "loss": 0.6098, "step": 860 }, { "epoch": 0.20284448589414783, "grad_norm": 10.35026741027832, "learning_rate": 4.797155514105852e-05, "loss": 0.6383, "step": 870 }, { "epoch": 0.20517603170902307, "grad_norm": 23.788137435913086, "learning_rate": 4.794823968290977e-05, "loss": 0.7854, "step": 880 }, { "epoch": 0.20750757752389834, "grad_norm": 21.079648971557617, "learning_rate": 4.792492422476102e-05, "loss": 0.6371, "step": 890 }, { "epoch": 0.2098391233387736, "grad_norm": 34.78284454345703, "learning_rate": 4.790160876661227e-05, "loss": 0.6078, "step": 900 }, { "epoch": 0.2098391233387736, "eval_accuracy": 0.689909762100082, "eval_f1": 0.6883721016902948, "eval_loss": 0.8121763467788696, "eval_precision": 0.7020681280584337, "eval_recall": 0.7085581473429932, "eval_runtime": 32.2791, "eval_samples_per_second": 37.764, "eval_steps_per_second": 2.385, "step": 900 }, { "epoch": 0.21217066915364888, "grad_norm": 38.722511291503906, "learning_rate": 4.7878293308463514e-05, "loss": 0.6371, "step": 910 }, { "epoch": 0.21450221496852412, "grad_norm": 30.822816848754883, "learning_rate": 4.785497785031476e-05, "loss": 0.6244, "step": 920 }, { "epoch": 0.2168337607833994, "grad_norm": 46.684818267822266, "learning_rate": 4.783166239216601e-05, "loss": 0.7515, "step": 930 }, { "epoch": 0.21916530659827466, "grad_norm": 23.10223960876465, "learning_rate": 4.7808346934017254e-05, "loss": 0.5434, "step": 940 }, { "epoch": 0.22149685241314993, "grad_norm": 20.43950080871582, "learning_rate": 4.77850314758685e-05, "loss": 0.5672, "step": 950 }, { "epoch": 0.2238283982280252, "grad_norm": 33.358795166015625, "learning_rate": 4.776171601771975e-05, "loss": 0.6576, "step": 960 }, { "epoch": 0.22615994404290043, "grad_norm": 25.824260711669922, "learning_rate": 4.7738400559571e-05, "loss": 0.6646, "step": 970 }, { "epoch": 0.2284914898577757, "grad_norm": 21.079469680786133, "learning_rate": 4.771508510142225e-05, "loss": 0.5052, "step": 980 }, { "epoch": 0.23082303567265097, "grad_norm": 15.477420806884766, "learning_rate": 4.769176964327349e-05, "loss": 0.6392, "step": 990 }, { "epoch": 0.23315458148752624, "grad_norm": 21.481847763061523, "learning_rate": 4.7668454185124736e-05, "loss": 0.6549, "step": 1000 }, { "epoch": 0.23548612730240148, "grad_norm": 14.648496627807617, "learning_rate": 4.764513872697599e-05, "loss": 0.5125, "step": 1010 }, { "epoch": 0.23781767311727675, "grad_norm": 17.860349655151367, "learning_rate": 4.762182326882724e-05, "loss": 0.5947, "step": 1020 }, { "epoch": 0.24014921893215202, "grad_norm": 30.999217987060547, "learning_rate": 4.7598507810678484e-05, "loss": 0.5108, "step": 1030 }, { "epoch": 0.2424807647470273, "grad_norm": 33.350311279296875, "learning_rate": 4.757519235252973e-05, "loss": 0.5957, "step": 1040 }, { "epoch": 0.24481231056190253, "grad_norm": 50.07561111450195, "learning_rate": 4.755187689438097e-05, "loss": 0.6289, "step": 1050 }, { "epoch": 0.2471438563767778, "grad_norm": 20.572126388549805, "learning_rate": 4.7528561436232225e-05, "loss": 0.6101, "step": 1060 }, { "epoch": 0.24947540219165307, "grad_norm": 31.299867630004883, "learning_rate": 4.7505245978083476e-05, "loss": 0.7346, "step": 1070 }, { "epoch": 0.2518069480065283, "grad_norm": 22.430063247680664, "learning_rate": 4.748193051993472e-05, "loss": 0.5229, "step": 1080 }, { "epoch": 0.2541384938214036, "grad_norm": 27.95315933227539, "learning_rate": 4.7458615061785966e-05, "loss": 0.6457, "step": 1090 }, { "epoch": 0.25647003963627885, "grad_norm": 20.998676300048828, "learning_rate": 4.743529960363721e-05, "loss": 0.6852, "step": 1100 }, { "epoch": 0.2588015854511541, "grad_norm": 28.259180068969727, "learning_rate": 4.741198414548846e-05, "loss": 0.6709, "step": 1110 }, { "epoch": 0.2611331312660294, "grad_norm": 29.109024047851562, "learning_rate": 4.738866868733971e-05, "loss": 0.5709, "step": 1120 }, { "epoch": 0.26346467708090465, "grad_norm": 25.32686996459961, "learning_rate": 4.736535322919096e-05, "loss": 0.5139, "step": 1130 }, { "epoch": 0.2657962228957799, "grad_norm": 19.918743133544922, "learning_rate": 4.73420377710422e-05, "loss": 0.6274, "step": 1140 }, { "epoch": 0.2681277687106552, "grad_norm": 27.803632736206055, "learning_rate": 4.731872231289345e-05, "loss": 0.7746, "step": 1150 }, { "epoch": 0.2704593145255304, "grad_norm": 51.257896423339844, "learning_rate": 4.72954068547447e-05, "loss": 0.6581, "step": 1160 }, { "epoch": 0.27279086034040567, "grad_norm": 25.976425170898438, "learning_rate": 4.7272091396595943e-05, "loss": 0.5674, "step": 1170 }, { "epoch": 0.27512240615528094, "grad_norm": 20.14984703063965, "learning_rate": 4.7248775938447195e-05, "loss": 0.5609, "step": 1180 }, { "epoch": 0.2774539519701562, "grad_norm": 21.2273006439209, "learning_rate": 4.722546048029844e-05, "loss": 0.5842, "step": 1190 }, { "epoch": 0.2797854977850315, "grad_norm": 31.50432014465332, "learning_rate": 4.7202145022149684e-05, "loss": 0.5867, "step": 1200 }, { "epoch": 0.2797854977850315, "eval_accuracy": 0.7227235438884332, "eval_f1": 0.7223497656011426, "eval_loss": 0.7796285152435303, "eval_precision": 0.7239042036307864, "eval_recall": 0.7433241953688237, "eval_runtime": 32.4272, "eval_samples_per_second": 37.592, "eval_steps_per_second": 2.375, "step": 1200 }, { "epoch": 0.28211704359990675, "grad_norm": 32.188232421875, "learning_rate": 4.7178829564000936e-05, "loss": 0.6986, "step": 1210 }, { "epoch": 0.284448589414782, "grad_norm": 21.2509708404541, "learning_rate": 4.715551410585218e-05, "loss": 0.5578, "step": 1220 }, { "epoch": 0.2867801352296573, "grad_norm": 36.59361267089844, "learning_rate": 4.713219864770343e-05, "loss": 0.6035, "step": 1230 }, { "epoch": 0.2891116810445325, "grad_norm": 23.820602416992188, "learning_rate": 4.7108883189554677e-05, "loss": 0.672, "step": 1240 }, { "epoch": 0.29144322685940777, "grad_norm": 9.917643547058105, "learning_rate": 4.708556773140592e-05, "loss": 0.5623, "step": 1250 }, { "epoch": 0.29377477267428304, "grad_norm": 23.47327423095703, "learning_rate": 4.706225227325717e-05, "loss": 0.5947, "step": 1260 }, { "epoch": 0.2961063184891583, "grad_norm": 20.891555786132812, "learning_rate": 4.703893681510842e-05, "loss": 0.5065, "step": 1270 }, { "epoch": 0.2984378643040336, "grad_norm": 20.379684448242188, "learning_rate": 4.701562135695967e-05, "loss": 0.4707, "step": 1280 }, { "epoch": 0.30076941011890884, "grad_norm": 23.66413688659668, "learning_rate": 4.6992305898810914e-05, "loss": 0.7354, "step": 1290 }, { "epoch": 0.3031009559337841, "grad_norm": 82.15457916259766, "learning_rate": 4.6968990440662165e-05, "loss": 0.6615, "step": 1300 }, { "epoch": 0.3054325017486594, "grad_norm": 43.44639587402344, "learning_rate": 4.694567498251341e-05, "loss": 0.7217, "step": 1310 }, { "epoch": 0.30776404756353465, "grad_norm": 27.2445125579834, "learning_rate": 4.6922359524364654e-05, "loss": 0.7379, "step": 1320 }, { "epoch": 0.31009559337840986, "grad_norm": 23.97974395751953, "learning_rate": 4.68990440662159e-05, "loss": 0.6117, "step": 1330 }, { "epoch": 0.31242713919328513, "grad_norm": 20.4930362701416, "learning_rate": 4.687572860806715e-05, "loss": 0.6047, "step": 1340 }, { "epoch": 0.3147586850081604, "grad_norm": 36.909305572509766, "learning_rate": 4.68524131499184e-05, "loss": 0.5695, "step": 1350 }, { "epoch": 0.31709023082303567, "grad_norm": 13.219598770141602, "learning_rate": 4.682909769176965e-05, "loss": 0.61, "step": 1360 }, { "epoch": 0.31942177663791094, "grad_norm": 37.49195861816406, "learning_rate": 4.680578223362089e-05, "loss": 0.6959, "step": 1370 }, { "epoch": 0.3217533224527862, "grad_norm": 21.94476890563965, "learning_rate": 4.6782466775472136e-05, "loss": 0.5361, "step": 1380 }, { "epoch": 0.3240848682676615, "grad_norm": 15.76740837097168, "learning_rate": 4.675915131732339e-05, "loss": 0.5799, "step": 1390 }, { "epoch": 0.32641641408253674, "grad_norm": 16.694778442382812, "learning_rate": 4.673583585917464e-05, "loss": 0.5195, "step": 1400 }, { "epoch": 0.32874795989741196, "grad_norm": 17.327856063842773, "learning_rate": 4.6712520401025884e-05, "loss": 0.4577, "step": 1410 }, { "epoch": 0.3310795057122872, "grad_norm": 19.089357376098633, "learning_rate": 4.668920494287713e-05, "loss": 0.5473, "step": 1420 }, { "epoch": 0.3334110515271625, "grad_norm": 20.266950607299805, "learning_rate": 4.666588948472837e-05, "loss": 0.5861, "step": 1430 }, { "epoch": 0.33574259734203776, "grad_norm": 14.822595596313477, "learning_rate": 4.6642574026579625e-05, "loss": 0.4603, "step": 1440 }, { "epoch": 0.33807414315691303, "grad_norm": 14.292266845703125, "learning_rate": 4.6619258568430876e-05, "loss": 0.5209, "step": 1450 }, { "epoch": 0.3404056889717883, "grad_norm": 15.99500560760498, "learning_rate": 4.659594311028212e-05, "loss": 0.5721, "step": 1460 }, { "epoch": 0.34273723478666357, "grad_norm": 15.458304405212402, "learning_rate": 4.6572627652133366e-05, "loss": 0.5798, "step": 1470 }, { "epoch": 0.34506878060153884, "grad_norm": 22.426408767700195, "learning_rate": 4.654931219398461e-05, "loss": 0.6674, "step": 1480 }, { "epoch": 0.3474003264164141, "grad_norm": 16.9567928314209, "learning_rate": 4.652599673583586e-05, "loss": 0.6049, "step": 1490 }, { "epoch": 0.3497318722312893, "grad_norm": 39.44313430786133, "learning_rate": 4.650268127768711e-05, "loss": 0.5684, "step": 1500 }, { "epoch": 0.3497318722312893, "eval_accuracy": 0.7506152584085316, "eval_f1": 0.7356563013468664, "eval_loss": 0.7205380797386169, "eval_precision": 0.7489548577706214, "eval_recall": 0.7325100738137515, "eval_runtime": 32.4911, "eval_samples_per_second": 37.518, "eval_steps_per_second": 2.37, "step": 1500 }, { "epoch": 0.3520634180461646, "grad_norm": 27.17556381225586, "learning_rate": 4.647936581953836e-05, "loss": 0.5555, "step": 1510 }, { "epoch": 0.35439496386103986, "grad_norm": 21.439729690551758, "learning_rate": 4.64560503613896e-05, "loss": 0.6018, "step": 1520 }, { "epoch": 0.3567265096759151, "grad_norm": 27.142850875854492, "learning_rate": 4.643273490324085e-05, "loss": 0.6183, "step": 1530 }, { "epoch": 0.3590580554907904, "grad_norm": 26.709306716918945, "learning_rate": 4.64094194450921e-05, "loss": 0.5238, "step": 1540 }, { "epoch": 0.36138960130566566, "grad_norm": 18.24578857421875, "learning_rate": 4.6386103986943343e-05, "loss": 0.6221, "step": 1550 }, { "epoch": 0.36372114712054093, "grad_norm": 16.884159088134766, "learning_rate": 4.6362788528794595e-05, "loss": 0.577, "step": 1560 }, { "epoch": 0.3660526929354162, "grad_norm": 27.92196273803711, "learning_rate": 4.633947307064584e-05, "loss": 0.4804, "step": 1570 }, { "epoch": 0.36838423875029147, "grad_norm": 19.397260665893555, "learning_rate": 4.6316157612497084e-05, "loss": 0.5586, "step": 1580 }, { "epoch": 0.3707157845651667, "grad_norm": 20.426605224609375, "learning_rate": 4.6292842154348336e-05, "loss": 0.7078, "step": 1590 }, { "epoch": 0.37304733038004195, "grad_norm": 16.057165145874023, "learning_rate": 4.626952669619958e-05, "loss": 0.4367, "step": 1600 }, { "epoch": 0.3753788761949172, "grad_norm": 15.59145450592041, "learning_rate": 4.624621123805083e-05, "loss": 0.4379, "step": 1610 }, { "epoch": 0.3777104220097925, "grad_norm": 11.662853240966797, "learning_rate": 4.622289577990208e-05, "loss": 0.4041, "step": 1620 }, { "epoch": 0.38004196782466776, "grad_norm": 27.15912628173828, "learning_rate": 4.619958032175333e-05, "loss": 0.5678, "step": 1630 }, { "epoch": 0.38237351363954303, "grad_norm": 18.558469772338867, "learning_rate": 4.617626486360457e-05, "loss": 0.5907, "step": 1640 }, { "epoch": 0.3847050594544183, "grad_norm": 32.28955078125, "learning_rate": 4.615294940545582e-05, "loss": 0.592, "step": 1650 }, { "epoch": 0.38703660526929357, "grad_norm": 19.484052658081055, "learning_rate": 4.612963394730707e-05, "loss": 0.5517, "step": 1660 }, { "epoch": 0.3893681510841688, "grad_norm": 19.991775512695312, "learning_rate": 4.6106318489158314e-05, "loss": 0.4574, "step": 1670 }, { "epoch": 0.39169969689904405, "grad_norm": 19.2491455078125, "learning_rate": 4.6083003031009565e-05, "loss": 0.5324, "step": 1680 }, { "epoch": 0.3940312427139193, "grad_norm": 26.1087646484375, "learning_rate": 4.605968757286081e-05, "loss": 0.5405, "step": 1690 }, { "epoch": 0.3963627885287946, "grad_norm": 28.05010986328125, "learning_rate": 4.6036372114712055e-05, "loss": 0.6015, "step": 1700 }, { "epoch": 0.39869433434366985, "grad_norm": 24.019983291625977, "learning_rate": 4.60130566565633e-05, "loss": 0.4885, "step": 1710 }, { "epoch": 0.4010258801585451, "grad_norm": 43.11894607543945, "learning_rate": 4.598974119841455e-05, "loss": 0.6539, "step": 1720 }, { "epoch": 0.4033574259734204, "grad_norm": 23.700422286987305, "learning_rate": 4.59664257402658e-05, "loss": 0.4439, "step": 1730 }, { "epoch": 0.40568897178829566, "grad_norm": 29.97321319580078, "learning_rate": 4.594311028211705e-05, "loss": 0.493, "step": 1740 }, { "epoch": 0.40802051760317093, "grad_norm": 32.946022033691406, "learning_rate": 4.591979482396829e-05, "loss": 0.6301, "step": 1750 }, { "epoch": 0.41035206341804614, "grad_norm": 16.01514434814453, "learning_rate": 4.5896479365819536e-05, "loss": 0.4617, "step": 1760 }, { "epoch": 0.4126836092329214, "grad_norm": 21.685338973999023, "learning_rate": 4.587316390767079e-05, "loss": 0.6289, "step": 1770 }, { "epoch": 0.4150151550477967, "grad_norm": 28.99067497253418, "learning_rate": 4.584984844952204e-05, "loss": 0.4477, "step": 1780 }, { "epoch": 0.41734670086267195, "grad_norm": 35.17900085449219, "learning_rate": 4.5826532991373284e-05, "loss": 0.4637, "step": 1790 }, { "epoch": 0.4196782466775472, "grad_norm": 33.74941635131836, "learning_rate": 4.580321753322453e-05, "loss": 0.6002, "step": 1800 }, { "epoch": 0.4196782466775472, "eval_accuracy": 0.7506152584085316, "eval_f1": 0.7449892374013476, "eval_loss": 0.7239476442337036, "eval_precision": 0.7453346607126952, "eval_recall": 0.7541687656017055, "eval_runtime": 32.6118, "eval_samples_per_second": 37.379, "eval_steps_per_second": 2.361, "step": 1800 }, { "epoch": 0.4220097924924225, "grad_norm": 13.531204223632812, "learning_rate": 4.577990207507577e-05, "loss": 0.5473, "step": 1810 }, { "epoch": 0.42434133830729776, "grad_norm": 16.84659194946289, "learning_rate": 4.5756586616927025e-05, "loss": 0.5308, "step": 1820 }, { "epoch": 0.426672884122173, "grad_norm": 34.70216369628906, "learning_rate": 4.5733271158778276e-05, "loss": 0.7112, "step": 1830 }, { "epoch": 0.42900442993704824, "grad_norm": 18.915586471557617, "learning_rate": 4.570995570062952e-05, "loss": 0.6053, "step": 1840 }, { "epoch": 0.4313359757519235, "grad_norm": 19.790071487426758, "learning_rate": 4.5686640242480766e-05, "loss": 0.4516, "step": 1850 }, { "epoch": 0.4336675215667988, "grad_norm": 19.730384826660156, "learning_rate": 4.566332478433201e-05, "loss": 0.6088, "step": 1860 }, { "epoch": 0.43599906738167404, "grad_norm": 31.787572860717773, "learning_rate": 4.564000932618326e-05, "loss": 0.5709, "step": 1870 }, { "epoch": 0.4383306131965493, "grad_norm": 26.529708862304688, "learning_rate": 4.561669386803451e-05, "loss": 0.5072, "step": 1880 }, { "epoch": 0.4406621590114246, "grad_norm": 29.785123825073242, "learning_rate": 4.559337840988576e-05, "loss": 0.4786, "step": 1890 }, { "epoch": 0.44299370482629985, "grad_norm": 18.451383590698242, "learning_rate": 4.5570062951737e-05, "loss": 0.5377, "step": 1900 }, { "epoch": 0.4453252506411751, "grad_norm": 24.651517868041992, "learning_rate": 4.554674749358825e-05, "loss": 0.4759, "step": 1910 }, { "epoch": 0.4476567964560504, "grad_norm": 23.738956451416016, "learning_rate": 4.55234320354395e-05, "loss": 0.6059, "step": 1920 }, { "epoch": 0.4499883422709256, "grad_norm": 15.957860946655273, "learning_rate": 4.5500116577290743e-05, "loss": 0.5788, "step": 1930 }, { "epoch": 0.45231988808580087, "grad_norm": 18.671892166137695, "learning_rate": 4.5476801119141995e-05, "loss": 0.42, "step": 1940 }, { "epoch": 0.45465143390067614, "grad_norm": 20.396814346313477, "learning_rate": 4.545348566099324e-05, "loss": 0.5062, "step": 1950 }, { "epoch": 0.4569829797155514, "grad_norm": 35.40294647216797, "learning_rate": 4.5430170202844484e-05, "loss": 0.6339, "step": 1960 }, { "epoch": 0.4593145255304267, "grad_norm": 20.99814224243164, "learning_rate": 4.5406854744695736e-05, "loss": 0.4737, "step": 1970 }, { "epoch": 0.46164607134530194, "grad_norm": 19.0653133392334, "learning_rate": 4.538353928654698e-05, "loss": 0.4303, "step": 1980 }, { "epoch": 0.4639776171601772, "grad_norm": 25.659717559814453, "learning_rate": 4.536022382839823e-05, "loss": 0.4544, "step": 1990 }, { "epoch": 0.4663091629750525, "grad_norm": 15.122028350830078, "learning_rate": 4.533690837024948e-05, "loss": 0.4343, "step": 2000 }, { "epoch": 0.4686407087899277, "grad_norm": 18.79733657836914, "learning_rate": 4.531359291210073e-05, "loss": 0.598, "step": 2010 }, { "epoch": 0.47097225460480296, "grad_norm": 21.690399169921875, "learning_rate": 4.529027745395197e-05, "loss": 0.5553, "step": 2020 }, { "epoch": 0.47330380041967823, "grad_norm": 19.833171844482422, "learning_rate": 4.526696199580322e-05, "loss": 0.4684, "step": 2030 }, { "epoch": 0.4756353462345535, "grad_norm": 31.109315872192383, "learning_rate": 4.524364653765447e-05, "loss": 0.5953, "step": 2040 }, { "epoch": 0.47796689204942877, "grad_norm": 14.755363464355469, "learning_rate": 4.5220331079505714e-05, "loss": 0.4521, "step": 2050 }, { "epoch": 0.48029843786430404, "grad_norm": 19.60972785949707, "learning_rate": 4.5197015621356965e-05, "loss": 0.4349, "step": 2060 }, { "epoch": 0.4826299836791793, "grad_norm": 20.63113784790039, "learning_rate": 4.517370016320821e-05, "loss": 0.4893, "step": 2070 }, { "epoch": 0.4849615294940546, "grad_norm": 21.99651336669922, "learning_rate": 4.5150384705059455e-05, "loss": 0.535, "step": 2080 }, { "epoch": 0.48729307530892985, "grad_norm": 13.244401931762695, "learning_rate": 4.51270692469107e-05, "loss": 0.4573, "step": 2090 }, { "epoch": 0.48962462112380506, "grad_norm": 22.323959350585938, "learning_rate": 4.510375378876195e-05, "loss": 0.4974, "step": 2100 }, { "epoch": 0.48962462112380506, "eval_accuracy": 0.7497949138638228, "eval_f1": 0.7429285315686787, "eval_loss": 0.7164492011070251, "eval_precision": 0.7399478118312398, "eval_recall": 0.751867958660305, "eval_runtime": 32.6276, "eval_samples_per_second": 37.361, "eval_steps_per_second": 2.36, "step": 2100 }, { "epoch": 0.49195616693868033, "grad_norm": 14.91236400604248, "learning_rate": 4.50804383306132e-05, "loss": 0.4868, "step": 2110 }, { "epoch": 0.4942877127535556, "grad_norm": 14.522492408752441, "learning_rate": 4.505712287246445e-05, "loss": 0.517, "step": 2120 }, { "epoch": 0.49661925856843087, "grad_norm": 14.661904335021973, "learning_rate": 4.503380741431569e-05, "loss": 0.6969, "step": 2130 }, { "epoch": 0.49895080438330613, "grad_norm": 17.774005889892578, "learning_rate": 4.5010491956166936e-05, "loss": 0.3392, "step": 2140 }, { "epoch": 0.5012823501981813, "grad_norm": 20.338176727294922, "learning_rate": 4.498717649801819e-05, "loss": 0.5478, "step": 2150 }, { "epoch": 0.5036138960130566, "grad_norm": 23.17992401123047, "learning_rate": 4.496386103986944e-05, "loss": 0.4363, "step": 2160 }, { "epoch": 0.5059454418279319, "grad_norm": 26.9781494140625, "learning_rate": 4.4940545581720684e-05, "loss": 0.4991, "step": 2170 }, { "epoch": 0.5082769876428072, "grad_norm": 13.74269962310791, "learning_rate": 4.491723012357193e-05, "loss": 0.4789, "step": 2180 }, { "epoch": 0.5106085334576824, "grad_norm": 9.351542472839355, "learning_rate": 4.489391466542317e-05, "loss": 0.6222, "step": 2190 }, { "epoch": 0.5129400792725577, "grad_norm": 29.69098472595215, "learning_rate": 4.4870599207274425e-05, "loss": 0.5182, "step": 2200 }, { "epoch": 0.515271625087433, "grad_norm": 34.87522506713867, "learning_rate": 4.4847283749125676e-05, "loss": 0.4833, "step": 2210 }, { "epoch": 0.5176031709023082, "grad_norm": 16.882413864135742, "learning_rate": 4.482396829097692e-05, "loss": 0.4985, "step": 2220 }, { "epoch": 0.5199347167171835, "grad_norm": 18.16925621032715, "learning_rate": 4.4800652832828166e-05, "loss": 0.5125, "step": 2230 }, { "epoch": 0.5222662625320588, "grad_norm": 25.316865921020508, "learning_rate": 4.477733737467941e-05, "loss": 0.6061, "step": 2240 }, { "epoch": 0.524597808346934, "grad_norm": 24.0291690826416, "learning_rate": 4.475402191653066e-05, "loss": 0.5563, "step": 2250 }, { "epoch": 0.5269293541618093, "grad_norm": 18.830142974853516, "learning_rate": 4.473070645838191e-05, "loss": 0.34, "step": 2260 }, { "epoch": 0.5292608999766846, "grad_norm": 24.81058692932129, "learning_rate": 4.470739100023316e-05, "loss": 0.4322, "step": 2270 }, { "epoch": 0.5315924457915598, "grad_norm": 25.785091400146484, "learning_rate": 4.46840755420844e-05, "loss": 0.4726, "step": 2280 }, { "epoch": 0.5339239916064351, "grad_norm": 14.79159927368164, "learning_rate": 4.466076008393565e-05, "loss": 0.3366, "step": 2290 }, { "epoch": 0.5362555374213104, "grad_norm": 34.41261672973633, "learning_rate": 4.46374446257869e-05, "loss": 0.5926, "step": 2300 }, { "epoch": 0.5385870832361855, "grad_norm": 34.747901916503906, "learning_rate": 4.4614129167638144e-05, "loss": 0.6057, "step": 2310 }, { "epoch": 0.5409186290510608, "grad_norm": 17.471677780151367, "learning_rate": 4.4590813709489395e-05, "loss": 0.5399, "step": 2320 }, { "epoch": 0.5432501748659361, "grad_norm": 29.014802932739258, "learning_rate": 4.456749825134064e-05, "loss": 0.4637, "step": 2330 }, { "epoch": 0.5455817206808113, "grad_norm": 12.530820846557617, "learning_rate": 4.454418279319189e-05, "loss": 0.4172, "step": 2340 }, { "epoch": 0.5479132664956866, "grad_norm": 15.449395179748535, "learning_rate": 4.4520867335043136e-05, "loss": 0.5294, "step": 2350 }, { "epoch": 0.5502448123105619, "grad_norm": 26.869712829589844, "learning_rate": 4.449755187689438e-05, "loss": 0.5169, "step": 2360 }, { "epoch": 0.5525763581254372, "grad_norm": 17.471458435058594, "learning_rate": 4.447423641874563e-05, "loss": 0.4523, "step": 2370 }, { "epoch": 0.5549079039403124, "grad_norm": 24.529001235961914, "learning_rate": 4.445092096059688e-05, "loss": 0.4501, "step": 2380 }, { "epoch": 0.5572394497551877, "grad_norm": 22.41488265991211, "learning_rate": 4.442760550244813e-05, "loss": 0.5475, "step": 2390 }, { "epoch": 0.559570995570063, "grad_norm": 27.631166458129883, "learning_rate": 4.440429004429937e-05, "loss": 0.5527, "step": 2400 }, { "epoch": 0.559570995570063, "eval_accuracy": 0.7506152584085316, "eval_f1": 0.7474469925712124, "eval_loss": 0.7103798985481262, "eval_precision": 0.7429756390197679, "eval_recall": 0.7637649710650173, "eval_runtime": 32.5095, "eval_samples_per_second": 37.497, "eval_steps_per_second": 2.369, "step": 2400 }, { "epoch": 0.5619025413849382, "grad_norm": 25.045551300048828, "learning_rate": 4.438097458615062e-05, "loss": 0.641, "step": 2410 }, { "epoch": 0.5642340871998135, "grad_norm": 21.757932662963867, "learning_rate": 4.435765912800187e-05, "loss": 0.4971, "step": 2420 }, { "epoch": 0.5665656330146888, "grad_norm": 21.797353744506836, "learning_rate": 4.4334343669853114e-05, "loss": 0.4863, "step": 2430 }, { "epoch": 0.568897178829564, "grad_norm": 24.75421905517578, "learning_rate": 4.4311028211704365e-05, "loss": 0.49, "step": 2440 }, { "epoch": 0.5712287246444393, "grad_norm": 29.258378982543945, "learning_rate": 4.428771275355561e-05, "loss": 0.4736, "step": 2450 }, { "epoch": 0.5735602704593146, "grad_norm": 36.19465255737305, "learning_rate": 4.4264397295406855e-05, "loss": 0.4717, "step": 2460 }, { "epoch": 0.5758918162741898, "grad_norm": 25.283084869384766, "learning_rate": 4.42410818372581e-05, "loss": 0.5374, "step": 2470 }, { "epoch": 0.578223362089065, "grad_norm": 26.333541870117188, "learning_rate": 4.421776637910936e-05, "loss": 0.3847, "step": 2480 }, { "epoch": 0.5805549079039403, "grad_norm": 21.764862060546875, "learning_rate": 4.41944509209606e-05, "loss": 0.4232, "step": 2490 }, { "epoch": 0.5828864537188155, "grad_norm": 11.467122077941895, "learning_rate": 4.417113546281185e-05, "loss": 0.6221, "step": 2500 }, { "epoch": 0.5852179995336908, "grad_norm": 16.913673400878906, "learning_rate": 4.414782000466309e-05, "loss": 0.4062, "step": 2510 }, { "epoch": 0.5875495453485661, "grad_norm": 25.194719314575195, "learning_rate": 4.4124504546514336e-05, "loss": 0.4734, "step": 2520 }, { "epoch": 0.5898810911634413, "grad_norm": 16.23316764831543, "learning_rate": 4.410118908836559e-05, "loss": 0.413, "step": 2530 }, { "epoch": 0.5922126369783166, "grad_norm": 29.319387435913086, "learning_rate": 4.407787363021684e-05, "loss": 0.4903, "step": 2540 }, { "epoch": 0.5945441827931919, "grad_norm": 55.968284606933594, "learning_rate": 4.4054558172068084e-05, "loss": 0.5513, "step": 2550 }, { "epoch": 0.5968757286080671, "grad_norm": 19.242820739746094, "learning_rate": 4.403124271391933e-05, "loss": 0.4908, "step": 2560 }, { "epoch": 0.5992072744229424, "grad_norm": 23.568754196166992, "learning_rate": 4.400792725577057e-05, "loss": 0.585, "step": 2570 }, { "epoch": 0.6015388202378177, "grad_norm": 19.30316925048828, "learning_rate": 4.3984611797621825e-05, "loss": 0.5568, "step": 2580 }, { "epoch": 0.603870366052693, "grad_norm": 11.688234329223633, "learning_rate": 4.3961296339473076e-05, "loss": 0.4393, "step": 2590 }, { "epoch": 0.6062019118675682, "grad_norm": 18.595117568969727, "learning_rate": 4.393798088132432e-05, "loss": 0.3502, "step": 2600 }, { "epoch": 0.6085334576824435, "grad_norm": 30.775352478027344, "learning_rate": 4.3914665423175566e-05, "loss": 0.4952, "step": 2610 }, { "epoch": 0.6108650034973188, "grad_norm": 9.629733085632324, "learning_rate": 4.389134996502681e-05, "loss": 0.3984, "step": 2620 }, { "epoch": 0.613196549312194, "grad_norm": 27.071420669555664, "learning_rate": 4.386803450687806e-05, "loss": 0.5048, "step": 2630 }, { "epoch": 0.6155280951270693, "grad_norm": 18.72870445251465, "learning_rate": 4.384471904872931e-05, "loss": 0.5675, "step": 2640 }, { "epoch": 0.6178596409419445, "grad_norm": 16.282094955444336, "learning_rate": 4.382140359058056e-05, "loss": 0.3591, "step": 2650 }, { "epoch": 0.6201911867568197, "grad_norm": 17.249792098999023, "learning_rate": 4.37980881324318e-05, "loss": 0.4584, "step": 2660 }, { "epoch": 0.622522732571695, "grad_norm": 21.42504119873047, "learning_rate": 4.3774772674283054e-05, "loss": 0.4607, "step": 2670 }, { "epoch": 0.6248542783865703, "grad_norm": 30.91826820373535, "learning_rate": 4.37514572161343e-05, "loss": 0.6725, "step": 2680 }, { "epoch": 0.6271858242014455, "grad_norm": 20.925262451171875, "learning_rate": 4.3728141757985544e-05, "loss": 0.4768, "step": 2690 }, { "epoch": 0.6295173700163208, "grad_norm": 25.807174682617188, "learning_rate": 4.3704826299836795e-05, "loss": 0.4127, "step": 2700 }, { "epoch": 0.6295173700163208, "eval_accuracy": 0.7678424938474159, "eval_f1": 0.7584708782486864, "eval_loss": 0.6823632121086121, "eval_precision": 0.7601462178390429, "eval_recall": 0.7648942677055709, "eval_runtime": 32.4267, "eval_samples_per_second": 37.592, "eval_steps_per_second": 2.375, "step": 2700 }, { "epoch": 0.6318489158311961, "grad_norm": 20.336694717407227, "learning_rate": 4.368151084168804e-05, "loss": 0.4772, "step": 2710 }, { "epoch": 0.6341804616460713, "grad_norm": 15.894454956054688, "learning_rate": 4.365819538353929e-05, "loss": 0.5338, "step": 2720 }, { "epoch": 0.6365120074609466, "grad_norm": 45.088111877441406, "learning_rate": 4.3634879925390536e-05, "loss": 0.6168, "step": 2730 }, { "epoch": 0.6388435532758219, "grad_norm": 31.453920364379883, "learning_rate": 4.361156446724178e-05, "loss": 0.4662, "step": 2740 }, { "epoch": 0.6411750990906971, "grad_norm": 11.898534774780273, "learning_rate": 4.358824900909303e-05, "loss": 0.5345, "step": 2750 }, { "epoch": 0.6435066449055724, "grad_norm": 21.230201721191406, "learning_rate": 4.356493355094428e-05, "loss": 0.4006, "step": 2760 }, { "epoch": 0.6458381907204477, "grad_norm": 25.514484405517578, "learning_rate": 4.354161809279553e-05, "loss": 0.5164, "step": 2770 }, { "epoch": 0.648169736535323, "grad_norm": 20.121109008789062, "learning_rate": 4.351830263464677e-05, "loss": 0.4885, "step": 2780 }, { "epoch": 0.6505012823501982, "grad_norm": 11.797569274902344, "learning_rate": 4.349498717649802e-05, "loss": 0.5272, "step": 2790 }, { "epoch": 0.6528328281650735, "grad_norm": 22.636089324951172, "learning_rate": 4.347167171834927e-05, "loss": 0.5199, "step": 2800 }, { "epoch": 0.6551643739799488, "grad_norm": 29.251462936401367, "learning_rate": 4.3448356260200514e-05, "loss": 0.3558, "step": 2810 }, { "epoch": 0.6574959197948239, "grad_norm": 14.990754127502441, "learning_rate": 4.3425040802051765e-05, "loss": 0.5055, "step": 2820 }, { "epoch": 0.6598274656096992, "grad_norm": 15.994630813598633, "learning_rate": 4.340172534390301e-05, "loss": 0.3577, "step": 2830 }, { "epoch": 0.6621590114245745, "grad_norm": 25.580074310302734, "learning_rate": 4.3378409885754255e-05, "loss": 0.464, "step": 2840 }, { "epoch": 0.6644905572394497, "grad_norm": 19.915939331054688, "learning_rate": 4.33550944276055e-05, "loss": 0.3557, "step": 2850 }, { "epoch": 0.666822103054325, "grad_norm": 17.42690658569336, "learning_rate": 4.333177896945676e-05, "loss": 0.5423, "step": 2860 }, { "epoch": 0.6691536488692003, "grad_norm": 16.17222785949707, "learning_rate": 4.3308463511308e-05, "loss": 0.3552, "step": 2870 }, { "epoch": 0.6714851946840755, "grad_norm": 23.892414093017578, "learning_rate": 4.328514805315925e-05, "loss": 0.4097, "step": 2880 }, { "epoch": 0.6738167404989508, "grad_norm": 23.955047607421875, "learning_rate": 4.326183259501049e-05, "loss": 0.6225, "step": 2890 }, { "epoch": 0.6761482863138261, "grad_norm": 19.446603775024414, "learning_rate": 4.3238517136861736e-05, "loss": 0.5303, "step": 2900 }, { "epoch": 0.6784798321287013, "grad_norm": 15.906410217285156, "learning_rate": 4.321520167871299e-05, "loss": 0.4539, "step": 2910 }, { "epoch": 0.6808113779435766, "grad_norm": 30.294095993041992, "learning_rate": 4.319188622056424e-05, "loss": 0.6016, "step": 2920 }, { "epoch": 0.6831429237584519, "grad_norm": 22.50743865966797, "learning_rate": 4.3168570762415484e-05, "loss": 0.4718, "step": 2930 }, { "epoch": 0.6854744695733271, "grad_norm": 11.781279563903809, "learning_rate": 4.314525530426673e-05, "loss": 0.413, "step": 2940 }, { "epoch": 0.6878060153882024, "grad_norm": 10.89158821105957, "learning_rate": 4.3121939846117973e-05, "loss": 0.507, "step": 2950 }, { "epoch": 0.6901375612030777, "grad_norm": 13.030016899108887, "learning_rate": 4.3098624387969225e-05, "loss": 0.4475, "step": 2960 }, { "epoch": 0.692469107017953, "grad_norm": 15.248382568359375, "learning_rate": 4.3075308929820476e-05, "loss": 0.4741, "step": 2970 }, { "epoch": 0.6948006528328282, "grad_norm": 32.4050407409668, "learning_rate": 4.305199347167172e-05, "loss": 0.5675, "step": 2980 }, { "epoch": 0.6971321986477035, "grad_norm": 36.350406646728516, "learning_rate": 4.3028678013522966e-05, "loss": 0.584, "step": 2990 }, { "epoch": 0.6994637444625786, "grad_norm": 14.917610168457031, "learning_rate": 4.300536255537422e-05, "loss": 0.4201, "step": 3000 }, { "epoch": 0.6994637444625786, "eval_accuracy": 0.7768662838392125, "eval_f1": 0.7673956246919376, "eval_loss": 0.6402276158332825, "eval_precision": 0.7691001302569485, "eval_recall": 0.7679644214691466, "eval_runtime": 32.6299, "eval_samples_per_second": 37.358, "eval_steps_per_second": 2.36, "step": 3000 }, { "epoch": 0.7017952902774539, "grad_norm": 28.404804229736328, "learning_rate": 4.298204709722546e-05, "loss": 0.4424, "step": 3010 }, { "epoch": 0.7041268360923292, "grad_norm": 22.141082763671875, "learning_rate": 4.295873163907671e-05, "loss": 0.5042, "step": 3020 }, { "epoch": 0.7064583819072044, "grad_norm": 26.650156021118164, "learning_rate": 4.293541618092796e-05, "loss": 0.5509, "step": 3030 }, { "epoch": 0.7087899277220797, "grad_norm": 17.86048698425293, "learning_rate": 4.29121007227792e-05, "loss": 0.525, "step": 3040 }, { "epoch": 0.711121473536955, "grad_norm": 31.643362045288086, "learning_rate": 4.2888785264630454e-05, "loss": 0.4592, "step": 3050 }, { "epoch": 0.7134530193518303, "grad_norm": 25.388368606567383, "learning_rate": 4.28654698064817e-05, "loss": 0.6506, "step": 3060 }, { "epoch": 0.7157845651667055, "grad_norm": 20.75798225402832, "learning_rate": 4.2842154348332944e-05, "loss": 0.5776, "step": 3070 }, { "epoch": 0.7181161109815808, "grad_norm": 17.62347412109375, "learning_rate": 4.2818838890184195e-05, "loss": 0.497, "step": 3080 }, { "epoch": 0.7204476567964561, "grad_norm": 21.391613006591797, "learning_rate": 4.279552343203544e-05, "loss": 0.4536, "step": 3090 }, { "epoch": 0.7227792026113313, "grad_norm": 19.84242057800293, "learning_rate": 4.277220797388669e-05, "loss": 0.518, "step": 3100 }, { "epoch": 0.7251107484262066, "grad_norm": 25.691789627075195, "learning_rate": 4.2748892515737936e-05, "loss": 0.4864, "step": 3110 }, { "epoch": 0.7274422942410819, "grad_norm": 19.61354637145996, "learning_rate": 4.272557705758918e-05, "loss": 0.3578, "step": 3120 }, { "epoch": 0.7297738400559571, "grad_norm": 20.281843185424805, "learning_rate": 4.270226159944043e-05, "loss": 0.492, "step": 3130 }, { "epoch": 0.7321053858708324, "grad_norm": 19.12962532043457, "learning_rate": 4.267894614129168e-05, "loss": 0.4072, "step": 3140 }, { "epoch": 0.7344369316857077, "grad_norm": 22.840578079223633, "learning_rate": 4.265563068314293e-05, "loss": 0.5414, "step": 3150 }, { "epoch": 0.7367684775005829, "grad_norm": 30.5288028717041, "learning_rate": 4.263231522499417e-05, "loss": 0.518, "step": 3160 }, { "epoch": 0.7391000233154581, "grad_norm": 15.130803108215332, "learning_rate": 4.260899976684542e-05, "loss": 0.3736, "step": 3170 }, { "epoch": 0.7414315691303334, "grad_norm": 22.197586059570312, "learning_rate": 4.258568430869667e-05, "loss": 0.3704, "step": 3180 }, { "epoch": 0.7437631149452086, "grad_norm": 13.625364303588867, "learning_rate": 4.256236885054792e-05, "loss": 0.3927, "step": 3190 }, { "epoch": 0.7460946607600839, "grad_norm": 23.51502227783203, "learning_rate": 4.2539053392399165e-05, "loss": 0.4915, "step": 3200 }, { "epoch": 0.7484262065749592, "grad_norm": 19.06190299987793, "learning_rate": 4.251573793425041e-05, "loss": 0.5324, "step": 3210 }, { "epoch": 0.7507577523898344, "grad_norm": 16.512483596801758, "learning_rate": 4.2492422476101655e-05, "loss": 0.5011, "step": 3220 }, { "epoch": 0.7530892982047097, "grad_norm": 12.160454750061035, "learning_rate": 4.24691070179529e-05, "loss": 0.5847, "step": 3230 }, { "epoch": 0.755420844019585, "grad_norm": 14.771639823913574, "learning_rate": 4.244579155980416e-05, "loss": 0.3473, "step": 3240 }, { "epoch": 0.7577523898344602, "grad_norm": 25.87384605407715, "learning_rate": 4.24224761016554e-05, "loss": 0.4409, "step": 3250 }, { "epoch": 0.7600839356493355, "grad_norm": 30.41501235961914, "learning_rate": 4.239916064350665e-05, "loss": 0.6514, "step": 3260 }, { "epoch": 0.7624154814642108, "grad_norm": 20.601119995117188, "learning_rate": 4.237584518535789e-05, "loss": 0.5139, "step": 3270 }, { "epoch": 0.7647470272790861, "grad_norm": 12.452898979187012, "learning_rate": 4.2352529727209136e-05, "loss": 0.395, "step": 3280 }, { "epoch": 0.7670785730939613, "grad_norm": 23.974411010742188, "learning_rate": 4.232921426906039e-05, "loss": 0.4344, "step": 3290 }, { "epoch": 0.7694101189088366, "grad_norm": 27.8565616607666, "learning_rate": 4.230589881091164e-05, "loss": 0.4636, "step": 3300 }, { "epoch": 0.7694101189088366, "eval_accuracy": 0.7727645611156686, "eval_f1": 0.7628742718759469, "eval_loss": 0.673406720161438, "eval_precision": 0.7584650322424751, "eval_recall": 0.7751981038943103, "eval_runtime": 32.4265, "eval_samples_per_second": 37.593, "eval_steps_per_second": 2.375, "step": 3300 }, { "epoch": 0.7717416647237119, "grad_norm": 31.938621520996094, "learning_rate": 4.2282583352762884e-05, "loss": 0.5178, "step": 3310 }, { "epoch": 0.7740732105385871, "grad_norm": 14.094200134277344, "learning_rate": 4.225926789461413e-05, "loss": 0.3979, "step": 3320 }, { "epoch": 0.7764047563534624, "grad_norm": 14.13912582397461, "learning_rate": 4.2235952436465373e-05, "loss": 0.4894, "step": 3330 }, { "epoch": 0.7787363021683376, "grad_norm": 16.00527572631836, "learning_rate": 4.2212636978316625e-05, "loss": 0.3421, "step": 3340 }, { "epoch": 0.7810678479832128, "grad_norm": 14.035831451416016, "learning_rate": 4.2189321520167876e-05, "loss": 0.4204, "step": 3350 }, { "epoch": 0.7833993937980881, "grad_norm": 17.89393424987793, "learning_rate": 4.216600606201912e-05, "loss": 0.3081, "step": 3360 }, { "epoch": 0.7857309396129634, "grad_norm": 19.49710464477539, "learning_rate": 4.2142690603870366e-05, "loss": 0.497, "step": 3370 }, { "epoch": 0.7880624854278386, "grad_norm": 32.836326599121094, "learning_rate": 4.211937514572162e-05, "loss": 0.5676, "step": 3380 }, { "epoch": 0.7903940312427139, "grad_norm": 21.465435028076172, "learning_rate": 4.209605968757286e-05, "loss": 0.583, "step": 3390 }, { "epoch": 0.7927255770575892, "grad_norm": 19.0760440826416, "learning_rate": 4.2072744229424113e-05, "loss": 0.4606, "step": 3400 }, { "epoch": 0.7950571228724644, "grad_norm": 34.77079772949219, "learning_rate": 4.204942877127536e-05, "loss": 0.4716, "step": 3410 }, { "epoch": 0.7973886686873397, "grad_norm": 12.543126106262207, "learning_rate": 4.20261133131266e-05, "loss": 0.391, "step": 3420 }, { "epoch": 0.799720214502215, "grad_norm": 15.688311576843262, "learning_rate": 4.2002797854977854e-05, "loss": 0.5113, "step": 3430 }, { "epoch": 0.8020517603170902, "grad_norm": 11.053609848022461, "learning_rate": 4.19794823968291e-05, "loss": 0.3462, "step": 3440 }, { "epoch": 0.8043833061319655, "grad_norm": 21.625019073486328, "learning_rate": 4.1956166938680344e-05, "loss": 0.5507, "step": 3450 }, { "epoch": 0.8067148519468408, "grad_norm": 15.765186309814453, "learning_rate": 4.1932851480531595e-05, "loss": 0.5008, "step": 3460 }, { "epoch": 0.809046397761716, "grad_norm": 32.54380416870117, "learning_rate": 4.190953602238284e-05, "loss": 0.6183, "step": 3470 }, { "epoch": 0.8113779435765913, "grad_norm": 19.001272201538086, "learning_rate": 4.188622056423409e-05, "loss": 0.5258, "step": 3480 }, { "epoch": 0.8137094893914666, "grad_norm": 43.268978118896484, "learning_rate": 4.1862905106085336e-05, "loss": 0.5148, "step": 3490 }, { "epoch": 0.8160410352063419, "grad_norm": 15.338736534118652, "learning_rate": 4.183958964793658e-05, "loss": 0.4623, "step": 3500 }, { "epoch": 0.818372581021217, "grad_norm": 21.353567123413086, "learning_rate": 4.181627418978783e-05, "loss": 0.4635, "step": 3510 }, { "epoch": 0.8207041268360923, "grad_norm": 133.6362762451172, "learning_rate": 4.1792958731639084e-05, "loss": 0.4413, "step": 3520 }, { "epoch": 0.8230356726509676, "grad_norm": 32.99754333496094, "learning_rate": 4.176964327349033e-05, "loss": 0.4504, "step": 3530 }, { "epoch": 0.8253672184658428, "grad_norm": 27.58423614501953, "learning_rate": 4.174632781534157e-05, "loss": 0.6231, "step": 3540 }, { "epoch": 0.8276987642807181, "grad_norm": 25.30926513671875, "learning_rate": 4.172301235719282e-05, "loss": 0.3987, "step": 3550 }, { "epoch": 0.8300303100955934, "grad_norm": 21.798927307128906, "learning_rate": 4.169969689904407e-05, "loss": 0.4207, "step": 3560 }, { "epoch": 0.8323618559104686, "grad_norm": 27.713125228881836, "learning_rate": 4.167638144089532e-05, "loss": 0.5021, "step": 3570 }, { "epoch": 0.8346934017253439, "grad_norm": 12.710470199584961, "learning_rate": 4.1653065982746565e-05, "loss": 0.303, "step": 3580 }, { "epoch": 0.8370249475402192, "grad_norm": 24.33064079284668, "learning_rate": 4.162975052459781e-05, "loss": 0.4564, "step": 3590 }, { "epoch": 0.8393564933550944, "grad_norm": 12.85043716430664, "learning_rate": 4.1606435066449055e-05, "loss": 0.4887, "step": 3600 }, { "epoch": 0.8393564933550944, "eval_accuracy": 0.7637407711238721, "eval_f1": 0.7492356256854388, "eval_loss": 0.6969403624534607, "eval_precision": 0.7486500525258384, "eval_recall": 0.7582037541347775, "eval_runtime": 32.6493, "eval_samples_per_second": 37.336, "eval_steps_per_second": 2.358, "step": 3600 }, { "epoch": 0.8416880391699697, "grad_norm": 16.10417366027832, "learning_rate": 4.15831196083003e-05, "loss": 0.4708, "step": 3610 }, { "epoch": 0.844019584984845, "grad_norm": 23.369497299194336, "learning_rate": 4.155980415015156e-05, "loss": 0.5608, "step": 3620 }, { "epoch": 0.8463511307997202, "grad_norm": 19.515378952026367, "learning_rate": 4.15364886920028e-05, "loss": 0.5755, "step": 3630 }, { "epoch": 0.8486826766145955, "grad_norm": 11.91996955871582, "learning_rate": 4.151317323385405e-05, "loss": 0.3869, "step": 3640 }, { "epoch": 0.8510142224294708, "grad_norm": 21.93692398071289, "learning_rate": 4.148985777570529e-05, "loss": 0.484, "step": 3650 }, { "epoch": 0.853345768244346, "grad_norm": 21.24445343017578, "learning_rate": 4.1466542317556536e-05, "loss": 0.3958, "step": 3660 }, { "epoch": 0.8556773140592213, "grad_norm": 16.571958541870117, "learning_rate": 4.144322685940779e-05, "loss": 0.331, "step": 3670 }, { "epoch": 0.8580088598740965, "grad_norm": 32.600013732910156, "learning_rate": 4.141991140125904e-05, "loss": 0.5633, "step": 3680 }, { "epoch": 0.8603404056889717, "grad_norm": 22.03318977355957, "learning_rate": 4.1396595943110284e-05, "loss": 0.4267, "step": 3690 }, { "epoch": 0.862671951503847, "grad_norm": 22.175247192382812, "learning_rate": 4.137328048496153e-05, "loss": 0.5786, "step": 3700 }, { "epoch": 0.8650034973187223, "grad_norm": 16.111539840698242, "learning_rate": 4.134996502681278e-05, "loss": 0.313, "step": 3710 }, { "epoch": 0.8673350431335975, "grad_norm": 23.84856414794922, "learning_rate": 4.1326649568664025e-05, "loss": 0.5621, "step": 3720 }, { "epoch": 0.8696665889484728, "grad_norm": 27.550527572631836, "learning_rate": 4.1303334110515276e-05, "loss": 0.3678, "step": 3730 }, { "epoch": 0.8719981347633481, "grad_norm": 17.500328063964844, "learning_rate": 4.128001865236652e-05, "loss": 0.4118, "step": 3740 }, { "epoch": 0.8743296805782234, "grad_norm": 20.387914657592773, "learning_rate": 4.1256703194217766e-05, "loss": 0.4329, "step": 3750 }, { "epoch": 0.8766612263930986, "grad_norm": 28.69219970703125, "learning_rate": 4.123338773606902e-05, "loss": 0.5973, "step": 3760 }, { "epoch": 0.8789927722079739, "grad_norm": 67.64371490478516, "learning_rate": 4.121007227792026e-05, "loss": 0.439, "step": 3770 }, { "epoch": 0.8813243180228492, "grad_norm": 11.542470932006836, "learning_rate": 4.1186756819771513e-05, "loss": 0.439, "step": 3780 }, { "epoch": 0.8836558638377244, "grad_norm": 28.455219268798828, "learning_rate": 4.116344136162276e-05, "loss": 0.5973, "step": 3790 }, { "epoch": 0.8859874096525997, "grad_norm": 24.25450325012207, "learning_rate": 4.1140125903474e-05, "loss": 0.5056, "step": 3800 }, { "epoch": 0.888318955467475, "grad_norm": 27.57263946533203, "learning_rate": 4.1116810445325254e-05, "loss": 0.5563, "step": 3810 }, { "epoch": 0.8906505012823502, "grad_norm": 32.943077087402344, "learning_rate": 4.10934949871765e-05, "loss": 0.5166, "step": 3820 }, { "epoch": 0.8929820470972255, "grad_norm": 25.220003128051758, "learning_rate": 4.1070179529027744e-05, "loss": 0.4761, "step": 3830 }, { "epoch": 0.8953135929121008, "grad_norm": 19.925155639648438, "learning_rate": 4.1046864070878995e-05, "loss": 0.3266, "step": 3840 }, { "epoch": 0.8976451387269759, "grad_norm": 39.80344009399414, "learning_rate": 4.102354861273025e-05, "loss": 0.5011, "step": 3850 }, { "epoch": 0.8999766845418512, "grad_norm": 23.42641830444336, "learning_rate": 4.100023315458149e-05, "loss": 0.5182, "step": 3860 }, { "epoch": 0.9023082303567265, "grad_norm": 18.273305892944336, "learning_rate": 4.0976917696432736e-05, "loss": 0.4711, "step": 3870 }, { "epoch": 0.9046397761716017, "grad_norm": 27.585613250732422, "learning_rate": 4.095360223828398e-05, "loss": 0.4976, "step": 3880 }, { "epoch": 0.906971321986477, "grad_norm": 10.163019180297852, "learning_rate": 4.093028678013523e-05, "loss": 0.3424, "step": 3890 }, { "epoch": 0.9093028678013523, "grad_norm": 18.85008430480957, "learning_rate": 4.0906971321986484e-05, "loss": 0.358, "step": 3900 }, { "epoch": 0.9093028678013523, "eval_accuracy": 0.7456931911402789, "eval_f1": 0.7407595301719996, "eval_loss": 0.776879072189331, "eval_precision": 0.7366901226312517, "eval_recall": 0.7566993445706643, "eval_runtime": 32.6742, "eval_samples_per_second": 37.308, "eval_steps_per_second": 2.357, "step": 3900 }, { "epoch": 0.9116344136162275, "grad_norm": 32.68353271484375, "learning_rate": 4.088365586383773e-05, "loss": 0.4426, "step": 3910 }, { "epoch": 0.9139659594311028, "grad_norm": 23.773094177246094, "learning_rate": 4.086034040568897e-05, "loss": 0.6041, "step": 3920 }, { "epoch": 0.9162975052459781, "grad_norm": 18.2379093170166, "learning_rate": 4.083702494754022e-05, "loss": 0.4279, "step": 3930 }, { "epoch": 0.9186290510608534, "grad_norm": 20.479825973510742, "learning_rate": 4.081370948939147e-05, "loss": 0.4433, "step": 3940 }, { "epoch": 0.9209605968757286, "grad_norm": 32.64277648925781, "learning_rate": 4.079039403124272e-05, "loss": 0.4704, "step": 3950 }, { "epoch": 0.9232921426906039, "grad_norm": 23.21099090576172, "learning_rate": 4.0767078573093965e-05, "loss": 0.4129, "step": 3960 }, { "epoch": 0.9256236885054792, "grad_norm": 15.052021026611328, "learning_rate": 4.074376311494521e-05, "loss": 0.4739, "step": 3970 }, { "epoch": 0.9279552343203544, "grad_norm": 14.582944869995117, "learning_rate": 4.0720447656796455e-05, "loss": 0.4743, "step": 3980 }, { "epoch": 0.9302867801352297, "grad_norm": 20.65665626525879, "learning_rate": 4.06971321986477e-05, "loss": 0.465, "step": 3990 }, { "epoch": 0.932618325950105, "grad_norm": 20.397233963012695, "learning_rate": 4.067381674049896e-05, "loss": 0.5524, "step": 4000 }, { "epoch": 0.9349498717649802, "grad_norm": 6.327408313751221, "learning_rate": 4.06505012823502e-05, "loss": 0.3442, "step": 4010 }, { "epoch": 0.9372814175798554, "grad_norm": 9.921250343322754, "learning_rate": 4.062718582420145e-05, "loss": 0.3566, "step": 4020 }, { "epoch": 0.9396129633947307, "grad_norm": 17.692302703857422, "learning_rate": 4.060387036605269e-05, "loss": 0.4596, "step": 4030 }, { "epoch": 0.9419445092096059, "grad_norm": 2.8422834873199463, "learning_rate": 4.058055490790394e-05, "loss": 0.4456, "step": 4040 }, { "epoch": 0.9442760550244812, "grad_norm": 16.632015228271484, "learning_rate": 4.055723944975519e-05, "loss": 0.517, "step": 4050 }, { "epoch": 0.9466076008393565, "grad_norm": 23.965824127197266, "learning_rate": 4.053392399160644e-05, "loss": 0.538, "step": 4060 }, { "epoch": 0.9489391466542317, "grad_norm": 26.93478775024414, "learning_rate": 4.0510608533457684e-05, "loss": 0.6331, "step": 4070 }, { "epoch": 0.951270692469107, "grad_norm": 24.248111724853516, "learning_rate": 4.048729307530893e-05, "loss": 0.4971, "step": 4080 }, { "epoch": 0.9536022382839823, "grad_norm": 19.178695678710938, "learning_rate": 4.046397761716018e-05, "loss": 0.4324, "step": 4090 }, { "epoch": 0.9559337840988575, "grad_norm": 19.033815383911133, "learning_rate": 4.0440662159011425e-05, "loss": 0.4867, "step": 4100 }, { "epoch": 0.9582653299137328, "grad_norm": 30.779146194458008, "learning_rate": 4.0417346700862676e-05, "loss": 0.5852, "step": 4110 }, { "epoch": 0.9605968757286081, "grad_norm": 15.432817459106445, "learning_rate": 4.039403124271392e-05, "loss": 0.5479, "step": 4120 }, { "epoch": 0.9629284215434833, "grad_norm": 20.558475494384766, "learning_rate": 4.0370715784565166e-05, "loss": 0.4594, "step": 4130 }, { "epoch": 0.9652599673583586, "grad_norm": 12.561017990112305, "learning_rate": 4.034740032641642e-05, "loss": 0.4238, "step": 4140 }, { "epoch": 0.9675915131732339, "grad_norm": 20.812999725341797, "learning_rate": 4.032408486826766e-05, "loss": 0.5912, "step": 4150 }, { "epoch": 0.9699230589881092, "grad_norm": 15.587718963623047, "learning_rate": 4.0300769410118914e-05, "loss": 0.3111, "step": 4160 }, { "epoch": 0.9722546048029844, "grad_norm": 21.68885612487793, "learning_rate": 4.027745395197016e-05, "loss": 0.4421, "step": 4170 }, { "epoch": 0.9745861506178597, "grad_norm": 8.09349536895752, "learning_rate": 4.02541384938214e-05, "loss": 0.4447, "step": 4180 }, { "epoch": 0.9769176964327349, "grad_norm": 23.542570114135742, "learning_rate": 4.0230823035672654e-05, "loss": 0.4601, "step": 4190 }, { "epoch": 0.9792492422476101, "grad_norm": 20.75370216369629, "learning_rate": 4.02075075775239e-05, "loss": 0.4899, "step": 4200 }, { "epoch": 0.9792492422476101, "eval_accuracy": 0.7809680065627563, "eval_f1": 0.7694587436976446, "eval_loss": 0.6500813961029053, "eval_precision": 0.7664779373382287, "eval_recall": 0.7765403302363741, "eval_runtime": 32.5513, "eval_samples_per_second": 37.449, "eval_steps_per_second": 2.365, "step": 4200 } ], "logging_steps": 10, "max_steps": 21445, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 600, "total_flos": 1.439676695052288e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }