{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9996172981247609, "eval_steps": 20, "global_step": 1306, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_accuracy": 0.648014440433213, "eval_f1": 0.5578231292517006, "eval_loss": 0.6320980787277222, "eval_precision": 0.5125, "eval_recall": 0.6119402985074627, "eval_runtime": 43.0693, "eval_samples_per_second": 6.989, "eval_steps_per_second": 0.232, "step": 0 }, { "epoch": 0.0007654037504783774, "grad_norm": 2.9013254642486572, "learning_rate": 1.5267175572519085e-07, "loss": 0.7916, "step": 1 }, { "epoch": 0.0015308075009567547, "grad_norm": 2.805720567703247, "learning_rate": 3.053435114503817e-07, "loss": 0.7752, "step": 2 }, { "epoch": 0.002296211251435132, "grad_norm": 2.7805535793304443, "learning_rate": 4.5801526717557257e-07, "loss": 0.7902, "step": 3 }, { "epoch": 0.0030616150019135095, "grad_norm": 3.5079779624938965, "learning_rate": 6.106870229007634e-07, "loss": 0.7335, "step": 4 }, { "epoch": 0.003827018752391887, "grad_norm": 2.8343734741210938, "learning_rate": 7.633587786259543e-07, "loss": 0.7026, "step": 5 }, { "epoch": 0.004592422502870264, "grad_norm": 3.3417398929595947, "learning_rate": 9.160305343511451e-07, "loss": 0.7486, "step": 6 }, { "epoch": 0.005357826253348641, "grad_norm": 3.5592422485351562, "learning_rate": 1.068702290076336e-06, "loss": 0.7085, "step": 7 }, { "epoch": 0.006123230003827019, "grad_norm": 4.005152702331543, "learning_rate": 1.2213740458015268e-06, "loss": 0.7904, "step": 8 }, { "epoch": 0.006888633754305396, "grad_norm": 3.1045947074890137, "learning_rate": 1.3740458015267178e-06, "loss": 0.7956, "step": 9 }, { "epoch": 0.007654037504783774, "grad_norm": 2.570974826812744, "learning_rate": 1.5267175572519086e-06, "loss": 0.7206, "step": 10 }, { "epoch": 0.008419441255262151, "grad_norm": 2.293649911880493, "learning_rate": 1.6793893129770995e-06, "loss": 0.6451, "step": 11 }, { "epoch": 0.009184845005740528, "grad_norm": 2.148493528366089, "learning_rate": 1.8320610687022903e-06, "loss": 0.5701, "step": 12 }, { "epoch": 0.009950248756218905, "grad_norm": 3.934507131576538, "learning_rate": 1.984732824427481e-06, "loss": 0.7146, "step": 13 }, { "epoch": 0.010715652506697282, "grad_norm": 2.8026010990142822, "learning_rate": 2.137404580152672e-06, "loss": 0.7812, "step": 14 }, { "epoch": 0.011481056257175661, "grad_norm": 2.6908481121063232, "learning_rate": 2.2900763358778625e-06, "loss": 0.6653, "step": 15 }, { "epoch": 0.012246460007654038, "grad_norm": 2.2652783393859863, "learning_rate": 2.4427480916030536e-06, "loss": 0.6606, "step": 16 }, { "epoch": 0.013011863758132415, "grad_norm": 3.234048843383789, "learning_rate": 2.595419847328244e-06, "loss": 0.7316, "step": 17 }, { "epoch": 0.013777267508610792, "grad_norm": 2.967696189880371, "learning_rate": 2.7480916030534356e-06, "loss": 0.6979, "step": 18 }, { "epoch": 0.01454267125908917, "grad_norm": 3.8475921154022217, "learning_rate": 2.900763358778626e-06, "loss": 0.8055, "step": 19 }, { "epoch": 0.015308075009567547, "grad_norm": 2.42297101020813, "learning_rate": 3.0534351145038173e-06, "loss": 0.6598, "step": 20 }, { "epoch": 0.015308075009567547, "eval_accuracy": 0.6552346570397112, "eval_f1": 0.5526932084309133, "eval_loss": 0.6283615231513977, "eval_precision": 0.5221238938053098, "eval_recall": 0.5870646766169154, "eval_runtime": 43.704, "eval_samples_per_second": 6.887, "eval_steps_per_second": 0.229, "step": 20 }, { "epoch": 0.016073478760045924, "grad_norm": 2.063398838043213, "learning_rate": 3.206106870229008e-06, "loss": 0.6512, "step": 21 }, { "epoch": 0.016838882510524303, "grad_norm": 2.4448840618133545, "learning_rate": 3.358778625954199e-06, "loss": 0.567, "step": 22 }, { "epoch": 0.017604286261002678, "grad_norm": 3.497593641281128, "learning_rate": 3.5114503816793895e-06, "loss": 0.7674, "step": 23 }, { "epoch": 0.018369690011481057, "grad_norm": 1.899181842803955, "learning_rate": 3.6641221374045806e-06, "loss": 0.6927, "step": 24 }, { "epoch": 0.019135093761959432, "grad_norm": 1.9708737134933472, "learning_rate": 3.816793893129772e-06, "loss": 0.7301, "step": 25 }, { "epoch": 0.01990049751243781, "grad_norm": 2.531264543533325, "learning_rate": 3.969465648854962e-06, "loss": 0.7908, "step": 26 }, { "epoch": 0.02066590126291619, "grad_norm": 3.4307644367218018, "learning_rate": 4.122137404580153e-06, "loss": 0.7273, "step": 27 }, { "epoch": 0.021431305013394564, "grad_norm": 2.6872708797454834, "learning_rate": 4.274809160305344e-06, "loss": 0.7336, "step": 28 }, { "epoch": 0.022196708763872943, "grad_norm": 3.0511341094970703, "learning_rate": 4.427480916030535e-06, "loss": 0.6708, "step": 29 }, { "epoch": 0.022962112514351322, "grad_norm": 2.479977607727051, "learning_rate": 4.580152671755725e-06, "loss": 0.7374, "step": 30 }, { "epoch": 0.023727516264829697, "grad_norm": 3.206690549850464, "learning_rate": 4.732824427480917e-06, "loss": 0.7716, "step": 31 }, { "epoch": 0.024492920015308076, "grad_norm": 2.561702013015747, "learning_rate": 4.885496183206107e-06, "loss": 0.8201, "step": 32 }, { "epoch": 0.02525832376578645, "grad_norm": 2.986666679382324, "learning_rate": 5.038167938931297e-06, "loss": 0.7439, "step": 33 }, { "epoch": 0.02602372751626483, "grad_norm": 1.9290986061096191, "learning_rate": 5.190839694656488e-06, "loss": 0.6164, "step": 34 }, { "epoch": 0.026789131266743208, "grad_norm": 2.402761459350586, "learning_rate": 5.34351145038168e-06, "loss": 0.7119, "step": 35 }, { "epoch": 0.027554535017221583, "grad_norm": 2.68208384513855, "learning_rate": 5.496183206106871e-06, "loss": 0.6692, "step": 36 }, { "epoch": 0.028319938767699962, "grad_norm": 2.0670857429504395, "learning_rate": 5.648854961832062e-06, "loss": 0.6724, "step": 37 }, { "epoch": 0.02908534251817834, "grad_norm": 2.9470982551574707, "learning_rate": 5.801526717557252e-06, "loss": 0.66, "step": 38 }, { "epoch": 0.029850746268656716, "grad_norm": 2.172914981842041, "learning_rate": 5.9541984732824435e-06, "loss": 0.6567, "step": 39 }, { "epoch": 0.030616150019135095, "grad_norm": 2.658773183822632, "learning_rate": 6.1068702290076346e-06, "loss": 0.6948, "step": 40 }, { "epoch": 0.030616150019135095, "eval_accuracy": 0.6787003610108303, "eval_f1": 0.5, "eval_loss": 0.6222203373908997, "eval_precision": 0.5741935483870968, "eval_recall": 0.4427860696517413, "eval_runtime": 43.9963, "eval_samples_per_second": 6.841, "eval_steps_per_second": 0.227, "step": 40 }, { "epoch": 0.03138155376961347, "grad_norm": 2.3062996864318848, "learning_rate": 6.259541984732826e-06, "loss": 0.7169, "step": 41 }, { "epoch": 0.03214695752009185, "grad_norm": 2.899698495864868, "learning_rate": 6.412213740458016e-06, "loss": 0.65, "step": 42 }, { "epoch": 0.03291236127057023, "grad_norm": 2.191978931427002, "learning_rate": 6.564885496183207e-06, "loss": 0.6741, "step": 43 }, { "epoch": 0.033677765021048606, "grad_norm": 2.6911063194274902, "learning_rate": 6.717557251908398e-06, "loss": 0.652, "step": 44 }, { "epoch": 0.03444316877152698, "grad_norm": 2.832355260848999, "learning_rate": 6.870229007633589e-06, "loss": 0.7163, "step": 45 }, { "epoch": 0.035208572522005356, "grad_norm": 2.7549805641174316, "learning_rate": 7.022900763358779e-06, "loss": 0.6482, "step": 46 }, { "epoch": 0.035973976272483735, "grad_norm": 2.533820390701294, "learning_rate": 7.17557251908397e-06, "loss": 0.6657, "step": 47 }, { "epoch": 0.03673938002296211, "grad_norm": 2.542445182800293, "learning_rate": 7.328244274809161e-06, "loss": 0.6803, "step": 48 }, { "epoch": 0.03750478377344049, "grad_norm": 2.254171848297119, "learning_rate": 7.480916030534352e-06, "loss": 0.6628, "step": 49 }, { "epoch": 0.038270187523918864, "grad_norm": 2.2992422580718994, "learning_rate": 7.633587786259543e-06, "loss": 0.6237, "step": 50 }, { "epoch": 0.03903559127439724, "grad_norm": 2.2923660278320312, "learning_rate": 7.786259541984733e-06, "loss": 0.636, "step": 51 }, { "epoch": 0.03980099502487562, "grad_norm": 3.348690986633301, "learning_rate": 7.938931297709924e-06, "loss": 0.7033, "step": 52 }, { "epoch": 0.040566398775354, "grad_norm": 2.794326066970825, "learning_rate": 8.091603053435115e-06, "loss": 0.7764, "step": 53 }, { "epoch": 0.04133180252583238, "grad_norm": 2.662022352218628, "learning_rate": 8.244274809160306e-06, "loss": 0.6787, "step": 54 }, { "epoch": 0.04209720627631076, "grad_norm": 2.623007297515869, "learning_rate": 8.396946564885497e-06, "loss": 0.6481, "step": 55 }, { "epoch": 0.04286261002678913, "grad_norm": 2.626187801361084, "learning_rate": 8.549618320610688e-06, "loss": 0.5648, "step": 56 }, { "epoch": 0.04362801377726751, "grad_norm": 3.5378401279449463, "learning_rate": 8.702290076335879e-06, "loss": 0.7537, "step": 57 }, { "epoch": 0.044393417527745886, "grad_norm": 2.221616744995117, "learning_rate": 8.85496183206107e-06, "loss": 0.5946, "step": 58 }, { "epoch": 0.045158821278224265, "grad_norm": 2.4003000259399414, "learning_rate": 9.007633587786259e-06, "loss": 0.6146, "step": 59 }, { "epoch": 0.045924225028702644, "grad_norm": 3.179100751876831, "learning_rate": 9.16030534351145e-06, "loss": 0.6394, "step": 60 }, { "epoch": 0.045924225028702644, "eval_accuracy": 0.6877256317689531, "eval_f1": 0.4507936507936508, "eval_loss": 0.6186545491218567, "eval_precision": 0.6228070175438597, "eval_recall": 0.35323383084577115, "eval_runtime": 43.6865, "eval_samples_per_second": 6.89, "eval_steps_per_second": 0.229, "step": 60 }, { "epoch": 0.046689628779181015, "grad_norm": 2.6747021675109863, "learning_rate": 9.312977099236641e-06, "loss": 0.6995, "step": 61 }, { "epoch": 0.047455032529659394, "grad_norm": 2.2150821685791016, "learning_rate": 9.465648854961834e-06, "loss": 0.6553, "step": 62 }, { "epoch": 0.04822043628013777, "grad_norm": 4.880781173706055, "learning_rate": 9.618320610687025e-06, "loss": 0.695, "step": 63 }, { "epoch": 0.04898584003061615, "grad_norm": 2.353868007659912, "learning_rate": 9.770992366412214e-06, "loss": 0.6338, "step": 64 }, { "epoch": 0.04975124378109453, "grad_norm": 2.5536489486694336, "learning_rate": 9.923664122137405e-06, "loss": 0.6623, "step": 65 }, { "epoch": 0.0505166475315729, "grad_norm": 2.4389688968658447, "learning_rate": 1.0076335877862595e-05, "loss": 0.676, "step": 66 }, { "epoch": 0.05128205128205128, "grad_norm": 2.0447139739990234, "learning_rate": 1.0229007633587786e-05, "loss": 0.5571, "step": 67 }, { "epoch": 0.05204745503252966, "grad_norm": 2.9738974571228027, "learning_rate": 1.0381679389312977e-05, "loss": 0.7705, "step": 68 }, { "epoch": 0.05281285878300804, "grad_norm": 2.7463932037353516, "learning_rate": 1.0534351145038168e-05, "loss": 0.6645, "step": 69 }, { "epoch": 0.053578262533486416, "grad_norm": 2.617324113845825, "learning_rate": 1.068702290076336e-05, "loss": 0.6211, "step": 70 }, { "epoch": 0.05434366628396479, "grad_norm": 2.629154920578003, "learning_rate": 1.0839694656488552e-05, "loss": 0.6189, "step": 71 }, { "epoch": 0.05510907003444317, "grad_norm": 3.0344226360321045, "learning_rate": 1.0992366412213743e-05, "loss": 0.7318, "step": 72 }, { "epoch": 0.055874473784921545, "grad_norm": 2.5670666694641113, "learning_rate": 1.1145038167938934e-05, "loss": 0.6702, "step": 73 }, { "epoch": 0.056639877535399924, "grad_norm": 2.1741843223571777, "learning_rate": 1.1297709923664125e-05, "loss": 0.6686, "step": 74 }, { "epoch": 0.0574052812858783, "grad_norm": 2.724057674407959, "learning_rate": 1.1450381679389312e-05, "loss": 0.6252, "step": 75 }, { "epoch": 0.05817068503635668, "grad_norm": 2.225811719894409, "learning_rate": 1.1603053435114503e-05, "loss": 0.5888, "step": 76 }, { "epoch": 0.05893608878683505, "grad_norm": 2.8234739303588867, "learning_rate": 1.1755725190839696e-05, "loss": 0.5833, "step": 77 }, { "epoch": 0.05970149253731343, "grad_norm": 2.3463521003723145, "learning_rate": 1.1908396946564887e-05, "loss": 0.6519, "step": 78 }, { "epoch": 0.06046689628779181, "grad_norm": 2.0764167308807373, "learning_rate": 1.2061068702290078e-05, "loss": 0.6022, "step": 79 }, { "epoch": 0.06123230003827019, "grad_norm": 2.889023542404175, "learning_rate": 1.2213740458015269e-05, "loss": 0.6466, "step": 80 }, { "epoch": 0.06123230003827019, "eval_accuracy": 0.7148014440433214, "eval_f1": 0.5752688172043011, "eval_loss": 0.5946354866027832, "eval_precision": 0.6257309941520468, "eval_recall": 0.5323383084577115, "eval_runtime": 43.854, "eval_samples_per_second": 6.864, "eval_steps_per_second": 0.228, "step": 80 }, { "epoch": 0.06199770378874857, "grad_norm": 2.1842238903045654, "learning_rate": 1.236641221374046e-05, "loss": 0.6137, "step": 81 }, { "epoch": 0.06276310753922694, "grad_norm": 2.382739543914795, "learning_rate": 1.2519083969465651e-05, "loss": 0.5988, "step": 82 }, { "epoch": 0.06352851128970533, "grad_norm": 2.536407470703125, "learning_rate": 1.2671755725190839e-05, "loss": 0.6698, "step": 83 }, { "epoch": 0.0642939150401837, "grad_norm": 3.341318130493164, "learning_rate": 1.2824427480916032e-05, "loss": 0.632, "step": 84 }, { "epoch": 0.06505931879066207, "grad_norm": 2.605912446975708, "learning_rate": 1.2977099236641223e-05, "loss": 0.6817, "step": 85 }, { "epoch": 0.06582472254114045, "grad_norm": 2.7820451259613037, "learning_rate": 1.3129770992366414e-05, "loss": 0.5941, "step": 86 }, { "epoch": 0.06659012629161883, "grad_norm": 2.19356632232666, "learning_rate": 1.3282442748091605e-05, "loss": 0.5755, "step": 87 }, { "epoch": 0.06735553004209721, "grad_norm": 2.6070029735565186, "learning_rate": 1.3435114503816796e-05, "loss": 0.6472, "step": 88 }, { "epoch": 0.06812093379257558, "grad_norm": 3.145352363586426, "learning_rate": 1.3587786259541987e-05, "loss": 0.6356, "step": 89 }, { "epoch": 0.06888633754305395, "grad_norm": 2.909482955932617, "learning_rate": 1.3740458015267178e-05, "loss": 0.5926, "step": 90 }, { "epoch": 0.06965174129353234, "grad_norm": 2.7917375564575195, "learning_rate": 1.3893129770992369e-05, "loss": 0.5445, "step": 91 }, { "epoch": 0.07041714504401071, "grad_norm": 2.8729193210601807, "learning_rate": 1.4045801526717558e-05, "loss": 0.6508, "step": 92 }, { "epoch": 0.0711825487944891, "grad_norm": 3.1102027893066406, "learning_rate": 1.4198473282442749e-05, "loss": 0.6189, "step": 93 }, { "epoch": 0.07194795254496747, "grad_norm": 3.4606645107269287, "learning_rate": 1.435114503816794e-05, "loss": 0.5364, "step": 94 }, { "epoch": 0.07271335629544584, "grad_norm": 2.350576877593994, "learning_rate": 1.4503816793893131e-05, "loss": 0.5593, "step": 95 }, { "epoch": 0.07347876004592423, "grad_norm": 3.097646474838257, "learning_rate": 1.4656488549618322e-05, "loss": 0.72, "step": 96 }, { "epoch": 0.0742441637964026, "grad_norm": 2.978395938873291, "learning_rate": 1.4809160305343513e-05, "loss": 0.648, "step": 97 }, { "epoch": 0.07500956754688098, "grad_norm": 3.397475004196167, "learning_rate": 1.4961832061068704e-05, "loss": 0.5215, "step": 98 }, { "epoch": 0.07577497129735936, "grad_norm": 2.8847622871398926, "learning_rate": 1.5114503816793895e-05, "loss": 0.5677, "step": 99 }, { "epoch": 0.07654037504783773, "grad_norm": 2.4467875957489014, "learning_rate": 1.5267175572519086e-05, "loss": 0.5551, "step": 100 }, { "epoch": 0.07654037504783773, "eval_accuracy": 0.7256317689530686, "eval_f1": 0.6346153846153846, "eval_loss": 0.5565891265869141, "eval_precision": 0.6139534883720931, "eval_recall": 0.6567164179104478, "eval_runtime": 44.3839, "eval_samples_per_second": 6.782, "eval_steps_per_second": 0.225, "step": 100 }, { "epoch": 0.07730577879831611, "grad_norm": 2.684727191925049, "learning_rate": 1.5419847328244274e-05, "loss": 0.5596, "step": 101 }, { "epoch": 0.07807118254879448, "grad_norm": 4.310076713562012, "learning_rate": 1.5572519083969465e-05, "loss": 0.5743, "step": 102 }, { "epoch": 0.07883658629927287, "grad_norm": 2.9870874881744385, "learning_rate": 1.5725190839694656e-05, "loss": 0.6095, "step": 103 }, { "epoch": 0.07960199004975124, "grad_norm": 2.954394578933716, "learning_rate": 1.5877862595419847e-05, "loss": 0.6373, "step": 104 }, { "epoch": 0.08036739380022963, "grad_norm": 2.9433109760284424, "learning_rate": 1.6030534351145038e-05, "loss": 0.5258, "step": 105 }, { "epoch": 0.081132797550708, "grad_norm": 2.8164420127868652, "learning_rate": 1.618320610687023e-05, "loss": 0.5401, "step": 106 }, { "epoch": 0.08189820130118637, "grad_norm": 3.0667014122009277, "learning_rate": 1.633587786259542e-05, "loss": 0.5749, "step": 107 }, { "epoch": 0.08266360505166476, "grad_norm": 3.64597749710083, "learning_rate": 1.648854961832061e-05, "loss": 0.5074, "step": 108 }, { "epoch": 0.08342900880214313, "grad_norm": 4.898886680603027, "learning_rate": 1.6641221374045802e-05, "loss": 0.6379, "step": 109 }, { "epoch": 0.08419441255262151, "grad_norm": 3.8319902420043945, "learning_rate": 1.6793893129770993e-05, "loss": 0.5238, "step": 110 }, { "epoch": 0.08495981630309989, "grad_norm": 3.2003352642059326, "learning_rate": 1.6946564885496184e-05, "loss": 0.5893, "step": 111 }, { "epoch": 0.08572522005357826, "grad_norm": 5.576728343963623, "learning_rate": 1.7099236641221375e-05, "loss": 0.4767, "step": 112 }, { "epoch": 0.08649062380405664, "grad_norm": 3.50812029838562, "learning_rate": 1.7251908396946566e-05, "loss": 0.5042, "step": 113 }, { "epoch": 0.08725602755453502, "grad_norm": 3.7311975955963135, "learning_rate": 1.7404580152671757e-05, "loss": 0.6514, "step": 114 }, { "epoch": 0.0880214313050134, "grad_norm": 3.020787477493286, "learning_rate": 1.755725190839695e-05, "loss": 0.5156, "step": 115 }, { "epoch": 0.08878683505549177, "grad_norm": 2.9726624488830566, "learning_rate": 1.770992366412214e-05, "loss": 0.5013, "step": 116 }, { "epoch": 0.08955223880597014, "grad_norm": 4.21381950378418, "learning_rate": 1.786259541984733e-05, "loss": 0.498, "step": 117 }, { "epoch": 0.09031764255644853, "grad_norm": 4.426884174346924, "learning_rate": 1.8015267175572518e-05, "loss": 0.5154, "step": 118 }, { "epoch": 0.0910830463069269, "grad_norm": 4.183692455291748, "learning_rate": 1.816793893129771e-05, "loss": 0.4391, "step": 119 }, { "epoch": 0.09184845005740529, "grad_norm": 4.293137550354004, "learning_rate": 1.83206106870229e-05, "loss": 0.5631, "step": 120 }, { "epoch": 0.09184845005740529, "eval_accuracy": 0.7924187725631769, "eval_f1": 0.6916890080428955, "eval_loss": 0.4903343915939331, "eval_precision": 0.75, "eval_recall": 0.6417910447761194, "eval_runtime": 44.3718, "eval_samples_per_second": 6.784, "eval_steps_per_second": 0.225, "step": 120 }, { "epoch": 0.09261385380788366, "grad_norm": 2.93092942237854, "learning_rate": 1.847328244274809e-05, "loss": 0.4524, "step": 121 }, { "epoch": 0.09337925755836203, "grad_norm": 5.02364444732666, "learning_rate": 1.8625954198473282e-05, "loss": 0.5278, "step": 122 }, { "epoch": 0.09414466130884042, "grad_norm": 4.0692596435546875, "learning_rate": 1.8778625954198473e-05, "loss": 0.434, "step": 123 }, { "epoch": 0.09491006505931879, "grad_norm": 5.78764009475708, "learning_rate": 1.8931297709923668e-05, "loss": 0.6609, "step": 124 }, { "epoch": 0.09567546880979717, "grad_norm": 4.8192458152771, "learning_rate": 1.908396946564886e-05, "loss": 0.5673, "step": 125 }, { "epoch": 0.09644087256027555, "grad_norm": 4.077422618865967, "learning_rate": 1.923664122137405e-05, "loss": 0.4788, "step": 126 }, { "epoch": 0.09720627631075392, "grad_norm": 3.655240058898926, "learning_rate": 1.9389312977099238e-05, "loss": 0.5678, "step": 127 }, { "epoch": 0.0979716800612323, "grad_norm": 4.653315544128418, "learning_rate": 1.954198473282443e-05, "loss": 0.4992, "step": 128 }, { "epoch": 0.09873708381171067, "grad_norm": 11.944878578186035, "learning_rate": 1.969465648854962e-05, "loss": 0.6651, "step": 129 }, { "epoch": 0.09950248756218906, "grad_norm": 4.565611362457275, "learning_rate": 1.984732824427481e-05, "loss": 0.5207, "step": 130 }, { "epoch": 0.10026789131266743, "grad_norm": 6.054805755615234, "learning_rate": 2e-05, "loss": 0.5111, "step": 131 }, { "epoch": 0.1010332950631458, "grad_norm": 4.262387275695801, "learning_rate": 1.9999964256773125e-05, "loss": 0.4818, "step": 132 }, { "epoch": 0.10179869881362419, "grad_norm": 4.246397018432617, "learning_rate": 1.9999857027348008e-05, "loss": 0.5247, "step": 133 }, { "epoch": 0.10256410256410256, "grad_norm": 4.61754846572876, "learning_rate": 1.9999678312491194e-05, "loss": 0.5223, "step": 134 }, { "epoch": 0.10332950631458095, "grad_norm": 5.072839736938477, "learning_rate": 1.999942811348026e-05, "loss": 0.4912, "step": 135 }, { "epoch": 0.10409491006505932, "grad_norm": 3.8653759956359863, "learning_rate": 1.9999106432103785e-05, "loss": 0.4203, "step": 136 }, { "epoch": 0.10486031381553769, "grad_norm": 4.48724365234375, "learning_rate": 1.999871327066135e-05, "loss": 0.4541, "step": 137 }, { "epoch": 0.10562571756601608, "grad_norm": 4.170693874359131, "learning_rate": 1.9998248631963532e-05, "loss": 0.4372, "step": 138 }, { "epoch": 0.10639112131649445, "grad_norm": 4.524763107299805, "learning_rate": 1.999771251933187e-05, "loss": 0.4466, "step": 139 }, { "epoch": 0.10715652506697283, "grad_norm": 6.832329750061035, "learning_rate": 1.999710493659884e-05, "loss": 0.5009, "step": 140 }, { "epoch": 0.10715652506697283, "eval_accuracy": 0.7978339350180506, "eval_f1": 0.7083333333333334, "eval_loss": 0.45516324043273926, "eval_precision": 0.7431693989071039, "eval_recall": 0.6766169154228856, "eval_runtime": 43.9314, "eval_samples_per_second": 6.852, "eval_steps_per_second": 0.228, "step": 140 }, { "epoch": 0.1079219288174512, "grad_norm": 7.247250080108643, "learning_rate": 1.999642588810784e-05, "loss": 0.5023, "step": 141 }, { "epoch": 0.10868733256792958, "grad_norm": 6.123129844665527, "learning_rate": 1.999567537871314e-05, "loss": 0.4237, "step": 142 }, { "epoch": 0.10945273631840796, "grad_norm": 3.4526610374450684, "learning_rate": 1.999485341377987e-05, "loss": 0.3793, "step": 143 }, { "epoch": 0.11021814006888633, "grad_norm": 7.032047271728516, "learning_rate": 1.9993959999183964e-05, "loss": 0.545, "step": 144 }, { "epoch": 0.11098354381936472, "grad_norm": 4.710192680358887, "learning_rate": 1.9992995141312126e-05, "loss": 0.4912, "step": 145 }, { "epoch": 0.11174894756984309, "grad_norm": 5.201582431793213, "learning_rate": 1.9991958847061786e-05, "loss": 0.4327, "step": 146 }, { "epoch": 0.11251435132032148, "grad_norm": 5.157017707824707, "learning_rate": 1.999085112384104e-05, "loss": 0.5494, "step": 147 }, { "epoch": 0.11327975507079985, "grad_norm": 5.805063247680664, "learning_rate": 1.998967197956861e-05, "loss": 0.567, "step": 148 }, { "epoch": 0.11404515882127822, "grad_norm": 5.021705150604248, "learning_rate": 1.998842142267378e-05, "loss": 0.4683, "step": 149 }, { "epoch": 0.1148105625717566, "grad_norm": 5.150928497314453, "learning_rate": 1.9987099462096342e-05, "loss": 0.4673, "step": 150 }, { "epoch": 0.11557596632223498, "grad_norm": 7.4559760093688965, "learning_rate": 1.9985706107286515e-05, "loss": 0.5338, "step": 151 }, { "epoch": 0.11634137007271336, "grad_norm": 4.605138301849365, "learning_rate": 1.9984241368204907e-05, "loss": 0.3907, "step": 152 }, { "epoch": 0.11710677382319173, "grad_norm": 7.63367223739624, "learning_rate": 1.998270525532241e-05, "loss": 0.4603, "step": 153 }, { "epoch": 0.1178721775736701, "grad_norm": 4.527704238891602, "learning_rate": 1.9981097779620156e-05, "loss": 0.4655, "step": 154 }, { "epoch": 0.11863758132414849, "grad_norm": 4.8172407150268555, "learning_rate": 1.9979418952589417e-05, "loss": 0.4778, "step": 155 }, { "epoch": 0.11940298507462686, "grad_norm": 3.8255341053009033, "learning_rate": 1.9977668786231536e-05, "loss": 0.4034, "step": 156 }, { "epoch": 0.12016838882510525, "grad_norm": 7.002188682556152, "learning_rate": 1.9975847293057822e-05, "loss": 0.4578, "step": 157 }, { "epoch": 0.12093379257558362, "grad_norm": 4.92902946472168, "learning_rate": 1.9973954486089494e-05, "loss": 0.442, "step": 158 }, { "epoch": 0.12169919632606199, "grad_norm": 4.575451374053955, "learning_rate": 1.997199037885755e-05, "loss": 0.5066, "step": 159 }, { "epoch": 0.12246460007654038, "grad_norm": 4.702727317810059, "learning_rate": 1.9969954985402702e-05, "loss": 0.4532, "step": 160 }, { "epoch": 0.12246460007654038, "eval_accuracy": 0.8267148014440433, "eval_f1": 0.7318435754189944, "eval_loss": 0.4339619278907776, "eval_precision": 0.8343949044585988, "eval_recall": 0.6517412935323383, "eval_runtime": 42.5179, "eval_samples_per_second": 7.079, "eval_steps_per_second": 0.235, "step": 160 }, { "epoch": 0.12323000382701875, "grad_norm": 6.910041332244873, "learning_rate": 1.9967848320275253e-05, "loss": 0.5374, "step": 161 }, { "epoch": 0.12399540757749714, "grad_norm": 4.21750020980835, "learning_rate": 1.9965670398535004e-05, "loss": 0.3887, "step": 162 }, { "epoch": 0.12476081132797551, "grad_norm": 4.853139877319336, "learning_rate": 1.996342123575115e-05, "loss": 0.4184, "step": 163 }, { "epoch": 0.12552621507845388, "grad_norm": 5.117790699005127, "learning_rate": 1.9961100848002154e-05, "loss": 0.4558, "step": 164 }, { "epoch": 0.12629161882893225, "grad_norm": 5.230927467346191, "learning_rate": 1.9958709251875642e-05, "loss": 0.3832, "step": 165 }, { "epoch": 0.12705702257941065, "grad_norm": 3.984503746032715, "learning_rate": 1.9956246464468294e-05, "loss": 0.3992, "step": 166 }, { "epoch": 0.12782242632988902, "grad_norm": 6.295953273773193, "learning_rate": 1.9953712503385702e-05, "loss": 0.4012, "step": 167 }, { "epoch": 0.1285878300803674, "grad_norm": 6.807311058044434, "learning_rate": 1.995110738674225e-05, "loss": 0.484, "step": 168 }, { "epoch": 0.12935323383084577, "grad_norm": 6.350063323974609, "learning_rate": 1.9948431133160998e-05, "loss": 0.3994, "step": 169 }, { "epoch": 0.13011863758132414, "grad_norm": 5.400058269500732, "learning_rate": 1.9945683761773533e-05, "loss": 0.3999, "step": 170 }, { "epoch": 0.13088404133180254, "grad_norm": 5.8745503425598145, "learning_rate": 1.9942865292219837e-05, "loss": 0.3733, "step": 171 }, { "epoch": 0.1316494450822809, "grad_norm": 7.252079963684082, "learning_rate": 1.9939975744648152e-05, "loss": 0.4508, "step": 172 }, { "epoch": 0.13241484883275928, "grad_norm": 5.198064804077148, "learning_rate": 1.9937015139714825e-05, "loss": 0.4664, "step": 173 }, { "epoch": 0.13318025258323765, "grad_norm": 4.906857013702393, "learning_rate": 1.9933983498584175e-05, "loss": 0.3245, "step": 174 }, { "epoch": 0.13394565633371602, "grad_norm": 5.309024333953857, "learning_rate": 1.9930880842928325e-05, "loss": 0.3559, "step": 175 }, { "epoch": 0.13471106008419442, "grad_norm": 7.003721237182617, "learning_rate": 1.9927707194927067e-05, "loss": 0.4652, "step": 176 }, { "epoch": 0.1354764638346728, "grad_norm": 6.0199408531188965, "learning_rate": 1.9924462577267676e-05, "loss": 0.5003, "step": 177 }, { "epoch": 0.13624186758515117, "grad_norm": 3.719982862472534, "learning_rate": 1.9921147013144782e-05, "loss": 0.3071, "step": 178 }, { "epoch": 0.13700727133562954, "grad_norm": 10.396299362182617, "learning_rate": 1.991776052626017e-05, "loss": 0.5692, "step": 179 }, { "epoch": 0.1377726750861079, "grad_norm": 6.391393661499023, "learning_rate": 1.9914303140822634e-05, "loss": 0.3813, "step": 180 }, { "epoch": 0.1377726750861079, "eval_accuracy": 0.8285198555956679, "eval_f1": 0.7262247838616714, "eval_loss": 0.44141414761543274, "eval_precision": 0.863013698630137, "eval_recall": 0.6268656716417911, "eval_runtime": 43.8443, "eval_samples_per_second": 6.865, "eval_steps_per_second": 0.228, "step": 180 }, { "epoch": 0.1385380788365863, "grad_norm": 6.433442115783691, "learning_rate": 1.9910774881547803e-05, "loss": 0.4277, "step": 181 }, { "epoch": 0.13930348258706468, "grad_norm": 5.727473258972168, "learning_rate": 1.9907175773657945e-05, "loss": 0.4079, "step": 182 }, { "epoch": 0.14006888633754305, "grad_norm": 4.4970011711120605, "learning_rate": 1.990350584288181e-05, "loss": 0.329, "step": 183 }, { "epoch": 0.14083429008802142, "grad_norm": 4.585634708404541, "learning_rate": 1.989976511545443e-05, "loss": 0.2894, "step": 184 }, { "epoch": 0.1415996938384998, "grad_norm": 16.09983253479004, "learning_rate": 1.9895953618116935e-05, "loss": 0.4009, "step": 185 }, { "epoch": 0.1423650975889782, "grad_norm": 8.76262378692627, "learning_rate": 1.9892071378116378e-05, "loss": 0.5451, "step": 186 }, { "epoch": 0.14313050133945657, "grad_norm": 8.696534156799316, "learning_rate": 1.9888118423205504e-05, "loss": 0.467, "step": 187 }, { "epoch": 0.14389590508993494, "grad_norm": 8.314312934875488, "learning_rate": 1.9884094781642592e-05, "loss": 0.4139, "step": 188 }, { "epoch": 0.1446613088404133, "grad_norm": 6.255258560180664, "learning_rate": 1.988000048219123e-05, "loss": 0.4909, "step": 189 }, { "epoch": 0.14542671259089168, "grad_norm": 5.545584678649902, "learning_rate": 1.9875835554120114e-05, "loss": 0.3941, "step": 190 }, { "epoch": 0.14619211634137008, "grad_norm": 6.706730842590332, "learning_rate": 1.987160002720283e-05, "loss": 0.4446, "step": 191 }, { "epoch": 0.14695752009184845, "grad_norm": 6.027093410491943, "learning_rate": 1.9867293931717664e-05, "loss": 0.4009, "step": 192 }, { "epoch": 0.14772292384232683, "grad_norm": 5.547446250915527, "learning_rate": 1.9862917298447365e-05, "loss": 0.406, "step": 193 }, { "epoch": 0.1484883275928052, "grad_norm": 5.960996627807617, "learning_rate": 1.9858470158678932e-05, "loss": 0.3704, "step": 194 }, { "epoch": 0.14925373134328357, "grad_norm": 6.804495334625244, "learning_rate": 1.9853952544203387e-05, "loss": 0.4264, "step": 195 }, { "epoch": 0.15001913509376197, "grad_norm": 6.881677627563477, "learning_rate": 1.984936448731556e-05, "loss": 0.4816, "step": 196 }, { "epoch": 0.15078453884424034, "grad_norm": 9.341668128967285, "learning_rate": 1.9844706020813835e-05, "loss": 0.4415, "step": 197 }, { "epoch": 0.1515499425947187, "grad_norm": 5.255512237548828, "learning_rate": 1.9839977177999942e-05, "loss": 0.3241, "step": 198 }, { "epoch": 0.15231534634519708, "grad_norm": 6.275108337402344, "learning_rate": 1.9835177992678704e-05, "loss": 0.4358, "step": 199 }, { "epoch": 0.15308075009567546, "grad_norm": 5.3615336418151855, "learning_rate": 1.9830308499157787e-05, "loss": 0.3897, "step": 200 }, { "epoch": 0.15308075009567546, "eval_accuracy": 0.8393501805054152, "eval_f1": 0.7449856733524355, "eval_loss": 0.42016080021858215, "eval_precision": 0.8783783783783784, "eval_recall": 0.6467661691542289, "eval_runtime": 43.7492, "eval_samples_per_second": 6.88, "eval_steps_per_second": 0.229, "step": 200 }, { "epoch": 0.15384615384615385, "grad_norm": 5.135117053985596, "learning_rate": 1.982536873224748e-05, "loss": 0.3718, "step": 201 }, { "epoch": 0.15461155759663223, "grad_norm": 9.245200157165527, "learning_rate": 1.982035872726042e-05, "loss": 0.4988, "step": 202 }, { "epoch": 0.1553769613471106, "grad_norm": 5.376220703125, "learning_rate": 1.9815278520011364e-05, "loss": 0.3518, "step": 203 }, { "epoch": 0.15614236509758897, "grad_norm": 5.195625305175781, "learning_rate": 1.98101281468169e-05, "loss": 0.3542, "step": 204 }, { "epoch": 0.15690776884806737, "grad_norm": 3.9978110790252686, "learning_rate": 1.980490764449523e-05, "loss": 0.332, "step": 205 }, { "epoch": 0.15767317259854574, "grad_norm": 6.721541881561279, "learning_rate": 1.979961705036587e-05, "loss": 0.4359, "step": 206 }, { "epoch": 0.1584385763490241, "grad_norm": 6.835299968719482, "learning_rate": 1.9794256402249398e-05, "loss": 0.4853, "step": 207 }, { "epoch": 0.15920398009950248, "grad_norm": 7.488376617431641, "learning_rate": 1.9788825738467194e-05, "loss": 0.4803, "step": 208 }, { "epoch": 0.15996938384998086, "grad_norm": 7.519308567047119, "learning_rate": 1.978332509784114e-05, "loss": 0.3915, "step": 209 }, { "epoch": 0.16073478760045926, "grad_norm": 4.765905380249023, "learning_rate": 1.977775451969337e-05, "loss": 0.4286, "step": 210 }, { "epoch": 0.16150019135093763, "grad_norm": 5.47350549697876, "learning_rate": 1.9772114043845968e-05, "loss": 0.4142, "step": 211 }, { "epoch": 0.162265595101416, "grad_norm": 6.0178375244140625, "learning_rate": 1.97664037106207e-05, "loss": 0.5002, "step": 212 }, { "epoch": 0.16303099885189437, "grad_norm": 5.823141098022461, "learning_rate": 1.9760623560838707e-05, "loss": 0.4094, "step": 213 }, { "epoch": 0.16379640260237274, "grad_norm": 6.595653057098389, "learning_rate": 1.9754773635820236e-05, "loss": 0.4408, "step": 214 }, { "epoch": 0.16456180635285114, "grad_norm": 6.631775379180908, "learning_rate": 1.9748853977384326e-05, "loss": 0.3829, "step": 215 }, { "epoch": 0.1653272101033295, "grad_norm": 8.617928504943848, "learning_rate": 1.974286462784851e-05, "loss": 0.4709, "step": 216 }, { "epoch": 0.16609261385380789, "grad_norm": 5.043487548828125, "learning_rate": 1.973680563002853e-05, "loss": 0.3764, "step": 217 }, { "epoch": 0.16685801760428626, "grad_norm": 5.085770606994629, "learning_rate": 1.973067702723801e-05, "loss": 0.3281, "step": 218 }, { "epoch": 0.16762342135476463, "grad_norm": 6.6135029792785645, "learning_rate": 1.972447886328816e-05, "loss": 0.4335, "step": 219 }, { "epoch": 0.16838882510524303, "grad_norm": 8.10050106048584, "learning_rate": 1.9718211182487455e-05, "loss": 0.427, "step": 220 }, { "epoch": 0.16838882510524303, "eval_accuracy": 0.8429602888086642, "eval_f1": 0.7563025210084033, "eval_loss": 0.4065958261489868, "eval_precision": 0.8653846153846154, "eval_recall": 0.6716417910447762, "eval_runtime": 42.5974, "eval_samples_per_second": 7.066, "eval_steps_per_second": 0.235, "step": 220 }, { "epoch": 0.1691542288557214, "grad_norm": 5.3172526359558105, "learning_rate": 1.971187402964132e-05, "loss": 0.4483, "step": 221 }, { "epoch": 0.16991963260619977, "grad_norm": 5.775417804718018, "learning_rate": 1.970546745005182e-05, "loss": 0.3196, "step": 222 }, { "epoch": 0.17068503635667814, "grad_norm": 5.696174621582031, "learning_rate": 1.969899148951731e-05, "loss": 0.3721, "step": 223 }, { "epoch": 0.17145044010715652, "grad_norm": 4.350657939910889, "learning_rate": 1.9692446194332144e-05, "loss": 0.3112, "step": 224 }, { "epoch": 0.17221584385763491, "grad_norm": 6.253859996795654, "learning_rate": 1.9685831611286312e-05, "loss": 0.3454, "step": 225 }, { "epoch": 0.1729812476081133, "grad_norm": 5.843462944030762, "learning_rate": 1.9679147787665128e-05, "loss": 0.321, "step": 226 }, { "epoch": 0.17374665135859166, "grad_norm": 9.418475151062012, "learning_rate": 1.9672394771248867e-05, "loss": 0.3685, "step": 227 }, { "epoch": 0.17451205510907003, "grad_norm": 5.893159866333008, "learning_rate": 1.966557261031246e-05, "loss": 0.3446, "step": 228 }, { "epoch": 0.1752774588595484, "grad_norm": 6.786052703857422, "learning_rate": 1.9658681353625105e-05, "loss": 0.4436, "step": 229 }, { "epoch": 0.1760428626100268, "grad_norm": 5.613348007202148, "learning_rate": 1.9651721050449964e-05, "loss": 0.3582, "step": 230 }, { "epoch": 0.17680826636050517, "grad_norm": 7.1399827003479, "learning_rate": 1.964469175054377e-05, "loss": 0.3733, "step": 231 }, { "epoch": 0.17757367011098354, "grad_norm": 5.748831748962402, "learning_rate": 1.963759350415649e-05, "loss": 0.3765, "step": 232 }, { "epoch": 0.17833907386146192, "grad_norm": 7.2922539710998535, "learning_rate": 1.9630426362030978e-05, "loss": 0.4476, "step": 233 }, { "epoch": 0.1791044776119403, "grad_norm": 10.018366813659668, "learning_rate": 1.962319037540259e-05, "loss": 0.5118, "step": 234 }, { "epoch": 0.1798698813624187, "grad_norm": 6.774781703948975, "learning_rate": 1.9615885595998825e-05, "loss": 0.3561, "step": 235 }, { "epoch": 0.18063528511289706, "grad_norm": 9.361811637878418, "learning_rate": 1.9608512076038964e-05, "loss": 0.4204, "step": 236 }, { "epoch": 0.18140068886337543, "grad_norm": 4.652408123016357, "learning_rate": 1.9601069868233687e-05, "loss": 0.3521, "step": 237 }, { "epoch": 0.1821660926138538, "grad_norm": 6.97662353515625, "learning_rate": 1.9593559025784692e-05, "loss": 0.464, "step": 238 }, { "epoch": 0.18293149636433217, "grad_norm": 5.046630382537842, "learning_rate": 1.9585979602384334e-05, "loss": 0.4245, "step": 239 }, { "epoch": 0.18369690011481057, "grad_norm": 6.48331356048584, "learning_rate": 1.9578331652215224e-05, "loss": 0.346, "step": 240 }, { "epoch": 0.18369690011481057, "eval_accuracy": 0.8339350180505415, "eval_f1": 0.7722772277227723, "eval_loss": 0.4155955910682678, "eval_precision": 0.7684729064039408, "eval_recall": 0.7761194029850746, "eval_runtime": 44.3642, "eval_samples_per_second": 6.785, "eval_steps_per_second": 0.225, "step": 240 }, { "epoch": 0.18446230386528895, "grad_norm": 5.903677940368652, "learning_rate": 1.9570615229949844e-05, "loss": 0.3271, "step": 241 }, { "epoch": 0.18522770761576732, "grad_norm": 8.325933456420898, "learning_rate": 1.9562830390750157e-05, "loss": 0.3309, "step": 242 }, { "epoch": 0.1859931113662457, "grad_norm": 4.691112518310547, "learning_rate": 1.955497719026722e-05, "loss": 0.4265, "step": 243 }, { "epoch": 0.18675851511672406, "grad_norm": 5.79347562789917, "learning_rate": 1.954705568464078e-05, "loss": 0.3228, "step": 244 }, { "epoch": 0.18752391886720246, "grad_norm": 6.642929553985596, "learning_rate": 1.953906593049887e-05, "loss": 0.3625, "step": 245 }, { "epoch": 0.18828932261768083, "grad_norm": 6.772623538970947, "learning_rate": 1.9531007984957408e-05, "loss": 0.3105, "step": 246 }, { "epoch": 0.1890547263681592, "grad_norm": 6.067032337188721, "learning_rate": 1.9522881905619794e-05, "loss": 0.3793, "step": 247 }, { "epoch": 0.18982013011863758, "grad_norm": 6.046145439147949, "learning_rate": 1.9514687750576483e-05, "loss": 0.439, "step": 248 }, { "epoch": 0.19058553386911595, "grad_norm": 5.100595951080322, "learning_rate": 1.950642557840458e-05, "loss": 0.3456, "step": 249 }, { "epoch": 0.19135093761959435, "grad_norm": 9.742971420288086, "learning_rate": 1.9498095448167435e-05, "loss": 0.5016, "step": 250 }, { "epoch": 0.19211634137007272, "grad_norm": 5.769988536834717, "learning_rate": 1.948969741941418e-05, "loss": 0.4057, "step": 251 }, { "epoch": 0.1928817451205511, "grad_norm": 6.542685031890869, "learning_rate": 1.948123155217936e-05, "loss": 0.3563, "step": 252 }, { "epoch": 0.19364714887102946, "grad_norm": 6.277159690856934, "learning_rate": 1.947269790698245e-05, "loss": 0.3911, "step": 253 }, { "epoch": 0.19441255262150783, "grad_norm": 5.547085285186768, "learning_rate": 1.946409654482745e-05, "loss": 0.3616, "step": 254 }, { "epoch": 0.19517795637198623, "grad_norm": 5.570236682891846, "learning_rate": 1.945542752720245e-05, "loss": 0.2538, "step": 255 }, { "epoch": 0.1959433601224646, "grad_norm": 9.954462051391602, "learning_rate": 1.944669091607919e-05, "loss": 0.4541, "step": 256 }, { "epoch": 0.19670876387294298, "grad_norm": 5.784120082855225, "learning_rate": 1.9437886773912595e-05, "loss": 0.3479, "step": 257 }, { "epoch": 0.19747416762342135, "grad_norm": 4.724707126617432, "learning_rate": 1.9429015163640363e-05, "loss": 0.2393, "step": 258 }, { "epoch": 0.19823957137389972, "grad_norm": 5.0216064453125, "learning_rate": 1.942007614868248e-05, "loss": 0.3678, "step": 259 }, { "epoch": 0.19900497512437812, "grad_norm": 6.818700790405273, "learning_rate": 1.9411069792940803e-05, "loss": 0.3623, "step": 260 }, { "epoch": 0.19900497512437812, "eval_accuracy": 0.8501805054151624, "eval_f1": 0.7688022284122563, "eval_loss": 0.40000924468040466, "eval_precision": 0.8734177215189873, "eval_recall": 0.6865671641791045, "eval_runtime": 43.8991, "eval_samples_per_second": 6.857, "eval_steps_per_second": 0.228, "step": 260 }, { "epoch": 0.1997703788748565, "grad_norm": 5.862509727478027, "learning_rate": 1.9401996160798574e-05, "loss": 0.3671, "step": 261 }, { "epoch": 0.20053578262533486, "grad_norm": 4.842383861541748, "learning_rate": 1.9392855317119966e-05, "loss": 0.3314, "step": 262 }, { "epoch": 0.20130118637581323, "grad_norm": 5.581681728363037, "learning_rate": 1.9383647327249635e-05, "loss": 0.4105, "step": 263 }, { "epoch": 0.2020665901262916, "grad_norm": 5.401704788208008, "learning_rate": 1.937437225701223e-05, "loss": 0.3592, "step": 264 }, { "epoch": 0.20283199387677, "grad_norm": 4.608808517456055, "learning_rate": 1.9365030172711946e-05, "loss": 0.3495, "step": 265 }, { "epoch": 0.20359739762724838, "grad_norm": 4.868807792663574, "learning_rate": 1.9355621141132022e-05, "loss": 0.3881, "step": 266 }, { "epoch": 0.20436280137772675, "grad_norm": 7.819051742553711, "learning_rate": 1.9346145229534295e-05, "loss": 0.4057, "step": 267 }, { "epoch": 0.20512820512820512, "grad_norm": 7.824717044830322, "learning_rate": 1.933660250565869e-05, "loss": 0.3441, "step": 268 }, { "epoch": 0.2058936088786835, "grad_norm": 7.23281192779541, "learning_rate": 1.9326993037722762e-05, "loss": 0.487, "step": 269 }, { "epoch": 0.2066590126291619, "grad_norm": 4.351503849029541, "learning_rate": 1.931731689442119e-05, "loss": 0.3626, "step": 270 }, { "epoch": 0.20742441637964026, "grad_norm": 9.379828453063965, "learning_rate": 1.9307574144925288e-05, "loss": 0.3386, "step": 271 }, { "epoch": 0.20818982013011864, "grad_norm": 5.923501491546631, "learning_rate": 1.9297764858882516e-05, "loss": 0.3378, "step": 272 }, { "epoch": 0.208955223880597, "grad_norm": 6.346581935882568, "learning_rate": 1.9287889106415983e-05, "loss": 0.3057, "step": 273 }, { "epoch": 0.20972062763107538, "grad_norm": 5.1364593505859375, "learning_rate": 1.927794695812394e-05, "loss": 0.2919, "step": 274 }, { "epoch": 0.21048603138155378, "grad_norm": 5.439457893371582, "learning_rate": 1.9267938485079285e-05, "loss": 0.4108, "step": 275 }, { "epoch": 0.21125143513203215, "grad_norm": 4.554882526397705, "learning_rate": 1.9257863758829038e-05, "loss": 0.3876, "step": 276 }, { "epoch": 0.21201683888251052, "grad_norm": 7.16301965713501, "learning_rate": 1.9247722851393838e-05, "loss": 0.399, "step": 277 }, { "epoch": 0.2127822426329889, "grad_norm": 7.004945755004883, "learning_rate": 1.9237515835267447e-05, "loss": 0.3675, "step": 278 }, { "epoch": 0.21354764638346727, "grad_norm": 5.764822959899902, "learning_rate": 1.92272427834162e-05, "loss": 0.3255, "step": 279 }, { "epoch": 0.21431305013394567, "grad_norm": 7.27835750579834, "learning_rate": 1.9216903769278498e-05, "loss": 0.3446, "step": 280 }, { "epoch": 0.21431305013394567, "eval_accuracy": 0.851985559566787, "eval_f1": 0.7747252747252747, "eval_loss": 0.39411845803260803, "eval_precision": 0.8650306748466258, "eval_recall": 0.7014925373134329, "eval_runtime": 42.6819, "eval_samples_per_second": 7.052, "eval_steps_per_second": 0.234, "step": 280 }, { "epoch": 0.21507845388442404, "grad_norm": 9.400500297546387, "learning_rate": 1.920649886676429e-05, "loss": 0.4279, "step": 281 }, { "epoch": 0.2158438576349024, "grad_norm": 6.810932159423828, "learning_rate": 1.9196028150254535e-05, "loss": 0.4324, "step": 282 }, { "epoch": 0.21660926138538078, "grad_norm": 5.471708297729492, "learning_rate": 1.9185491694600668e-05, "loss": 0.333, "step": 283 }, { "epoch": 0.21737466513585915, "grad_norm": 5.115065097808838, "learning_rate": 1.9174889575124077e-05, "loss": 0.3169, "step": 284 }, { "epoch": 0.21814006888633755, "grad_norm": 6.514889717102051, "learning_rate": 1.9164221867615556e-05, "loss": 0.319, "step": 285 }, { "epoch": 0.21890547263681592, "grad_norm": 5.090853214263916, "learning_rate": 1.915348864833476e-05, "loss": 0.4063, "step": 286 }, { "epoch": 0.2196708763872943, "grad_norm": 8.121560096740723, "learning_rate": 1.9142689994009666e-05, "loss": 0.3493, "step": 287 }, { "epoch": 0.22043628013777267, "grad_norm": 8.17483139038086, "learning_rate": 1.913182598183603e-05, "loss": 0.3528, "step": 288 }, { "epoch": 0.22120168388825107, "grad_norm": 6.741644382476807, "learning_rate": 1.9120896689476817e-05, "loss": 0.4365, "step": 289 }, { "epoch": 0.22196708763872944, "grad_norm": 7.430425643920898, "learning_rate": 1.9109902195061666e-05, "loss": 0.3639, "step": 290 }, { "epoch": 0.2227324913892078, "grad_norm": 6.185513496398926, "learning_rate": 1.9098842577186315e-05, "loss": 0.4335, "step": 291 }, { "epoch": 0.22349789513968618, "grad_norm": 9.91633129119873, "learning_rate": 1.9087717914912054e-05, "loss": 0.4059, "step": 292 }, { "epoch": 0.22426329889016455, "grad_norm": 5.951688289642334, "learning_rate": 1.9076528287765145e-05, "loss": 0.2802, "step": 293 }, { "epoch": 0.22502870264064295, "grad_norm": 5.823990821838379, "learning_rate": 1.9065273775736264e-05, "loss": 0.4503, "step": 294 }, { "epoch": 0.22579410639112132, "grad_norm": 9.704663276672363, "learning_rate": 1.9053954459279934e-05, "loss": 0.3731, "step": 295 }, { "epoch": 0.2265595101415997, "grad_norm": 8.522269248962402, "learning_rate": 1.9042570419313927e-05, "loss": 0.4185, "step": 296 }, { "epoch": 0.22732491389207807, "grad_norm": 7.043125152587891, "learning_rate": 1.9031121737218706e-05, "loss": 0.3138, "step": 297 }, { "epoch": 0.22809031764255644, "grad_norm": 6.579213619232178, "learning_rate": 1.9019608494836843e-05, "loss": 0.4797, "step": 298 }, { "epoch": 0.22885572139303484, "grad_norm": 4.732334613800049, "learning_rate": 1.900803077447243e-05, "loss": 0.2618, "step": 299 }, { "epoch": 0.2296211251435132, "grad_norm": 6.821180820465088, "learning_rate": 1.899638865889047e-05, "loss": 0.2533, "step": 300 }, { "epoch": 0.2296211251435132, "eval_accuracy": 0.855595667870036, "eval_f1": 0.7740112994350282, "eval_loss": 0.3807988166809082, "eval_precision": 0.8954248366013072, "eval_recall": 0.681592039800995, "eval_runtime": 43.4459, "eval_samples_per_second": 6.928, "eval_steps_per_second": 0.23, "step": 300 }, { "epoch": 0.23038652889399158, "grad_norm": 7.175292491912842, "learning_rate": 1.8984682231316335e-05, "loss": 0.3254, "step": 301 }, { "epoch": 0.23115193264446995, "grad_norm": 7.32252311706543, "learning_rate": 1.8972911575435112e-05, "loss": 0.4463, "step": 302 }, { "epoch": 0.23191733639494833, "grad_norm": 4.355768203735352, "learning_rate": 1.896107677539105e-05, "loss": 0.3336, "step": 303 }, { "epoch": 0.23268274014542673, "grad_norm": 7.022250652313232, "learning_rate": 1.8949177915786942e-05, "loss": 0.3473, "step": 304 }, { "epoch": 0.2334481438959051, "grad_norm": 7.137199878692627, "learning_rate": 1.893721508168351e-05, "loss": 0.34, "step": 305 }, { "epoch": 0.23421354764638347, "grad_norm": 6.419118404388428, "learning_rate": 1.8925188358598815e-05, "loss": 0.392, "step": 306 }, { "epoch": 0.23497895139686184, "grad_norm": 4.412863731384277, "learning_rate": 1.8913097832507632e-05, "loss": 0.298, "step": 307 }, { "epoch": 0.2357443551473402, "grad_norm": 4.97670316696167, "learning_rate": 1.890094358984085e-05, "loss": 0.2557, "step": 308 }, { "epoch": 0.2365097588978186, "grad_norm": 4.732510566711426, "learning_rate": 1.8888725717484834e-05, "loss": 0.3838, "step": 309 }, { "epoch": 0.23727516264829698, "grad_norm": 5.817479133605957, "learning_rate": 1.8876444302780826e-05, "loss": 0.3199, "step": 310 }, { "epoch": 0.23804056639877536, "grad_norm": 6.594438552856445, "learning_rate": 1.8864099433524302e-05, "loss": 0.306, "step": 311 }, { "epoch": 0.23880597014925373, "grad_norm": 6.091873645782471, "learning_rate": 1.8851691197964356e-05, "loss": 0.43, "step": 312 }, { "epoch": 0.2395713738997321, "grad_norm": 6.488644123077393, "learning_rate": 1.8839219684803057e-05, "loss": 0.2994, "step": 313 }, { "epoch": 0.2403367776502105, "grad_norm": 6.759396076202393, "learning_rate": 1.882668498319484e-05, "loss": 0.4442, "step": 314 }, { "epoch": 0.24110218140068887, "grad_norm": 7.386966228485107, "learning_rate": 1.8814087182745835e-05, "loss": 0.3868, "step": 315 }, { "epoch": 0.24186758515116724, "grad_norm": 10.550067901611328, "learning_rate": 1.880142637351325e-05, "loss": 0.3712, "step": 316 }, { "epoch": 0.2426329889016456, "grad_norm": 6.639855861663818, "learning_rate": 1.8788702646004725e-05, "loss": 0.3205, "step": 317 }, { "epoch": 0.24339839265212398, "grad_norm": 5.728460311889648, "learning_rate": 1.8775916091177674e-05, "loss": 0.344, "step": 318 }, { "epoch": 0.24416379640260238, "grad_norm": 6.587100982666016, "learning_rate": 1.8763066800438638e-05, "loss": 0.3959, "step": 319 }, { "epoch": 0.24492920015308076, "grad_norm": 5.411223888397217, "learning_rate": 1.8750154865642644e-05, "loss": 0.3451, "step": 320 }, { "epoch": 0.24492920015308076, "eval_accuracy": 0.8357400722021661, "eval_f1": 0.7672634271099744, "eval_loss": 0.38974297046661377, "eval_precision": 0.7894736842105263, "eval_recall": 0.746268656716418, "eval_runtime": 43.2303, "eval_samples_per_second": 6.963, "eval_steps_per_second": 0.231, "step": 320 }, { "epoch": 0.24569460390355913, "grad_norm": 6.072524070739746, "learning_rate": 1.8737180379092536e-05, "loss": 0.3165, "step": 321 }, { "epoch": 0.2464600076540375, "grad_norm": 4.054773807525635, "learning_rate": 1.8724143433538317e-05, "loss": 0.2936, "step": 322 }, { "epoch": 0.24722541140451587, "grad_norm": 8.195338249206543, "learning_rate": 1.8711044122176484e-05, "loss": 0.4184, "step": 323 }, { "epoch": 0.24799081515499427, "grad_norm": 5.091484069824219, "learning_rate": 1.8697882538649373e-05, "loss": 0.2534, "step": 324 }, { "epoch": 0.24875621890547264, "grad_norm": 6.942485332489014, "learning_rate": 1.8684658777044478e-05, "loss": 0.3206, "step": 325 }, { "epoch": 0.24952162265595101, "grad_norm": 5.852565288543701, "learning_rate": 1.8671372931893775e-05, "loss": 0.3028, "step": 326 }, { "epoch": 0.2502870264064294, "grad_norm": 12.011276245117188, "learning_rate": 1.865802509817306e-05, "loss": 0.508, "step": 327 }, { "epoch": 0.25105243015690776, "grad_norm": 10.422298431396484, "learning_rate": 1.8644615371301275e-05, "loss": 0.5396, "step": 328 }, { "epoch": 0.25181783390738616, "grad_norm": 9.140241622924805, "learning_rate": 1.8631143847139785e-05, "loss": 0.4273, "step": 329 }, { "epoch": 0.2525832376578645, "grad_norm": 5.860537528991699, "learning_rate": 1.8617610621991753e-05, "loss": 0.3022, "step": 330 }, { "epoch": 0.2533486414083429, "grad_norm": 10.354637145996094, "learning_rate": 1.8604015792601395e-05, "loss": 0.3909, "step": 331 }, { "epoch": 0.2541140451588213, "grad_norm": 5.857511043548584, "learning_rate": 1.8590359456153333e-05, "loss": 0.3242, "step": 332 }, { "epoch": 0.25487944890929964, "grad_norm": 8.596695899963379, "learning_rate": 1.857664171027187e-05, "loss": 0.2872, "step": 333 }, { "epoch": 0.25564485265977804, "grad_norm": 6.899674415588379, "learning_rate": 1.8562862653020306e-05, "loss": 0.3693, "step": 334 }, { "epoch": 0.2564102564102564, "grad_norm": 5.9379048347473145, "learning_rate": 1.854902238290024e-05, "loss": 0.3764, "step": 335 }, { "epoch": 0.2571756601607348, "grad_norm": 4.492950916290283, "learning_rate": 1.853512099885085e-05, "loss": 0.2514, "step": 336 }, { "epoch": 0.2579410639112132, "grad_norm": 6.459884166717529, "learning_rate": 1.85211586002482e-05, "loss": 0.2892, "step": 337 }, { "epoch": 0.25870646766169153, "grad_norm": 9.04910659790039, "learning_rate": 1.8507135286904527e-05, "loss": 0.4386, "step": 338 }, { "epoch": 0.25947187141216993, "grad_norm": 6.688485145568848, "learning_rate": 1.849305115906753e-05, "loss": 0.3654, "step": 339 }, { "epoch": 0.2602372751626483, "grad_norm": 9.276376724243164, "learning_rate": 1.8478906317419644e-05, "loss": 0.3667, "step": 340 }, { "epoch": 0.2602372751626483, "eval_accuracy": 0.8375451263537906, "eval_f1": 0.7761194029850746, "eval_loss": 0.3894501328468323, "eval_precision": 0.7761194029850746, "eval_recall": 0.7761194029850746, "eval_runtime": 42.6114, "eval_samples_per_second": 7.064, "eval_steps_per_second": 0.235, "step": 340 }, { "epoch": 0.2610026789131267, "grad_norm": 8.038447380065918, "learning_rate": 1.8464700863077313e-05, "loss": 0.4252, "step": 341 }, { "epoch": 0.2617680826636051, "grad_norm": 7.8890204429626465, "learning_rate": 1.845043489759031e-05, "loss": 0.3921, "step": 342 }, { "epoch": 0.2625334864140834, "grad_norm": 7.012020111083984, "learning_rate": 1.8436108522940953e-05, "loss": 0.3669, "step": 343 }, { "epoch": 0.2632988901645618, "grad_norm": 5.313587188720703, "learning_rate": 1.8421721841543412e-05, "loss": 0.3989, "step": 344 }, { "epoch": 0.26406429391504016, "grad_norm": 6.152250289916992, "learning_rate": 1.8407274956242983e-05, "loss": 0.328, "step": 345 }, { "epoch": 0.26482969766551856, "grad_norm": 5.080443382263184, "learning_rate": 1.8392767970315314e-05, "loss": 0.336, "step": 346 }, { "epoch": 0.26559510141599696, "grad_norm": 5.631304740905762, "learning_rate": 1.8378200987465704e-05, "loss": 0.3614, "step": 347 }, { "epoch": 0.2663605051664753, "grad_norm": 12.059300422668457, "learning_rate": 1.836357411182835e-05, "loss": 0.4981, "step": 348 }, { "epoch": 0.2671259089169537, "grad_norm": 10.769081115722656, "learning_rate": 1.83488874479656e-05, "loss": 0.4066, "step": 349 }, { "epoch": 0.26789131266743205, "grad_norm": 6.042365550994873, "learning_rate": 1.8334141100867208e-05, "loss": 0.4097, "step": 350 }, { "epoch": 0.26865671641791045, "grad_norm": 4.787807941436768, "learning_rate": 1.831933517594957e-05, "loss": 0.3223, "step": 351 }, { "epoch": 0.26942212016838885, "grad_norm": 6.3437347412109375, "learning_rate": 1.8304469779055e-05, "loss": 0.4679, "step": 352 }, { "epoch": 0.2701875239188672, "grad_norm": 4.660147666931152, "learning_rate": 1.8289545016450953e-05, "loss": 0.2834, "step": 353 }, { "epoch": 0.2709529276693456, "grad_norm": 6.362420082092285, "learning_rate": 1.8274560994829256e-05, "loss": 0.4398, "step": 354 }, { "epoch": 0.27171833141982393, "grad_norm": 5.986339092254639, "learning_rate": 1.825951782130537e-05, "loss": 0.2797, "step": 355 }, { "epoch": 0.27248373517030233, "grad_norm": 4.155045032501221, "learning_rate": 1.8244415603417603e-05, "loss": 0.3659, "step": 356 }, { "epoch": 0.27324913892078073, "grad_norm": 5.72913932800293, "learning_rate": 1.8229254449126365e-05, "loss": 0.3776, "step": 357 }, { "epoch": 0.2740145426712591, "grad_norm": 6.8864827156066895, "learning_rate": 1.821403446681336e-05, "loss": 0.3776, "step": 358 }, { "epoch": 0.2747799464217375, "grad_norm": 8.918042182922363, "learning_rate": 1.819875576528085e-05, "loss": 0.3626, "step": 359 }, { "epoch": 0.2755453501722158, "grad_norm": 6.99332332611084, "learning_rate": 1.818341845375086e-05, "loss": 0.3378, "step": 360 }, { "epoch": 0.2755453501722158, "eval_accuracy": 0.8592057761732852, "eval_f1": 0.7857142857142857, "eval_loss": 0.36907240748405457, "eval_precision": 0.8773006134969326, "eval_recall": 0.7114427860696517, "eval_runtime": 42.2698, "eval_samples_per_second": 7.121, "eval_steps_per_second": 0.237, "step": 360 }, { "epoch": 0.2763107539226942, "grad_norm": 5.717648983001709, "learning_rate": 1.816802264186438e-05, "loss": 0.4399, "step": 361 }, { "epoch": 0.2770761576731726, "grad_norm": 6.995461463928223, "learning_rate": 1.8152568439680612e-05, "loss": 0.345, "step": 362 }, { "epoch": 0.27784156142365096, "grad_norm": 9.210029602050781, "learning_rate": 1.8137055957676172e-05, "loss": 0.4588, "step": 363 }, { "epoch": 0.27860696517412936, "grad_norm": 5.077588081359863, "learning_rate": 1.8121485306744286e-05, "loss": 0.2814, "step": 364 }, { "epoch": 0.2793723689246077, "grad_norm": 4.220605850219727, "learning_rate": 1.8105856598194026e-05, "loss": 0.3045, "step": 365 }, { "epoch": 0.2801377726750861, "grad_norm": 6.491408824920654, "learning_rate": 1.8090169943749477e-05, "loss": 0.4034, "step": 366 }, { "epoch": 0.2809031764255645, "grad_norm": 6.0255866050720215, "learning_rate": 1.8074425455548972e-05, "loss": 0.3329, "step": 367 }, { "epoch": 0.28166858017604285, "grad_norm": 8.101137161254883, "learning_rate": 1.8058623246144274e-05, "loss": 0.4252, "step": 368 }, { "epoch": 0.28243398392652125, "grad_norm": 7.802106857299805, "learning_rate": 1.8042763428499777e-05, "loss": 0.3987, "step": 369 }, { "epoch": 0.2831993876769996, "grad_norm": 10.641077995300293, "learning_rate": 1.802684611599169e-05, "loss": 0.3418, "step": 370 }, { "epoch": 0.283964791427478, "grad_norm": 7.813147068023682, "learning_rate": 1.8010871422407238e-05, "loss": 0.3626, "step": 371 }, { "epoch": 0.2847301951779564, "grad_norm": 5.057185173034668, "learning_rate": 1.7994839461943834e-05, "loss": 0.3448, "step": 372 }, { "epoch": 0.28549559892843474, "grad_norm": 6.039783000946045, "learning_rate": 1.7978750349208284e-05, "loss": 0.3194, "step": 373 }, { "epoch": 0.28626100267891313, "grad_norm": 5.504507064819336, "learning_rate": 1.7962604199215946e-05, "loss": 0.3855, "step": 374 }, { "epoch": 0.2870264064293915, "grad_norm": 5.044504642486572, "learning_rate": 1.7946401127389928e-05, "loss": 0.3235, "step": 375 }, { "epoch": 0.2877918101798699, "grad_norm": 6.590427398681641, "learning_rate": 1.7930141249560235e-05, "loss": 0.3108, "step": 376 }, { "epoch": 0.2885572139303483, "grad_norm": 5.834172248840332, "learning_rate": 1.791382468196297e-05, "loss": 0.3104, "step": 377 }, { "epoch": 0.2893226176808266, "grad_norm": 5.307084560394287, "learning_rate": 1.789745154123949e-05, "loss": 0.2796, "step": 378 }, { "epoch": 0.290088021431305, "grad_norm": 6.264888763427734, "learning_rate": 1.788102194443557e-05, "loss": 0.3827, "step": 379 }, { "epoch": 0.29085342518178336, "grad_norm": 4.5824713706970215, "learning_rate": 1.7864536009000575e-05, "loss": 0.3216, "step": 380 }, { "epoch": 0.29085342518178336, "eval_accuracy": 0.8393501805054152, "eval_f1": 0.7723785166240409, "eval_loss": 0.375131756067276, "eval_precision": 0.7947368421052632, "eval_recall": 0.7512437810945274, "eval_runtime": 43.2663, "eval_samples_per_second": 6.957, "eval_steps_per_second": 0.231, "step": 380 }, { "epoch": 0.29161882893226176, "grad_norm": 5.6450700759887695, "learning_rate": 1.7847993852786612e-05, "loss": 0.3965, "step": 381 }, { "epoch": 0.29238423268274016, "grad_norm": 7.779424667358398, "learning_rate": 1.7831395594047682e-05, "loss": 0.3438, "step": 382 }, { "epoch": 0.2931496364332185, "grad_norm": 8.265360832214355, "learning_rate": 1.7814741351438855e-05, "loss": 0.3091, "step": 383 }, { "epoch": 0.2939150401836969, "grad_norm": 8.733458518981934, "learning_rate": 1.7798031244015406e-05, "loss": 0.3375, "step": 384 }, { "epoch": 0.29468044393417525, "grad_norm": 7.921517848968506, "learning_rate": 1.7781265391231968e-05, "loss": 0.3651, "step": 385 }, { "epoch": 0.29544584768465365, "grad_norm": 8.350987434387207, "learning_rate": 1.7764443912941675e-05, "loss": 0.3825, "step": 386 }, { "epoch": 0.29621125143513205, "grad_norm": 4.965341091156006, "learning_rate": 1.7747566929395307e-05, "loss": 0.3302, "step": 387 }, { "epoch": 0.2969766551856104, "grad_norm": 5.6050896644592285, "learning_rate": 1.7730634561240442e-05, "loss": 0.3173, "step": 388 }, { "epoch": 0.2977420589360888, "grad_norm": 4.471499443054199, "learning_rate": 1.7713646929520568e-05, "loss": 0.2726, "step": 389 }, { "epoch": 0.29850746268656714, "grad_norm": 4.538937091827393, "learning_rate": 1.769660415567424e-05, "loss": 0.2388, "step": 390 }, { "epoch": 0.29927286643704554, "grad_norm": 6.3678483963012695, "learning_rate": 1.7679506361534216e-05, "loss": 0.3751, "step": 391 }, { "epoch": 0.30003827018752394, "grad_norm": 6.761591911315918, "learning_rate": 1.766235366932655e-05, "loss": 0.3698, "step": 392 }, { "epoch": 0.3008036739380023, "grad_norm": 4.421695232391357, "learning_rate": 1.764514620166976e-05, "loss": 0.333, "step": 393 }, { "epoch": 0.3015690776884807, "grad_norm": 6.026480197906494, "learning_rate": 1.762788408157393e-05, "loss": 0.3659, "step": 394 }, { "epoch": 0.302334481438959, "grad_norm": 7.1920623779296875, "learning_rate": 1.7610567432439834e-05, "loss": 0.3202, "step": 395 }, { "epoch": 0.3030998851894374, "grad_norm": 5.5270676612854, "learning_rate": 1.759319637805806e-05, "loss": 0.3482, "step": 396 }, { "epoch": 0.3038652889399158, "grad_norm": 7.092195510864258, "learning_rate": 1.757577104260811e-05, "loss": 0.4268, "step": 397 }, { "epoch": 0.30463069269039417, "grad_norm": 6.13981819152832, "learning_rate": 1.755829155065753e-05, "loss": 0.3942, "step": 398 }, { "epoch": 0.30539609644087257, "grad_norm": 5.976612091064453, "learning_rate": 1.7540758027161014e-05, "loss": 0.4031, "step": 399 }, { "epoch": 0.3061615001913509, "grad_norm": 5.4243855476379395, "learning_rate": 1.7523170597459497e-05, "loss": 0.3109, "step": 400 }, { "epoch": 0.3061615001913509, "eval_accuracy": 0.8537906137184116, "eval_f1": 0.7949367088607595, "eval_loss": 0.37360242009162903, "eval_precision": 0.8092783505154639, "eval_recall": 0.7810945273631841, "eval_runtime": 43.6793, "eval_samples_per_second": 6.891, "eval_steps_per_second": 0.229, "step": 400 }, { "epoch": 0.3069269039418293, "grad_norm": 7.383713722229004, "learning_rate": 1.750552938727928e-05, "loss": 0.4095, "step": 401 }, { "epoch": 0.3076923076923077, "grad_norm": 6.099390506744385, "learning_rate": 1.7487834522731115e-05, "loss": 0.3951, "step": 402 }, { "epoch": 0.30845771144278605, "grad_norm": 5.263575553894043, "learning_rate": 1.747008613030932e-05, "loss": 0.3158, "step": 403 }, { "epoch": 0.30922311519326445, "grad_norm": 4.222282886505127, "learning_rate": 1.7452284336890853e-05, "loss": 0.2905, "step": 404 }, { "epoch": 0.3099885189437428, "grad_norm": 5.008325099945068, "learning_rate": 1.7434429269734426e-05, "loss": 0.2814, "step": 405 }, { "epoch": 0.3107539226942212, "grad_norm": 6.125126838684082, "learning_rate": 1.7416521056479577e-05, "loss": 0.3048, "step": 406 }, { "epoch": 0.3115193264446996, "grad_norm": 4.458704948425293, "learning_rate": 1.7398559825145776e-05, "loss": 0.2961, "step": 407 }, { "epoch": 0.31228473019517794, "grad_norm": 6.859616756439209, "learning_rate": 1.7380545704131496e-05, "loss": 0.34, "step": 408 }, { "epoch": 0.31305013394565634, "grad_norm": 5.362451553344727, "learning_rate": 1.73624788222133e-05, "loss": 0.3612, "step": 409 }, { "epoch": 0.31381553769613474, "grad_norm": 5.342657089233398, "learning_rate": 1.734435930854492e-05, "loss": 0.3438, "step": 410 }, { "epoch": 0.3145809414466131, "grad_norm": 3.9414455890655518, "learning_rate": 1.7326187292656332e-05, "loss": 0.3004, "step": 411 }, { "epoch": 0.3153463451970915, "grad_norm": 5.787232875823975, "learning_rate": 1.7307962904452837e-05, "loss": 0.3853, "step": 412 }, { "epoch": 0.3161117489475698, "grad_norm": 4.210262775421143, "learning_rate": 1.7289686274214116e-05, "loss": 0.2395, "step": 413 }, { "epoch": 0.3168771526980482, "grad_norm": 4.290014743804932, "learning_rate": 1.7271357532593325e-05, "loss": 0.2023, "step": 414 }, { "epoch": 0.3176425564485266, "grad_norm": 7.905555248260498, "learning_rate": 1.7252976810616134e-05, "loss": 0.412, "step": 415 }, { "epoch": 0.31840796019900497, "grad_norm": 5.809239387512207, "learning_rate": 1.7234544239679807e-05, "loss": 0.3266, "step": 416 }, { "epoch": 0.31917336394948337, "grad_norm": 8.394856452941895, "learning_rate": 1.7216059951552256e-05, "loss": 0.3349, "step": 417 }, { "epoch": 0.3199387676999617, "grad_norm": 7.639961242675781, "learning_rate": 1.7197524078371105e-05, "loss": 0.4369, "step": 418 }, { "epoch": 0.3207041714504401, "grad_norm": 4.321413993835449, "learning_rate": 1.7178936752642737e-05, "loss": 0.2755, "step": 419 }, { "epoch": 0.3214695752009185, "grad_norm": 5.398126602172852, "learning_rate": 1.7160298107241347e-05, "loss": 0.2893, "step": 420 }, { "epoch": 0.3214695752009185, "eval_accuracy": 0.8664259927797834, "eval_f1": 0.7989130434782609, "eval_loss": 0.3465858995914459, "eval_precision": 0.8802395209580839, "eval_recall": 0.7313432835820896, "eval_runtime": 42.9072, "eval_samples_per_second": 7.015, "eval_steps_per_second": 0.233, "step": 420 }, { "epoch": 0.32223497895139686, "grad_norm": 7.948053359985352, "learning_rate": 1.714160827540801e-05, "loss": 0.3243, "step": 421 }, { "epoch": 0.32300038270187525, "grad_norm": 6.143364429473877, "learning_rate": 1.7122867390749697e-05, "loss": 0.3382, "step": 422 }, { "epoch": 0.3237657864523536, "grad_norm": 4.757430076599121, "learning_rate": 1.7104075587238353e-05, "loss": 0.2241, "step": 423 }, { "epoch": 0.324531190202832, "grad_norm": 5.464345932006836, "learning_rate": 1.7085232999209915e-05, "loss": 0.2313, "step": 424 }, { "epoch": 0.3252965939533104, "grad_norm": 5.977030277252197, "learning_rate": 1.7066339761363364e-05, "loss": 0.2507, "step": 425 }, { "epoch": 0.32606199770378874, "grad_norm": 6.142635345458984, "learning_rate": 1.7047396008759755e-05, "loss": 0.2431, "step": 426 }, { "epoch": 0.32682740145426714, "grad_norm": 9.327625274658203, "learning_rate": 1.7028401876821257e-05, "loss": 0.4359, "step": 427 }, { "epoch": 0.3275928052047455, "grad_norm": 7.524345397949219, "learning_rate": 1.7009357501330188e-05, "loss": 0.292, "step": 428 }, { "epoch": 0.3283582089552239, "grad_norm": 5.8476243019104, "learning_rate": 1.699026301842803e-05, "loss": 0.3256, "step": 429 }, { "epoch": 0.3291236127057023, "grad_norm": 4.947808742523193, "learning_rate": 1.6971118564614473e-05, "loss": 0.2586, "step": 430 }, { "epoch": 0.32988901645618063, "grad_norm": 8.49828815460205, "learning_rate": 1.6951924276746425e-05, "loss": 0.3808, "step": 431 }, { "epoch": 0.330654420206659, "grad_norm": 12.203619003295898, "learning_rate": 1.6932680292037045e-05, "loss": 0.4431, "step": 432 }, { "epoch": 0.33141982395713737, "grad_norm": 10.268878936767578, "learning_rate": 1.6913386748054757e-05, "loss": 0.3617, "step": 433 }, { "epoch": 0.33218522770761577, "grad_norm": 7.486672878265381, "learning_rate": 1.689404378272226e-05, "loss": 0.2425, "step": 434 }, { "epoch": 0.33295063145809417, "grad_norm": 4.700595378875732, "learning_rate": 1.687465153431556e-05, "loss": 0.2017, "step": 435 }, { "epoch": 0.3337160352085725, "grad_norm": 6.158583641052246, "learning_rate": 1.6855210141462964e-05, "loss": 0.3416, "step": 436 }, { "epoch": 0.3344814389590509, "grad_norm": 6.82537841796875, "learning_rate": 1.683571974314409e-05, "loss": 0.2956, "step": 437 }, { "epoch": 0.33524684270952926, "grad_norm": 6.505055904388428, "learning_rate": 1.6816180478688885e-05, "loss": 0.3231, "step": 438 }, { "epoch": 0.33601224646000766, "grad_norm": 8.432504653930664, "learning_rate": 1.679659248777662e-05, "loss": 0.3563, "step": 439 }, { "epoch": 0.33677765021048606, "grad_norm": 7.021294593811035, "learning_rate": 1.67769559104349e-05, "loss": 0.3635, "step": 440 }, { "epoch": 0.33677765021048606, "eval_accuracy": 0.8610108303249098, "eval_f1": 0.7957559681697612, "eval_loss": 0.34903958439826965, "eval_precision": 0.8522727272727273, "eval_recall": 0.746268656716418, "eval_runtime": 42.5394, "eval_samples_per_second": 7.076, "eval_steps_per_second": 0.235, "step": 440 }, { "epoch": 0.3375430539609644, "grad_norm": 6.332080841064453, "learning_rate": 1.6757270887038653e-05, "loss": 0.4206, "step": 441 }, { "epoch": 0.3383084577114428, "grad_norm": 5.29794979095459, "learning_rate": 1.6737537558309128e-05, "loss": 0.2312, "step": 442 }, { "epoch": 0.33907386146192114, "grad_norm": 5.670821666717529, "learning_rate": 1.6717756065312892e-05, "loss": 0.2835, "step": 443 }, { "epoch": 0.33983926521239954, "grad_norm": 7.677097320556641, "learning_rate": 1.6697926549460826e-05, "loss": 0.4268, "step": 444 }, { "epoch": 0.34060466896287794, "grad_norm": 11.509166717529297, "learning_rate": 1.667804915250711e-05, "loss": 0.5323, "step": 445 }, { "epoch": 0.3413700727133563, "grad_norm": 6.500860214233398, "learning_rate": 1.66581240165482e-05, "loss": 0.2776, "step": 446 }, { "epoch": 0.3421354764638347, "grad_norm": 5.490180492401123, "learning_rate": 1.6638151284021828e-05, "loss": 0.4148, "step": 447 }, { "epoch": 0.34290088021431303, "grad_norm": 5.856701850891113, "learning_rate": 1.661813109770598e-05, "loss": 0.2599, "step": 448 }, { "epoch": 0.34366628396479143, "grad_norm": 9.65992546081543, "learning_rate": 1.6598063600717865e-05, "loss": 0.3004, "step": 449 }, { "epoch": 0.34443168771526983, "grad_norm": 6.811250686645508, "learning_rate": 1.6577948936512905e-05, "loss": 0.4602, "step": 450 }, { "epoch": 0.3451970914657482, "grad_norm": 7.434620380401611, "learning_rate": 1.6557787248883698e-05, "loss": 0.2899, "step": 451 }, { "epoch": 0.3459624952162266, "grad_norm": 5.892435073852539, "learning_rate": 1.6537578681958998e-05, "loss": 0.3457, "step": 452 }, { "epoch": 0.3467278989667049, "grad_norm": 5.33993673324585, "learning_rate": 1.6517323380202693e-05, "loss": 0.3563, "step": 453 }, { "epoch": 0.3474933027171833, "grad_norm": 7.092268466949463, "learning_rate": 1.649702148841274e-05, "loss": 0.3254, "step": 454 }, { "epoch": 0.3482587064676617, "grad_norm": 7.1226701736450195, "learning_rate": 1.647667315172017e-05, "loss": 0.3557, "step": 455 }, { "epoch": 0.34902411021814006, "grad_norm": 8.488228797912598, "learning_rate": 1.6456278515588023e-05, "loss": 0.2825, "step": 456 }, { "epoch": 0.34978951396861846, "grad_norm": 7.586481094360352, "learning_rate": 1.6435837725810326e-05, "loss": 0.363, "step": 457 }, { "epoch": 0.3505549177190968, "grad_norm": 6.680689334869385, "learning_rate": 1.6415350928511037e-05, "loss": 0.3918, "step": 458 }, { "epoch": 0.3513203214695752, "grad_norm": 10.17097282409668, "learning_rate": 1.6394818270142995e-05, "loss": 0.4635, "step": 459 }, { "epoch": 0.3520857252200536, "grad_norm": 5.999205112457275, "learning_rate": 1.63742398974869e-05, "loss": 0.3582, "step": 460 }, { "epoch": 0.3520857252200536, "eval_accuracy": 0.871841155234657, "eval_f1": 0.8086253369272237, "eval_loss": 0.3369763195514679, "eval_precision": 0.8823529411764706, "eval_recall": 0.746268656716418, "eval_runtime": 43.4125, "eval_samples_per_second": 6.933, "eval_steps_per_second": 0.23, "step": 460 }, { "epoch": 0.35285112897053195, "grad_norm": 4.6777215003967285, "learning_rate": 1.635361595765024e-05, "loss": 0.2173, "step": 461 }, { "epoch": 0.35361653272101035, "grad_norm": 6.156562805175781, "learning_rate": 1.6332946598066244e-05, "loss": 0.3245, "step": 462 }, { "epoch": 0.3543819364714887, "grad_norm": 8.194892883300781, "learning_rate": 1.631223196649284e-05, "loss": 0.4433, "step": 463 }, { "epoch": 0.3551473402219671, "grad_norm": 6.393969535827637, "learning_rate": 1.6291472211011575e-05, "loss": 0.3032, "step": 464 }, { "epoch": 0.3559127439724455, "grad_norm": 5.061738967895508, "learning_rate": 1.6270667480026588e-05, "loss": 0.2703, "step": 465 }, { "epoch": 0.35667814772292383, "grad_norm": 4.332313537597656, "learning_rate": 1.6249817922263518e-05, "loss": 0.2336, "step": 466 }, { "epoch": 0.35744355147340223, "grad_norm": 5.894283771514893, "learning_rate": 1.6228923686768458e-05, "loss": 0.3498, "step": 467 }, { "epoch": 0.3582089552238806, "grad_norm": 9.115880966186523, "learning_rate": 1.6207984922906893e-05, "loss": 0.4193, "step": 468 }, { "epoch": 0.358974358974359, "grad_norm": 4.503346920013428, "learning_rate": 1.6187001780362613e-05, "loss": 0.3486, "step": 469 }, { "epoch": 0.3597397627248374, "grad_norm": 5.8561110496521, "learning_rate": 1.6165974409136673e-05, "loss": 0.2452, "step": 470 }, { "epoch": 0.3605051664753157, "grad_norm": 5.894245624542236, "learning_rate": 1.6144902959546286e-05, "loss": 0.2937, "step": 471 }, { "epoch": 0.3612705702257941, "grad_norm": 8.111411094665527, "learning_rate": 1.6123787582223774e-05, "loss": 0.3315, "step": 472 }, { "epoch": 0.36203597397627246, "grad_norm": 6.83114767074585, "learning_rate": 1.610262842811548e-05, "loss": 0.399, "step": 473 }, { "epoch": 0.36280137772675086, "grad_norm": 7.122241497039795, "learning_rate": 1.6081425648480696e-05, "loss": 0.3585, "step": 474 }, { "epoch": 0.36356678147722926, "grad_norm": 6.216829776763916, "learning_rate": 1.6060179394890573e-05, "loss": 0.326, "step": 475 }, { "epoch": 0.3643321852277076, "grad_norm": 8.567191123962402, "learning_rate": 1.6038889819227047e-05, "loss": 0.2917, "step": 476 }, { "epoch": 0.365097588978186, "grad_norm": 9.297383308410645, "learning_rate": 1.601755707368174e-05, "loss": 0.4102, "step": 477 }, { "epoch": 0.36586299272866435, "grad_norm": 8.04216480255127, "learning_rate": 1.5996181310754883e-05, "loss": 0.3136, "step": 478 }, { "epoch": 0.36662839647914275, "grad_norm": 5.950779438018799, "learning_rate": 1.5974762683254232e-05, "loss": 0.3788, "step": 479 }, { "epoch": 0.36739380022962115, "grad_norm": 10.357524871826172, "learning_rate": 1.5953301344293954e-05, "loss": 0.3879, "step": 480 }, { "epoch": 0.36739380022962115, "eval_accuracy": 0.855595667870036, "eval_f1": 0.801980198019802, "eval_loss": 0.35207080841064453, "eval_precision": 0.7980295566502463, "eval_recall": 0.8059701492537313, "eval_runtime": 42.5342, "eval_samples_per_second": 7.077, "eval_steps_per_second": 0.235, "step": 480 }, { "epoch": 0.3681592039800995, "grad_norm": 6.6643171310424805, "learning_rate": 1.5931797447293553e-05, "loss": 0.321, "step": 481 }, { "epoch": 0.3689246077305779, "grad_norm": 14.971054077148438, "learning_rate": 1.5910251145976762e-05, "loss": 0.3322, "step": 482 }, { "epoch": 0.36969001148105624, "grad_norm": 7.498319149017334, "learning_rate": 1.5888662594370448e-05, "loss": 0.3007, "step": 483 }, { "epoch": 0.37045541523153463, "grad_norm": 4.738834381103516, "learning_rate": 1.5867031946803512e-05, "loss": 0.2716, "step": 484 }, { "epoch": 0.37122081898201303, "grad_norm": 4.1388840675354, "learning_rate": 1.584535935790578e-05, "loss": 0.2069, "step": 485 }, { "epoch": 0.3719862227324914, "grad_norm": 10.354584693908691, "learning_rate": 1.5823644982606905e-05, "loss": 0.3032, "step": 486 }, { "epoch": 0.3727516264829698, "grad_norm": 7.267064094543457, "learning_rate": 1.580188897613526e-05, "loss": 0.3127, "step": 487 }, { "epoch": 0.3735170302334481, "grad_norm": 6.989504337310791, "learning_rate": 1.578009149401681e-05, "loss": 0.2764, "step": 488 }, { "epoch": 0.3742824339839265, "grad_norm": 8.135834693908691, "learning_rate": 1.5758252692074036e-05, "loss": 0.3159, "step": 489 }, { "epoch": 0.3750478377344049, "grad_norm": 5.070521831512451, "learning_rate": 1.5736372726424784e-05, "loss": 0.2304, "step": 490 }, { "epoch": 0.37581324148488326, "grad_norm": 10.535805702209473, "learning_rate": 1.571445175348117e-05, "loss": 0.4468, "step": 491 }, { "epoch": 0.37657864523536166, "grad_norm": 8.138240814208984, "learning_rate": 1.5692489929948453e-05, "loss": 0.4031, "step": 492 }, { "epoch": 0.37734404898584, "grad_norm": 7.41883659362793, "learning_rate": 1.5670487412823922e-05, "loss": 0.3837, "step": 493 }, { "epoch": 0.3781094527363184, "grad_norm": 6.058717727661133, "learning_rate": 1.564844435939577e-05, "loss": 0.3819, "step": 494 }, { "epoch": 0.3788748564867968, "grad_norm": 6.363833427429199, "learning_rate": 1.5626360927241974e-05, "loss": 0.32, "step": 495 }, { "epoch": 0.37964026023727515, "grad_norm": 7.919443607330322, "learning_rate": 1.560423727422915e-05, "loss": 0.2573, "step": 496 }, { "epoch": 0.38040566398775355, "grad_norm": 5.187404632568359, "learning_rate": 1.5582073558511452e-05, "loss": 0.2811, "step": 497 }, { "epoch": 0.3811710677382319, "grad_norm": 4.53115177154541, "learning_rate": 1.5559869938529428e-05, "loss": 0.2753, "step": 498 }, { "epoch": 0.3819364714887103, "grad_norm": 5.4234161376953125, "learning_rate": 1.5537626573008878e-05, "loss": 0.2334, "step": 499 }, { "epoch": 0.3827018752391887, "grad_norm": 7.260324478149414, "learning_rate": 1.551534362095973e-05, "loss": 0.3741, "step": 500 }, { "epoch": 0.3827018752391887, "eval_accuracy": 0.868231046931408, "eval_f1": 0.8021680216802168, "eval_loss": 0.3297866880893707, "eval_precision": 0.8809523809523809, "eval_recall": 0.736318407960199, "eval_runtime": 42.7922, "eval_samples_per_second": 7.034, "eval_steps_per_second": 0.234, "step": 500 }, { "epoch": 0.38346727898966704, "grad_norm": 6.959766864776611, "learning_rate": 1.549302124167492e-05, "loss": 0.315, "step": 501 }, { "epoch": 0.38423268274014544, "grad_norm": 5.475060939788818, "learning_rate": 1.547065959472921e-05, "loss": 0.2744, "step": 502 }, { "epoch": 0.3849980864906238, "grad_norm": 8.374768257141113, "learning_rate": 1.544825883997809e-05, "loss": 0.4522, "step": 503 }, { "epoch": 0.3857634902411022, "grad_norm": 6.9955549240112305, "learning_rate": 1.5425819137556605e-05, "loss": 0.343, "step": 504 }, { "epoch": 0.3865288939915806, "grad_norm": 9.89376163482666, "learning_rate": 1.5403340647878234e-05, "loss": 0.3817, "step": 505 }, { "epoch": 0.3872942977420589, "grad_norm": 8.855565071105957, "learning_rate": 1.5380823531633727e-05, "loss": 0.276, "step": 506 }, { "epoch": 0.3880597014925373, "grad_norm": 6.829111099243164, "learning_rate": 1.5358267949789968e-05, "loss": 0.3471, "step": 507 }, { "epoch": 0.38882510524301567, "grad_norm": 5.749798774719238, "learning_rate": 1.5335674063588808e-05, "loss": 0.2977, "step": 508 }, { "epoch": 0.38959050899349407, "grad_norm": 5.0231475830078125, "learning_rate": 1.531304203454593e-05, "loss": 0.3335, "step": 509 }, { "epoch": 0.39035591274397247, "grad_norm": 5.640665054321289, "learning_rate": 1.529037202444968e-05, "loss": 0.3419, "step": 510 }, { "epoch": 0.3911213164944508, "grad_norm": 11.185114860534668, "learning_rate": 1.5267664195359917e-05, "loss": 0.4589, "step": 511 }, { "epoch": 0.3918867202449292, "grad_norm": 5.990738868713379, "learning_rate": 1.524491870960687e-05, "loss": 0.2539, "step": 512 }, { "epoch": 0.39265212399540755, "grad_norm": 4.685416221618652, "learning_rate": 1.5222135729789944e-05, "loss": 0.2603, "step": 513 }, { "epoch": 0.39341752774588595, "grad_norm": 6.87640380859375, "learning_rate": 1.5199315418776584e-05, "loss": 0.3758, "step": 514 }, { "epoch": 0.39418293149636435, "grad_norm": 6.227936744689941, "learning_rate": 1.51764579397011e-05, "loss": 0.311, "step": 515 }, { "epoch": 0.3949483352468427, "grad_norm": 5.835103988647461, "learning_rate": 1.5153563455963501e-05, "loss": 0.3894, "step": 516 }, { "epoch": 0.3957137389973211, "grad_norm": 4.9519267082214355, "learning_rate": 1.5130632131228336e-05, "loss": 0.3444, "step": 517 }, { "epoch": 0.39647914274779944, "grad_norm": 4.969162940979004, "learning_rate": 1.5107664129423513e-05, "loss": 0.3403, "step": 518 }, { "epoch": 0.39724454649827784, "grad_norm": 6.865941047668457, "learning_rate": 1.5084659614739133e-05, "loss": 0.4106, "step": 519 }, { "epoch": 0.39800995024875624, "grad_norm": 4.673407077789307, "learning_rate": 1.506161875162631e-05, "loss": 0.3291, "step": 520 }, { "epoch": 0.39800995024875624, "eval_accuracy": 0.8628158844765343, "eval_f1": 0.7934782608695652, "eval_loss": 0.33473458886146545, "eval_precision": 0.874251497005988, "eval_recall": 0.7263681592039801, "eval_runtime": 43.722, "eval_samples_per_second": 6.884, "eval_steps_per_second": 0.229, "step": 520 }, { "epoch": 0.3987753539992346, "grad_norm": 8.29903507232666, "learning_rate": 1.5038541704796004e-05, "loss": 0.3677, "step": 521 }, { "epoch": 0.399540757749713, "grad_norm": 4.923694610595703, "learning_rate": 1.5015428639217844e-05, "loss": 0.2676, "step": 522 }, { "epoch": 0.4003061615001913, "grad_norm": 6.128358364105225, "learning_rate": 1.4992279720118936e-05, "loss": 0.3667, "step": 523 }, { "epoch": 0.4010715652506697, "grad_norm": 5.727793216705322, "learning_rate": 1.4969095112982692e-05, "loss": 0.3264, "step": 524 }, { "epoch": 0.4018369690011481, "grad_norm": 6.320516109466553, "learning_rate": 1.4945874983547647e-05, "loss": 0.2906, "step": 525 }, { "epoch": 0.40260237275162647, "grad_norm": 5.455401420593262, "learning_rate": 1.4922619497806276e-05, "loss": 0.3035, "step": 526 }, { "epoch": 0.40336777650210487, "grad_norm": 4.919046401977539, "learning_rate": 1.4899328822003796e-05, "loss": 0.3857, "step": 527 }, { "epoch": 0.4041331802525832, "grad_norm": 5.039306163787842, "learning_rate": 1.4876003122636989e-05, "loss": 0.3136, "step": 528 }, { "epoch": 0.4048985840030616, "grad_norm": 8.849163055419922, "learning_rate": 1.4852642566453008e-05, "loss": 0.3516, "step": 529 }, { "epoch": 0.40566398775354, "grad_norm": 4.999965667724609, "learning_rate": 1.4829247320448187e-05, "loss": 0.3408, "step": 530 }, { "epoch": 0.40642939150401836, "grad_norm": 6.8412885665893555, "learning_rate": 1.4805817551866839e-05, "loss": 0.3158, "step": 531 }, { "epoch": 0.40719479525449676, "grad_norm": 6.913996696472168, "learning_rate": 1.4782353428200075e-05, "loss": 0.3999, "step": 532 }, { "epoch": 0.4079601990049751, "grad_norm": 6.748564720153809, "learning_rate": 1.4758855117184591e-05, "loss": 0.2736, "step": 533 }, { "epoch": 0.4087256027554535, "grad_norm": 8.175541877746582, "learning_rate": 1.473532278680148e-05, "loss": 0.3499, "step": 534 }, { "epoch": 0.4094910065059319, "grad_norm": 9.689931869506836, "learning_rate": 1.4711756605275031e-05, "loss": 0.3207, "step": 535 }, { "epoch": 0.41025641025641024, "grad_norm": 7.443689823150635, "learning_rate": 1.4688156741071513e-05, "loss": 0.2849, "step": 536 }, { "epoch": 0.41102181400688864, "grad_norm": 7.436212062835693, "learning_rate": 1.4664523362897991e-05, "loss": 0.2768, "step": 537 }, { "epoch": 0.411787217757367, "grad_norm": 9.211559295654297, "learning_rate": 1.46408566397011e-05, "loss": 0.3629, "step": 538 }, { "epoch": 0.4125526215078454, "grad_norm": 5.4096832275390625, "learning_rate": 1.4617156740665852e-05, "loss": 0.2989, "step": 539 }, { "epoch": 0.4133180252583238, "grad_norm": 7.316256523132324, "learning_rate": 1.4593423835214421e-05, "loss": 0.3697, "step": 540 }, { "epoch": 0.4133180252583238, "eval_accuracy": 0.868231046931408, "eval_f1": 0.8063660477453581, "eval_loss": 0.32355356216430664, "eval_precision": 0.8636363636363636, "eval_recall": 0.7562189054726368, "eval_runtime": 43.4609, "eval_samples_per_second": 6.926, "eval_steps_per_second": 0.23, "step": 540 }, { "epoch": 0.41408342900880213, "grad_norm": 4.19016170501709, "learning_rate": 1.4569658093004935e-05, "loss": 0.2299, "step": 541 }, { "epoch": 0.41484883275928053, "grad_norm": 7.407268047332764, "learning_rate": 1.4545859683930252e-05, "loss": 0.3404, "step": 542 }, { "epoch": 0.41561423650975887, "grad_norm": 5.734637260437012, "learning_rate": 1.4522028778116765e-05, "loss": 0.2609, "step": 543 }, { "epoch": 0.41637964026023727, "grad_norm": 9.505693435668945, "learning_rate": 1.4498165545923167e-05, "loss": 0.3792, "step": 544 }, { "epoch": 0.41714504401071567, "grad_norm": 8.186382293701172, "learning_rate": 1.4474270157939236e-05, "loss": 0.3061, "step": 545 }, { "epoch": 0.417910447761194, "grad_norm": 8.078675270080566, "learning_rate": 1.4450342784984632e-05, "loss": 0.3662, "step": 546 }, { "epoch": 0.4186758515116724, "grad_norm": 6.977138042449951, "learning_rate": 1.4426383598107663e-05, "loss": 0.3395, "step": 547 }, { "epoch": 0.41944125526215076, "grad_norm": 6.912917613983154, "learning_rate": 1.4402392768584053e-05, "loss": 0.2658, "step": 548 }, { "epoch": 0.42020665901262916, "grad_norm": 4.735348224639893, "learning_rate": 1.4378370467915736e-05, "loss": 0.2933, "step": 549 }, { "epoch": 0.42097206276310756, "grad_norm": 9.662302017211914, "learning_rate": 1.4354316867829622e-05, "loss": 0.3847, "step": 550 }, { "epoch": 0.4217374665135859, "grad_norm": 7.782718181610107, "learning_rate": 1.4330232140276365e-05, "loss": 0.3426, "step": 551 }, { "epoch": 0.4225028702640643, "grad_norm": 5.793607711791992, "learning_rate": 1.4306116457429146e-05, "loss": 0.3222, "step": 552 }, { "epoch": 0.42326827401454264, "grad_norm": 7.877765655517578, "learning_rate": 1.4281969991682427e-05, "loss": 0.397, "step": 553 }, { "epoch": 0.42403367776502104, "grad_norm": 5.865022659301758, "learning_rate": 1.4257792915650728e-05, "loss": 0.2545, "step": 554 }, { "epoch": 0.42479908151549944, "grad_norm": 7.93567419052124, "learning_rate": 1.4233585402167394e-05, "loss": 0.3406, "step": 555 }, { "epoch": 0.4255644852659778, "grad_norm": 5.086268424987793, "learning_rate": 1.4209347624283352e-05, "loss": 0.1723, "step": 556 }, { "epoch": 0.4263298890164562, "grad_norm": 8.989797592163086, "learning_rate": 1.418507975526588e-05, "loss": 0.3193, "step": 557 }, { "epoch": 0.42709529276693453, "grad_norm": 8.771771430969238, "learning_rate": 1.4160781968597372e-05, "loss": 0.3112, "step": 558 }, { "epoch": 0.42786069651741293, "grad_norm": 6.325069904327393, "learning_rate": 1.4136454437974086e-05, "loss": 0.3621, "step": 559 }, { "epoch": 0.42862610026789133, "grad_norm": 4.631584644317627, "learning_rate": 1.4112097337304908e-05, "loss": 0.3143, "step": 560 }, { "epoch": 0.42862610026789133, "eval_accuracy": 0.8628158844765343, "eval_f1": 0.7978723404255319, "eval_loss": 0.32944566011428833, "eval_precision": 0.8571428571428571, "eval_recall": 0.746268656716418, "eval_runtime": 43.4755, "eval_samples_per_second": 6.923, "eval_steps_per_second": 0.23, "step": 560 }, { "epoch": 0.4293915040183697, "grad_norm": 5.975313663482666, "learning_rate": 1.408771084071012e-05, "loss": 0.2631, "step": 561 }, { "epoch": 0.4301569077688481, "grad_norm": 6.154519557952881, "learning_rate": 1.406329512252013e-05, "loss": 0.2762, "step": 562 }, { "epoch": 0.4309223115193264, "grad_norm": 6.479941368103027, "learning_rate": 1.4038850357274254e-05, "loss": 0.2319, "step": 563 }, { "epoch": 0.4316877152698048, "grad_norm": 9.675774574279785, "learning_rate": 1.4014376719719454e-05, "loss": 0.2686, "step": 564 }, { "epoch": 0.4324531190202832, "grad_norm": 7.308215141296387, "learning_rate": 1.3989874384809077e-05, "loss": 0.2504, "step": 565 }, { "epoch": 0.43321852277076156, "grad_norm": 6.632358551025391, "learning_rate": 1.3965343527701629e-05, "loss": 0.2506, "step": 566 }, { "epoch": 0.43398392652123996, "grad_norm": 5.3038153648376465, "learning_rate": 1.3940784323759511e-05, "loss": 0.282, "step": 567 }, { "epoch": 0.4347493302717183, "grad_norm": 6.68082332611084, "learning_rate": 1.391619694854776e-05, "loss": 0.3579, "step": 568 }, { "epoch": 0.4355147340221967, "grad_norm": 8.612346649169922, "learning_rate": 1.3891581577832804e-05, "loss": 0.2847, "step": 569 }, { "epoch": 0.4362801377726751, "grad_norm": 4.375307559967041, "learning_rate": 1.3866938387581199e-05, "loss": 0.2066, "step": 570 }, { "epoch": 0.43704554152315345, "grad_norm": 10.40518569946289, "learning_rate": 1.3842267553958373e-05, "loss": 0.407, "step": 571 }, { "epoch": 0.43781094527363185, "grad_norm": 7.533114433288574, "learning_rate": 1.3817569253327363e-05, "loss": 0.2188, "step": 572 }, { "epoch": 0.43857634902411025, "grad_norm": 5.815516948699951, "learning_rate": 1.3792843662247565e-05, "loss": 0.2584, "step": 573 }, { "epoch": 0.4393417527745886, "grad_norm": 4.81285285949707, "learning_rate": 1.3768090957473464e-05, "loss": 0.271, "step": 574 }, { "epoch": 0.440107156525067, "grad_norm": 6.468416213989258, "learning_rate": 1.3743311315953363e-05, "loss": 0.3531, "step": 575 }, { "epoch": 0.44087256027554533, "grad_norm": 6.450528621673584, "learning_rate": 1.3718504914828135e-05, "loss": 0.2993, "step": 576 }, { "epoch": 0.44163796402602373, "grad_norm": 7.386157035827637, "learning_rate": 1.3693671931429941e-05, "loss": 0.2766, "step": 577 }, { "epoch": 0.44240336777650213, "grad_norm": 7.769729137420654, "learning_rate": 1.3668812543280976e-05, "loss": 0.4186, "step": 578 }, { "epoch": 0.4431687715269805, "grad_norm": 5.560869216918945, "learning_rate": 1.3643926928092192e-05, "loss": 0.2456, "step": 579 }, { "epoch": 0.4439341752774589, "grad_norm": 4.490424633026123, "learning_rate": 1.3619015263762028e-05, "loss": 0.2442, "step": 580 }, { "epoch": 0.4439341752774589, "eval_accuracy": 0.8700361010830325, "eval_f1": 0.8032786885245902, "eval_loss": 0.3167375922203064, "eval_precision": 0.8909090909090909, "eval_recall": 0.7313432835820896, "eval_runtime": 44.0242, "eval_samples_per_second": 6.837, "eval_steps_per_second": 0.227, "step": 580 }, { "epoch": 0.4446995790279372, "grad_norm": 6.274224758148193, "learning_rate": 1.3594077728375129e-05, "loss": 0.2817, "step": 581 }, { "epoch": 0.4454649827784156, "grad_norm": 7.068592548370361, "learning_rate": 1.35691145002011e-05, "loss": 0.2949, "step": 582 }, { "epoch": 0.446230386528894, "grad_norm": 5.085371971130371, "learning_rate": 1.3544125757693207e-05, "loss": 0.3137, "step": 583 }, { "epoch": 0.44699579027937236, "grad_norm": 7.372035980224609, "learning_rate": 1.35191116794871e-05, "loss": 0.3648, "step": 584 }, { "epoch": 0.44776119402985076, "grad_norm": 5.603137016296387, "learning_rate": 1.3494072444399566e-05, "loss": 0.218, "step": 585 }, { "epoch": 0.4485265977803291, "grad_norm": 6.575461387634277, "learning_rate": 1.3469008231427207e-05, "loss": 0.2643, "step": 586 }, { "epoch": 0.4492920015308075, "grad_norm": 8.994118690490723, "learning_rate": 1.3443919219745199e-05, "loss": 0.3923, "step": 587 }, { "epoch": 0.4500574052812859, "grad_norm": 5.57374382019043, "learning_rate": 1.3418805588705986e-05, "loss": 0.2228, "step": 588 }, { "epoch": 0.45082280903176425, "grad_norm": 4.125316143035889, "learning_rate": 1.3393667517838012e-05, "loss": 0.2589, "step": 589 }, { "epoch": 0.45158821278224265, "grad_norm": 7.517770290374756, "learning_rate": 1.3368505186844427e-05, "loss": 0.361, "step": 590 }, { "epoch": 0.452353616532721, "grad_norm": 10.626962661743164, "learning_rate": 1.334331877560182e-05, "loss": 0.3936, "step": 591 }, { "epoch": 0.4531190202831994, "grad_norm": 8.18287467956543, "learning_rate": 1.3318108464158907e-05, "loss": 0.435, "step": 592 }, { "epoch": 0.4538844240336778, "grad_norm": 6.347609996795654, "learning_rate": 1.3292874432735268e-05, "loss": 0.3315, "step": 593 }, { "epoch": 0.45464982778415614, "grad_norm": 5.818674564361572, "learning_rate": 1.3267616861720041e-05, "loss": 0.3142, "step": 594 }, { "epoch": 0.45541523153463453, "grad_norm": 9.625964164733887, "learning_rate": 1.3242335931670647e-05, "loss": 0.3171, "step": 595 }, { "epoch": 0.4561806352851129, "grad_norm": 5.231682300567627, "learning_rate": 1.3217031823311488e-05, "loss": 0.3012, "step": 596 }, { "epoch": 0.4569460390355913, "grad_norm": 5.77132511138916, "learning_rate": 1.3191704717532667e-05, "loss": 0.3864, "step": 597 }, { "epoch": 0.4577114427860697, "grad_norm": 6.603677749633789, "learning_rate": 1.3166354795388677e-05, "loss": 0.352, "step": 598 }, { "epoch": 0.458476846536548, "grad_norm": 5.674981594085693, "learning_rate": 1.3140982238097117e-05, "loss": 0.3483, "step": 599 }, { "epoch": 0.4592422502870264, "grad_norm": 6.125768661499023, "learning_rate": 1.3115587227037408e-05, "loss": 0.361, "step": 600 }, { "epoch": 0.4592422502870264, "eval_accuracy": 0.8664259927797834, "eval_f1": 0.8102564102564103, "eval_loss": 0.3246942162513733, "eval_precision": 0.8359788359788359, "eval_recall": 0.7860696517412935, "eval_runtime": 43.5688, "eval_samples_per_second": 6.909, "eval_steps_per_second": 0.23, "step": 600 }, { "epoch": 0.46000765403750477, "grad_norm": 5.956824779510498, "learning_rate": 1.3090169943749475e-05, "loss": 0.2853, "step": 601 }, { "epoch": 0.46077305778798316, "grad_norm": 5.746252536773682, "learning_rate": 1.3064730569932467e-05, "loss": 0.2419, "step": 602 }, { "epoch": 0.46153846153846156, "grad_norm": 6.898614883422852, "learning_rate": 1.3039269287443442e-05, "loss": 0.2599, "step": 603 }, { "epoch": 0.4623038652889399, "grad_norm": 5.138125896453857, "learning_rate": 1.301378627829608e-05, "loss": 0.346, "step": 604 }, { "epoch": 0.4630692690394183, "grad_norm": 5.577975273132324, "learning_rate": 1.2988281724659375e-05, "loss": 0.4313, "step": 605 }, { "epoch": 0.46383467278989665, "grad_norm": 6.451081275939941, "learning_rate": 1.2962755808856341e-05, "loss": 0.3364, "step": 606 }, { "epoch": 0.46460007654037505, "grad_norm": 5.723383903503418, "learning_rate": 1.2937208713362694e-05, "loss": 0.285, "step": 607 }, { "epoch": 0.46536548029085345, "grad_norm": 6.299584865570068, "learning_rate": 1.2911640620805561e-05, "loss": 0.3708, "step": 608 }, { "epoch": 0.4661308840413318, "grad_norm": 5.189462661743164, "learning_rate": 1.2886051713962172e-05, "loss": 0.3041, "step": 609 }, { "epoch": 0.4668962877918102, "grad_norm": 5.592836856842041, "learning_rate": 1.2860442175758543e-05, "loss": 0.3218, "step": 610 }, { "epoch": 0.46766169154228854, "grad_norm": 6.396859645843506, "learning_rate": 1.283481218926818e-05, "loss": 0.3155, "step": 611 }, { "epoch": 0.46842709529276694, "grad_norm": 6.612271785736084, "learning_rate": 1.280916193771077e-05, "loss": 0.4262, "step": 612 }, { "epoch": 0.46919249904324534, "grad_norm": 6.376734256744385, "learning_rate": 1.2783491604450869e-05, "loss": 0.2233, "step": 613 }, { "epoch": 0.4699579027937237, "grad_norm": 5.077390193939209, "learning_rate": 1.2757801372996577e-05, "loss": 0.2412, "step": 614 }, { "epoch": 0.4707233065442021, "grad_norm": 6.0435872077941895, "learning_rate": 1.2732091426998258e-05, "loss": 0.4129, "step": 615 }, { "epoch": 0.4714887102946804, "grad_norm": 6.408792495727539, "learning_rate": 1.270636195024719e-05, "loss": 0.3446, "step": 616 }, { "epoch": 0.4722541140451588, "grad_norm": 5.99134635925293, "learning_rate": 1.2680613126674285e-05, "loss": 0.2502, "step": 617 }, { "epoch": 0.4730195177956372, "grad_norm": 4.678402423858643, "learning_rate": 1.2654845140348746e-05, "loss": 0.3089, "step": 618 }, { "epoch": 0.47378492154611557, "grad_norm": 6.420397758483887, "learning_rate": 1.2629058175476774e-05, "loss": 0.2951, "step": 619 }, { "epoch": 0.47455032529659397, "grad_norm": 7.072854518890381, "learning_rate": 1.2603252416400232e-05, "loss": 0.3877, "step": 620 }, { "epoch": 0.47455032529659397, "eval_accuracy": 0.8700361010830325, "eval_f1": 0.817258883248731, "eval_loss": 0.33253204822540283, "eval_precision": 0.8341968911917098, "eval_recall": 0.8009950248756219, "eval_runtime": 43.8544, "eval_samples_per_second": 6.864, "eval_steps_per_second": 0.228, "step": 620 }, { "epoch": 0.4753157290470723, "grad_norm": 7.795954704284668, "learning_rate": 1.2577428047595343e-05, "loss": 0.4187, "step": 621 }, { "epoch": 0.4760811327975507, "grad_norm": 5.663315773010254, "learning_rate": 1.255158525367136e-05, "loss": 0.3266, "step": 622 }, { "epoch": 0.4768465365480291, "grad_norm": 7.847259998321533, "learning_rate": 1.2525724219369253e-05, "loss": 0.4043, "step": 623 }, { "epoch": 0.47761194029850745, "grad_norm": 5.627171993255615, "learning_rate": 1.2499845129560386e-05, "loss": 0.2725, "step": 624 }, { "epoch": 0.47837734404898585, "grad_norm": 7.48345422744751, "learning_rate": 1.2473948169245196e-05, "loss": 0.3373, "step": 625 }, { "epoch": 0.4791427477994642, "grad_norm": 5.870919704437256, "learning_rate": 1.2448033523551866e-05, "loss": 0.2746, "step": 626 }, { "epoch": 0.4799081515499426, "grad_norm": 5.2100396156311035, "learning_rate": 1.2422101377735007e-05, "loss": 0.3173, "step": 627 }, { "epoch": 0.480673555300421, "grad_norm": 6.408041954040527, "learning_rate": 1.2396151917174335e-05, "loss": 0.3533, "step": 628 }, { "epoch": 0.48143895905089934, "grad_norm": 6.015228748321533, "learning_rate": 1.2370185327373341e-05, "loss": 0.3696, "step": 629 }, { "epoch": 0.48220436280137774, "grad_norm": 7.102441310882568, "learning_rate": 1.234420179395797e-05, "loss": 0.4167, "step": 630 }, { "epoch": 0.4829697665518561, "grad_norm": 6.134483814239502, "learning_rate": 1.2318201502675285e-05, "loss": 0.3047, "step": 631 }, { "epoch": 0.4837351703023345, "grad_norm": 5.107813358306885, "learning_rate": 1.2292184639392146e-05, "loss": 0.21, "step": 632 }, { "epoch": 0.4845005740528129, "grad_norm": 6.076941013336182, "learning_rate": 1.2266151390093887e-05, "loss": 0.3665, "step": 633 }, { "epoch": 0.4852659778032912, "grad_norm": 4.5892863273620605, "learning_rate": 1.224010194088297e-05, "loss": 0.3551, "step": 634 }, { "epoch": 0.4860313815537696, "grad_norm": 3.7130930423736572, "learning_rate": 1.2214036477977675e-05, "loss": 0.2009, "step": 635 }, { "epoch": 0.48679678530424797, "grad_norm": 6.9683918952941895, "learning_rate": 1.2187955187710752e-05, "loss": 0.4284, "step": 636 }, { "epoch": 0.48756218905472637, "grad_norm": 9.758392333984375, "learning_rate": 1.2161858256528092e-05, "loss": 0.3854, "step": 637 }, { "epoch": 0.48832759280520477, "grad_norm": 4.792107105255127, "learning_rate": 1.2135745870987406e-05, "loss": 0.2987, "step": 638 }, { "epoch": 0.4890929965556831, "grad_norm": 4.2573347091674805, "learning_rate": 1.2109618217756876e-05, "loss": 0.2276, "step": 639 }, { "epoch": 0.4898584003061615, "grad_norm": 5.893449306488037, "learning_rate": 1.2083475483613828e-05, "loss": 0.2342, "step": 640 }, { "epoch": 0.4898584003061615, "eval_accuracy": 0.8736462093862816, "eval_f1": 0.8157894736842105, "eval_loss": 0.3177836537361145, "eval_precision": 0.8659217877094972, "eval_recall": 0.7711442786069652, "eval_runtime": 44.0099, "eval_samples_per_second": 6.839, "eval_steps_per_second": 0.227, "step": 640 }, { "epoch": 0.49062380405663986, "grad_norm": 3.7821764945983887, "learning_rate": 1.2057317855443395e-05, "loss": 0.2527, "step": 641 }, { "epoch": 0.49138920780711826, "grad_norm": 6.740549087524414, "learning_rate": 1.2031145520237194e-05, "loss": 0.2847, "step": 642 }, { "epoch": 0.49215461155759666, "grad_norm": 5.346322059631348, "learning_rate": 1.2004958665091964e-05, "loss": 0.2837, "step": 643 }, { "epoch": 0.492920015308075, "grad_norm": 4.715932369232178, "learning_rate": 1.1978757477208242e-05, "loss": 0.2696, "step": 644 }, { "epoch": 0.4936854190585534, "grad_norm": 5.284407615661621, "learning_rate": 1.1952542143889034e-05, "loss": 0.307, "step": 645 }, { "epoch": 0.49445082280903174, "grad_norm": 5.576004505157471, "learning_rate": 1.1926312852538456e-05, "loss": 0.3274, "step": 646 }, { "epoch": 0.49521622655951014, "grad_norm": 4.6278395652771, "learning_rate": 1.1900069790660411e-05, "loss": 0.2723, "step": 647 }, { "epoch": 0.49598163030998854, "grad_norm": 6.354363441467285, "learning_rate": 1.187381314585725e-05, "loss": 0.2892, "step": 648 }, { "epoch": 0.4967470340604669, "grad_norm": 5.980183124542236, "learning_rate": 1.1847543105828404e-05, "loss": 0.3154, "step": 649 }, { "epoch": 0.4975124378109453, "grad_norm": 6.652758598327637, "learning_rate": 1.1821259858369082e-05, "loss": 0.31, "step": 650 }, { "epoch": 0.49827784156142363, "grad_norm": 4.981330394744873, "learning_rate": 1.1794963591368893e-05, "loss": 0.1999, "step": 651 }, { "epoch": 0.49904324531190203, "grad_norm": 7.559998035430908, "learning_rate": 1.1768654492810525e-05, "loss": 0.2701, "step": 652 }, { "epoch": 0.49980864906238043, "grad_norm": 4.401430130004883, "learning_rate": 1.1742332750768402e-05, "loss": 0.2127, "step": 653 }, { "epoch": 0.5005740528128588, "grad_norm": 8.975022315979004, "learning_rate": 1.1715998553407315e-05, "loss": 0.4186, "step": 654 }, { "epoch": 0.5013394565633371, "grad_norm": 8.256175994873047, "learning_rate": 1.1689652088981102e-05, "loss": 0.3824, "step": 655 }, { "epoch": 0.5021048603138155, "grad_norm": 8.785820007324219, "learning_rate": 1.1663293545831302e-05, "loss": 0.4255, "step": 656 }, { "epoch": 0.5028702640642939, "grad_norm": 4.359196662902832, "learning_rate": 1.1636923112385785e-05, "loss": 0.2107, "step": 657 }, { "epoch": 0.5036356678147723, "grad_norm": 9.45128059387207, "learning_rate": 1.161054097715743e-05, "loss": 0.3538, "step": 658 }, { "epoch": 0.5044010715652507, "grad_norm": 6.700775146484375, "learning_rate": 1.1584147328742767e-05, "loss": 0.3089, "step": 659 }, { "epoch": 0.505166475315729, "grad_norm": 5.239867687225342, "learning_rate": 1.155774235582063e-05, "loss": 0.2483, "step": 660 }, { "epoch": 0.505166475315729, "eval_accuracy": 0.871841155234657, "eval_f1": 0.8054794520547945, "eval_loss": 0.3145829141139984, "eval_precision": 0.8963414634146342, "eval_recall": 0.7313432835820896, "eval_runtime": 45.0474, "eval_samples_per_second": 6.682, "eval_steps_per_second": 0.222, "step": 660 }, { "epoch": 0.5059318790662074, "grad_norm": 7.80463171005249, "learning_rate": 1.1531326247150802e-05, "loss": 0.3278, "step": 661 }, { "epoch": 0.5066972828166858, "grad_norm": 5.086753845214844, "learning_rate": 1.1504899191572682e-05, "loss": 0.2196, "step": 662 }, { "epoch": 0.5074626865671642, "grad_norm": 7.081567287445068, "learning_rate": 1.1478461378003913e-05, "loss": 0.3013, "step": 663 }, { "epoch": 0.5082280903176426, "grad_norm": 8.720135688781738, "learning_rate": 1.145201299543905e-05, "loss": 0.4044, "step": 664 }, { "epoch": 0.5089934940681209, "grad_norm": 5.741237640380859, "learning_rate": 1.1425554232948206e-05, "loss": 0.2103, "step": 665 }, { "epoch": 0.5097588978185993, "grad_norm": 10.087435722351074, "learning_rate": 1.1399085279675688e-05, "loss": 0.3157, "step": 666 }, { "epoch": 0.5105243015690777, "grad_norm": 8.691031455993652, "learning_rate": 1.1372606324838651e-05, "loss": 0.2895, "step": 667 }, { "epoch": 0.5112897053195561, "grad_norm": 7.523034572601318, "learning_rate": 1.1346117557725757e-05, "loss": 0.2872, "step": 668 }, { "epoch": 0.5120551090700345, "grad_norm": 5.898047924041748, "learning_rate": 1.1319619167695814e-05, "loss": 0.3007, "step": 669 }, { "epoch": 0.5128205128205128, "grad_norm": 3.7639052867889404, "learning_rate": 1.1293111344176406e-05, "loss": 0.2113, "step": 670 }, { "epoch": 0.5135859165709912, "grad_norm": 7.386530876159668, "learning_rate": 1.126659427666257e-05, "loss": 0.3495, "step": 671 }, { "epoch": 0.5143513203214696, "grad_norm": 5.551990985870361, "learning_rate": 1.1240068154715416e-05, "loss": 0.3122, "step": 672 }, { "epoch": 0.515116724071948, "grad_norm": 7.6228156089782715, "learning_rate": 1.121353316796078e-05, "loss": 0.3292, "step": 673 }, { "epoch": 0.5158821278224264, "grad_norm": 5.472020149230957, "learning_rate": 1.1186989506087876e-05, "loss": 0.3501, "step": 674 }, { "epoch": 0.5166475315729047, "grad_norm": 5.9330525398254395, "learning_rate": 1.116043735884793e-05, "loss": 0.3392, "step": 675 }, { "epoch": 0.5174129353233831, "grad_norm": 6.453945636749268, "learning_rate": 1.1133876916052822e-05, "loss": 0.2199, "step": 676 }, { "epoch": 0.5181783390738615, "grad_norm": 6.126132488250732, "learning_rate": 1.1107308367573744e-05, "loss": 0.2892, "step": 677 }, { "epoch": 0.5189437428243399, "grad_norm": 3.9714488983154297, "learning_rate": 1.1080731903339825e-05, "loss": 0.2664, "step": 678 }, { "epoch": 0.5197091465748183, "grad_norm": 9.137075424194336, "learning_rate": 1.1054147713336782e-05, "loss": 0.3458, "step": 679 }, { "epoch": 0.5204745503252965, "grad_norm": 5.681141376495361, "learning_rate": 1.1027555987605562e-05, "loss": 0.2841, "step": 680 }, { "epoch": 0.5204745503252965, "eval_accuracy": 0.871841155234657, "eval_f1": 0.8011204481792717, "eval_loss": 0.3226017951965332, "eval_precision": 0.9166666666666666, "eval_recall": 0.7114427860696517, "eval_runtime": 44.0834, "eval_samples_per_second": 6.828, "eval_steps_per_second": 0.227, "step": 680 }, { "epoch": 0.521239954075775, "grad_norm": 5.028656005859375, "learning_rate": 1.1000956916240985e-05, "loss": 0.3274, "step": 681 }, { "epoch": 0.5220053578262533, "grad_norm": 4.5937700271606445, "learning_rate": 1.0974350689390376e-05, "loss": 0.2103, "step": 682 }, { "epoch": 0.5227707615767317, "grad_norm": 4.783949375152588, "learning_rate": 1.094773749725222e-05, "loss": 0.2575, "step": 683 }, { "epoch": 0.5235361653272101, "grad_norm": 7.1982502937316895, "learning_rate": 1.0921117530074785e-05, "loss": 0.3264, "step": 684 }, { "epoch": 0.5243015690776884, "grad_norm": 5.304211139678955, "learning_rate": 1.0894490978154777e-05, "loss": 0.3124, "step": 685 }, { "epoch": 0.5250669728281668, "grad_norm": 7.728262424468994, "learning_rate": 1.0867858031835975e-05, "loss": 0.3697, "step": 686 }, { "epoch": 0.5258323765786452, "grad_norm": 5.3696465492248535, "learning_rate": 1.084121888150787e-05, "loss": 0.3055, "step": 687 }, { "epoch": 0.5265977803291236, "grad_norm": 7.408420562744141, "learning_rate": 1.0814573717604295e-05, "loss": 0.3524, "step": 688 }, { "epoch": 0.527363184079602, "grad_norm": 6.884669303894043, "learning_rate": 1.0787922730602083e-05, "loss": 0.2839, "step": 689 }, { "epoch": 0.5281285878300803, "grad_norm": 8.420857429504395, "learning_rate": 1.0761266111019685e-05, "loss": 0.4047, "step": 690 }, { "epoch": 0.5288939915805587, "grad_norm": 4.702800750732422, "learning_rate": 1.0734604049415822e-05, "loss": 0.2042, "step": 691 }, { "epoch": 0.5296593953310371, "grad_norm": 7.388383865356445, "learning_rate": 1.070793673638812e-05, "loss": 0.2754, "step": 692 }, { "epoch": 0.5304247990815155, "grad_norm": 7.465807914733887, "learning_rate": 1.0681264362571744e-05, "loss": 0.2907, "step": 693 }, { "epoch": 0.5311902028319939, "grad_norm": 5.684911251068115, "learning_rate": 1.0654587118638027e-05, "loss": 0.2513, "step": 694 }, { "epoch": 0.5319556065824722, "grad_norm": 5.584875583648682, "learning_rate": 1.0627905195293135e-05, "loss": 0.2658, "step": 695 }, { "epoch": 0.5327210103329506, "grad_norm": 6.920920372009277, "learning_rate": 1.0601218783276673e-05, "loss": 0.3378, "step": 696 }, { "epoch": 0.533486414083429, "grad_norm": 6.030942916870117, "learning_rate": 1.0574528073360333e-05, "loss": 0.2732, "step": 697 }, { "epoch": 0.5342518178339074, "grad_norm": 8.450331687927246, "learning_rate": 1.054783325634654e-05, "loss": 0.3302, "step": 698 }, { "epoch": 0.5350172215843858, "grad_norm": 8.008537292480469, "learning_rate": 1.0521134523067076e-05, "loss": 0.2018, "step": 699 }, { "epoch": 0.5357826253348641, "grad_norm": 5.137029647827148, "learning_rate": 1.0494432064381707e-05, "loss": 0.3065, "step": 700 }, { "epoch": 0.5357826253348641, "eval_accuracy": 0.8844765342960289, "eval_f1": 0.8212290502793296, "eval_loss": 0.31221428513526917, "eval_precision": 0.9363057324840764, "eval_recall": 0.7313432835820896, "eval_runtime": 43.6609, "eval_samples_per_second": 6.894, "eval_steps_per_second": 0.229, "step": 700 }, { "epoch": 0.5365480290853425, "grad_norm": 9.422268867492676, "learning_rate": 1.0467726071176854e-05, "loss": 0.2778, "step": 701 }, { "epoch": 0.5373134328358209, "grad_norm": 4.314672470092773, "learning_rate": 1.044101673436418e-05, "loss": 0.2731, "step": 702 }, { "epoch": 0.5380788365862993, "grad_norm": 3.78764009475708, "learning_rate": 1.041430424487927e-05, "loss": 0.1847, "step": 703 }, { "epoch": 0.5388442403367777, "grad_norm": 10.072879791259766, "learning_rate": 1.0387588793680235e-05, "loss": 0.3702, "step": 704 }, { "epoch": 0.539609644087256, "grad_norm": 6.438011169433594, "learning_rate": 1.0360870571746364e-05, "loss": 0.3182, "step": 705 }, { "epoch": 0.5403750478377344, "grad_norm": 10.780308723449707, "learning_rate": 1.0334149770076747e-05, "loss": 0.3591, "step": 706 }, { "epoch": 0.5411404515882128, "grad_norm": 8.534119606018066, "learning_rate": 1.0307426579688924e-05, "loss": 0.3965, "step": 707 }, { "epoch": 0.5419058553386912, "grad_norm": 7.7759294509887695, "learning_rate": 1.0280701191617502e-05, "loss": 0.341, "step": 708 }, { "epoch": 0.5426712590891696, "grad_norm": 10.711833000183105, "learning_rate": 1.0253973796912801e-05, "loss": 0.4222, "step": 709 }, { "epoch": 0.5434366628396479, "grad_norm": 7.7709736824035645, "learning_rate": 1.0227244586639498e-05, "loss": 0.2426, "step": 710 }, { "epoch": 0.5442020665901263, "grad_norm": 7.218238830566406, "learning_rate": 1.0200513751875227e-05, "loss": 0.3744, "step": 711 }, { "epoch": 0.5449674703406047, "grad_norm": 5.452602863311768, "learning_rate": 1.0173781483709253e-05, "loss": 0.2426, "step": 712 }, { "epoch": 0.5457328740910831, "grad_norm": 9.658629417419434, "learning_rate": 1.0147047973241078e-05, "loss": 0.337, "step": 713 }, { "epoch": 0.5464982778415615, "grad_norm": 7.690628528594971, "learning_rate": 1.012031341157909e-05, "loss": 0.3391, "step": 714 }, { "epoch": 0.5472636815920398, "grad_norm": 5.8382568359375, "learning_rate": 1.009357798983919e-05, "loss": 0.1913, "step": 715 }, { "epoch": 0.5480290853425182, "grad_norm": 8.064395904541016, "learning_rate": 1.0066841899143424e-05, "loss": 0.2635, "step": 716 }, { "epoch": 0.5487944890929966, "grad_norm": 5.719712734222412, "learning_rate": 1.0040105330618624e-05, "loss": 0.2509, "step": 717 }, { "epoch": 0.549559892843475, "grad_norm": 6.939627170562744, "learning_rate": 1.001336847539504e-05, "loss": 0.3044, "step": 718 }, { "epoch": 0.5503252965939534, "grad_norm": 7.947267532348633, "learning_rate": 9.986631524604967e-06, "loss": 0.3369, "step": 719 }, { "epoch": 0.5510907003444316, "grad_norm": 6.54526424407959, "learning_rate": 9.95989466938138e-06, "loss": 0.2231, "step": 720 }, { "epoch": 0.5510907003444316, "eval_accuracy": 0.8808664259927798, "eval_f1": 0.828125, "eval_loss": 0.30751052498817444, "eval_precision": 0.8688524590163934, "eval_recall": 0.7910447761194029, "eval_runtime": 43.8093, "eval_samples_per_second": 6.871, "eval_steps_per_second": 0.228, "step": 720 }, { "epoch": 0.55185610409491, "grad_norm": 8.218890190124512, "learning_rate": 9.93315810085658e-06, "loss": 0.2876, "step": 721 }, { "epoch": 0.5526215078453884, "grad_norm": 6.78832483291626, "learning_rate": 9.906422010160815e-06, "loss": 0.2322, "step": 722 }, { "epoch": 0.5533869115958668, "grad_norm": 5.334784507751465, "learning_rate": 9.879686588420912e-06, "loss": 0.3155, "step": 723 }, { "epoch": 0.5541523153463452, "grad_norm": 7.487120151519775, "learning_rate": 9.852952026758923e-06, "loss": 0.2474, "step": 724 }, { "epoch": 0.5549177190968235, "grad_norm": 6.340409755706787, "learning_rate": 9.826218516290749e-06, "loss": 0.2036, "step": 725 }, { "epoch": 0.5556831228473019, "grad_norm": 7.877002239227295, "learning_rate": 9.799486248124775e-06, "loss": 0.2962, "step": 726 }, { "epoch": 0.5564485265977803, "grad_norm": 12.480257034301758, "learning_rate": 9.772755413360505e-06, "loss": 0.3794, "step": 727 }, { "epoch": 0.5572139303482587, "grad_norm": 8.214043617248535, "learning_rate": 9.746026203087198e-06, "loss": 0.3456, "step": 728 }, { "epoch": 0.5579793340987371, "grad_norm": 7.6310200691223145, "learning_rate": 9.719298808382502e-06, "loss": 0.242, "step": 729 }, { "epoch": 0.5587447378492154, "grad_norm": 5.91217565536499, "learning_rate": 9.69257342031108e-06, "loss": 0.2533, "step": 730 }, { "epoch": 0.5595101415996938, "grad_norm": 8.397479057312012, "learning_rate": 9.665850229923258e-06, "loss": 0.2492, "step": 731 }, { "epoch": 0.5602755453501722, "grad_norm": 6.648274898529053, "learning_rate": 9.639129428253639e-06, "loss": 0.3537, "step": 732 }, { "epoch": 0.5610409491006506, "grad_norm": 8.644059181213379, "learning_rate": 9.612411206319765e-06, "loss": 0.3804, "step": 733 }, { "epoch": 0.561806352851129, "grad_norm": 10.174010276794434, "learning_rate": 9.585695755120735e-06, "loss": 0.3242, "step": 734 }, { "epoch": 0.5625717566016073, "grad_norm": 10.4850435256958, "learning_rate": 9.558983265635822e-06, "loss": 0.347, "step": 735 }, { "epoch": 0.5633371603520857, "grad_norm": 10.979066848754883, "learning_rate": 9.532273928823151e-06, "loss": 0.45, "step": 736 }, { "epoch": 0.5641025641025641, "grad_norm": 7.7759833335876465, "learning_rate": 9.505567935618295e-06, "loss": 0.2547, "step": 737 }, { "epoch": 0.5648679678530425, "grad_norm": 7.15999698638916, "learning_rate": 9.47886547693293e-06, "loss": 0.2776, "step": 738 }, { "epoch": 0.5656333716035209, "grad_norm": 4.63205099105835, "learning_rate": 9.452166743653461e-06, "loss": 0.2385, "step": 739 }, { "epoch": 0.5663987753539992, "grad_norm": 5.939043045043945, "learning_rate": 9.425471926639667e-06, "loss": 0.2701, "step": 740 }, { "epoch": 0.5663987753539992, "eval_accuracy": 0.8808664259927798, "eval_f1": 0.8253968253968254, "eval_loss": 0.3040929138660431, "eval_precision": 0.8813559322033898, "eval_recall": 0.7761194029850746, "eval_runtime": 42.5041, "eval_samples_per_second": 7.082, "eval_steps_per_second": 0.235, "step": 740 }, { "epoch": 0.5671641791044776, "grad_norm": 8.01513957977295, "learning_rate": 9.39878121672333e-06, "loss": 0.4233, "step": 741 }, { "epoch": 0.567929582854956, "grad_norm": 6.426464080810547, "learning_rate": 9.372094804706867e-06, "loss": 0.2706, "step": 742 }, { "epoch": 0.5686949866054344, "grad_norm": 8.642332077026367, "learning_rate": 9.345412881361978e-06, "loss": 0.3164, "step": 743 }, { "epoch": 0.5694603903559128, "grad_norm": 10.497084617614746, "learning_rate": 9.31873563742826e-06, "loss": 0.4347, "step": 744 }, { "epoch": 0.5702257941063911, "grad_norm": 6.028977870941162, "learning_rate": 9.29206326361188e-06, "loss": 0.3735, "step": 745 }, { "epoch": 0.5709911978568695, "grad_norm": 5.886969566345215, "learning_rate": 9.265395950584181e-06, "loss": 0.2942, "step": 746 }, { "epoch": 0.5717566016073479, "grad_norm": 7.176773548126221, "learning_rate": 9.238733888980316e-06, "loss": 0.2876, "step": 747 }, { "epoch": 0.5725220053578263, "grad_norm": 7.8566999435424805, "learning_rate": 9.21207726939792e-06, "loss": 0.3305, "step": 748 }, { "epoch": 0.5732874091083047, "grad_norm": 6.04640531539917, "learning_rate": 9.185426282395707e-06, "loss": 0.3243, "step": 749 }, { "epoch": 0.574052812858783, "grad_norm": 8.753013610839844, "learning_rate": 9.158781118492133e-06, "loss": 0.3785, "step": 750 }, { "epoch": 0.5748182166092614, "grad_norm": 3.3650951385498047, "learning_rate": 9.132141968164026e-06, "loss": 0.239, "step": 751 }, { "epoch": 0.5755836203597398, "grad_norm": 5.48830509185791, "learning_rate": 9.105509021845224e-06, "loss": 0.2733, "step": 752 }, { "epoch": 0.5763490241102182, "grad_norm": 5.107679843902588, "learning_rate": 9.078882469925219e-06, "loss": 0.2973, "step": 753 }, { "epoch": 0.5771144278606966, "grad_norm": 10.316614151000977, "learning_rate": 9.052262502747784e-06, "loss": 0.3675, "step": 754 }, { "epoch": 0.5778798316111748, "grad_norm": 5.080225944519043, "learning_rate": 9.025649310609627e-06, "loss": 0.2482, "step": 755 }, { "epoch": 0.5786452353616532, "grad_norm": 4.110746383666992, "learning_rate": 8.999043083759016e-06, "loss": 0.2488, "step": 756 }, { "epoch": 0.5794106391121316, "grad_norm": 5.712521076202393, "learning_rate": 8.97244401239444e-06, "loss": 0.3355, "step": 757 }, { "epoch": 0.58017604286261, "grad_norm": 5.605413436889648, "learning_rate": 8.945852286663224e-06, "loss": 0.2868, "step": 758 }, { "epoch": 0.5809414466130884, "grad_norm": 6.864566802978516, "learning_rate": 8.919268096660178e-06, "loss": 0.3078, "step": 759 }, { "epoch": 0.5817068503635667, "grad_norm": 7.781955242156982, "learning_rate": 8.89269163242626e-06, "loss": 0.263, "step": 760 }, { "epoch": 0.5817068503635667, "eval_accuracy": 0.8772563176895307, "eval_f1": 0.8219895287958116, "eval_loss": 0.3053552806377411, "eval_precision": 0.8674033149171271, "eval_recall": 0.7810945273631841, "eval_runtime": 43.0199, "eval_samples_per_second": 6.997, "eval_steps_per_second": 0.232, "step": 760 }, { "epoch": 0.5824722541140451, "grad_norm": 7.361171722412109, "learning_rate": 8.866123083947182e-06, "loss": 0.3909, "step": 761 }, { "epoch": 0.5832376578645235, "grad_norm": 5.013346195220947, "learning_rate": 8.839562641152074e-06, "loss": 0.2957, "step": 762 }, { "epoch": 0.5840030616150019, "grad_norm": 5.838186264038086, "learning_rate": 8.813010493912127e-06, "loss": 0.1958, "step": 763 }, { "epoch": 0.5847684653654803, "grad_norm": 5.0725016593933105, "learning_rate": 8.786466832039222e-06, "loss": 0.3002, "step": 764 }, { "epoch": 0.5855338691159586, "grad_norm": 6.6698198318481445, "learning_rate": 8.759931845284589e-06, "loss": 0.208, "step": 765 }, { "epoch": 0.586299272866437, "grad_norm": 8.152987480163574, "learning_rate": 8.733405723337433e-06, "loss": 0.313, "step": 766 }, { "epoch": 0.5870646766169154, "grad_norm": 5.144524574279785, "learning_rate": 8.706888655823594e-06, "loss": 0.2905, "step": 767 }, { "epoch": 0.5878300803673938, "grad_norm": 5.568119525909424, "learning_rate": 8.680380832304189e-06, "loss": 0.2673, "step": 768 }, { "epoch": 0.5885954841178722, "grad_norm": 5.470310688018799, "learning_rate": 8.653882442274243e-06, "loss": 0.2665, "step": 769 }, { "epoch": 0.5893608878683505, "grad_norm": 8.845556259155273, "learning_rate": 8.627393675161354e-06, "loss": 0.3698, "step": 770 }, { "epoch": 0.5901262916188289, "grad_norm": 6.532787322998047, "learning_rate": 8.600914720324315e-06, "loss": 0.3666, "step": 771 }, { "epoch": 0.5908916953693073, "grad_norm": 5.255888938903809, "learning_rate": 8.574445767051794e-06, "loss": 0.2652, "step": 772 }, { "epoch": 0.5916570991197857, "grad_norm": 6.652350425720215, "learning_rate": 8.547987004560952e-06, "loss": 0.3326, "step": 773 }, { "epoch": 0.5924225028702641, "grad_norm": 7.3055524826049805, "learning_rate": 8.521538621996087e-06, "loss": 0.3047, "step": 774 }, { "epoch": 0.5931879066207424, "grad_norm": 9.414663314819336, "learning_rate": 8.495100808427323e-06, "loss": 0.3557, "step": 775 }, { "epoch": 0.5939533103712208, "grad_norm": 5.943358421325684, "learning_rate": 8.468673752849201e-06, "loss": 0.2677, "step": 776 }, { "epoch": 0.5947187141216992, "grad_norm": 8.034760475158691, "learning_rate": 8.442257644179374e-06, "loss": 0.3002, "step": 777 }, { "epoch": 0.5954841178721776, "grad_norm": 7.520342826843262, "learning_rate": 8.415852671257235e-06, "loss": 0.2883, "step": 778 }, { "epoch": 0.596249521622656, "grad_norm": 6.301088333129883, "learning_rate": 8.38945902284257e-06, "loss": 0.3803, "step": 779 }, { "epoch": 0.5970149253731343, "grad_norm": 6.890069007873535, "learning_rate": 8.363076887614218e-06, "loss": 0.3769, "step": 780 }, { "epoch": 0.5970149253731343, "eval_accuracy": 0.8754512635379061, "eval_f1": 0.8179419525065963, "eval_loss": 0.30358046293258667, "eval_precision": 0.8707865168539326, "eval_recall": 0.7711442786069652, "eval_runtime": 43.2535, "eval_samples_per_second": 6.959, "eval_steps_per_second": 0.231, "step": 780 }, { "epoch": 0.5977803291236127, "grad_norm": 5.621861457824707, "learning_rate": 8.336706454168701e-06, "loss": 0.2402, "step": 781 }, { "epoch": 0.5985457328740911, "grad_norm": 5.014660835266113, "learning_rate": 8.3103479110189e-06, "loss": 0.2169, "step": 782 }, { "epoch": 0.5993111366245695, "grad_norm": 10.174066543579102, "learning_rate": 8.284001446592687e-06, "loss": 0.4028, "step": 783 }, { "epoch": 0.6000765403750479, "grad_norm": 4.826257705688477, "learning_rate": 8.2576672492316e-06, "loss": 0.1936, "step": 784 }, { "epoch": 0.6008419441255262, "grad_norm": 5.713893890380859, "learning_rate": 8.231345507189478e-06, "loss": 0.2634, "step": 785 }, { "epoch": 0.6016073478760046, "grad_norm": 8.464000701904297, "learning_rate": 8.20503640863111e-06, "loss": 0.2961, "step": 786 }, { "epoch": 0.602372751626483, "grad_norm": 5.1683220863342285, "learning_rate": 8.178740141630925e-06, "loss": 0.2526, "step": 787 }, { "epoch": 0.6031381553769614, "grad_norm": 8.329268455505371, "learning_rate": 8.1524568941716e-06, "loss": 0.3785, "step": 788 }, { "epoch": 0.6039035591274398, "grad_norm": 5.792215824127197, "learning_rate": 8.126186854142752e-06, "loss": 0.2389, "step": 789 }, { "epoch": 0.604668962877918, "grad_norm": 5.503443717956543, "learning_rate": 8.09993020933959e-06, "loss": 0.2625, "step": 790 }, { "epoch": 0.6054343666283964, "grad_norm": 9.723910331726074, "learning_rate": 8.073687147461548e-06, "loss": 0.3418, "step": 791 }, { "epoch": 0.6061997703788748, "grad_norm": 6.494279384613037, "learning_rate": 8.047457856110972e-06, "loss": 0.2417, "step": 792 }, { "epoch": 0.6069651741293532, "grad_norm": 4.701023101806641, "learning_rate": 8.021242522791761e-06, "loss": 0.2855, "step": 793 }, { "epoch": 0.6077305778798316, "grad_norm": 7.103637218475342, "learning_rate": 7.99504133490804e-06, "loss": 0.3301, "step": 794 }, { "epoch": 0.6084959816303099, "grad_norm": 6.677608966827393, "learning_rate": 7.968854479762807e-06, "loss": 0.3203, "step": 795 }, { "epoch": 0.6092613853807883, "grad_norm": 7.5500264167785645, "learning_rate": 7.942682144556605e-06, "loss": 0.3786, "step": 796 }, { "epoch": 0.6100267891312667, "grad_norm": 9.03438949584961, "learning_rate": 7.916524516386177e-06, "loss": 0.2202, "step": 797 }, { "epoch": 0.6107921928817451, "grad_norm": 7.192146301269531, "learning_rate": 7.890381782243129e-06, "loss": 0.3227, "step": 798 }, { "epoch": 0.6115575966322235, "grad_norm": 6.554990768432617, "learning_rate": 7.864254129012599e-06, "loss": 0.2451, "step": 799 }, { "epoch": 0.6123230003827018, "grad_norm": 5.212791442871094, "learning_rate": 7.838141743471912e-06, "loss": 0.184, "step": 800 }, { "epoch": 0.6123230003827018, "eval_accuracy": 0.8754512635379061, "eval_f1": 0.8226221079691517, "eval_loss": 0.30545830726623535, "eval_precision": 0.851063829787234, "eval_recall": 0.7960199004975125, "eval_runtime": 43.2813, "eval_samples_per_second": 6.954, "eval_steps_per_second": 0.231, "step": 800 }, { "epoch": 0.6130884041331802, "grad_norm": 6.996140480041504, "learning_rate": 7.81204481228925e-06, "loss": 0.3193, "step": 801 }, { "epoch": 0.6138538078836586, "grad_norm": 5.212123394012451, "learning_rate": 7.785963522022328e-06, "loss": 0.3102, "step": 802 }, { "epoch": 0.614619211634137, "grad_norm": 7.367398262023926, "learning_rate": 7.759898059117031e-06, "loss": 0.3493, "step": 803 }, { "epoch": 0.6153846153846154, "grad_norm": 5.806961536407471, "learning_rate": 7.733848609906118e-06, "loss": 0.2261, "step": 804 }, { "epoch": 0.6161500191350937, "grad_norm": 8.282057762145996, "learning_rate": 7.707815360607857e-06, "loss": 0.327, "step": 805 }, { "epoch": 0.6169154228855721, "grad_norm": 7.256002426147461, "learning_rate": 7.681798497324717e-06, "loss": 0.2842, "step": 806 }, { "epoch": 0.6176808266360505, "grad_norm": 6.356285572052002, "learning_rate": 7.655798206042033e-06, "loss": 0.2381, "step": 807 }, { "epoch": 0.6184462303865289, "grad_norm": 7.932591438293457, "learning_rate": 7.629814672626659e-06, "loss": 0.299, "step": 808 }, { "epoch": 0.6192116341370073, "grad_norm": 9.997736930847168, "learning_rate": 7.603848082825667e-06, "loss": 0.455, "step": 809 }, { "epoch": 0.6199770378874856, "grad_norm": 6.560126304626465, "learning_rate": 7.577898622264995e-06, "loss": 0.2781, "step": 810 }, { "epoch": 0.620742441637964, "grad_norm": 5.914036273956299, "learning_rate": 7.55196647644814e-06, "loss": 0.2797, "step": 811 }, { "epoch": 0.6215078453884424, "grad_norm": 5.265806198120117, "learning_rate": 7.526051830754806e-06, "loss": 0.2588, "step": 812 }, { "epoch": 0.6222732491389208, "grad_norm": 9.48167610168457, "learning_rate": 7.500154870439613e-06, "loss": 0.3233, "step": 813 }, { "epoch": 0.6230386528893992, "grad_norm": 9.331697463989258, "learning_rate": 7.474275780630749e-06, "loss": 0.3605, "step": 814 }, { "epoch": 0.6238040566398775, "grad_norm": 5.877933979034424, "learning_rate": 7.4484147463286425e-06, "loss": 0.2063, "step": 815 }, { "epoch": 0.6245694603903559, "grad_norm": 7.0039286613464355, "learning_rate": 7.422571952404662e-06, "loss": 0.2673, "step": 816 }, { "epoch": 0.6253348641408343, "grad_norm": 5.136800289154053, "learning_rate": 7.3967475835997715e-06, "loss": 0.2393, "step": 817 }, { "epoch": 0.6261002678913127, "grad_norm": 7.966847896575928, "learning_rate": 7.37094182452323e-06, "loss": 0.3355, "step": 818 }, { "epoch": 0.6268656716417911, "grad_norm": 5.4302873611450195, "learning_rate": 7.345154859651258e-06, "loss": 0.2687, "step": 819 }, { "epoch": 0.6276310753922695, "grad_norm": 7.903888702392578, "learning_rate": 7.319386873325718e-06, "loss": 0.3339, "step": 820 }, { "epoch": 0.6276310753922695, "eval_accuracy": 0.8772563176895307, "eval_f1": 0.826530612244898, "eval_loss": 0.30791300535202026, "eval_precision": 0.8481675392670157, "eval_recall": 0.8059701492537313, "eval_runtime": 43.2728, "eval_samples_per_second": 6.956, "eval_steps_per_second": 0.231, "step": 820 }, { "epoch": 0.6283964791427478, "grad_norm": 8.574530601501465, "learning_rate": 7.293638049752813e-06, "loss": 0.3461, "step": 821 }, { "epoch": 0.6291618828932262, "grad_norm": 5.353209018707275, "learning_rate": 7.267908573001745e-06, "loss": 0.2579, "step": 822 }, { "epoch": 0.6299272866437046, "grad_norm": 13.138022422790527, "learning_rate": 7.242198627003423e-06, "loss": 0.4717, "step": 823 }, { "epoch": 0.630692690394183, "grad_norm": 6.482840061187744, "learning_rate": 7.216508395549134e-06, "loss": 0.2575, "step": 824 }, { "epoch": 0.6314580941446614, "grad_norm": 6.459270000457764, "learning_rate": 7.19083806228923e-06, "loss": 0.252, "step": 825 }, { "epoch": 0.6322234978951397, "grad_norm": 7.762699127197266, "learning_rate": 7.165187810731824e-06, "loss": 0.2834, "step": 826 }, { "epoch": 0.632988901645618, "grad_norm": 6.184956073760986, "learning_rate": 7.13955782424146e-06, "loss": 0.2233, "step": 827 }, { "epoch": 0.6337543053960965, "grad_norm": 4.162037372589111, "learning_rate": 7.1139482860378325e-06, "loss": 0.1924, "step": 828 }, { "epoch": 0.6345197091465749, "grad_norm": 4.195563316345215, "learning_rate": 7.0883593791944405e-06, "loss": 0.2445, "step": 829 }, { "epoch": 0.6352851128970533, "grad_norm": 7.017816066741943, "learning_rate": 7.062791286637307e-06, "loss": 0.2483, "step": 830 }, { "epoch": 0.6360505166475315, "grad_norm": 5.105776786804199, "learning_rate": 7.037244191143662e-06, "loss": 0.2481, "step": 831 }, { "epoch": 0.6368159203980099, "grad_norm": 4.578166961669922, "learning_rate": 7.011718275340626e-06, "loss": 0.188, "step": 832 }, { "epoch": 0.6375813241484883, "grad_norm": 5.503800868988037, "learning_rate": 6.986213721703925e-06, "loss": 0.2171, "step": 833 }, { "epoch": 0.6383467278989667, "grad_norm": 6.527347564697266, "learning_rate": 6.960730712556561e-06, "loss": 0.3013, "step": 834 }, { "epoch": 0.6391121316494451, "grad_norm": 5.6936354637146, "learning_rate": 6.9352694300675345e-06, "loss": 0.2749, "step": 835 }, { "epoch": 0.6398775353999234, "grad_norm": 7.051584720611572, "learning_rate": 6.909830056250527e-06, "loss": 0.3115, "step": 836 }, { "epoch": 0.6406429391504018, "grad_norm": 8.501811027526855, "learning_rate": 6.884412772962594e-06, "loss": 0.3699, "step": 837 }, { "epoch": 0.6414083429008802, "grad_norm": 7.283932209014893, "learning_rate": 6.859017761902888e-06, "loss": 0.3301, "step": 838 }, { "epoch": 0.6421737466513586, "grad_norm": 7.593969345092773, "learning_rate": 6.8336452046113276e-06, "loss": 0.231, "step": 839 }, { "epoch": 0.642939150401837, "grad_norm": 8.386115074157715, "learning_rate": 6.8082952824673345e-06, "loss": 0.2078, "step": 840 }, { "epoch": 0.642939150401837, "eval_accuracy": 0.8826714801444043, "eval_f1": 0.8302872062663186, "eval_loss": 0.29997462034225464, "eval_precision": 0.8736263736263736, "eval_recall": 0.7910447761194029, "eval_runtime": 43.8127, "eval_samples_per_second": 6.87, "eval_steps_per_second": 0.228, "step": 840 }, { "epoch": 0.6437045541523153, "grad_norm": 6.559789657592773, "learning_rate": 6.782968176688514e-06, "loss": 0.2948, "step": 841 }, { "epoch": 0.6444699579027937, "grad_norm": 6.5288615226745605, "learning_rate": 6.757664068329353e-06, "loss": 0.2972, "step": 842 }, { "epoch": 0.6452353616532721, "grad_norm": 6.198600769042969, "learning_rate": 6.732383138279963e-06, "loss": 0.2211, "step": 843 }, { "epoch": 0.6460007654037505, "grad_norm": 6.554396629333496, "learning_rate": 6.7071255672647366e-06, "loss": 0.3524, "step": 844 }, { "epoch": 0.6467661691542289, "grad_norm": 7.058009624481201, "learning_rate": 6.681891535841094e-06, "loss": 0.3195, "step": 845 }, { "epoch": 0.6475315729047072, "grad_norm": 7.023917198181152, "learning_rate": 6.656681224398182e-06, "loss": 0.3004, "step": 846 }, { "epoch": 0.6482969766551856, "grad_norm": 8.042481422424316, "learning_rate": 6.631494813155574e-06, "loss": 0.3817, "step": 847 }, { "epoch": 0.649062380405664, "grad_norm": 7.734927177429199, "learning_rate": 6.606332482161992e-06, "loss": 0.2658, "step": 848 }, { "epoch": 0.6498277841561424, "grad_norm": 8.582637786865234, "learning_rate": 6.581194411294018e-06, "loss": 0.3253, "step": 849 }, { "epoch": 0.6505931879066208, "grad_norm": 5.6030449867248535, "learning_rate": 6.556080780254805e-06, "loss": 0.2632, "step": 850 }, { "epoch": 0.6513585916570991, "grad_norm": 5.397579193115234, "learning_rate": 6.530991768572794e-06, "loss": 0.3054, "step": 851 }, { "epoch": 0.6521239954075775, "grad_norm": 5.722109794616699, "learning_rate": 6.505927555600435e-06, "loss": 0.2944, "step": 852 }, { "epoch": 0.6528893991580559, "grad_norm": 6.566413402557373, "learning_rate": 6.480888320512901e-06, "loss": 0.3368, "step": 853 }, { "epoch": 0.6536548029085343, "grad_norm": 7.058630466461182, "learning_rate": 6.455874242306795e-06, "loss": 0.2567, "step": 854 }, { "epoch": 0.6544202066590127, "grad_norm": 9.69749641418457, "learning_rate": 6.430885499798903e-06, "loss": 0.2551, "step": 855 }, { "epoch": 0.655185610409491, "grad_norm": 7.507757186889648, "learning_rate": 6.405922271624874e-06, "loss": 0.3295, "step": 856 }, { "epoch": 0.6559510141599694, "grad_norm": 9.954885482788086, "learning_rate": 6.3809847362379765e-06, "loss": 0.4049, "step": 857 }, { "epoch": 0.6567164179104478, "grad_norm": 9.585737228393555, "learning_rate": 6.356073071907809e-06, "loss": 0.2768, "step": 858 }, { "epoch": 0.6574818216609262, "grad_norm": 6.906581878662109, "learning_rate": 6.331187456719023e-06, "loss": 0.2695, "step": 859 }, { "epoch": 0.6582472254114046, "grad_norm": 7.005069255828857, "learning_rate": 6.306328068570062e-06, "loss": 0.3542, "step": 860 }, { "epoch": 0.6582472254114046, "eval_accuracy": 0.8826714801444043, "eval_f1": 0.8293963254593176, "eval_loss": 0.30143383145332336, "eval_precision": 0.8777777777777778, "eval_recall": 0.7860696517412935, "eval_runtime": 43.8172, "eval_samples_per_second": 6.869, "eval_steps_per_second": 0.228, "step": 860 }, { "epoch": 0.6590126291618829, "grad_norm": 5.407721519470215, "learning_rate": 6.2814950851718695e-06, "loss": 0.2574, "step": 861 }, { "epoch": 0.6597780329123613, "grad_norm": 7.781362533569336, "learning_rate": 6.256688684046639e-06, "loss": 0.3767, "step": 862 }, { "epoch": 0.6605434366628397, "grad_norm": 6.320938587188721, "learning_rate": 6.231909042526539e-06, "loss": 0.3684, "step": 863 }, { "epoch": 0.661308840413318, "grad_norm": 4.665534973144531, "learning_rate": 6.207156337752435e-06, "loss": 0.2498, "step": 864 }, { "epoch": 0.6620742441637965, "grad_norm": 6.499063491821289, "learning_rate": 6.1824307466726405e-06, "loss": 0.3129, "step": 865 }, { "epoch": 0.6628396479142747, "grad_norm": 6.318509101867676, "learning_rate": 6.15773244604163e-06, "loss": 0.208, "step": 866 }, { "epoch": 0.6636050516647531, "grad_norm": 7.20699405670166, "learning_rate": 6.133061612418804e-06, "loss": 0.3234, "step": 867 }, { "epoch": 0.6643704554152315, "grad_norm": 5.541223526000977, "learning_rate": 6.108418422167199e-06, "loss": 0.22, "step": 868 }, { "epoch": 0.6651358591657099, "grad_norm": 6.329871654510498, "learning_rate": 6.08380305145224e-06, "loss": 0.3354, "step": 869 }, { "epoch": 0.6659012629161883, "grad_norm": 5.076813697814941, "learning_rate": 6.059215676240493e-06, "loss": 0.2852, "step": 870 }, { "epoch": 0.6666666666666666, "grad_norm": 7.819876670837402, "learning_rate": 6.034656472298374e-06, "loss": 0.395, "step": 871 }, { "epoch": 0.667432070417145, "grad_norm": 8.548850059509277, "learning_rate": 6.0101256151909286e-06, "loss": 0.3352, "step": 872 }, { "epoch": 0.6681974741676234, "grad_norm": 5.550721168518066, "learning_rate": 5.9856232802805505e-06, "loss": 0.1429, "step": 873 }, { "epoch": 0.6689628779181018, "grad_norm": 5.15646505355835, "learning_rate": 5.961149642725745e-06, "loss": 0.2563, "step": 874 }, { "epoch": 0.6697282816685802, "grad_norm": 10.889342308044434, "learning_rate": 5.936704877479872e-06, "loss": 0.3205, "step": 875 }, { "epoch": 0.6704936854190585, "grad_norm": 6.7458600997924805, "learning_rate": 5.912289159289884e-06, "loss": 0.3138, "step": 876 }, { "epoch": 0.6712590891695369, "grad_norm": 4.428853988647461, "learning_rate": 5.887902662695093e-06, "loss": 0.2809, "step": 877 }, { "epoch": 0.6720244929200153, "grad_norm": 8.511722564697266, "learning_rate": 5.863545562025916e-06, "loss": 0.3409, "step": 878 }, { "epoch": 0.6727898966704937, "grad_norm": 7.67958402633667, "learning_rate": 5.839218031402629e-06, "loss": 0.2999, "step": 879 }, { "epoch": 0.6735553004209721, "grad_norm": 4.995622634887695, "learning_rate": 5.814920244734124e-06, "loss": 0.2316, "step": 880 }, { "epoch": 0.6735553004209721, "eval_accuracy": 0.8754512635379061, "eval_f1": 0.8207792207792208, "eval_loss": 0.3073867857456207, "eval_precision": 0.8586956521739131, "eval_recall": 0.7860696517412935, "eval_runtime": 43.9875, "eval_samples_per_second": 6.843, "eval_steps_per_second": 0.227, "step": 880 }, { "epoch": 0.6743207041714504, "grad_norm": 6.208434581756592, "learning_rate": 5.790652375716653e-06, "loss": 0.2957, "step": 881 }, { "epoch": 0.6750861079219288, "grad_norm": 8.62459659576416, "learning_rate": 5.7664145978326095e-06, "loss": 0.4123, "step": 882 }, { "epoch": 0.6758515116724072, "grad_norm": 8.513111114501953, "learning_rate": 5.742207084349274e-06, "loss": 0.2957, "step": 883 }, { "epoch": 0.6766169154228856, "grad_norm": 8.137804985046387, "learning_rate": 5.718030008317578e-06, "loss": 0.2782, "step": 884 }, { "epoch": 0.677382319173364, "grad_norm": 6.080589771270752, "learning_rate": 5.6938835425708575e-06, "loss": 0.2541, "step": 885 }, { "epoch": 0.6781477229238423, "grad_norm": 5.756350040435791, "learning_rate": 5.669767859723636e-06, "loss": 0.2528, "step": 886 }, { "epoch": 0.6789131266743207, "grad_norm": 5.877570629119873, "learning_rate": 5.645683132170384e-06, "loss": 0.3401, "step": 887 }, { "epoch": 0.6796785304247991, "grad_norm": 7.436519622802734, "learning_rate": 5.621629532084265e-06, "loss": 0.293, "step": 888 }, { "epoch": 0.6804439341752775, "grad_norm": 7.50223445892334, "learning_rate": 5.597607231415952e-06, "loss": 0.3169, "step": 889 }, { "epoch": 0.6812093379257559, "grad_norm": 8.692954063415527, "learning_rate": 5.57361640189234e-06, "loss": 0.4175, "step": 890 }, { "epoch": 0.6819747416762342, "grad_norm": 8.41832447052002, "learning_rate": 5.549657215015367e-06, "loss": 0.3262, "step": 891 }, { "epoch": 0.6827401454267126, "grad_norm": 11.16489028930664, "learning_rate": 5.525729842060768e-06, "loss": 0.4013, "step": 892 }, { "epoch": 0.683505549177191, "grad_norm": 6.714212894439697, "learning_rate": 5.501834454076838e-06, "loss": 0.249, "step": 893 }, { "epoch": 0.6842709529276694, "grad_norm": 10.778039932250977, "learning_rate": 5.4779712218832356e-06, "loss": 0.3007, "step": 894 }, { "epoch": 0.6850363566781478, "grad_norm": 6.015567302703857, "learning_rate": 5.454140316069747e-06, "loss": 0.3512, "step": 895 }, { "epoch": 0.6858017604286261, "grad_norm": 8.104532241821289, "learning_rate": 5.430341906995064e-06, "loss": 0.3026, "step": 896 }, { "epoch": 0.6865671641791045, "grad_norm": 6.712876796722412, "learning_rate": 5.406576164785582e-06, "loss": 0.2889, "step": 897 }, { "epoch": 0.6873325679295829, "grad_norm": 5.471017837524414, "learning_rate": 5.382843259334152e-06, "loss": 0.2518, "step": 898 }, { "epoch": 0.6880979716800613, "grad_norm": 8.591350555419922, "learning_rate": 5.3591433602989076e-06, "loss": 0.1854, "step": 899 }, { "epoch": 0.6888633754305397, "grad_norm": 5.255414962768555, "learning_rate": 5.3354766371020106e-06, "loss": 0.2983, "step": 900 }, { "epoch": 0.6888633754305397, "eval_accuracy": 0.8808664259927798, "eval_f1": 0.8263157894736842, "eval_loss": 0.30376458168029785, "eval_precision": 0.8770949720670391, "eval_recall": 0.7810945273631841, "eval_runtime": 44.3497, "eval_samples_per_second": 6.787, "eval_steps_per_second": 0.225, "step": 900 }, { "epoch": 0.689628779181018, "grad_norm": 7.284707546234131, "learning_rate": 5.311843258928489e-06, "loss": 0.2557, "step": 901 }, { "epoch": 0.6903941829314963, "grad_norm": 5.908609867095947, "learning_rate": 5.288243394724971e-06, "loss": 0.2206, "step": 902 }, { "epoch": 0.6911595866819747, "grad_norm": 6.1348466873168945, "learning_rate": 5.264677213198519e-06, "loss": 0.3276, "step": 903 }, { "epoch": 0.6919249904324531, "grad_norm": 6.337802410125732, "learning_rate": 5.241144882815413e-06, "loss": 0.3548, "step": 904 }, { "epoch": 0.6926903941829315, "grad_norm": 5.046546936035156, "learning_rate": 5.217646571799929e-06, "loss": 0.2594, "step": 905 }, { "epoch": 0.6934557979334098, "grad_norm": 6.154454708099365, "learning_rate": 5.194182448133163e-06, "loss": 0.2041, "step": 906 }, { "epoch": 0.6942212016838882, "grad_norm": 4.449357986450195, "learning_rate": 5.170752679551816e-06, "loss": 0.2166, "step": 907 }, { "epoch": 0.6949866054343666, "grad_norm": 11.773411750793457, "learning_rate": 5.147357433546992e-06, "loss": 0.3792, "step": 908 }, { "epoch": 0.695752009184845, "grad_norm": 9.096918106079102, "learning_rate": 5.123996877363015e-06, "loss": 0.3238, "step": 909 }, { "epoch": 0.6965174129353234, "grad_norm": 6.791448593139648, "learning_rate": 5.100671177996206e-06, "loss": 0.4018, "step": 910 }, { "epoch": 0.6972828166858017, "grad_norm": 6.929929256439209, "learning_rate": 5.077380502193725e-06, "loss": 0.3131, "step": 911 }, { "epoch": 0.6980482204362801, "grad_norm": 8.740852355957031, "learning_rate": 5.054125016452352e-06, "loss": 0.422, "step": 912 }, { "epoch": 0.6988136241867585, "grad_norm": 7.577706336975098, "learning_rate": 5.0309048870173074e-06, "loss": 0.2722, "step": 913 }, { "epoch": 0.6995790279372369, "grad_norm": 5.261869430541992, "learning_rate": 5.0077202798810675e-06, "loss": 0.2643, "step": 914 }, { "epoch": 0.7003444316877153, "grad_norm": 6.624474048614502, "learning_rate": 4.984571360782158e-06, "loss": 0.2752, "step": 915 }, { "epoch": 0.7011098354381936, "grad_norm": 4.49412202835083, "learning_rate": 4.961458295203999e-06, "loss": 0.2445, "step": 916 }, { "epoch": 0.701875239188672, "grad_norm": 7.072380542755127, "learning_rate": 4.938381248373695e-06, "loss": 0.2266, "step": 917 }, { "epoch": 0.7026406429391504, "grad_norm": 6.16682767868042, "learning_rate": 4.915340385260871e-06, "loss": 0.1628, "step": 918 }, { "epoch": 0.7034060466896288, "grad_norm": 7.842196941375732, "learning_rate": 4.8923358705764885e-06, "loss": 0.2581, "step": 919 }, { "epoch": 0.7041714504401072, "grad_norm": 5.595624923706055, "learning_rate": 4.869367868771666e-06, "loss": 0.3039, "step": 920 }, { "epoch": 0.7041714504401072, "eval_accuracy": 0.8844765342960289, "eval_f1": 0.8306878306878307, "eval_loss": 0.3023536801338196, "eval_precision": 0.8870056497175142, "eval_recall": 0.7810945273631841, "eval_runtime": 43.7277, "eval_samples_per_second": 6.884, "eval_steps_per_second": 0.229, "step": 920 }, { "epoch": 0.7049368541905855, "grad_norm": 5.825865268707275, "learning_rate": 4.846436544036505e-06, "loss": 0.3092, "step": 921 }, { "epoch": 0.7057022579410639, "grad_norm": 9.2932710647583, "learning_rate": 4.823542060298905e-06, "loss": 0.3468, "step": 922 }, { "epoch": 0.7064676616915423, "grad_norm": 7.464381217956543, "learning_rate": 4.80068458122342e-06, "loss": 0.3004, "step": 923 }, { "epoch": 0.7072330654420207, "grad_norm": 6.114269733428955, "learning_rate": 4.777864270210057e-06, "loss": 0.2287, "step": 924 }, { "epoch": 0.7079984691924991, "grad_norm": 7.069962978363037, "learning_rate": 4.75508129039313e-06, "loss": 0.2774, "step": 925 }, { "epoch": 0.7087638729429774, "grad_norm": 9.016349792480469, "learning_rate": 4.7323358046400844e-06, "loss": 0.293, "step": 926 }, { "epoch": 0.7095292766934558, "grad_norm": 8.230774879455566, "learning_rate": 4.709627975550326e-06, "loss": 0.2374, "step": 927 }, { "epoch": 0.7102946804439342, "grad_norm": 5.313325881958008, "learning_rate": 4.686957965454078e-06, "loss": 0.3027, "step": 928 }, { "epoch": 0.7110600841944126, "grad_norm": 13.65420913696289, "learning_rate": 4.664325936411197e-06, "loss": 0.2676, "step": 929 }, { "epoch": 0.711825487944891, "grad_norm": 5.817005157470703, "learning_rate": 4.641732050210032e-06, "loss": 0.2841, "step": 930 }, { "epoch": 0.7125908916953693, "grad_norm": 5.238630294799805, "learning_rate": 4.619176468366274e-06, "loss": 0.2615, "step": 931 }, { "epoch": 0.7133562954458477, "grad_norm": 4.981099605560303, "learning_rate": 4.596659352121768e-06, "loss": 0.2997, "step": 932 }, { "epoch": 0.7141216991963261, "grad_norm": 8.994159698486328, "learning_rate": 4.574180862443402e-06, "loss": 0.3546, "step": 933 }, { "epoch": 0.7148871029468045, "grad_norm": 6.442971706390381, "learning_rate": 4.551741160021916e-06, "loss": 0.2595, "step": 934 }, { "epoch": 0.7156525066972829, "grad_norm": 6.125987529754639, "learning_rate": 4.529340405270792e-06, "loss": 0.2808, "step": 935 }, { "epoch": 0.7164179104477612, "grad_norm": 8.460797309875488, "learning_rate": 4.5069787583250815e-06, "loss": 0.3772, "step": 936 }, { "epoch": 0.7171833141982396, "grad_norm": 9.00993537902832, "learning_rate": 4.484656379040268e-06, "loss": 0.2304, "step": 937 }, { "epoch": 0.717948717948718, "grad_norm": 7.658569812774658, "learning_rate": 4.4623734269911274e-06, "loss": 0.2673, "step": 938 }, { "epoch": 0.7187141216991964, "grad_norm": 5.94216775894165, "learning_rate": 4.4401300614705765e-06, "loss": 0.3316, "step": 939 }, { "epoch": 0.7194795254496748, "grad_norm": 9.894668579101562, "learning_rate": 4.417926441488553e-06, "loss": 0.311, "step": 940 }, { "epoch": 0.7194795254496748, "eval_accuracy": 0.8826714801444043, "eval_f1": 0.8284960422163589, "eval_loss": 0.3015783131122589, "eval_precision": 0.8820224719101124, "eval_recall": 0.7810945273631841, "eval_runtime": 43.7456, "eval_samples_per_second": 6.881, "eval_steps_per_second": 0.229, "step": 940 }, { "epoch": 0.720244929200153, "grad_norm": 6.326188087463379, "learning_rate": 4.395762725770852e-06, "loss": 0.3407, "step": 941 }, { "epoch": 0.7210103329506314, "grad_norm": 6.0098419189453125, "learning_rate": 4.3736390727580295e-06, "loss": 0.2842, "step": 942 }, { "epoch": 0.7217757367011098, "grad_norm": 5.677011013031006, "learning_rate": 4.351555640604233e-06, "loss": 0.3018, "step": 943 }, { "epoch": 0.7225411404515882, "grad_norm": 10.627213478088379, "learning_rate": 4.329512587176081e-06, "loss": 0.229, "step": 944 }, { "epoch": 0.7233065442020666, "grad_norm": 7.869304180145264, "learning_rate": 4.307510070051554e-06, "loss": 0.3286, "step": 945 }, { "epoch": 0.7240719479525449, "grad_norm": 8.52935791015625, "learning_rate": 4.285548246518837e-06, "loss": 0.3035, "step": 946 }, { "epoch": 0.7248373517030233, "grad_norm": 6.446293830871582, "learning_rate": 4.2636272735752195e-06, "loss": 0.1992, "step": 947 }, { "epoch": 0.7256027554535017, "grad_norm": 12.176380157470703, "learning_rate": 4.241747307925966e-06, "loss": 0.4213, "step": 948 }, { "epoch": 0.7263681592039801, "grad_norm": 5.431862831115723, "learning_rate": 4.21990850598319e-06, "loss": 0.2714, "step": 949 }, { "epoch": 0.7271335629544585, "grad_norm": 8.738656044006348, "learning_rate": 4.198111023864747e-06, "loss": 0.3066, "step": 950 }, { "epoch": 0.7278989667049368, "grad_norm": 12.56904125213623, "learning_rate": 4.176355017393099e-06, "loss": 0.4619, "step": 951 }, { "epoch": 0.7286643704554152, "grad_norm": 7.273378372192383, "learning_rate": 4.154640642094223e-06, "loss": 0.3136, "step": 952 }, { "epoch": 0.7294297742058936, "grad_norm": 7.288893222808838, "learning_rate": 4.1329680531964914e-06, "loss": 0.3274, "step": 953 }, { "epoch": 0.730195177956372, "grad_norm": 7.65816068649292, "learning_rate": 4.111337405629553e-06, "loss": 0.325, "step": 954 }, { "epoch": 0.7309605817068504, "grad_norm": 4.704836368560791, "learning_rate": 4.089748854023241e-06, "loss": 0.2776, "step": 955 }, { "epoch": 0.7317259854573287, "grad_norm": 7.699378490447998, "learning_rate": 4.0682025527064486e-06, "loss": 0.3671, "step": 956 }, { "epoch": 0.7324913892078071, "grad_norm": 6.303155422210693, "learning_rate": 4.04669865570605e-06, "loss": 0.2963, "step": 957 }, { "epoch": 0.7332567929582855, "grad_norm": 5.999407768249512, "learning_rate": 4.025237316745771e-06, "loss": 0.2291, "step": 958 }, { "epoch": 0.7340221967087639, "grad_norm": 6.365483283996582, "learning_rate": 4.003818689245118e-06, "loss": 0.2498, "step": 959 }, { "epoch": 0.7347876004592423, "grad_norm": 8.274271011352539, "learning_rate": 3.982442926318263e-06, "loss": 0.406, "step": 960 }, { "epoch": 0.7347876004592423, "eval_accuracy": 0.8826714801444043, "eval_f1": 0.8329048843187661, "eval_loss": 0.30403637886047363, "eval_precision": 0.8617021276595744, "eval_recall": 0.8059701492537313, "eval_runtime": 43.2929, "eval_samples_per_second": 6.953, "eval_steps_per_second": 0.231, "step": 960 }, { "epoch": 0.7355530042097206, "grad_norm": 7.607824325561523, "learning_rate": 3.961110180772955e-06, "loss": 0.2747, "step": 961 }, { "epoch": 0.736318407960199, "grad_norm": 4.3896803855896, "learning_rate": 3.939820605109429e-06, "loss": 0.285, "step": 962 }, { "epoch": 0.7370838117106774, "grad_norm": 5.973980903625488, "learning_rate": 3.9185743515193065e-06, "loss": 0.2031, "step": 963 }, { "epoch": 0.7378492154611558, "grad_norm": 5.6452226638793945, "learning_rate": 3.897371571884521e-06, "loss": 0.2707, "step": 964 }, { "epoch": 0.7386146192116342, "grad_norm": 3.9997060298919678, "learning_rate": 3.8762124177762285e-06, "loss": 0.1803, "step": 965 }, { "epoch": 0.7393800229621125, "grad_norm": 7.737429141998291, "learning_rate": 3.855097040453715e-06, "loss": 0.2833, "step": 966 }, { "epoch": 0.7401454267125909, "grad_norm": 5.245492935180664, "learning_rate": 3.83402559086333e-06, "loss": 0.2237, "step": 967 }, { "epoch": 0.7409108304630693, "grad_norm": 8.341132164001465, "learning_rate": 3.812998219637387e-06, "loss": 0.355, "step": 968 }, { "epoch": 0.7416762342135477, "grad_norm": 8.247241020202637, "learning_rate": 3.7920150770931095e-06, "loss": 0.301, "step": 969 }, { "epoch": 0.7424416379640261, "grad_norm": 5.903472900390625, "learning_rate": 3.7710763132315455e-06, "loss": 0.2827, "step": 970 }, { "epoch": 0.7432070417145044, "grad_norm": 6.879560470581055, "learning_rate": 3.750182077736486e-06, "loss": 0.2869, "step": 971 }, { "epoch": 0.7439724454649828, "grad_norm": 4.436656951904297, "learning_rate": 3.7293325199734144e-06, "loss": 0.238, "step": 972 }, { "epoch": 0.7447378492154612, "grad_norm": 8.565714836120605, "learning_rate": 3.7085277889884253e-06, "loss": 0.2616, "step": 973 }, { "epoch": 0.7455032529659396, "grad_norm": 4.988437652587891, "learning_rate": 3.6877680335071653e-06, "loss": 0.2541, "step": 974 }, { "epoch": 0.746268656716418, "grad_norm": 6.868668079376221, "learning_rate": 3.667053401933759e-06, "loss": 0.2744, "step": 975 }, { "epoch": 0.7470340604668962, "grad_norm": 7.482735633850098, "learning_rate": 3.6463840423497643e-06, "loss": 0.2063, "step": 976 }, { "epoch": 0.7477994642173746, "grad_norm": 5.769603729248047, "learning_rate": 3.625760102513103e-06, "loss": 0.2843, "step": 977 }, { "epoch": 0.748564867967853, "grad_norm": 7.187550067901611, "learning_rate": 3.6051817298570067e-06, "loss": 0.3898, "step": 978 }, { "epoch": 0.7493302717183314, "grad_norm": 6.445494651794434, "learning_rate": 3.5846490714889694e-06, "loss": 0.351, "step": 979 }, { "epoch": 0.7500956754688098, "grad_norm": 5.746145725250244, "learning_rate": 3.5641622741896742e-06, "loss": 0.2306, "step": 980 }, { "epoch": 0.7500956754688098, "eval_accuracy": 0.8862815884476535, "eval_f1": 0.8301886792452831, "eval_loss": 0.29747819900512695, "eval_precision": 0.9058823529411765, "eval_recall": 0.7661691542288557, "eval_runtime": 43.5344, "eval_samples_per_second": 6.914, "eval_steps_per_second": 0.23, "step": 980 }, { "epoch": 0.7508610792192881, "grad_norm": 5.425146579742432, "learning_rate": 3.543721484411976e-06, "loss": 0.2208, "step": 981 }, { "epoch": 0.7516264829697665, "grad_norm": 5.471285820007324, "learning_rate": 3.5233268482798353e-06, "loss": 0.24, "step": 982 }, { "epoch": 0.7523918867202449, "grad_norm": 7.079029083251953, "learning_rate": 3.5029785115872617e-06, "loss": 0.255, "step": 983 }, { "epoch": 0.7531572904707233, "grad_norm": 5.006328105926514, "learning_rate": 3.4826766197973127e-06, "loss": 0.2167, "step": 984 }, { "epoch": 0.7539226942212017, "grad_norm": 5.058717250823975, "learning_rate": 3.462421318041003e-06, "loss": 0.2135, "step": 985 }, { "epoch": 0.75468809797168, "grad_norm": 5.569031715393066, "learning_rate": 3.442212751116305e-06, "loss": 0.3167, "step": 986 }, { "epoch": 0.7554535017221584, "grad_norm": 6.174429416656494, "learning_rate": 3.4220510634871005e-06, "loss": 0.3092, "step": 987 }, { "epoch": 0.7562189054726368, "grad_norm": 6.917867660522461, "learning_rate": 3.4019363992821386e-06, "loss": 0.2478, "step": 988 }, { "epoch": 0.7569843092231152, "grad_norm": 7.082120895385742, "learning_rate": 3.381868902294023e-06, "loss": 0.3097, "step": 989 }, { "epoch": 0.7577497129735936, "grad_norm": 7.59193754196167, "learning_rate": 3.361848715978173e-06, "loss": 0.2751, "step": 990 }, { "epoch": 0.7585151167240719, "grad_norm": 8.931351661682129, "learning_rate": 3.3418759834518056e-06, "loss": 0.3971, "step": 991 }, { "epoch": 0.7592805204745503, "grad_norm": 4.901859760284424, "learning_rate": 3.321950847492895e-06, "loss": 0.2425, "step": 992 }, { "epoch": 0.7600459242250287, "grad_norm": 7.7956976890563965, "learning_rate": 3.302073450539176e-06, "loss": 0.3153, "step": 993 }, { "epoch": 0.7608113279755071, "grad_norm": 7.031491756439209, "learning_rate": 3.2822439346871127e-06, "loss": 0.3279, "step": 994 }, { "epoch": 0.7615767317259855, "grad_norm": 5.565333843231201, "learning_rate": 3.2624624416908745e-06, "loss": 0.2985, "step": 995 }, { "epoch": 0.7623421354764638, "grad_norm": 10.345918655395508, "learning_rate": 3.2427291129613502e-06, "loss": 0.3259, "step": 996 }, { "epoch": 0.7631075392269422, "grad_norm": 7.010242938995361, "learning_rate": 3.2230440895651006e-06, "loss": 0.2949, "step": 997 }, { "epoch": 0.7638729429774206, "grad_norm": 10.574152946472168, "learning_rate": 3.2034075122233798e-06, "loss": 0.4259, "step": 998 }, { "epoch": 0.764638346727899, "grad_norm": 7.348075866699219, "learning_rate": 3.18381952131112e-06, "loss": 0.2886, "step": 999 }, { "epoch": 0.7654037504783774, "grad_norm": 7.71437931060791, "learning_rate": 3.164280256855914e-06, "loss": 0.3494, "step": 1000 }, { "epoch": 0.7654037504783774, "eval_accuracy": 0.8862815884476535, "eval_f1": 0.8363636363636363, "eval_loss": 0.30092278122901917, "eval_precision": 0.875, "eval_recall": 0.8009950248756219, "eval_runtime": 43.0915, "eval_samples_per_second": 6.985, "eval_steps_per_second": 0.232, "step": 1000 }, { "epoch": 0.7661691542288557, "grad_norm": 5.599081993103027, "learning_rate": 3.1447898585370386e-06, "loss": 0.2121, "step": 1001 }, { "epoch": 0.7669345579793341, "grad_norm": 4.840198516845703, "learning_rate": 3.125348465684439e-06, "loss": 0.1907, "step": 1002 }, { "epoch": 0.7676999617298125, "grad_norm": 7.6106858253479, "learning_rate": 3.105956217277738e-06, "loss": 0.3576, "step": 1003 }, { "epoch": 0.7684653654802909, "grad_norm": 6.351868629455566, "learning_rate": 3.086613251945246e-06, "loss": 0.2508, "step": 1004 }, { "epoch": 0.7692307692307693, "grad_norm": 9.808284759521484, "learning_rate": 3.067319707962957e-06, "loss": 0.4001, "step": 1005 }, { "epoch": 0.7699961729812476, "grad_norm": 7.302840709686279, "learning_rate": 3.0480757232535773e-06, "loss": 0.3344, "step": 1006 }, { "epoch": 0.770761576731726, "grad_norm": 7.3297438621521, "learning_rate": 3.02888143538553e-06, "loss": 0.2596, "step": 1007 }, { "epoch": 0.7715269804822044, "grad_norm": 6.303321838378906, "learning_rate": 3.0097369815719746e-06, "loss": 0.2743, "step": 1008 }, { "epoch": 0.7722923842326828, "grad_norm": 7.253098011016846, "learning_rate": 2.990642498669816e-06, "loss": 0.3192, "step": 1009 }, { "epoch": 0.7730577879831612, "grad_norm": 6.633822441101074, "learning_rate": 2.971598123178744e-06, "loss": 0.2047, "step": 1010 }, { "epoch": 0.7738231917336394, "grad_norm": 6.165762901306152, "learning_rate": 2.9526039912402504e-06, "loss": 0.3222, "step": 1011 }, { "epoch": 0.7745885954841178, "grad_norm": 4.74859094619751, "learning_rate": 2.9336602386366396e-06, "loss": 0.209, "step": 1012 }, { "epoch": 0.7753539992345962, "grad_norm": 7.679808616638184, "learning_rate": 2.9147670007900875e-06, "loss": 0.2636, "step": 1013 }, { "epoch": 0.7761194029850746, "grad_norm": 6.208285331726074, "learning_rate": 2.8959244127616483e-06, "loss": 0.3115, "step": 1014 }, { "epoch": 0.776884806735553, "grad_norm": 7.4330902099609375, "learning_rate": 2.877132609250303e-06, "loss": 0.2613, "step": 1015 }, { "epoch": 0.7776502104860313, "grad_norm": 7.014687538146973, "learning_rate": 2.8583917245919944e-06, "loss": 0.2428, "step": 1016 }, { "epoch": 0.7784156142365097, "grad_norm": 6.197044372558594, "learning_rate": 2.839701892758655e-06, "loss": 0.3242, "step": 1017 }, { "epoch": 0.7791810179869881, "grad_norm": 5.864729404449463, "learning_rate": 2.8210632473572664e-06, "loss": 0.3934, "step": 1018 }, { "epoch": 0.7799464217374665, "grad_norm": 9.904959678649902, "learning_rate": 2.8024759216288953e-06, "loss": 0.4493, "step": 1019 }, { "epoch": 0.7807118254879449, "grad_norm": 7.564253330230713, "learning_rate": 2.783940048447743e-06, "loss": 0.3237, "step": 1020 }, { "epoch": 0.7807118254879449, "eval_accuracy": 0.8898916967509025, "eval_f1": 0.8431876606683805, "eval_loss": 0.3033747375011444, "eval_precision": 0.8723404255319149, "eval_recall": 0.8159203980099502, "eval_runtime": 44.2093, "eval_samples_per_second": 6.809, "eval_steps_per_second": 0.226, "step": 1020 }, { "epoch": 0.7814772292384232, "grad_norm": 7.01906156539917, "learning_rate": 2.765455760320196e-06, "loss": 0.2956, "step": 1021 }, { "epoch": 0.7822426329889016, "grad_norm": 5.770253658294678, "learning_rate": 2.7470231893838684e-06, "loss": 0.2521, "step": 1022 }, { "epoch": 0.78300803673938, "grad_norm": 5.1977410316467285, "learning_rate": 2.728642467406679e-06, "loss": 0.2315, "step": 1023 }, { "epoch": 0.7837734404898584, "grad_norm": 6.585521697998047, "learning_rate": 2.7103137257858867e-06, "loss": 0.266, "step": 1024 }, { "epoch": 0.7845388442403368, "grad_norm": 4.882285118103027, "learning_rate": 2.692037095547164e-06, "loss": 0.2697, "step": 1025 }, { "epoch": 0.7853042479908151, "grad_norm": 5.839199542999268, "learning_rate": 2.6738127073436694e-06, "loss": 0.2287, "step": 1026 }, { "epoch": 0.7860696517412935, "grad_norm": 6.01020622253418, "learning_rate": 2.6556406914550803e-06, "loss": 0.2481, "step": 1027 }, { "epoch": 0.7868350554917719, "grad_norm": 6.746147632598877, "learning_rate": 2.6375211777867015e-06, "loss": 0.307, "step": 1028 }, { "epoch": 0.7876004592422503, "grad_norm": 7.418403148651123, "learning_rate": 2.6194542958685052e-06, "loss": 0.3297, "step": 1029 }, { "epoch": 0.7883658629927287, "grad_norm": 5.111098766326904, "learning_rate": 2.601440174854225e-06, "loss": 0.25, "step": 1030 }, { "epoch": 0.789131266743207, "grad_norm": 5.072177410125732, "learning_rate": 2.5834789435204245e-06, "loss": 0.217, "step": 1031 }, { "epoch": 0.7898966704936854, "grad_norm": 9.598026275634766, "learning_rate": 2.5655707302655766e-06, "loss": 0.2256, "step": 1032 }, { "epoch": 0.7906620742441638, "grad_norm": 5.46431303024292, "learning_rate": 2.5477156631091503e-06, "loss": 0.2236, "step": 1033 }, { "epoch": 0.7914274779946422, "grad_norm": 6.440191268920898, "learning_rate": 2.5299138696906833e-06, "loss": 0.2711, "step": 1034 }, { "epoch": 0.7921928817451206, "grad_norm": 6.173571586608887, "learning_rate": 2.512165477268889e-06, "loss": 0.2715, "step": 1035 }, { "epoch": 0.7929582854955989, "grad_norm": 9.205805778503418, "learning_rate": 2.4944706127207252e-06, "loss": 0.3408, "step": 1036 }, { "epoch": 0.7937236892460773, "grad_norm": 6.677053451538086, "learning_rate": 2.476829402540504e-06, "loss": 0.1972, "step": 1037 }, { "epoch": 0.7944890929965557, "grad_norm": 9.463765144348145, "learning_rate": 2.459241972838988e-06, "loss": 0.2984, "step": 1038 }, { "epoch": 0.7952544967470341, "grad_norm": 6.406791687011719, "learning_rate": 2.4417084493424693e-06, "loss": 0.2565, "step": 1039 }, { "epoch": 0.7960199004975125, "grad_norm": 8.168065071105957, "learning_rate": 2.4242289573918933e-06, "loss": 0.4034, "step": 1040 }, { "epoch": 0.7960199004975125, "eval_accuracy": 0.8898916967509025, "eval_f1": 0.8381962864721485, "eval_loss": 0.2988373935222626, "eval_precision": 0.8977272727272727, "eval_recall": 0.7860696517412935, "eval_runtime": 43.2157, "eval_samples_per_second": 6.965, "eval_steps_per_second": 0.231, "step": 1040 }, { "epoch": 0.7967853042479908, "grad_norm": 5.31250524520874, "learning_rate": 2.4068036219419433e-06, "loss": 0.2661, "step": 1041 }, { "epoch": 0.7975507079984692, "grad_norm": 15.13749885559082, "learning_rate": 2.3894325675601683e-06, "loss": 0.3992, "step": 1042 }, { "epoch": 0.7983161117489476, "grad_norm": 7.951029300689697, "learning_rate": 2.3721159184260733e-06, "loss": 0.3912, "step": 1043 }, { "epoch": 0.799081515499426, "grad_norm": 7.845716953277588, "learning_rate": 2.354853798330242e-06, "loss": 0.2465, "step": 1044 }, { "epoch": 0.7998469192499044, "grad_norm": 10.499722480773926, "learning_rate": 2.3376463306734543e-06, "loss": 0.4171, "step": 1045 }, { "epoch": 0.8006123230003827, "grad_norm": 7.074059009552002, "learning_rate": 2.3204936384657873e-06, "loss": 0.333, "step": 1046 }, { "epoch": 0.801377726750861, "grad_norm": 5.240701198577881, "learning_rate": 2.303395844325761e-06, "loss": 0.2089, "step": 1047 }, { "epoch": 0.8021431305013395, "grad_norm": 6.376049041748047, "learning_rate": 2.2863530704794334e-06, "loss": 0.2528, "step": 1048 }, { "epoch": 0.8029085342518179, "grad_norm": 6.376869201660156, "learning_rate": 2.26936543875956e-06, "loss": 0.2357, "step": 1049 }, { "epoch": 0.8036739380022963, "grad_norm": 7.007830619812012, "learning_rate": 2.252433070604695e-06, "loss": 0.3034, "step": 1050 }, { "epoch": 0.8044393417527745, "grad_norm": 5.573482990264893, "learning_rate": 2.2355560870583283e-06, "loss": 0.2589, "step": 1051 }, { "epoch": 0.8052047455032529, "grad_norm": 7.182730674743652, "learning_rate": 2.2187346087680363e-06, "loss": 0.2601, "step": 1052 }, { "epoch": 0.8059701492537313, "grad_norm": 6.265502452850342, "learning_rate": 2.201968755984596e-06, "loss": 0.2131, "step": 1053 }, { "epoch": 0.8067355530042097, "grad_norm": 6.296632289886475, "learning_rate": 2.185258648561147e-06, "loss": 0.2377, "step": 1054 }, { "epoch": 0.8075009567546881, "grad_norm": 6.393189430236816, "learning_rate": 2.1686044059523192e-06, "loss": 0.3424, "step": 1055 }, { "epoch": 0.8082663605051664, "grad_norm": 5.018173694610596, "learning_rate": 2.1520061472133903e-06, "loss": 0.2154, "step": 1056 }, { "epoch": 0.8090317642556448, "grad_norm": 6.2258687019348145, "learning_rate": 2.1354639909994258e-06, "loss": 0.2585, "step": 1057 }, { "epoch": 0.8097971680061232, "grad_norm": 6.394412040710449, "learning_rate": 2.1189780555644302e-06, "loss": 0.3103, "step": 1058 }, { "epoch": 0.8105625717566016, "grad_norm": 6.619604110717773, "learning_rate": 2.1025484587605115e-06, "loss": 0.4228, "step": 1059 }, { "epoch": 0.81132797550708, "grad_norm": 6.956901550292969, "learning_rate": 2.0861753180370324e-06, "loss": 0.2682, "step": 1060 }, { "epoch": 0.81132797550708, "eval_accuracy": 0.8844765342960289, "eval_f1": 0.8350515463917526, "eval_loss": 0.3001127541065216, "eval_precision": 0.8663101604278075, "eval_recall": 0.8059701492537313, "eval_runtime": 43.5926, "eval_samples_per_second": 6.905, "eval_steps_per_second": 0.229, "step": 1060 }, { "epoch": 0.8120933792575583, "grad_norm": 7.195816993713379, "learning_rate": 2.0698587504397684e-06, "loss": 0.3363, "step": 1061 }, { "epoch": 0.8128587830080367, "grad_norm": 7.6192498207092285, "learning_rate": 2.0535988726100774e-06, "loss": 0.3632, "step": 1062 }, { "epoch": 0.8136241867585151, "grad_norm": 8.436617851257324, "learning_rate": 2.0373958007840545e-06, "loss": 0.3416, "step": 1063 }, { "epoch": 0.8143895905089935, "grad_norm": 8.453132629394531, "learning_rate": 2.0212496507917214e-06, "loss": 0.2447, "step": 1064 }, { "epoch": 0.8151549942594719, "grad_norm": 11.446274757385254, "learning_rate": 2.0051605380561702e-06, "loss": 0.3519, "step": 1065 }, { "epoch": 0.8159203980099502, "grad_norm": 5.6378254890441895, "learning_rate": 1.9891285775927684e-06, "loss": 0.2777, "step": 1066 }, { "epoch": 0.8166858017604286, "grad_norm": 11.645222663879395, "learning_rate": 1.973153884008312e-06, "loss": 0.2887, "step": 1067 }, { "epoch": 0.817451205510907, "grad_norm": 5.4502434730529785, "learning_rate": 1.957236571500224e-06, "loss": 0.2295, "step": 1068 }, { "epoch": 0.8182166092613854, "grad_norm": 5.929233074188232, "learning_rate": 1.941376753855728e-06, "loss": 0.2573, "step": 1069 }, { "epoch": 0.8189820130118638, "grad_norm": 7.02168083190918, "learning_rate": 1.925574544451031e-06, "loss": 0.3507, "step": 1070 }, { "epoch": 0.8197474167623421, "grad_norm": 6.753659248352051, "learning_rate": 1.9098300562505266e-06, "loss": 0.3882, "step": 1071 }, { "epoch": 0.8205128205128205, "grad_norm": 7.252670764923096, "learning_rate": 1.8941434018059779e-06, "loss": 0.3117, "step": 1072 }, { "epoch": 0.8212782242632989, "grad_norm": 4.297603130340576, "learning_rate": 1.878514693255714e-06, "loss": 0.2392, "step": 1073 }, { "epoch": 0.8220436280137773, "grad_norm": 8.13119125366211, "learning_rate": 1.8629440423238333e-06, "loss": 0.3269, "step": 1074 }, { "epoch": 0.8228090317642557, "grad_norm": 8.53504753112793, "learning_rate": 1.8474315603193916e-06, "loss": 0.3209, "step": 1075 }, { "epoch": 0.823574435514734, "grad_norm": 7.265506267547607, "learning_rate": 1.8319773581356248e-06, "loss": 0.3279, "step": 1076 }, { "epoch": 0.8243398392652124, "grad_norm": 6.86147928237915, "learning_rate": 1.8165815462491466e-06, "loss": 0.2692, "step": 1077 }, { "epoch": 0.8251052430156908, "grad_norm": 7.1608686447143555, "learning_rate": 1.8012442347191483e-06, "loss": 0.3593, "step": 1078 }, { "epoch": 0.8258706467661692, "grad_norm": 5.432136535644531, "learning_rate": 1.7859655331866422e-06, "loss": 0.3488, "step": 1079 }, { "epoch": 0.8266360505166476, "grad_norm": 8.375764846801758, "learning_rate": 1.7707455508736381e-06, "loss": 0.2921, "step": 1080 }, { "epoch": 0.8266360505166476, "eval_accuracy": 0.8844765342960289, "eval_f1": 0.8324607329842932, "eval_loss": 0.29819196462631226, "eval_precision": 0.8784530386740331, "eval_recall": 0.7910447761194029, "eval_runtime": 43.9709, "eval_samples_per_second": 6.845, "eval_steps_per_second": 0.227, "step": 1080 }, { "epoch": 0.8274014542671259, "grad_norm": 5.469040870666504, "learning_rate": 1.7555843965823992e-06, "loss": 0.2931, "step": 1081 }, { "epoch": 0.8281668580176043, "grad_norm": 6.6713409423828125, "learning_rate": 1.7404821786946346e-06, "loss": 0.3362, "step": 1082 }, { "epoch": 0.8289322617680827, "grad_norm": 7.332741737365723, "learning_rate": 1.725439005170747e-06, "loss": 0.2507, "step": 1083 }, { "epoch": 0.8296976655185611, "grad_norm": 8.208507537841797, "learning_rate": 1.7104549835490491e-06, "loss": 0.3192, "step": 1084 }, { "epoch": 0.8304630692690395, "grad_norm": 6.0084967613220215, "learning_rate": 1.6955302209449987e-06, "loss": 0.3366, "step": 1085 }, { "epoch": 0.8312284730195177, "grad_norm": 6.15051794052124, "learning_rate": 1.680664824050432e-06, "loss": 0.3023, "step": 1086 }, { "epoch": 0.8319938767699961, "grad_norm": 8.824700355529785, "learning_rate": 1.6658588991327962e-06, "loss": 0.2097, "step": 1087 }, { "epoch": 0.8327592805204745, "grad_norm": 4.843833923339844, "learning_rate": 1.6511125520344007e-06, "loss": 0.2767, "step": 1088 }, { "epoch": 0.8335246842709529, "grad_norm": 4.750216007232666, "learning_rate": 1.636425888171652e-06, "loss": 0.2911, "step": 1089 }, { "epoch": 0.8342900880214313, "grad_norm": 4.159714698791504, "learning_rate": 1.6217990125342964e-06, "loss": 0.2666, "step": 1090 }, { "epoch": 0.8350554917719096, "grad_norm": 9.31843090057373, "learning_rate": 1.6072320296846898e-06, "loss": 0.2472, "step": 1091 }, { "epoch": 0.835820895522388, "grad_norm": 11.698112487792969, "learning_rate": 1.5927250437570197e-06, "loss": 0.2629, "step": 1092 }, { "epoch": 0.8365862992728664, "grad_norm": 6.662525653839111, "learning_rate": 1.5782781584565854e-06, "loss": 0.3005, "step": 1093 }, { "epoch": 0.8373517030233448, "grad_norm": 6.270053386688232, "learning_rate": 1.5638914770590508e-06, "loss": 0.2998, "step": 1094 }, { "epoch": 0.8381171067738232, "grad_norm": 8.17238712310791, "learning_rate": 1.5495651024096925e-06, "loss": 0.4168, "step": 1095 }, { "epoch": 0.8388825105243015, "grad_norm": 7.660524845123291, "learning_rate": 1.5352991369226865e-06, "loss": 0.2949, "step": 1096 }, { "epoch": 0.8396479142747799, "grad_norm": 6.679647922515869, "learning_rate": 1.5210936825803602e-06, "loss": 0.3089, "step": 1097 }, { "epoch": 0.8404133180252583, "grad_norm": 6.309601306915283, "learning_rate": 1.5069488409324696e-06, "loss": 0.2447, "step": 1098 }, { "epoch": 0.8411787217757367, "grad_norm": 6.660057544708252, "learning_rate": 1.4928647130954743e-06, "loss": 0.2332, "step": 1099 }, { "epoch": 0.8419441255262151, "grad_norm": 7.898063659667969, "learning_rate": 1.4788413997518026e-06, "loss": 0.3732, "step": 1100 }, { "epoch": 0.8419441255262151, "eval_accuracy": 0.8790613718411552, "eval_f1": 0.8277634961439588, "eval_loss": 0.3002900779247284, "eval_precision": 0.8563829787234043, "eval_recall": 0.8009950248756219, "eval_runtime": 42.9938, "eval_samples_per_second": 7.001, "eval_steps_per_second": 0.233, "step": 1100 }, { "epoch": 0.8427095292766934, "grad_norm": 5.925868034362793, "learning_rate": 1.4648790011491544e-06, "loss": 0.3003, "step": 1101 }, { "epoch": 0.8434749330271718, "grad_norm": 6.9676995277404785, "learning_rate": 1.4509776170997625e-06, "loss": 0.3329, "step": 1102 }, { "epoch": 0.8442403367776502, "grad_norm": 6.943129539489746, "learning_rate": 1.4371373469796956e-06, "loss": 0.2854, "step": 1103 }, { "epoch": 0.8450057405281286, "grad_norm": 7.517777919769287, "learning_rate": 1.4233582897281328e-06, "loss": 0.2719, "step": 1104 }, { "epoch": 0.845771144278607, "grad_norm": 7.715554237365723, "learning_rate": 1.4096405438466687e-06, "loss": 0.3078, "step": 1105 }, { "epoch": 0.8465365480290853, "grad_norm": 7.961045742034912, "learning_rate": 1.3959842073986085e-06, "loss": 0.2775, "step": 1106 }, { "epoch": 0.8473019517795637, "grad_norm": 5.7302751541137695, "learning_rate": 1.3823893780082508e-06, "loss": 0.2433, "step": 1107 }, { "epoch": 0.8480673555300421, "grad_norm": 7.472646236419678, "learning_rate": 1.368856152860215e-06, "loss": 0.3264, "step": 1108 }, { "epoch": 0.8488327592805205, "grad_norm": 8.17573070526123, "learning_rate": 1.3553846286987271e-06, "loss": 0.2075, "step": 1109 }, { "epoch": 0.8495981630309989, "grad_norm": 8.035270690917969, "learning_rate": 1.3419749018269368e-06, "loss": 0.3511, "step": 1110 }, { "epoch": 0.8503635667814772, "grad_norm": 7.398448467254639, "learning_rate": 1.3286270681062275e-06, "loss": 0.2243, "step": 1111 }, { "epoch": 0.8511289705319556, "grad_norm": 5.270333290100098, "learning_rate": 1.3153412229555251e-06, "loss": 0.2921, "step": 1112 }, { "epoch": 0.851894374282434, "grad_norm": 5.221624374389648, "learning_rate": 1.302117461350627e-06, "loss": 0.3181, "step": 1113 }, { "epoch": 0.8526597780329124, "grad_norm": 8.733942985534668, "learning_rate": 1.2889558778235157e-06, "loss": 0.2652, "step": 1114 }, { "epoch": 0.8534251817833908, "grad_norm": 5.429276466369629, "learning_rate": 1.2758565664616829e-06, "loss": 0.2734, "step": 1115 }, { "epoch": 0.8541905855338691, "grad_norm": 10.059110641479492, "learning_rate": 1.262819620907465e-06, "loss": 0.3404, "step": 1116 }, { "epoch": 0.8549559892843475, "grad_norm": 6.145954608917236, "learning_rate": 1.249845134357357e-06, "loss": 0.3076, "step": 1117 }, { "epoch": 0.8557213930348259, "grad_norm": 5.079444885253906, "learning_rate": 1.2369331995613664e-06, "loss": 0.185, "step": 1118 }, { "epoch": 0.8564867967853043, "grad_norm": 15.14505672454834, "learning_rate": 1.224083908822331e-06, "loss": 0.3866, "step": 1119 }, { "epoch": 0.8572522005357827, "grad_norm": 6.147080421447754, "learning_rate": 1.2112973539952777e-06, "loss": 0.324, "step": 1120 }, { "epoch": 0.8572522005357827, "eval_accuracy": 0.8844765342960289, "eval_f1": 0.8333333333333334, "eval_loss": 0.29969924688339233, "eval_precision": 0.8743169398907104, "eval_recall": 0.7960199004975125, "eval_runtime": 43.0138, "eval_samples_per_second": 6.998, "eval_steps_per_second": 0.232, "step": 1120 }, { "epoch": 0.858017604286261, "grad_norm": 7.136957168579102, "learning_rate": 1.198573626486751e-06, "loss": 0.396, "step": 1121 }, { "epoch": 0.8587830080367393, "grad_norm": 5.081778049468994, "learning_rate": 1.1859128172541668e-06, "loss": 0.2741, "step": 1122 }, { "epoch": 0.8595484117872177, "grad_norm": 5.848927974700928, "learning_rate": 1.1733150168051632e-06, "loss": 0.301, "step": 1123 }, { "epoch": 0.8603138155376961, "grad_norm": 8.139251708984375, "learning_rate": 1.1607803151969443e-06, "loss": 0.3968, "step": 1124 }, { "epoch": 0.8610792192881745, "grad_norm": 11.221075057983398, "learning_rate": 1.148308802035648e-06, "loss": 0.3192, "step": 1125 }, { "epoch": 0.8618446230386528, "grad_norm": 12.196139335632324, "learning_rate": 1.1359005664756994e-06, "loss": 0.3429, "step": 1126 }, { "epoch": 0.8626100267891312, "grad_norm": 7.772244453430176, "learning_rate": 1.123555697219174e-06, "loss": 0.3333, "step": 1127 }, { "epoch": 0.8633754305396096, "grad_norm": 6.083074569702148, "learning_rate": 1.1112742825151669e-06, "loss": 0.2641, "step": 1128 }, { "epoch": 0.864140834290088, "grad_norm": 7.137205123901367, "learning_rate": 1.0990564101591527e-06, "loss": 0.3597, "step": 1129 }, { "epoch": 0.8649062380405664, "grad_norm": 8.642711639404297, "learning_rate": 1.0869021674923708e-06, "loss": 0.2681, "step": 1130 }, { "epoch": 0.8656716417910447, "grad_norm": 6.8455305099487305, "learning_rate": 1.074811641401189e-06, "loss": 0.3558, "step": 1131 }, { "epoch": 0.8664370455415231, "grad_norm": 6.04085636138916, "learning_rate": 1.0627849183164906e-06, "loss": 0.3488, "step": 1132 }, { "epoch": 0.8672024492920015, "grad_norm": 6.025130271911621, "learning_rate": 1.0508220842130602e-06, "loss": 0.3252, "step": 1133 }, { "epoch": 0.8679678530424799, "grad_norm": 5.714728355407715, "learning_rate": 1.0389232246089499e-06, "loss": 0.2859, "step": 1134 }, { "epoch": 0.8687332567929583, "grad_norm": 8.63733959197998, "learning_rate": 1.0270884245648905e-06, "loss": 0.3019, "step": 1135 }, { "epoch": 0.8694986605434366, "grad_norm": 5.155510902404785, "learning_rate": 1.015317768683669e-06, "loss": 0.2339, "step": 1136 }, { "epoch": 0.870264064293915, "grad_norm": 7.10530948638916, "learning_rate": 1.0036113411095304e-06, "loss": 0.2472, "step": 1137 }, { "epoch": 0.8710294680443934, "grad_norm": 7.122653007507324, "learning_rate": 9.919692255275747e-07, "loss": 0.3575, "step": 1138 }, { "epoch": 0.8717948717948718, "grad_norm": 7.5401530265808105, "learning_rate": 9.803915051631574e-07, "loss": 0.2504, "step": 1139 }, { "epoch": 0.8725602755453502, "grad_norm": 6.913841247558594, "learning_rate": 9.688782627812965e-07, "loss": 0.3607, "step": 1140 }, { "epoch": 0.8725602755453502, "eval_accuracy": 0.8826714801444043, "eval_f1": 0.8302872062663186, "eval_loss": 0.2986834943294525, "eval_precision": 0.8736263736263736, "eval_recall": 0.7910447761194029, "eval_runtime": 43.2421, "eval_samples_per_second": 6.961, "eval_steps_per_second": 0.231, "step": 1140 }, { "epoch": 0.8733256792958285, "grad_norm": 4.701968193054199, "learning_rate": 9.574295806860767e-07, "loss": 0.222, "step": 1141 }, { "epoch": 0.8740910830463069, "grad_norm": 6.480667591094971, "learning_rate": 9.460455407200708e-07, "loss": 0.3484, "step": 1142 }, { "epoch": 0.8748564867967853, "grad_norm": 6.2637481689453125, "learning_rate": 9.347262242637345e-07, "loss": 0.3202, "step": 1143 }, { "epoch": 0.8756218905472637, "grad_norm": 7.320901393890381, "learning_rate": 9.234717122348558e-07, "loss": 0.3725, "step": 1144 }, { "epoch": 0.8763872942977421, "grad_norm": 6.01532506942749, "learning_rate": 9.122820850879488e-07, "loss": 0.2905, "step": 1145 }, { "epoch": 0.8771526980482205, "grad_norm": 8.483268737792969, "learning_rate": 9.011574228136866e-07, "loss": 0.4175, "step": 1146 }, { "epoch": 0.8779181017986988, "grad_norm": 7.978078365325928, "learning_rate": 8.90097804938338e-07, "loss": 0.3556, "step": 1147 }, { "epoch": 0.8786835055491772, "grad_norm": 4.914220333099365, "learning_rate": 8.791033105231861e-07, "loss": 0.3226, "step": 1148 }, { "epoch": 0.8794489092996556, "grad_norm": 10.738564491271973, "learning_rate": 8.681740181639731e-07, "loss": 0.325, "step": 1149 }, { "epoch": 0.880214313050134, "grad_norm": 5.5547990798950195, "learning_rate": 8.573100059903349e-07, "loss": 0.2508, "step": 1150 }, { "epoch": 0.8809797168006124, "grad_norm": 9.317310333251953, "learning_rate": 8.465113516652424e-07, "loss": 0.2961, "step": 1151 }, { "epoch": 0.8817451205510907, "grad_norm": 4.832771301269531, "learning_rate": 8.357781323844482e-07, "loss": 0.1824, "step": 1152 }, { "epoch": 0.8825105243015691, "grad_norm": 7.913245677947998, "learning_rate": 8.251104248759256e-07, "loss": 0.3127, "step": 1153 }, { "epoch": 0.8832759280520475, "grad_norm": 6.320401668548584, "learning_rate": 8.145083053993364e-07, "loss": 0.2046, "step": 1154 }, { "epoch": 0.8840413318025259, "grad_norm": 6.381113052368164, "learning_rate": 8.039718497454685e-07, "loss": 0.3374, "step": 1155 }, { "epoch": 0.8848067355530043, "grad_norm": 5.279355049133301, "learning_rate": 7.935011332357113e-07, "loss": 0.2347, "step": 1156 }, { "epoch": 0.8855721393034826, "grad_norm": 5.956709861755371, "learning_rate": 7.83096230721505e-07, "loss": 0.2561, "step": 1157 }, { "epoch": 0.886337543053961, "grad_norm": 8.500905990600586, "learning_rate": 7.727572165838038e-07, "loss": 0.3429, "step": 1158 }, { "epoch": 0.8871029468044394, "grad_norm": 11.31344223022461, "learning_rate": 7.624841647325565e-07, "loss": 0.3175, "step": 1159 }, { "epoch": 0.8878683505549178, "grad_norm": 4.852387428283691, "learning_rate": 7.522771486061642e-07, "loss": 0.2201, "step": 1160 }, { "epoch": 0.8878683505549178, "eval_accuracy": 0.8880866425992779, "eval_f1": 0.8368421052631579, "eval_loss": 0.29599303007125854, "eval_precision": 0.888268156424581, "eval_recall": 0.7910447761194029, "eval_runtime": 42.5979, "eval_samples_per_second": 7.066, "eval_steps_per_second": 0.235, "step": 1160 }, { "epoch": 0.8886337543053962, "grad_norm": 5.681868076324463, "learning_rate": 7.421362411709676e-07, "loss": 0.2797, "step": 1161 }, { "epoch": 0.8893991580558744, "grad_norm": 7.943777561187744, "learning_rate": 7.320615149207177e-07, "loss": 0.2878, "step": 1162 }, { "epoch": 0.8901645618063528, "grad_norm": 7.188109397888184, "learning_rate": 7.220530418760597e-07, "loss": 0.2972, "step": 1163 }, { "epoch": 0.8909299655568312, "grad_norm": 5.419342994689941, "learning_rate": 7.121108935840193e-07, "loss": 0.2502, "step": 1164 }, { "epoch": 0.8916953693073096, "grad_norm": 10.313029289245605, "learning_rate": 7.022351411174866e-07, "loss": 0.4279, "step": 1165 }, { "epoch": 0.892460773057788, "grad_norm": 9.825774192810059, "learning_rate": 6.924258550747154e-07, "loss": 0.3422, "step": 1166 }, { "epoch": 0.8932261768082663, "grad_norm": 7.466933250427246, "learning_rate": 6.826831055788119e-07, "loss": 0.2288, "step": 1167 }, { "epoch": 0.8939915805587447, "grad_norm": 4.190829277038574, "learning_rate": 6.730069622772373e-07, "loss": 0.2315, "step": 1168 }, { "epoch": 0.8947569843092231, "grad_norm": 4.927202224731445, "learning_rate": 6.633974943413113e-07, "loss": 0.2855, "step": 1169 }, { "epoch": 0.8955223880597015, "grad_norm": 5.255453109741211, "learning_rate": 6.538547704657094e-07, "loss": 0.2338, "step": 1170 }, { "epoch": 0.8962877918101799, "grad_norm": 6.458939075469971, "learning_rate": 6.443788588679823e-07, "loss": 0.2398, "step": 1171 }, { "epoch": 0.8970531955606582, "grad_norm": 6.905317783355713, "learning_rate": 6.349698272880588e-07, "loss": 0.2978, "step": 1172 }, { "epoch": 0.8978185993111366, "grad_norm": 7.470308780670166, "learning_rate": 6.256277429877711e-07, "loss": 0.2552, "step": 1173 }, { "epoch": 0.898584003061615, "grad_norm": 9.028374671936035, "learning_rate": 6.163526727503688e-07, "loss": 0.2822, "step": 1174 }, { "epoch": 0.8993494068120934, "grad_norm": 4.99279260635376, "learning_rate": 6.071446828800353e-07, "loss": 0.1629, "step": 1175 }, { "epoch": 0.9001148105625718, "grad_norm": 5.656613349914551, "learning_rate": 5.980038392014309e-07, "loss": 0.2495, "step": 1176 }, { "epoch": 0.9008802143130501, "grad_norm": 4.793300628662109, "learning_rate": 5.889302070591985e-07, "loss": 0.1765, "step": 1177 }, { "epoch": 0.9016456180635285, "grad_norm": 5.23650598526001, "learning_rate": 5.79923851317521e-07, "loss": 0.1807, "step": 1178 }, { "epoch": 0.9024110218140069, "grad_norm": 4.662338733673096, "learning_rate": 5.709848363596404e-07, "loss": 0.2996, "step": 1179 }, { "epoch": 0.9031764255644853, "grad_norm": 6.364925384521484, "learning_rate": 5.621132260874051e-07, "loss": 0.2767, "step": 1180 }, { "epoch": 0.9031764255644853, "eval_accuracy": 0.8898916967509025, "eval_f1": 0.8390501319261213, "eval_loss": 0.2949095368385315, "eval_precision": 0.8932584269662921, "eval_recall": 0.7910447761194029, "eval_runtime": 42.8147, "eval_samples_per_second": 7.03, "eval_steps_per_second": 0.234, "step": 1180 }, { "epoch": 0.9039418293149637, "grad_norm": 8.286806106567383, "learning_rate": 5.533090839208133e-07, "loss": 0.3283, "step": 1181 }, { "epoch": 0.904707233065442, "grad_norm": 5.3382720947265625, "learning_rate": 5.445724727975498e-07, "loss": 0.2489, "step": 1182 }, { "epoch": 0.9054726368159204, "grad_norm": 7.994104862213135, "learning_rate": 5.359034551725517e-07, "loss": 0.3883, "step": 1183 }, { "epoch": 0.9062380405663988, "grad_norm": 10.035967826843262, "learning_rate": 5.273020930175543e-07, "loss": 0.325, "step": 1184 }, { "epoch": 0.9070034443168772, "grad_norm": 5.84358549118042, "learning_rate": 5.187684478206412e-07, "loss": 0.2696, "step": 1185 }, { "epoch": 0.9077688480673556, "grad_norm": 5.898288249969482, "learning_rate": 5.103025805858197e-07, "loss": 0.2285, "step": 1186 }, { "epoch": 0.9085342518178339, "grad_norm": 4.795246601104736, "learning_rate": 5.019045518325693e-07, "loss": 0.2324, "step": 1187 }, { "epoch": 0.9092996555683123, "grad_norm": 13.465359687805176, "learning_rate": 4.935744215954197e-07, "loss": 0.3142, "step": 1188 }, { "epoch": 0.9100650593187907, "grad_norm": 7.159090995788574, "learning_rate": 4.853122494235207e-07, "loss": 0.2966, "step": 1189 }, { "epoch": 0.9108304630692691, "grad_norm": 5.706002235412598, "learning_rate": 4.77118094380209e-07, "loss": 0.2718, "step": 1190 }, { "epoch": 0.9115958668197475, "grad_norm": 5.979389190673828, "learning_rate": 4.6899201504259196e-07, "loss": 0.2746, "step": 1191 }, { "epoch": 0.9123612705702258, "grad_norm": 8.064590454101562, "learning_rate": 4.609340695011311e-07, "loss": 0.2624, "step": 1192 }, { "epoch": 0.9131266743207042, "grad_norm": 4.81801176071167, "learning_rate": 4.5294431535922166e-07, "loss": 0.1888, "step": 1193 }, { "epoch": 0.9138920780711826, "grad_norm": 5.313014030456543, "learning_rate": 4.4502280973278135e-07, "loss": 0.3078, "step": 1194 }, { "epoch": 0.914657481821661, "grad_norm": 6.325895309448242, "learning_rate": 4.3716960924984566e-07, "loss": 0.3188, "step": 1195 }, { "epoch": 0.9154228855721394, "grad_norm": 5.998826026916504, "learning_rate": 4.2938477005015853e-07, "loss": 0.3208, "step": 1196 }, { "epoch": 0.9161882893226176, "grad_norm": 6.817664623260498, "learning_rate": 4.2166834778477717e-07, "loss": 0.2706, "step": 1197 }, { "epoch": 0.916953693073096, "grad_norm": 9.771849632263184, "learning_rate": 4.140203976156665e-07, "loss": 0.3291, "step": 1198 }, { "epoch": 0.9177190968235744, "grad_norm": 7.858504772186279, "learning_rate": 4.064409742153097e-07, "loss": 0.3371, "step": 1199 }, { "epoch": 0.9184845005740528, "grad_norm": 4.8687591552734375, "learning_rate": 3.9893013176631636e-07, "loss": 0.2563, "step": 1200 }, { "epoch": 0.9184845005740528, "eval_accuracy": 0.8898916967509025, "eval_f1": 0.8390501319261213, "eval_loss": 0.293884813785553, "eval_precision": 0.8932584269662921, "eval_recall": 0.7910447761194029, "eval_runtime": 43.9202, "eval_samples_per_second": 6.853, "eval_steps_per_second": 0.228, "step": 1200 }, { "epoch": 0.9192499043245312, "grad_norm": 4.442579746246338, "learning_rate": 3.914879239610392e-07, "loss": 0.186, "step": 1201 }, { "epoch": 0.9200153080750095, "grad_norm": 5.45106315612793, "learning_rate": 3.8411440400117685e-07, "loss": 0.1837, "step": 1202 }, { "epoch": 0.9207807118254879, "grad_norm": 4.747509479522705, "learning_rate": 3.768096245974129e-07, "loss": 0.2562, "step": 1203 }, { "epoch": 0.9215461155759663, "grad_norm": 6.138671398162842, "learning_rate": 3.69573637969024e-07, "loss": 0.3244, "step": 1204 }, { "epoch": 0.9223115193264447, "grad_norm": 7.972070217132568, "learning_rate": 3.6240649584351137e-07, "loss": 0.4027, "step": 1205 }, { "epoch": 0.9230769230769231, "grad_norm": 8.4572172164917, "learning_rate": 3.553082494562354e-07, "loss": 0.4941, "step": 1206 }, { "epoch": 0.9238423268274014, "grad_norm": 9.352378845214844, "learning_rate": 3.4827894955003825e-07, "loss": 0.448, "step": 1207 }, { "epoch": 0.9246077305778798, "grad_norm": 7.637875556945801, "learning_rate": 3.413186463748941e-07, "loss": 0.2718, "step": 1208 }, { "epoch": 0.9253731343283582, "grad_norm": 8.596519470214844, "learning_rate": 3.3442738968754164e-07, "loss": 0.2043, "step": 1209 }, { "epoch": 0.9261385380788366, "grad_norm": 11.038840293884277, "learning_rate": 3.276052287511333e-07, "loss": 0.2731, "step": 1210 }, { "epoch": 0.926903941829315, "grad_norm": 6.149134635925293, "learning_rate": 3.2085221233487564e-07, "loss": 0.3046, "step": 1211 }, { "epoch": 0.9276693455797933, "grad_norm": 5.461088180541992, "learning_rate": 3.1416838871368925e-07, "loss": 0.2553, "step": 1212 }, { "epoch": 0.9284347493302717, "grad_norm": 8.15916919708252, "learning_rate": 3.0755380566785955e-07, "loss": 0.2793, "step": 1213 }, { "epoch": 0.9292001530807501, "grad_norm": 6.028532028198242, "learning_rate": 3.010085104826932e-07, "loss": 0.2108, "step": 1214 }, { "epoch": 0.9299655568312285, "grad_norm": 9.626595497131348, "learning_rate": 2.945325499481855e-07, "loss": 0.2889, "step": 1215 }, { "epoch": 0.9307309605817069, "grad_norm": 8.43061637878418, "learning_rate": 2.881259703586814e-07, "loss": 0.3819, "step": 1216 }, { "epoch": 0.9314963643321852, "grad_norm": 9.330650329589844, "learning_rate": 2.817888175125472e-07, "loss": 0.2979, "step": 1217 }, { "epoch": 0.9322617680826636, "grad_norm": 6.501589775085449, "learning_rate": 2.7552113671184264e-07, "loss": 0.293, "step": 1218 }, { "epoch": 0.933027171833142, "grad_norm": 6.367552757263184, "learning_rate": 2.693229727619906e-07, "loss": 0.3728, "step": 1219 }, { "epoch": 0.9337925755836204, "grad_norm": 6.511219501495361, "learning_rate": 2.631943699714712e-07, "loss": 0.2681, "step": 1220 }, { "epoch": 0.9337925755836204, "eval_accuracy": 0.8898916967509025, "eval_f1": 0.8390501319261213, "eval_loss": 0.2956056296825409, "eval_precision": 0.8932584269662921, "eval_recall": 0.7910447761194029, "eval_runtime": 43.3109, "eval_samples_per_second": 6.95, "eval_steps_per_second": 0.231, "step": 1220 }, { "epoch": 0.9345579793340988, "grad_norm": 5.723000526428223, "learning_rate": 2.571353721514913e-07, "loss": 0.2749, "step": 1221 }, { "epoch": 0.9353233830845771, "grad_norm": 8.66303825378418, "learning_rate": 2.51146022615677e-07, "loss": 0.2631, "step": 1222 }, { "epoch": 0.9360887868350555, "grad_norm": 6.536643981933594, "learning_rate": 2.452263641797659e-07, "loss": 0.2504, "step": 1223 }, { "epoch": 0.9368541905855339, "grad_norm": 5.747756481170654, "learning_rate": 2.3937643916129404e-07, "loss": 0.2857, "step": 1224 }, { "epoch": 0.9376195943360123, "grad_norm": 13.398006439208984, "learning_rate": 2.3359628937930422e-07, "loss": 0.4189, "step": 1225 }, { "epoch": 0.9383849980864907, "grad_norm": 5.998396396636963, "learning_rate": 2.2788595615403475e-07, "loss": 0.3231, "step": 1226 }, { "epoch": 0.939150401836969, "grad_norm": 6.068146705627441, "learning_rate": 2.222454803066332e-07, "loss": 0.3236, "step": 1227 }, { "epoch": 0.9399158055874474, "grad_norm": 5.644654750823975, "learning_rate": 2.16674902158861e-07, "loss": 0.3332, "step": 1228 }, { "epoch": 0.9406812093379258, "grad_norm": 4.82579231262207, "learning_rate": 2.111742615328083e-07, "loss": 0.2132, "step": 1229 }, { "epoch": 0.9414466130884042, "grad_norm": 4.6144256591796875, "learning_rate": 2.057435977506028e-07, "loss": 0.2308, "step": 1230 }, { "epoch": 0.9422120168388826, "grad_norm": 10.00190258026123, "learning_rate": 2.0038294963413251e-07, "loss": 0.373, "step": 1231 }, { "epoch": 0.9429774205893608, "grad_norm": 5.754945755004883, "learning_rate": 1.9509235550477123e-07, "loss": 0.2395, "step": 1232 }, { "epoch": 0.9437428243398392, "grad_norm": 6.360520362854004, "learning_rate": 1.8987185318310009e-07, "loss": 0.1902, "step": 1233 }, { "epoch": 0.9445082280903176, "grad_norm": 9.590492248535156, "learning_rate": 1.8472147998863877e-07, "loss": 0.3155, "step": 1234 }, { "epoch": 0.945273631840796, "grad_norm": 7.996187686920166, "learning_rate": 1.796412727395802e-07, "loss": 0.3433, "step": 1235 }, { "epoch": 0.9460390355912744, "grad_norm": 4.422671794891357, "learning_rate": 1.7463126775252192e-07, "loss": 0.237, "step": 1236 }, { "epoch": 0.9468044393417527, "grad_norm": 6.761044979095459, "learning_rate": 1.6969150084221399e-07, "loss": 0.3662, "step": 1237 }, { "epoch": 0.9475698430922311, "grad_norm": 5.3165411949157715, "learning_rate": 1.6482200732129804e-07, "loss": 0.2149, "step": 1238 }, { "epoch": 0.9483352468427095, "grad_norm": 8.114785194396973, "learning_rate": 1.600228220000577e-07, "loss": 0.3416, "step": 1239 }, { "epoch": 0.9491006505931879, "grad_norm": 10.293120384216309, "learning_rate": 1.552939791861663e-07, "loss": 0.3409, "step": 1240 }, { "epoch": 0.9491006505931879, "eval_accuracy": 0.8880866425992779, "eval_f1": 0.8368421052631579, "eval_loss": 0.29501873254776, "eval_precision": 0.888268156424581, "eval_recall": 0.7910447761194029, "eval_runtime": 43.815, "eval_samples_per_second": 6.87, "eval_steps_per_second": 0.228, "step": 1240 }, { "epoch": 0.9498660543436663, "grad_norm": 6.4339799880981445, "learning_rate": 1.5063551268444275e-07, "loss": 0.3244, "step": 1241 }, { "epoch": 0.9506314580941446, "grad_norm": 5.49373722076416, "learning_rate": 1.4604745579661405e-07, "loss": 0.1764, "step": 1242 }, { "epoch": 0.951396861844623, "grad_norm": 6.4061126708984375, "learning_rate": 1.4152984132106972e-07, "loss": 0.3189, "step": 1243 }, { "epoch": 0.9521622655951014, "grad_norm": 5.936630725860596, "learning_rate": 1.370827015526355e-07, "loss": 0.3355, "step": 1244 }, { "epoch": 0.9529276693455798, "grad_norm": 14.100617408752441, "learning_rate": 1.3270606828233668e-07, "loss": 0.5053, "step": 1245 }, { "epoch": 0.9536930730960582, "grad_norm": 8.441110610961914, "learning_rate": 1.2839997279717075e-07, "loss": 0.274, "step": 1246 }, { "epoch": 0.9544584768465365, "grad_norm": 6.178558826446533, "learning_rate": 1.241644458798885e-07, "loss": 0.2966, "step": 1247 }, { "epoch": 0.9552238805970149, "grad_norm": 6.316476345062256, "learning_rate": 1.1999951780876872e-07, "loss": 0.2785, "step": 1248 }, { "epoch": 0.9559892843474933, "grad_norm": 6.520962238311768, "learning_rate": 1.159052183574072e-07, "loss": 0.2933, "step": 1249 }, { "epoch": 0.9567546880979717, "grad_norm": 6.651547431945801, "learning_rate": 1.1188157679449585e-07, "loss": 0.2775, "step": 1250 }, { "epoch": 0.9575200918484501, "grad_norm": 5.902339935302734, "learning_rate": 1.0792862188362396e-07, "loss": 0.2386, "step": 1251 }, { "epoch": 0.9582854955989284, "grad_norm": 7.483514308929443, "learning_rate": 1.0404638188306504e-07, "loss": 0.2501, "step": 1252 }, { "epoch": 0.9590508993494068, "grad_norm": 6.495910167694092, "learning_rate": 1.002348845455725e-07, "loss": 0.3872, "step": 1253 }, { "epoch": 0.9598163030998852, "grad_norm": 6.121851921081543, "learning_rate": 9.64941571181921e-08, "loss": 0.3186, "step": 1254 }, { "epoch": 0.9605817068503636, "grad_norm": 6.671183109283447, "learning_rate": 9.282422634205645e-08, "loss": 0.2947, "step": 1255 }, { "epoch": 0.961347110600842, "grad_norm": 5.844105243682861, "learning_rate": 8.922511845219972e-08, "loss": 0.2272, "step": 1256 }, { "epoch": 0.9621125143513203, "grad_norm": 6.843101501464844, "learning_rate": 8.569685917736659e-08, "loss": 0.2826, "step": 1257 }, { "epoch": 0.9628779181017987, "grad_norm": 6.810047626495361, "learning_rate": 8.223947373983354e-08, "loss": 0.2737, "step": 1258 }, { "epoch": 0.9636433218522771, "grad_norm": 6.269131660461426, "learning_rate": 7.885298685522235e-08, "loss": 0.3041, "step": 1259 }, { "epoch": 0.9644087256027555, "grad_norm": 7.05451774597168, "learning_rate": 7.553742273232578e-08, "loss": 0.3316, "step": 1260 }, { "epoch": 0.9644087256027555, "eval_accuracy": 0.8898916967509025, "eval_f1": 0.8390501319261213, "eval_loss": 0.2938833236694336, "eval_precision": 0.8932584269662921, "eval_recall": 0.7910447761194029, "eval_runtime": 43.817, "eval_samples_per_second": 6.869, "eval_steps_per_second": 0.228, "step": 1260 }, { "epoch": 0.9651741293532339, "grad_norm": 7.257000923156738, "learning_rate": 7.229280507293657e-08, "loss": 0.3027, "step": 1261 }, { "epoch": 0.9659395331037122, "grad_norm": 8.234956741333008, "learning_rate": 6.911915707167538e-08, "loss": 0.3549, "step": 1262 }, { "epoch": 0.9667049368541906, "grad_norm": 6.89831018447876, "learning_rate": 6.601650141582649e-08, "loss": 0.2276, "step": 1263 }, { "epoch": 0.967470340604669, "grad_norm": 5.264804840087891, "learning_rate": 6.29848602851768e-08, "loss": 0.2677, "step": 1264 }, { "epoch": 0.9682357443551474, "grad_norm": 7.13667631149292, "learning_rate": 6.002425535185041e-08, "loss": 0.3305, "step": 1265 }, { "epoch": 0.9690011481056258, "grad_norm": 5.207520008087158, "learning_rate": 5.713470778016539e-08, "loss": 0.2083, "step": 1266 }, { "epoch": 0.969766551856104, "grad_norm": 5.961206436157227, "learning_rate": 5.4316238226469476e-08, "loss": 0.2633, "step": 1267 }, { "epoch": 0.9705319556065825, "grad_norm": 11.930121421813965, "learning_rate": 5.1568866839003525e-08, "loss": 0.3997, "step": 1268 }, { "epoch": 0.9712973593570609, "grad_norm": 6.59713077545166, "learning_rate": 4.889261325775163e-08, "loss": 0.2437, "step": 1269 }, { "epoch": 0.9720627631075393, "grad_norm": 7.702863693237305, "learning_rate": 4.628749661430121e-08, "loss": 0.3456, "step": 1270 }, { "epoch": 0.9728281668580177, "grad_norm": 7.830643177032471, "learning_rate": 4.375353553170647e-08, "loss": 0.3608, "step": 1271 }, { "epoch": 0.9735935706084959, "grad_norm": 7.027949333190918, "learning_rate": 4.1290748124358513e-08, "loss": 0.2728, "step": 1272 }, { "epoch": 0.9743589743589743, "grad_norm": 9.216780662536621, "learning_rate": 3.889915199784877e-08, "loss": 0.3055, "step": 1273 }, { "epoch": 0.9751243781094527, "grad_norm": 5.373678684234619, "learning_rate": 3.657876424885243e-08, "loss": 0.2806, "step": 1274 }, { "epoch": 0.9758897818599311, "grad_norm": 6.474977970123291, "learning_rate": 3.432960146499631e-08, "loss": 0.3257, "step": 1275 }, { "epoch": 0.9766551856104095, "grad_norm": 8.3179292678833, "learning_rate": 3.2151679724748974e-08, "loss": 0.3389, "step": 1276 }, { "epoch": 0.9774205893608878, "grad_norm": 5.711795806884766, "learning_rate": 3.0045014597299695e-08, "loss": 0.2503, "step": 1277 }, { "epoch": 0.9781859931113662, "grad_norm": 5.385677337646484, "learning_rate": 2.800962114245076e-08, "loss": 0.2485, "step": 1278 }, { "epoch": 0.9789513968618446, "grad_norm": 3.9317917823791504, "learning_rate": 2.6045513910509802e-08, "loss": 0.212, "step": 1279 }, { "epoch": 0.979716800612323, "grad_norm": 4.621948719024658, "learning_rate": 2.415270694217986e-08, "loss": 0.1957, "step": 1280 }, { "epoch": 0.979716800612323, "eval_accuracy": 0.8898916967509025, "eval_f1": 0.8390501319261213, "eval_loss": 0.2945975959300995, "eval_precision": 0.8932584269662921, "eval_recall": 0.7910447761194029, "eval_runtime": 42.919, "eval_samples_per_second": 7.013, "eval_steps_per_second": 0.233, "step": 1280 }, { "epoch": 0.9804822043628014, "grad_norm": 6.141805648803711, "learning_rate": 2.2331213768468363e-08, "loss": 0.2438, "step": 1281 }, { "epoch": 0.9812476081132797, "grad_norm": 5.874077320098877, "learning_rate": 2.0581047410583865e-08, "loss": 0.343, "step": 1282 }, { "epoch": 0.9820130118637581, "grad_norm": 9.686785697937012, "learning_rate": 1.8902220379846125e-08, "loss": 0.4448, "step": 1283 }, { "epoch": 0.9827784156142365, "grad_norm": 6.589422225952148, "learning_rate": 1.7294744677591733e-08, "loss": 0.3774, "step": 1284 }, { "epoch": 0.9835438193647149, "grad_norm": 7.531107425689697, "learning_rate": 1.57586317950964e-08, "loss": 0.2591, "step": 1285 }, { "epoch": 0.9843092231151933, "grad_norm": 6.169864654541016, "learning_rate": 1.4293892713486135e-08, "loss": 0.3366, "step": 1286 }, { "epoch": 0.9850746268656716, "grad_norm": 7.703701496124268, "learning_rate": 1.2900537903660637e-08, "loss": 0.2595, "step": 1287 }, { "epoch": 0.98584003061615, "grad_norm": 5.90448522567749, "learning_rate": 1.157857732622003e-08, "loss": 0.2492, "step": 1288 }, { "epoch": 0.9866054343666284, "grad_norm": 5.025811672210693, "learning_rate": 1.0328020431391583e-08, "loss": 0.2422, "step": 1289 }, { "epoch": 0.9873708381171068, "grad_norm": 5.388332843780518, "learning_rate": 9.148876158961983e-09, "loss": 0.2482, "step": 1290 }, { "epoch": 0.9881362418675852, "grad_norm": 4.219669342041016, "learning_rate": 8.041152938216278e-09, "loss": 0.2682, "step": 1291 }, { "epoch": 0.9889016456180635, "grad_norm": 7.032052516937256, "learning_rate": 7.004858687874594e-09, "loss": 0.2261, "step": 1292 }, { "epoch": 0.9896670493685419, "grad_norm": 5.230202674865723, "learning_rate": 6.040000816037728e-09, "loss": 0.2749, "step": 1293 }, { "epoch": 0.9904324531190203, "grad_norm": 6.469751358032227, "learning_rate": 5.146586220131644e-09, "loss": 0.1947, "step": 1294 }, { "epoch": 0.9911978568694987, "grad_norm": 4.652950286865234, "learning_rate": 4.324621286861952e-09, "loss": 0.1941, "step": 1295 }, { "epoch": 0.9919632606199771, "grad_norm": 9.259235382080078, "learning_rate": 3.5741118921628346e-09, "loss": 0.2713, "step": 1296 }, { "epoch": 0.9927286643704554, "grad_norm": 6.85486364364624, "learning_rate": 2.895063401160414e-09, "loss": 0.3251, "step": 1297 }, { "epoch": 0.9934940681209338, "grad_norm": 9.239498138427734, "learning_rate": 2.2874806681305593e-09, "loss": 0.2696, "step": 1298 }, { "epoch": 0.9942594718714122, "grad_norm": 4.937226295471191, "learning_rate": 1.7513680364689145e-09, "loss": 0.2714, "step": 1299 }, { "epoch": 0.9950248756218906, "grad_norm": 8.691539764404297, "learning_rate": 1.2867293386531476e-09, "loss": 0.2439, "step": 1300 }, { "epoch": 0.9950248756218906, "eval_accuracy": 0.8898916967509025, "eval_f1": 0.8390501319261213, "eval_loss": 0.2946934700012207, "eval_precision": 0.8932584269662921, "eval_recall": 0.7910447761194029, "eval_runtime": 43.3576, "eval_samples_per_second": 6.942, "eval_steps_per_second": 0.231, "step": 1300 }, { "epoch": 0.995790279372369, "grad_norm": 7.007760047912598, "learning_rate": 8.935678962196381e-10, "loss": 0.2438, "step": 1301 }, { "epoch": 0.9965556831228473, "grad_norm": 6.088259696960449, "learning_rate": 5.718865197423817e-10, "loss": 0.3118, "step": 1302 }, { "epoch": 0.9973210868733257, "grad_norm": 8.319058418273926, "learning_rate": 3.2168750880634537e-10, "loss": 0.36, "step": 1303 }, { "epoch": 0.9980864906238041, "grad_norm": 7.24370813369751, "learning_rate": 1.4297265199414434e-10, "loss": 0.3103, "step": 1304 }, { "epoch": 0.9988518943742825, "grad_norm": 5.21471643447876, "learning_rate": 3.57432268771607e-11, "loss": 0.2336, "step": 1305 }, { "epoch": 0.9996172981247609, "grad_norm": 4.997439384460449, "learning_rate": 0.0, "loss": 0.2676, "step": 1306 } ], "logging_steps": 1, "max_steps": 1306, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.006617662863442e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }