{ "best_metric": 0.9845317725752508, "best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-lora-medmnistv2/checkpoint-2030", "epoch": 9.98769987699877, "eval_steps": 500, "global_step": 2030, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 3.528864860534668, "learning_rate": 0.004975369458128079, "loss": 1.7726, "step": 10 }, { "epoch": 0.1, "grad_norm": 3.5074732303619385, "learning_rate": 0.004950738916256157, "loss": 1.1827, "step": 20 }, { "epoch": 0.15, "grad_norm": 3.2094099521636963, "learning_rate": 0.0049261083743842365, "loss": 1.1119, "step": 30 }, { "epoch": 0.2, "grad_norm": 3.396892786026001, "learning_rate": 0.004901477832512316, "loss": 1.005, "step": 40 }, { "epoch": 0.25, "grad_norm": 6.750483512878418, "learning_rate": 0.004876847290640395, "loss": 1.0136, "step": 50 }, { "epoch": 0.3, "grad_norm": 1.9515941143035889, "learning_rate": 0.004852216748768473, "loss": 0.9685, "step": 60 }, { "epoch": 0.34, "grad_norm": 4.040039539337158, "learning_rate": 0.004827586206896552, "loss": 0.8341, "step": 70 }, { "epoch": 0.39, "grad_norm": 3.427025318145752, "learning_rate": 0.004802955665024631, "loss": 0.9924, "step": 80 }, { "epoch": 0.44, "grad_norm": 2.151984691619873, "learning_rate": 0.004778325123152709, "loss": 0.8987, "step": 90 }, { "epoch": 0.49, "grad_norm": 2.4919590950012207, "learning_rate": 0.00475615763546798, "loss": 0.8009, "step": 100 }, { "epoch": 0.54, "grad_norm": 5.044064521789551, "learning_rate": 0.004731527093596059, "loss": 0.8151, "step": 110 }, { "epoch": 0.59, "grad_norm": 1.9818896055221558, "learning_rate": 0.004706896551724138, "loss": 0.8661, "step": 120 }, { "epoch": 0.64, "grad_norm": 3.006596326828003, "learning_rate": 0.004682266009852217, "loss": 0.778, "step": 130 }, { "epoch": 0.69, "grad_norm": 1.7369205951690674, "learning_rate": 0.004657635467980295, "loss": 0.7773, "step": 140 }, { "epoch": 0.74, "grad_norm": 2.791836738586426, "learning_rate": 0.0046330049261083745, "loss": 0.8726, "step": 150 }, { "epoch": 0.79, "grad_norm": 4.622689723968506, "learning_rate": 0.004608374384236454, "loss": 0.8111, "step": 160 }, { "epoch": 0.84, "grad_norm": 1.5161832571029663, "learning_rate": 0.004583743842364533, "loss": 0.8989, "step": 170 }, { "epoch": 0.89, "grad_norm": 1.7710577249526978, "learning_rate": 0.004559113300492611, "loss": 0.7968, "step": 180 }, { "epoch": 0.93, "grad_norm": 2.7612533569335938, "learning_rate": 0.00453448275862069, "loss": 0.7998, "step": 190 }, { "epoch": 0.98, "grad_norm": 3.0397326946258545, "learning_rate": 0.004509852216748769, "loss": 0.8262, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.8867056856187291, "eval_f1": 0.8260371939184133, "eval_loss": 0.2666037082672119, "eval_precision": 0.9001689475695723, "eval_recall": 0.8322255195197975, "eval_runtime": 13.7975, "eval_samples_per_second": 173.364, "eval_steps_per_second": 10.872, "step": 203 }, { "epoch": 1.03, "grad_norm": 1.7258822917938232, "learning_rate": 0.004485221674876847, "loss": 0.8387, "step": 210 }, { "epoch": 1.08, "grad_norm": 2.434767484664917, "learning_rate": 0.004460591133004926, "loss": 0.6806, "step": 220 }, { "epoch": 1.13, "grad_norm": 2.592229127883911, "learning_rate": 0.004435960591133005, "loss": 0.7756, "step": 230 }, { "epoch": 1.18, "grad_norm": 1.330603003501892, "learning_rate": 0.004411330049261084, "loss": 0.7266, "step": 240 }, { "epoch": 1.23, "grad_norm": 2.4841859340667725, "learning_rate": 0.004386699507389163, "loss": 0.6754, "step": 250 }, { "epoch": 1.28, "grad_norm": 1.9352140426635742, "learning_rate": 0.004362068965517241, "loss": 0.6658, "step": 260 }, { "epoch": 1.33, "grad_norm": 3.0308997631073, "learning_rate": 0.00433743842364532, "loss": 0.7867, "step": 270 }, { "epoch": 1.38, "grad_norm": 2.155539035797119, "learning_rate": 0.004312807881773399, "loss": 0.7005, "step": 280 }, { "epoch": 1.43, "grad_norm": 1.6944102048873901, "learning_rate": 0.004288177339901478, "loss": 0.6911, "step": 290 }, { "epoch": 1.48, "grad_norm": 2.3200197219848633, "learning_rate": 0.0042635467980295565, "loss": 0.7285, "step": 300 }, { "epoch": 1.53, "grad_norm": 3.5465104579925537, "learning_rate": 0.004238916256157636, "loss": 0.7058, "step": 310 }, { "epoch": 1.57, "grad_norm": 2.2330048084259033, "learning_rate": 0.004214285714285715, "loss": 0.7698, "step": 320 }, { "epoch": 1.62, "grad_norm": 2.0374197959899902, "learning_rate": 0.004189655172413793, "loss": 0.7587, "step": 330 }, { "epoch": 1.67, "grad_norm": 2.1679511070251465, "learning_rate": 0.004165024630541872, "loss": 0.7052, "step": 340 }, { "epoch": 1.72, "grad_norm": 1.7825062274932861, "learning_rate": 0.004140394088669951, "loss": 0.8108, "step": 350 }, { "epoch": 1.77, "grad_norm": 1.918578863143921, "learning_rate": 0.00411576354679803, "loss": 0.716, "step": 360 }, { "epoch": 1.82, "grad_norm": 1.9800987243652344, "learning_rate": 0.004091133004926108, "loss": 0.7882, "step": 370 }, { "epoch": 1.87, "grad_norm": 2.024862289428711, "learning_rate": 0.0040665024630541875, "loss": 0.6802, "step": 380 }, { "epoch": 1.92, "grad_norm": 3.392876386642456, "learning_rate": 0.004041871921182267, "loss": 0.7433, "step": 390 }, { "epoch": 1.97, "grad_norm": 1.8368072509765625, "learning_rate": 0.004017241379310345, "loss": 0.6431, "step": 400 }, { "epoch": 2.0, "eval_accuracy": 0.9535953177257525, "eval_f1": 0.9397119704907577, "eval_loss": 0.15141013264656067, "eval_precision": 0.9486140571898312, "eval_recall": 0.9376925488488177, "eval_runtime": 13.9517, "eval_samples_per_second": 171.449, "eval_steps_per_second": 10.751, "step": 406 }, { "epoch": 2.02, "grad_norm": 2.830169677734375, "learning_rate": 0.003992610837438423, "loss": 0.7549, "step": 410 }, { "epoch": 2.07, "grad_norm": 2.7798097133636475, "learning_rate": 0.003967980295566502, "loss": 0.7119, "step": 420 }, { "epoch": 2.12, "grad_norm": 1.7284464836120605, "learning_rate": 0.003943349753694581, "loss": 0.6626, "step": 430 }, { "epoch": 2.16, "grad_norm": 1.5493806600570679, "learning_rate": 0.00391871921182266, "loss": 0.6571, "step": 440 }, { "epoch": 2.21, "grad_norm": 1.844478964805603, "learning_rate": 0.003894088669950739, "loss": 0.7092, "step": 450 }, { "epoch": 2.26, "grad_norm": 2.4284541606903076, "learning_rate": 0.0038694581280788176, "loss": 0.6968, "step": 460 }, { "epoch": 2.31, "grad_norm": 1.6052756309509277, "learning_rate": 0.0038448275862068967, "loss": 0.6914, "step": 470 }, { "epoch": 2.36, "grad_norm": 3.0184834003448486, "learning_rate": 0.0038201970443349754, "loss": 0.6652, "step": 480 }, { "epoch": 2.41, "grad_norm": 2.026437520980835, "learning_rate": 0.0037955665024630545, "loss": 0.5978, "step": 490 }, { "epoch": 2.46, "grad_norm": 1.569425106048584, "learning_rate": 0.003770935960591133, "loss": 0.5897, "step": 500 }, { "epoch": 2.51, "grad_norm": 1.7113816738128662, "learning_rate": 0.0037463054187192118, "loss": 0.6609, "step": 510 }, { "epoch": 2.56, "grad_norm": 2.4151597023010254, "learning_rate": 0.003721674876847291, "loss": 0.7075, "step": 520 }, { "epoch": 2.61, "grad_norm": 3.4408905506134033, "learning_rate": 0.0036970443349753695, "loss": 0.7336, "step": 530 }, { "epoch": 2.66, "grad_norm": 1.7509477138519287, "learning_rate": 0.0036724137931034486, "loss": 0.7206, "step": 540 }, { "epoch": 2.71, "grad_norm": 1.6178234815597534, "learning_rate": 0.0036477832512315273, "loss": 0.5657, "step": 550 }, { "epoch": 2.76, "grad_norm": 2.5884556770324707, "learning_rate": 0.0036231527093596064, "loss": 0.6937, "step": 560 }, { "epoch": 2.8, "grad_norm": 2.045440196990967, "learning_rate": 0.003598522167487685, "loss": 0.7115, "step": 570 }, { "epoch": 2.85, "grad_norm": 1.657232642173767, "learning_rate": 0.0035738916256157637, "loss": 0.6461, "step": 580 }, { "epoch": 2.9, "grad_norm": 1.475516676902771, "learning_rate": 0.0035492610837438428, "loss": 0.6657, "step": 590 }, { "epoch": 2.95, "grad_norm": 2.4086556434631348, "learning_rate": 0.003524630541871921, "loss": 0.6986, "step": 600 }, { "epoch": 3.0, "eval_accuracy": 0.9765886287625418, "eval_f1": 0.9730141573380251, "eval_loss": 0.11787305772304535, "eval_precision": 0.9709586602228021, "eval_recall": 0.9768791114504076, "eval_runtime": 13.8653, "eval_samples_per_second": 172.517, "eval_steps_per_second": 10.818, "step": 609 }, { "epoch": 3.0, "grad_norm": 2.560819387435913, "learning_rate": 0.0034999999999999996, "loss": 0.6837, "step": 610 }, { "epoch": 3.05, "grad_norm": 1.1871329545974731, "learning_rate": 0.0034753694581280787, "loss": 0.6576, "step": 620 }, { "epoch": 3.1, "grad_norm": 2.1404545307159424, "learning_rate": 0.0034507389162561574, "loss": 0.6749, "step": 630 }, { "epoch": 3.15, "grad_norm": 2.5356063842773438, "learning_rate": 0.0034261083743842365, "loss": 0.6164, "step": 640 }, { "epoch": 3.2, "grad_norm": 1.3809722661972046, "learning_rate": 0.003401477832512315, "loss": 0.6707, "step": 650 }, { "epoch": 3.25, "grad_norm": 1.9669575691223145, "learning_rate": 0.0033768472906403942, "loss": 0.5547, "step": 660 }, { "epoch": 3.3, "grad_norm": 1.718974232673645, "learning_rate": 0.003352216748768473, "loss": 0.6739, "step": 670 }, { "epoch": 3.35, "grad_norm": 1.3080029487609863, "learning_rate": 0.003327586206896552, "loss": 0.5895, "step": 680 }, { "epoch": 3.39, "grad_norm": 1.3631353378295898, "learning_rate": 0.0033029556650246306, "loss": 0.6361, "step": 690 }, { "epoch": 3.44, "grad_norm": 2.3547539710998535, "learning_rate": 0.0032783251231527093, "loss": 0.6679, "step": 700 }, { "epoch": 3.49, "grad_norm": 1.9778302907943726, "learning_rate": 0.0032536945812807884, "loss": 0.6053, "step": 710 }, { "epoch": 3.54, "grad_norm": 2.3876760005950928, "learning_rate": 0.003229064039408867, "loss": 0.6499, "step": 720 }, { "epoch": 3.59, "grad_norm": 1.9629145860671997, "learning_rate": 0.003204433497536946, "loss": 0.5895, "step": 730 }, { "epoch": 3.64, "grad_norm": 2.662309169769287, "learning_rate": 0.0031798029556650248, "loss": 0.6268, "step": 740 }, { "epoch": 3.69, "grad_norm": 1.559710144996643, "learning_rate": 0.003155172413793104, "loss": 0.7095, "step": 750 }, { "epoch": 3.74, "grad_norm": 1.5949896574020386, "learning_rate": 0.0031305418719211825, "loss": 0.5834, "step": 760 }, { "epoch": 3.79, "grad_norm": 1.7590668201446533, "learning_rate": 0.0031059113300492616, "loss": 0.5529, "step": 770 }, { "epoch": 3.84, "grad_norm": 1.73826265335083, "learning_rate": 0.00308128078817734, "loss": 0.6756, "step": 780 }, { "epoch": 3.89, "grad_norm": 2.0228023529052734, "learning_rate": 0.0030566502463054185, "loss": 0.5661, "step": 790 }, { "epoch": 3.94, "grad_norm": 1.5560855865478516, "learning_rate": 0.0030320197044334976, "loss": 0.5913, "step": 800 }, { "epoch": 3.99, "grad_norm": 2.173814058303833, "learning_rate": 0.0030073891625615762, "loss": 0.5797, "step": 810 }, { "epoch": 4.0, "eval_accuracy": 0.9765886287625418, "eval_f1": 0.9757773872830087, "eval_loss": 0.10448675602674484, "eval_precision": 0.9756232675822735, "eval_recall": 0.9768377118491273, "eval_runtime": 13.948, "eval_samples_per_second": 171.494, "eval_steps_per_second": 10.754, "step": 813 }, { "epoch": 4.03, "grad_norm": 1.4235618114471436, "learning_rate": 0.002982758620689655, "loss": 0.6117, "step": 820 }, { "epoch": 4.08, "grad_norm": 1.621345043182373, "learning_rate": 0.002958128078817734, "loss": 0.5411, "step": 830 }, { "epoch": 4.13, "grad_norm": 1.9347153902053833, "learning_rate": 0.0029334975369458127, "loss": 0.544, "step": 840 }, { "epoch": 4.18, "grad_norm": 1.6332886219024658, "learning_rate": 0.0029088669950738917, "loss": 0.6507, "step": 850 }, { "epoch": 4.23, "grad_norm": 1.3586100339889526, "learning_rate": 0.0028842364532019704, "loss": 0.5219, "step": 860 }, { "epoch": 4.28, "grad_norm": 1.4624931812286377, "learning_rate": 0.0028596059113300495, "loss": 0.5906, "step": 870 }, { "epoch": 4.33, "grad_norm": 2.6470000743865967, "learning_rate": 0.002834975369458128, "loss": 0.4967, "step": 880 }, { "epoch": 4.38, "grad_norm": 1.2781357765197754, "learning_rate": 0.002810344827586207, "loss": 0.6435, "step": 890 }, { "epoch": 4.43, "grad_norm": 2.159756898880005, "learning_rate": 0.002785714285714286, "loss": 0.5514, "step": 900 }, { "epoch": 4.48, "grad_norm": 1.7241922616958618, "learning_rate": 0.0027610837438423646, "loss": 0.5388, "step": 910 }, { "epoch": 4.53, "grad_norm": 1.4886691570281982, "learning_rate": 0.0027364532019704436, "loss": 0.5093, "step": 920 }, { "epoch": 4.58, "grad_norm": 1.4995299577713013, "learning_rate": 0.0027118226600985223, "loss": 0.526, "step": 930 }, { "epoch": 4.62, "grad_norm": 1.1041914224624634, "learning_rate": 0.0026871921182266014, "loss": 0.5983, "step": 940 }, { "epoch": 4.67, "grad_norm": 2.8604071140289307, "learning_rate": 0.00266256157635468, "loss": 0.5675, "step": 950 }, { "epoch": 4.72, "grad_norm": 1.5616779327392578, "learning_rate": 0.002637931034482759, "loss": 0.5572, "step": 960 }, { "epoch": 4.77, "grad_norm": 1.478014349937439, "learning_rate": 0.0026133004926108374, "loss": 0.5287, "step": 970 }, { "epoch": 4.82, "grad_norm": 2.3648438453674316, "learning_rate": 0.002588669950738916, "loss": 0.5468, "step": 980 }, { "epoch": 4.87, "grad_norm": 1.8113144636154175, "learning_rate": 0.002564039408866995, "loss": 0.5709, "step": 990 }, { "epoch": 4.92, "grad_norm": 2.2772140502929688, "learning_rate": 0.0025394088669950738, "loss": 0.593, "step": 1000 }, { "epoch": 4.97, "grad_norm": 1.3280140161514282, "learning_rate": 0.0025147783251231524, "loss": 0.5475, "step": 1010 }, { "epoch": 5.0, "eval_accuracy": 0.9707357859531772, "eval_f1": 0.9658684578754092, "eval_loss": 0.1280980110168457, "eval_precision": 0.9676857183807798, "eval_recall": 0.9662304923874783, "eval_runtime": 14.1292, "eval_samples_per_second": 169.295, "eval_steps_per_second": 10.616, "step": 1016 }, { "epoch": 5.02, "grad_norm": 1.8137166500091553, "learning_rate": 0.0024901477832512315, "loss": 0.5047, "step": 1020 }, { "epoch": 5.07, "grad_norm": 1.9502696990966797, "learning_rate": 0.00246551724137931, "loss": 0.5084, "step": 1030 }, { "epoch": 5.12, "grad_norm": 2.115793228149414, "learning_rate": 0.0024408866995073893, "loss": 0.5282, "step": 1040 }, { "epoch": 5.17, "grad_norm": 1.8565967082977295, "learning_rate": 0.002416256157635468, "loss": 0.4794, "step": 1050 }, { "epoch": 5.22, "grad_norm": 1.7492934465408325, "learning_rate": 0.002391625615763547, "loss": 0.531, "step": 1060 }, { "epoch": 5.26, "grad_norm": 1.3985103368759155, "learning_rate": 0.0023669950738916257, "loss": 0.5849, "step": 1070 }, { "epoch": 5.31, "grad_norm": 1.594131588935852, "learning_rate": 0.0023423645320197048, "loss": 0.5398, "step": 1080 }, { "epoch": 5.36, "grad_norm": 1.7168729305267334, "learning_rate": 0.0023177339901477834, "loss": 0.4962, "step": 1090 }, { "epoch": 5.41, "grad_norm": 1.2366892099380493, "learning_rate": 0.002293103448275862, "loss": 0.5225, "step": 1100 }, { "epoch": 5.46, "grad_norm": 1.2104135751724243, "learning_rate": 0.0022684729064039407, "loss": 0.5312, "step": 1110 }, { "epoch": 5.51, "grad_norm": 1.743102788925171, "learning_rate": 0.00224384236453202, "loss": 0.4379, "step": 1120 }, { "epoch": 5.56, "grad_norm": 1.3108042478561401, "learning_rate": 0.0022192118226600985, "loss": 0.484, "step": 1130 }, { "epoch": 5.61, "grad_norm": 1.5817514657974243, "learning_rate": 0.0021945812807881776, "loss": 0.4318, "step": 1140 }, { "epoch": 5.66, "grad_norm": 2.16567063331604, "learning_rate": 0.0021699507389162562, "loss": 0.4647, "step": 1150 }, { "epoch": 5.71, "grad_norm": 1.5236085653305054, "learning_rate": 0.002145320197044335, "loss": 0.5744, "step": 1160 }, { "epoch": 5.76, "grad_norm": 1.4826277494430542, "learning_rate": 0.002120689655172414, "loss": 0.4169, "step": 1170 }, { "epoch": 5.81, "grad_norm": 1.5402677059173584, "learning_rate": 0.0020960591133004926, "loss": 0.589, "step": 1180 }, { "epoch": 5.85, "grad_norm": 1.908897042274475, "learning_rate": 0.0020714285714285717, "loss": 0.4665, "step": 1190 }, { "epoch": 5.9, "grad_norm": 2.138887882232666, "learning_rate": 0.0020467980295566504, "loss": 0.4831, "step": 1200 }, { "epoch": 5.95, "grad_norm": 1.1232534646987915, "learning_rate": 0.002022167487684729, "loss": 0.5518, "step": 1210 }, { "epoch": 6.0, "eval_accuracy": 0.9832775919732442, "eval_f1": 0.9813304673339887, "eval_loss": 0.0764709934592247, "eval_precision": 0.9790793820260529, "eval_recall": 0.984228373175378, "eval_runtime": 13.8821, "eval_samples_per_second": 172.308, "eval_steps_per_second": 10.805, "step": 1219 }, { "epoch": 6.0, "grad_norm": 1.9790089130401611, "learning_rate": 0.0019975369458128077, "loss": 0.4465, "step": 1220 }, { "epoch": 6.05, "grad_norm": 1.8086011409759521, "learning_rate": 0.0019729064039408868, "loss": 0.4783, "step": 1230 }, { "epoch": 6.1, "grad_norm": 1.832200050354004, "learning_rate": 0.0019482758620689657, "loss": 0.4658, "step": 1240 }, { "epoch": 6.15, "grad_norm": 1.0026772022247314, "learning_rate": 0.0019236453201970443, "loss": 0.4357, "step": 1250 }, { "epoch": 6.2, "grad_norm": 1.8719532489776611, "learning_rate": 0.0018990147783251232, "loss": 0.491, "step": 1260 }, { "epoch": 6.25, "grad_norm": 1.3923914432525635, "learning_rate": 0.001874384236453202, "loss": 0.4476, "step": 1270 }, { "epoch": 6.3, "grad_norm": 1.7893891334533691, "learning_rate": 0.001849753694581281, "loss": 0.4684, "step": 1280 }, { "epoch": 6.35, "grad_norm": 1.4994755983352661, "learning_rate": 0.0018251231527093596, "loss": 0.4309, "step": 1290 }, { "epoch": 6.4, "grad_norm": 1.227845549583435, "learning_rate": 0.0018004926108374385, "loss": 0.444, "step": 1300 }, { "epoch": 6.45, "grad_norm": 1.2157208919525146, "learning_rate": 0.0017758620689655171, "loss": 0.4201, "step": 1310 }, { "epoch": 6.49, "grad_norm": 1.3680998086929321, "learning_rate": 0.001751231527093596, "loss": 0.4305, "step": 1320 }, { "epoch": 6.54, "grad_norm": 1.839348554611206, "learning_rate": 0.0017266009852216749, "loss": 0.5415, "step": 1330 }, { "epoch": 6.59, "grad_norm": 1.5122746229171753, "learning_rate": 0.0017019704433497537, "loss": 0.4457, "step": 1340 }, { "epoch": 6.64, "grad_norm": 0.9478976726531982, "learning_rate": 0.0016773399014778326, "loss": 0.4858, "step": 1350 }, { "epoch": 6.69, "grad_norm": 0.8827309012413025, "learning_rate": 0.0016527093596059115, "loss": 0.4649, "step": 1360 }, { "epoch": 6.74, "grad_norm": 1.5809803009033203, "learning_rate": 0.0016280788177339904, "loss": 0.451, "step": 1370 }, { "epoch": 6.79, "grad_norm": 1.2577319145202637, "learning_rate": 0.0016034482758620688, "loss": 0.4865, "step": 1380 }, { "epoch": 6.84, "grad_norm": 1.109689712524414, "learning_rate": 0.0015788177339901477, "loss": 0.4085, "step": 1390 }, { "epoch": 6.89, "grad_norm": 1.1977938413619995, "learning_rate": 0.0015541871921182266, "loss": 0.4371, "step": 1400 }, { "epoch": 6.94, "grad_norm": 1.5450685024261475, "learning_rate": 0.0015295566502463054, "loss": 0.4127, "step": 1410 }, { "epoch": 6.99, "grad_norm": 1.2304378747940063, "learning_rate": 0.0015049261083743843, "loss": 0.5167, "step": 1420 }, { "epoch": 7.0, "eval_accuracy": 0.9724080267558528, "eval_f1": 0.9735373103871435, "eval_loss": 0.106478750705719, "eval_precision": 0.978469583949093, "eval_recall": 0.9701472747924569, "eval_runtime": 13.7702, "eval_samples_per_second": 173.709, "eval_steps_per_second": 10.893, "step": 1422 }, { "epoch": 7.04, "grad_norm": 0.8643027544021606, "learning_rate": 0.0014802955665024632, "loss": 0.3761, "step": 1430 }, { "epoch": 7.08, "grad_norm": 1.3667055368423462, "learning_rate": 0.001455665024630542, "loss": 0.4071, "step": 1440 }, { "epoch": 7.13, "grad_norm": 1.4807791709899902, "learning_rate": 0.0014310344827586207, "loss": 0.4255, "step": 1450 }, { "epoch": 7.18, "grad_norm": 1.441760540008545, "learning_rate": 0.0014064039408866996, "loss": 0.4224, "step": 1460 }, { "epoch": 7.23, "grad_norm": 1.4927411079406738, "learning_rate": 0.0013817733990147782, "loss": 0.4259, "step": 1470 }, { "epoch": 7.28, "grad_norm": 1.20118248462677, "learning_rate": 0.0013571428571428571, "loss": 0.4146, "step": 1480 }, { "epoch": 7.33, "grad_norm": 1.276848316192627, "learning_rate": 0.001332512315270936, "loss": 0.4202, "step": 1490 }, { "epoch": 7.38, "grad_norm": 1.1261953115463257, "learning_rate": 0.0013078817733990149, "loss": 0.4193, "step": 1500 }, { "epoch": 7.43, "grad_norm": 1.2932482957839966, "learning_rate": 0.0012832512315270935, "loss": 0.3777, "step": 1510 }, { "epoch": 7.48, "grad_norm": 1.6696242094039917, "learning_rate": 0.0012586206896551724, "loss": 0.4225, "step": 1520 }, { "epoch": 7.53, "grad_norm": 1.254827618598938, "learning_rate": 0.0012339901477832513, "loss": 0.3798, "step": 1530 }, { "epoch": 7.58, "grad_norm": 1.5830212831497192, "learning_rate": 0.00120935960591133, "loss": 0.4432, "step": 1540 }, { "epoch": 7.63, "grad_norm": 1.2743747234344482, "learning_rate": 0.0011847290640394088, "loss": 0.5163, "step": 1550 }, { "epoch": 7.68, "grad_norm": 1.226605772972107, "learning_rate": 0.0011600985221674877, "loss": 0.3497, "step": 1560 }, { "epoch": 7.72, "grad_norm": 1.7948116064071655, "learning_rate": 0.0011354679802955665, "loss": 0.4091, "step": 1570 }, { "epoch": 7.77, "grad_norm": 1.2246979475021362, "learning_rate": 0.0011108374384236454, "loss": 0.397, "step": 1580 }, { "epoch": 7.82, "grad_norm": 1.289957880973816, "learning_rate": 0.001086206896551724, "loss": 0.4196, "step": 1590 }, { "epoch": 7.87, "grad_norm": 1.0236166715621948, "learning_rate": 0.001061576354679803, "loss": 0.3323, "step": 1600 }, { "epoch": 7.92, "grad_norm": 1.5420043468475342, "learning_rate": 0.0010369458128078818, "loss": 0.3967, "step": 1610 }, { "epoch": 7.97, "grad_norm": 0.9735037684440613, "learning_rate": 0.0010123152709359607, "loss": 0.4417, "step": 1620 }, { "epoch": 8.0, "eval_accuracy": 0.9824414715719063, "eval_f1": 0.9836834235379569, "eval_loss": 0.10265911370515823, "eval_precision": 0.9848070192675991, "eval_recall": 0.9834171420418744, "eval_runtime": 13.836, "eval_samples_per_second": 172.882, "eval_steps_per_second": 10.841, "step": 1626 }, { "epoch": 8.02, "grad_norm": 1.333899736404419, "learning_rate": 0.0009876847290640393, "loss": 0.3266, "step": 1630 }, { "epoch": 8.07, "grad_norm": 1.3696863651275635, "learning_rate": 0.0009630541871921182, "loss": 0.3963, "step": 1640 }, { "epoch": 8.12, "grad_norm": 1.2078107595443726, "learning_rate": 0.0009384236453201971, "loss": 0.4219, "step": 1650 }, { "epoch": 8.17, "grad_norm": 1.5616003274917603, "learning_rate": 0.0009137931034482759, "loss": 0.372, "step": 1660 }, { "epoch": 8.22, "grad_norm": 1.2028272151947021, "learning_rate": 0.0008891625615763547, "loss": 0.3209, "step": 1670 }, { "epoch": 8.27, "grad_norm": 1.6825675964355469, "learning_rate": 0.0008645320197044335, "loss": 0.4092, "step": 1680 }, { "epoch": 8.31, "grad_norm": 1.083766222000122, "learning_rate": 0.0008399014778325123, "loss": 0.3019, "step": 1690 }, { "epoch": 8.36, "grad_norm": 1.9076427221298218, "learning_rate": 0.0008152709359605911, "loss": 0.3971, "step": 1700 }, { "epoch": 8.41, "grad_norm": 1.4987802505493164, "learning_rate": 0.00079064039408867, "loss": 0.347, "step": 1710 }, { "epoch": 8.46, "grad_norm": 1.5171914100646973, "learning_rate": 0.0007660098522167489, "loss": 0.3459, "step": 1720 }, { "epoch": 8.51, "grad_norm": 1.1429753303527832, "learning_rate": 0.0007413793103448275, "loss": 0.3607, "step": 1730 }, { "epoch": 8.56, "grad_norm": 1.2178524732589722, "learning_rate": 0.0007167487684729064, "loss": 0.3352, "step": 1740 }, { "epoch": 8.61, "grad_norm": 1.3182921409606934, "learning_rate": 0.0006921182266009853, "loss": 0.3988, "step": 1750 }, { "epoch": 8.66, "grad_norm": 0.8894291520118713, "learning_rate": 0.0006674876847290641, "loss": 0.3753, "step": 1760 }, { "epoch": 8.71, "grad_norm": 0.9807326793670654, "learning_rate": 0.0006428571428571428, "loss": 0.3989, "step": 1770 }, { "epoch": 8.76, "grad_norm": 1.4324990510940552, "learning_rate": 0.0006182266009852217, "loss": 0.3979, "step": 1780 }, { "epoch": 8.81, "grad_norm": 1.4007656574249268, "learning_rate": 0.0005935960591133005, "loss": 0.3676, "step": 1790 }, { "epoch": 8.86, "grad_norm": 0.8580948710441589, "learning_rate": 0.0005689655172413793, "loss": 0.3373, "step": 1800 }, { "epoch": 8.91, "grad_norm": 1.5153675079345703, "learning_rate": 0.0005443349753694581, "loss": 0.3781, "step": 1810 }, { "epoch": 8.95, "grad_norm": 1.5855642557144165, "learning_rate": 0.000519704433497537, "loss": 0.3555, "step": 1820 }, { "epoch": 9.0, "eval_accuracy": 0.9774247491638796, "eval_f1": 0.9802735042916932, "eval_loss": 0.1285521239042282, "eval_precision": 0.983840002210874, "eval_recall": 0.9777879475418643, "eval_runtime": 14.0882, "eval_samples_per_second": 169.788, "eval_steps_per_second": 10.647, "step": 1829 }, { "epoch": 9.0, "grad_norm": 1.1518791913986206, "learning_rate": 0.0004950738916256157, "loss": 0.3911, "step": 1830 }, { "epoch": 9.05, "grad_norm": 0.8330171704292297, "learning_rate": 0.0004704433497536946, "loss": 0.3378, "step": 1840 }, { "epoch": 9.1, "grad_norm": 0.9390627145767212, "learning_rate": 0.0004458128078817734, "loss": 0.3801, "step": 1850 }, { "epoch": 9.15, "grad_norm": 0.8658188581466675, "learning_rate": 0.0004211822660098522, "loss": 0.3546, "step": 1860 }, { "epoch": 9.2, "grad_norm": 1.1137974262237549, "learning_rate": 0.0003965517241379311, "loss": 0.3132, "step": 1870 }, { "epoch": 9.25, "grad_norm": 0.9071435928344727, "learning_rate": 0.00037192118226600984, "loss": 0.3397, "step": 1880 }, { "epoch": 9.3, "grad_norm": 0.8356829285621643, "learning_rate": 0.0003472906403940887, "loss": 0.3005, "step": 1890 }, { "epoch": 9.35, "grad_norm": 1.6670458316802979, "learning_rate": 0.0003226600985221675, "loss": 0.2935, "step": 1900 }, { "epoch": 9.4, "grad_norm": 0.8949184417724609, "learning_rate": 0.0002980295566502463, "loss": 0.3067, "step": 1910 }, { "epoch": 9.45, "grad_norm": 1.5367650985717773, "learning_rate": 0.0002733990147783251, "loss": 0.3243, "step": 1920 }, { "epoch": 9.5, "grad_norm": 1.526454210281372, "learning_rate": 0.00024876847290640394, "loss": 0.325, "step": 1930 }, { "epoch": 9.54, "grad_norm": 0.9838758707046509, "learning_rate": 0.00022413793103448276, "loss": 0.2937, "step": 1940 }, { "epoch": 9.59, "grad_norm": 0.6418048143386841, "learning_rate": 0.00019950738916256158, "loss": 0.287, "step": 1950 }, { "epoch": 9.64, "grad_norm": 1.5970793962478638, "learning_rate": 0.0001748768472906404, "loss": 0.3064, "step": 1960 }, { "epoch": 9.69, "grad_norm": 1.298159122467041, "learning_rate": 0.00015024630541871922, "loss": 0.3235, "step": 1970 }, { "epoch": 9.74, "grad_norm": 1.0908325910568237, "learning_rate": 0.00012561576354679804, "loss": 0.3621, "step": 1980 }, { "epoch": 9.79, "grad_norm": 1.2317895889282227, "learning_rate": 0.00010098522167487686, "loss": 0.2885, "step": 1990 }, { "epoch": 9.84, "grad_norm": 1.2517650127410889, "learning_rate": 7.635467980295568e-05, "loss": 0.3403, "step": 2000 }, { "epoch": 9.89, "grad_norm": 0.8592343330383301, "learning_rate": 5.172413793103448e-05, "loss": 0.3006, "step": 2010 }, { "epoch": 9.94, "grad_norm": 0.97120600938797, "learning_rate": 2.70935960591133e-05, "loss": 0.314, "step": 2020 }, { "epoch": 9.99, "grad_norm": 1.6330816745758057, "learning_rate": 2.463054187192118e-06, "loss": 0.3552, "step": 2030 }, { "epoch": 9.99, "eval_accuracy": 0.9845317725752508, "eval_f1": 0.9866765497659894, "eval_loss": 0.10463691502809525, "eval_precision": 0.9881728395743647, "eval_recall": 0.9857079239819071, "eval_runtime": 13.8616, "eval_samples_per_second": 172.563, "eval_steps_per_second": 10.821, "step": 2030 }, { "epoch": 9.99, "step": 2030, "total_flos": 1.0128854206748197e+19, "train_loss": 0.5589088313098024, "train_runtime": 1723.9879, "train_samples_per_second": 75.407, "train_steps_per_second": 1.178 } ], "logging_steps": 10, "max_steps": 2030, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.0128854206748197e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }