{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997629770087698, "eval_steps": 20, "global_step": 2109, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_accuracy": 0.9098228663446055, "eval_f1": 0.3, "eval_loss": 0.39109358191490173, "eval_precision": 0.2222222222222222, "eval_recall": 0.46153846153846156, "eval_runtime": 50.0411, "eval_samples_per_second": 5.416, "eval_steps_per_second": 0.18, "step": 0 }, { "epoch": 0.00047404598246029864, "grad_norm": 2.736264228820801, "learning_rate": 9.478672985781992e-08, "loss": 0.6485, "step": 1 }, { "epoch": 0.0009480919649205973, "grad_norm": 2.5090606212615967, "learning_rate": 1.8957345971563984e-07, "loss": 0.6663, "step": 2 }, { "epoch": 0.001422137947380896, "grad_norm": 2.8418514728546143, "learning_rate": 2.843601895734597e-07, "loss": 0.6669, "step": 3 }, { "epoch": 0.0018961839298411946, "grad_norm": 3.081920862197876, "learning_rate": 3.791469194312797e-07, "loss": 0.7195, "step": 4 }, { "epoch": 0.002370229912301493, "grad_norm": 2.9263253211975098, "learning_rate": 4.7393364928909956e-07, "loss": 0.699, "step": 5 }, { "epoch": 0.002844275894761792, "grad_norm": 2.3481531143188477, "learning_rate": 5.687203791469194e-07, "loss": 0.6352, "step": 6 }, { "epoch": 0.0033183218772220905, "grad_norm": 2.840491533279419, "learning_rate": 6.635071090047394e-07, "loss": 0.6859, "step": 7 }, { "epoch": 0.003792367859682389, "grad_norm": 2.8396637439727783, "learning_rate": 7.582938388625594e-07, "loss": 0.6912, "step": 8 }, { "epoch": 0.004266413842142688, "grad_norm": 3.1124157905578613, "learning_rate": 8.530805687203792e-07, "loss": 0.6481, "step": 9 }, { "epoch": 0.004740459824602986, "grad_norm": 2.768177032470703, "learning_rate": 9.478672985781991e-07, "loss": 0.6911, "step": 10 }, { "epoch": 0.0052145058070632855, "grad_norm": 2.4753313064575195, "learning_rate": 1.042654028436019e-06, "loss": 0.5654, "step": 11 }, { "epoch": 0.005688551789523584, "grad_norm": 3.73299241065979, "learning_rate": 1.1374407582938388e-06, "loss": 0.7249, "step": 12 }, { "epoch": 0.006162597771983883, "grad_norm": 2.8647408485412598, "learning_rate": 1.2322274881516587e-06, "loss": 0.571, "step": 13 }, { "epoch": 0.006636643754444181, "grad_norm": 1.9949300289154053, "learning_rate": 1.3270142180094788e-06, "loss": 0.5458, "step": 14 }, { "epoch": 0.00711068973690448, "grad_norm": 2.8095905780792236, "learning_rate": 1.4218009478672987e-06, "loss": 0.6974, "step": 15 }, { "epoch": 0.007584735719364778, "grad_norm": 2.3465747833251953, "learning_rate": 1.5165876777251187e-06, "loss": 0.5798, "step": 16 }, { "epoch": 0.008058781701825076, "grad_norm": 2.2376415729522705, "learning_rate": 1.6113744075829384e-06, "loss": 0.6219, "step": 17 }, { "epoch": 0.008532827684285376, "grad_norm": 2.2321646213531494, "learning_rate": 1.7061611374407585e-06, "loss": 0.6327, "step": 18 }, { "epoch": 0.009006873666745675, "grad_norm": 2.9532177448272705, "learning_rate": 1.8009478672985784e-06, "loss": 0.7287, "step": 19 }, { "epoch": 0.009480919649205973, "grad_norm": 2.7521305084228516, "learning_rate": 1.8957345971563982e-06, "loss": 0.7904, "step": 20 }, { "epoch": 0.009480919649205973, "eval_accuracy": 0.9162640901771336, "eval_f1": 0.3157894736842105, "eval_loss": 0.3854508101940155, "eval_precision": 0.24, "eval_recall": 0.46153846153846156, "eval_runtime": 50.1605, "eval_samples_per_second": 5.403, "eval_steps_per_second": 0.179, "step": 20 }, { "epoch": 0.009954965631666271, "grad_norm": 2.5812387466430664, "learning_rate": 1.990521327014218e-06, "loss": 0.6582, "step": 21 }, { "epoch": 0.010429011614126571, "grad_norm": 2.5878043174743652, "learning_rate": 2.085308056872038e-06, "loss": 0.5975, "step": 22 }, { "epoch": 0.01090305759658687, "grad_norm": 2.4602837562561035, "learning_rate": 2.180094786729858e-06, "loss": 0.6356, "step": 23 }, { "epoch": 0.011377103579047167, "grad_norm": 2.857377290725708, "learning_rate": 2.2748815165876777e-06, "loss": 0.6933, "step": 24 }, { "epoch": 0.011851149561507466, "grad_norm": 2.478761911392212, "learning_rate": 2.369668246445498e-06, "loss": 0.6806, "step": 25 }, { "epoch": 0.012325195543967766, "grad_norm": 2.6150331497192383, "learning_rate": 2.4644549763033174e-06, "loss": 0.6727, "step": 26 }, { "epoch": 0.012799241526428064, "grad_norm": 2.4646215438842773, "learning_rate": 2.5592417061611373e-06, "loss": 0.7231, "step": 27 }, { "epoch": 0.013273287508888362, "grad_norm": 2.3204421997070312, "learning_rate": 2.6540284360189576e-06, "loss": 0.715, "step": 28 }, { "epoch": 0.01374733349134866, "grad_norm": 2.1901276111602783, "learning_rate": 2.7488151658767775e-06, "loss": 0.6207, "step": 29 }, { "epoch": 0.01422137947380896, "grad_norm": 2.659156322479248, "learning_rate": 2.8436018957345973e-06, "loss": 0.6465, "step": 30 }, { "epoch": 0.014695425456269258, "grad_norm": 3.0104305744171143, "learning_rate": 2.938388625592417e-06, "loss": 0.6848, "step": 31 }, { "epoch": 0.015169471438729557, "grad_norm": 3.2612526416778564, "learning_rate": 3.0331753554502375e-06, "loss": 0.6094, "step": 32 }, { "epoch": 0.015643517421189856, "grad_norm": 2.8630073070526123, "learning_rate": 3.1279620853080574e-06, "loss": 0.6679, "step": 33 }, { "epoch": 0.016117563403650153, "grad_norm": 3.1366546154022217, "learning_rate": 3.222748815165877e-06, "loss": 0.6961, "step": 34 }, { "epoch": 0.016591609386110453, "grad_norm": 2.5289793014526367, "learning_rate": 3.3175355450236967e-06, "loss": 0.6363, "step": 35 }, { "epoch": 0.017065655368570753, "grad_norm": 1.996009111404419, "learning_rate": 3.412322274881517e-06, "loss": 0.6137, "step": 36 }, { "epoch": 0.01753970135103105, "grad_norm": 2.309265613555908, "learning_rate": 3.507109004739337e-06, "loss": 0.5873, "step": 37 }, { "epoch": 0.01801374733349135, "grad_norm": 2.2232859134674072, "learning_rate": 3.6018957345971567e-06, "loss": 0.6081, "step": 38 }, { "epoch": 0.018487793315951646, "grad_norm": 2.608635902404785, "learning_rate": 3.6966824644549766e-06, "loss": 0.5946, "step": 39 }, { "epoch": 0.018961839298411946, "grad_norm": 2.9958667755126953, "learning_rate": 3.7914691943127964e-06, "loss": 0.6496, "step": 40 }, { "epoch": 0.018961839298411946, "eval_accuracy": 0.9299516908212561, "eval_f1": 0.304, "eval_loss": 0.357759952545166, "eval_precision": 0.2602739726027397, "eval_recall": 0.36538461538461536, "eval_runtime": 50.7819, "eval_samples_per_second": 5.337, "eval_steps_per_second": 0.177, "step": 40 }, { "epoch": 0.019435885280872246, "grad_norm": 2.9960222244262695, "learning_rate": 3.886255924170616e-06, "loss": 0.7012, "step": 41 }, { "epoch": 0.019909931263332542, "grad_norm": 2.469219923019409, "learning_rate": 3.981042654028436e-06, "loss": 0.5172, "step": 42 }, { "epoch": 0.020383977245792842, "grad_norm": 2.2367403507232666, "learning_rate": 4.075829383886256e-06, "loss": 0.6086, "step": 43 }, { "epoch": 0.020858023228253142, "grad_norm": 2.455852746963501, "learning_rate": 4.170616113744076e-06, "loss": 0.6616, "step": 44 }, { "epoch": 0.02133206921071344, "grad_norm": 2.6048426628112793, "learning_rate": 4.265402843601897e-06, "loss": 0.6319, "step": 45 }, { "epoch": 0.02180611519317374, "grad_norm": 2.633476495742798, "learning_rate": 4.360189573459716e-06, "loss": 0.5807, "step": 46 }, { "epoch": 0.022280161175634035, "grad_norm": 2.525595188140869, "learning_rate": 4.4549763033175355e-06, "loss": 0.566, "step": 47 }, { "epoch": 0.022754207158094335, "grad_norm": 2.116396427154541, "learning_rate": 4.549763033175355e-06, "loss": 0.5222, "step": 48 }, { "epoch": 0.023228253140554635, "grad_norm": 2.2869620323181152, "learning_rate": 4.644549763033176e-06, "loss": 0.6677, "step": 49 }, { "epoch": 0.02370229912301493, "grad_norm": 3.656646966934204, "learning_rate": 4.739336492890996e-06, "loss": 0.6329, "step": 50 }, { "epoch": 0.02417634510547523, "grad_norm": 2.4779574871063232, "learning_rate": 4.834123222748816e-06, "loss": 0.6179, "step": 51 }, { "epoch": 0.02465039108793553, "grad_norm": 2.9239354133605957, "learning_rate": 4.928909952606635e-06, "loss": 0.5679, "step": 52 }, { "epoch": 0.025124437070395828, "grad_norm": 2.596090793609619, "learning_rate": 5.023696682464455e-06, "loss": 0.5907, "step": 53 }, { "epoch": 0.025598483052856127, "grad_norm": 2.4275245666503906, "learning_rate": 5.118483412322275e-06, "loss": 0.5432, "step": 54 }, { "epoch": 0.026072529035316427, "grad_norm": 3.1805362701416016, "learning_rate": 5.213270142180096e-06, "loss": 0.6221, "step": 55 }, { "epoch": 0.026546575017776724, "grad_norm": 2.3142030239105225, "learning_rate": 5.308056872037915e-06, "loss": 0.6459, "step": 56 }, { "epoch": 0.027020621000237024, "grad_norm": 2.3154592514038086, "learning_rate": 5.402843601895735e-06, "loss": 0.5481, "step": 57 }, { "epoch": 0.02749466698269732, "grad_norm": 2.70127272605896, "learning_rate": 5.497630331753555e-06, "loss": 0.5592, "step": 58 }, { "epoch": 0.02796871296515762, "grad_norm": 2.5554442405700684, "learning_rate": 5.592417061611375e-06, "loss": 0.587, "step": 59 }, { "epoch": 0.02844275894761792, "grad_norm": 2.4974448680877686, "learning_rate": 5.687203791469195e-06, "loss": 0.5209, "step": 60 }, { "epoch": 0.02844275894761792, "eval_accuracy": 0.9468599033816425, "eval_f1": 0.23255813953488372, "eval_loss": 0.30114662647247314, "eval_precision": 0.29411764705882354, "eval_recall": 0.19230769230769232, "eval_runtime": 50.6864, "eval_samples_per_second": 5.347, "eval_steps_per_second": 0.178, "step": 60 }, { "epoch": 0.028916804930078217, "grad_norm": 2.1992790699005127, "learning_rate": 5.7819905213270145e-06, "loss": 0.6134, "step": 61 }, { "epoch": 0.029390850912538517, "grad_norm": 2.135422468185425, "learning_rate": 5.876777251184834e-06, "loss": 0.5917, "step": 62 }, { "epoch": 0.029864896894998817, "grad_norm": 1.9710865020751953, "learning_rate": 5.971563981042654e-06, "loss": 0.5341, "step": 63 }, { "epoch": 0.030338942877459113, "grad_norm": 2.6831486225128174, "learning_rate": 6.066350710900475e-06, "loss": 0.5878, "step": 64 }, { "epoch": 0.030812988859919413, "grad_norm": 2.277893543243408, "learning_rate": 6.161137440758295e-06, "loss": 0.5407, "step": 65 }, { "epoch": 0.03128703484237971, "grad_norm": 2.153470993041992, "learning_rate": 6.255924170616115e-06, "loss": 0.5109, "step": 66 }, { "epoch": 0.03176108082484001, "grad_norm": 2.458293914794922, "learning_rate": 6.350710900473935e-06, "loss": 0.5687, "step": 67 }, { "epoch": 0.032235126807300306, "grad_norm": 1.6730012893676758, "learning_rate": 6.445497630331754e-06, "loss": 0.511, "step": 68 }, { "epoch": 0.032709172789760606, "grad_norm": 2.294477939605713, "learning_rate": 6.5402843601895735e-06, "loss": 0.5163, "step": 69 }, { "epoch": 0.033183218772220906, "grad_norm": 1.931765079498291, "learning_rate": 6.635071090047393e-06, "loss": 0.5463, "step": 70 }, { "epoch": 0.033657264754681206, "grad_norm": 1.9582473039627075, "learning_rate": 6.729857819905213e-06, "loss": 0.5404, "step": 71 }, { "epoch": 0.034131310737141506, "grad_norm": 2.352447986602783, "learning_rate": 6.824644549763034e-06, "loss": 0.5004, "step": 72 }, { "epoch": 0.0346053567196018, "grad_norm": 2.5306575298309326, "learning_rate": 6.919431279620854e-06, "loss": 0.491, "step": 73 }, { "epoch": 0.0350794027020621, "grad_norm": 1.954287052154541, "learning_rate": 7.014218009478674e-06, "loss": 0.5096, "step": 74 }, { "epoch": 0.0355534486845224, "grad_norm": 1.7585203647613525, "learning_rate": 7.1090047393364935e-06, "loss": 0.4437, "step": 75 }, { "epoch": 0.0360274946669827, "grad_norm": 1.9448845386505127, "learning_rate": 7.203791469194313e-06, "loss": 0.4902, "step": 76 }, { "epoch": 0.036501540649443, "grad_norm": 2.1417629718780518, "learning_rate": 7.298578199052133e-06, "loss": 0.5599, "step": 77 }, { "epoch": 0.03697558663190329, "grad_norm": 1.9677048921585083, "learning_rate": 7.393364928909953e-06, "loss": 0.5196, "step": 78 }, { "epoch": 0.03744963261436359, "grad_norm": 4.773871421813965, "learning_rate": 7.488151658767773e-06, "loss": 0.7193, "step": 79 }, { "epoch": 0.03792367859682389, "grad_norm": 1.7716329097747803, "learning_rate": 7.582938388625593e-06, "loss": 0.482, "step": 80 }, { "epoch": 0.03792367859682389, "eval_accuracy": 0.9524959742351047, "eval_f1": 0.21333333333333335, "eval_loss": 0.2597336769104004, "eval_precision": 0.34782608695652173, "eval_recall": 0.15384615384615385, "eval_runtime": 50.3051, "eval_samples_per_second": 5.387, "eval_steps_per_second": 0.179, "step": 80 }, { "epoch": 0.03839772457928419, "grad_norm": 1.7248247861862183, "learning_rate": 7.677725118483414e-06, "loss": 0.4988, "step": 81 }, { "epoch": 0.03887177056174449, "grad_norm": 2.6806564331054688, "learning_rate": 7.772511848341233e-06, "loss": 0.6173, "step": 82 }, { "epoch": 0.03934581654420479, "grad_norm": 3.3090500831604004, "learning_rate": 7.867298578199053e-06, "loss": 0.6747, "step": 83 }, { "epoch": 0.039819862526665084, "grad_norm": 1.7768396139144897, "learning_rate": 7.962085308056872e-06, "loss": 0.4246, "step": 84 }, { "epoch": 0.040293908509125384, "grad_norm": 2.553398847579956, "learning_rate": 8.056872037914693e-06, "loss": 0.5885, "step": 85 }, { "epoch": 0.040767954491585684, "grad_norm": 2.223745107650757, "learning_rate": 8.151658767772512e-06, "loss": 0.631, "step": 86 }, { "epoch": 0.041242000474045984, "grad_norm": 2.303098440170288, "learning_rate": 8.246445497630333e-06, "loss": 0.4689, "step": 87 }, { "epoch": 0.041716046456506284, "grad_norm": 1.8970552682876587, "learning_rate": 8.341232227488152e-06, "loss": 0.523, "step": 88 }, { "epoch": 0.04219009243896658, "grad_norm": 2.505955934524536, "learning_rate": 8.436018957345973e-06, "loss": 0.4935, "step": 89 }, { "epoch": 0.04266413842142688, "grad_norm": 1.875301718711853, "learning_rate": 8.530805687203793e-06, "loss": 0.4522, "step": 90 }, { "epoch": 0.04313818440388718, "grad_norm": 1.900534749031067, "learning_rate": 8.625592417061612e-06, "loss": 0.4667, "step": 91 }, { "epoch": 0.04361223038634748, "grad_norm": 3.142495632171631, "learning_rate": 8.720379146919431e-06, "loss": 0.7367, "step": 92 }, { "epoch": 0.04408627636880778, "grad_norm": 2.096675395965576, "learning_rate": 8.815165876777252e-06, "loss": 0.5286, "step": 93 }, { "epoch": 0.04456032235126807, "grad_norm": 2.4111526012420654, "learning_rate": 8.909952606635071e-06, "loss": 0.4165, "step": 94 }, { "epoch": 0.04503436833372837, "grad_norm": 2.4553468227386475, "learning_rate": 9.004739336492892e-06, "loss": 0.5336, "step": 95 }, { "epoch": 0.04550841431618867, "grad_norm": 2.3772170543670654, "learning_rate": 9.09952606635071e-06, "loss": 0.455, "step": 96 }, { "epoch": 0.04598246029864897, "grad_norm": 2.652953863143921, "learning_rate": 9.194312796208532e-06, "loss": 0.5995, "step": 97 }, { "epoch": 0.04645650628110927, "grad_norm": 1.7384384870529175, "learning_rate": 9.289099526066352e-06, "loss": 0.4296, "step": 98 }, { "epoch": 0.04693055226356957, "grad_norm": 2.1251447200775146, "learning_rate": 9.383886255924171e-06, "loss": 0.5505, "step": 99 }, { "epoch": 0.04740459824602986, "grad_norm": 1.7407325506210327, "learning_rate": 9.478672985781992e-06, "loss": 0.4165, "step": 100 }, { "epoch": 0.04740459824602986, "eval_accuracy": 0.9549114331723028, "eval_f1": 0.40425531914893614, "eval_loss": 0.2602430284023285, "eval_precision": 0.4523809523809524, "eval_recall": 0.36538461538461536, "eval_runtime": 50.5287, "eval_samples_per_second": 5.363, "eval_steps_per_second": 0.178, "step": 100 }, { "epoch": 0.04787864422849016, "grad_norm": 1.908913016319275, "learning_rate": 9.573459715639811e-06, "loss": 0.4844, "step": 101 }, { "epoch": 0.04835269021095046, "grad_norm": 2.47544527053833, "learning_rate": 9.668246445497632e-06, "loss": 0.4939, "step": 102 }, { "epoch": 0.04882673619341076, "grad_norm": 2.693955183029175, "learning_rate": 9.76303317535545e-06, "loss": 0.516, "step": 103 }, { "epoch": 0.04930078217587106, "grad_norm": 2.166332960128784, "learning_rate": 9.85781990521327e-06, "loss": 0.3886, "step": 104 }, { "epoch": 0.049774828158331355, "grad_norm": 1.968274474143982, "learning_rate": 9.95260663507109e-06, "loss": 0.4948, "step": 105 }, { "epoch": 0.050248874140791655, "grad_norm": 2.600205898284912, "learning_rate": 1.004739336492891e-05, "loss": 0.447, "step": 106 }, { "epoch": 0.050722920123251955, "grad_norm": 2.6228041648864746, "learning_rate": 1.0142180094786732e-05, "loss": 0.4832, "step": 107 }, { "epoch": 0.051196966105712255, "grad_norm": 2.470987319946289, "learning_rate": 1.023696682464455e-05, "loss": 0.4968, "step": 108 }, { "epoch": 0.051671012088172555, "grad_norm": 2.0623159408569336, "learning_rate": 1.033175355450237e-05, "loss": 0.4376, "step": 109 }, { "epoch": 0.052145058070632855, "grad_norm": 2.3347840309143066, "learning_rate": 1.0426540284360192e-05, "loss": 0.4794, "step": 110 }, { "epoch": 0.05261910405309315, "grad_norm": 3.5064194202423096, "learning_rate": 1.052132701421801e-05, "loss": 0.4832, "step": 111 }, { "epoch": 0.05309315003555345, "grad_norm": 2.4698598384857178, "learning_rate": 1.061611374407583e-05, "loss": 0.4454, "step": 112 }, { "epoch": 0.05356719601801375, "grad_norm": 3.2061517238616943, "learning_rate": 1.071090047393365e-05, "loss": 0.3793, "step": 113 }, { "epoch": 0.05404124200047405, "grad_norm": 2.628289222717285, "learning_rate": 1.080568720379147e-05, "loss": 0.4649, "step": 114 }, { "epoch": 0.05451528798293435, "grad_norm": 2.606915235519409, "learning_rate": 1.0900473933649289e-05, "loss": 0.4821, "step": 115 }, { "epoch": 0.05498933396539464, "grad_norm": 2.454237461090088, "learning_rate": 1.099526066350711e-05, "loss": 0.4826, "step": 116 }, { "epoch": 0.05546337994785494, "grad_norm": 2.233793258666992, "learning_rate": 1.1090047393364929e-05, "loss": 0.4403, "step": 117 }, { "epoch": 0.05593742593031524, "grad_norm": 2.4212899208068848, "learning_rate": 1.118483412322275e-05, "loss": 0.3906, "step": 118 }, { "epoch": 0.05641147191277554, "grad_norm": 2.67380952835083, "learning_rate": 1.127962085308057e-05, "loss": 0.4524, "step": 119 }, { "epoch": 0.05688551789523584, "grad_norm": 3.087235689163208, "learning_rate": 1.137440758293839e-05, "loss": 0.5055, "step": 120 }, { "epoch": 0.05688551789523584, "eval_accuracy": 0.961352657004831, "eval_f1": 0.5636363636363636, "eval_loss": 0.21827787160873413, "eval_precision": 0.5344827586206896, "eval_recall": 0.5961538461538461, "eval_runtime": 50.4106, "eval_samples_per_second": 5.376, "eval_steps_per_second": 0.179, "step": 120 }, { "epoch": 0.05735956387769613, "grad_norm": 3.3387227058410645, "learning_rate": 1.146919431279621e-05, "loss": 0.4667, "step": 121 }, { "epoch": 0.05783360986015643, "grad_norm": 3.6316022872924805, "learning_rate": 1.1563981042654029e-05, "loss": 0.4783, "step": 122 }, { "epoch": 0.05830765584261673, "grad_norm": 2.532358407974243, "learning_rate": 1.165876777251185e-05, "loss": 0.3861, "step": 123 }, { "epoch": 0.05878170182507703, "grad_norm": 2.6193060874938965, "learning_rate": 1.1753554502369669e-05, "loss": 0.4667, "step": 124 }, { "epoch": 0.05925574780753733, "grad_norm": 2.7999274730682373, "learning_rate": 1.184834123222749e-05, "loss": 0.4729, "step": 125 }, { "epoch": 0.05972979378999763, "grad_norm": 4.306354999542236, "learning_rate": 1.1943127962085309e-05, "loss": 0.55, "step": 126 }, { "epoch": 0.060203839772457926, "grad_norm": 3.659555435180664, "learning_rate": 1.203791469194313e-05, "loss": 0.4994, "step": 127 }, { "epoch": 0.060677885754918226, "grad_norm": 2.463505506515503, "learning_rate": 1.213270142180095e-05, "loss": 0.3816, "step": 128 }, { "epoch": 0.061151931737378526, "grad_norm": 3.011713981628418, "learning_rate": 1.2227488151658769e-05, "loss": 0.4302, "step": 129 }, { "epoch": 0.061625977719838826, "grad_norm": 3.642547130584717, "learning_rate": 1.232227488151659e-05, "loss": 0.3334, "step": 130 }, { "epoch": 0.062100023702299126, "grad_norm": 3.2988884449005127, "learning_rate": 1.2417061611374409e-05, "loss": 0.3946, "step": 131 }, { "epoch": 0.06257406968475943, "grad_norm": 3.1273484230041504, "learning_rate": 1.251184834123223e-05, "loss": 0.3477, "step": 132 }, { "epoch": 0.06304811566721973, "grad_norm": 3.406371831893921, "learning_rate": 1.2606635071090047e-05, "loss": 0.5039, "step": 133 }, { "epoch": 0.06352216164968003, "grad_norm": 3.2792601585388184, "learning_rate": 1.270142180094787e-05, "loss": 0.3721, "step": 134 }, { "epoch": 0.06399620763214031, "grad_norm": 3.312288999557495, "learning_rate": 1.2796208530805687e-05, "loss": 0.3462, "step": 135 }, { "epoch": 0.06447025361460061, "grad_norm": 3.004293441772461, "learning_rate": 1.2890995260663507e-05, "loss": 0.3421, "step": 136 }, { "epoch": 0.06494429959706091, "grad_norm": 3.3723042011260986, "learning_rate": 1.298578199052133e-05, "loss": 0.4015, "step": 137 }, { "epoch": 0.06541834557952121, "grad_norm": 5.373427867889404, "learning_rate": 1.3080568720379147e-05, "loss": 0.5164, "step": 138 }, { "epoch": 0.06589239156198151, "grad_norm": 3.394989252090454, "learning_rate": 1.3175355450236968e-05, "loss": 0.3828, "step": 139 }, { "epoch": 0.06636643754444181, "grad_norm": 2.8693277835845947, "learning_rate": 1.3270142180094787e-05, "loss": 0.2795, "step": 140 }, { "epoch": 0.06636643754444181, "eval_accuracy": 0.9541062801932367, "eval_f1": 0.5777777777777777, "eval_loss": 0.160878986120224, "eval_precision": 0.46987951807228917, "eval_recall": 0.75, "eval_runtime": 50.0867, "eval_samples_per_second": 5.411, "eval_steps_per_second": 0.18, "step": 140 }, { "epoch": 0.06684048352690211, "grad_norm": 4.5176777839660645, "learning_rate": 1.3364928909952607e-05, "loss": 0.4215, "step": 141 }, { "epoch": 0.06731452950936241, "grad_norm": 3.332298517227173, "learning_rate": 1.3459715639810426e-05, "loss": 0.3453, "step": 142 }, { "epoch": 0.06778857549182271, "grad_norm": 4.390758991241455, "learning_rate": 1.3554502369668247e-05, "loss": 0.3583, "step": 143 }, { "epoch": 0.06826262147428301, "grad_norm": 3.537217140197754, "learning_rate": 1.3649289099526068e-05, "loss": 0.3976, "step": 144 }, { "epoch": 0.0687366674567433, "grad_norm": 3.8464837074279785, "learning_rate": 1.3744075829383887e-05, "loss": 0.338, "step": 145 }, { "epoch": 0.0692107134392036, "grad_norm": 3.7537763118743896, "learning_rate": 1.3838862559241708e-05, "loss": 0.3329, "step": 146 }, { "epoch": 0.0696847594216639, "grad_norm": 3.610654592514038, "learning_rate": 1.3933649289099527e-05, "loss": 0.3841, "step": 147 }, { "epoch": 0.0701588054041242, "grad_norm": 3.029467821121216, "learning_rate": 1.4028436018957347e-05, "loss": 0.2706, "step": 148 }, { "epoch": 0.0706328513865845, "grad_norm": 4.385302543640137, "learning_rate": 1.4123222748815166e-05, "loss": 0.3456, "step": 149 }, { "epoch": 0.0711068973690448, "grad_norm": 5.12073278427124, "learning_rate": 1.4218009478672987e-05, "loss": 0.416, "step": 150 }, { "epoch": 0.0715809433515051, "grad_norm": 3.408851146697998, "learning_rate": 1.4312796208530806e-05, "loss": 0.2294, "step": 151 }, { "epoch": 0.0720549893339654, "grad_norm": 5.863509178161621, "learning_rate": 1.4407582938388627e-05, "loss": 0.3959, "step": 152 }, { "epoch": 0.0725290353164257, "grad_norm": 5.222507476806641, "learning_rate": 1.4502369668246448e-05, "loss": 0.3258, "step": 153 }, { "epoch": 0.073003081298886, "grad_norm": 7.751191139221191, "learning_rate": 1.4597156398104267e-05, "loss": 0.2993, "step": 154 }, { "epoch": 0.0734771272813463, "grad_norm": 6.545222759246826, "learning_rate": 1.4691943127962087e-05, "loss": 0.3931, "step": 155 }, { "epoch": 0.07395117326380658, "grad_norm": 7.393334865570068, "learning_rate": 1.4786729857819906e-05, "loss": 0.4186, "step": 156 }, { "epoch": 0.07442521924626688, "grad_norm": 3.8417253494262695, "learning_rate": 1.4881516587677727e-05, "loss": 0.276, "step": 157 }, { "epoch": 0.07489926522872718, "grad_norm": 5.1075873374938965, "learning_rate": 1.4976303317535546e-05, "loss": 0.36, "step": 158 }, { "epoch": 0.07537331121118748, "grad_norm": 4.459785461425781, "learning_rate": 1.5071090047393367e-05, "loss": 0.3414, "step": 159 }, { "epoch": 0.07584735719364778, "grad_norm": 6.1932220458984375, "learning_rate": 1.5165876777251186e-05, "loss": 0.3314, "step": 160 }, { "epoch": 0.07584735719364778, "eval_accuracy": 0.9315619967793881, "eval_f1": 0.5142857142857142, "eval_loss": 0.1573554426431656, "eval_precision": 0.36585365853658536, "eval_recall": 0.8653846153846154, "eval_runtime": 50.8938, "eval_samples_per_second": 5.325, "eval_steps_per_second": 0.177, "step": 160 }, { "epoch": 0.07632140317610808, "grad_norm": 6.182979106903076, "learning_rate": 1.5260663507109007e-05, "loss": 0.3182, "step": 161 }, { "epoch": 0.07679544915856838, "grad_norm": 5.413295269012451, "learning_rate": 1.5355450236966827e-05, "loss": 0.3146, "step": 162 }, { "epoch": 0.07726949514102868, "grad_norm": 8.663524627685547, "learning_rate": 1.5450236966824645e-05, "loss": 0.358, "step": 163 }, { "epoch": 0.07774354112348898, "grad_norm": 6.667413711547852, "learning_rate": 1.5545023696682465e-05, "loss": 0.4242, "step": 164 }, { "epoch": 0.07821758710594928, "grad_norm": 5.1349101066589355, "learning_rate": 1.5639810426540286e-05, "loss": 0.3376, "step": 165 }, { "epoch": 0.07869163308840958, "grad_norm": 6.741973400115967, "learning_rate": 1.5734597156398107e-05, "loss": 0.3881, "step": 166 }, { "epoch": 0.07916567907086987, "grad_norm": 7.087311267852783, "learning_rate": 1.5829383886255924e-05, "loss": 0.3073, "step": 167 }, { "epoch": 0.07963972505333017, "grad_norm": 6.722820281982422, "learning_rate": 1.5924170616113745e-05, "loss": 0.2776, "step": 168 }, { "epoch": 0.08011377103579047, "grad_norm": 7.327820777893066, "learning_rate": 1.6018957345971565e-05, "loss": 0.4114, "step": 169 }, { "epoch": 0.08058781701825077, "grad_norm": 5.990736961364746, "learning_rate": 1.6113744075829386e-05, "loss": 0.3591, "step": 170 }, { "epoch": 0.08106186300071107, "grad_norm": 7.22863245010376, "learning_rate": 1.6208530805687207e-05, "loss": 0.3155, "step": 171 }, { "epoch": 0.08153590898317137, "grad_norm": 6.643196105957031, "learning_rate": 1.6303317535545024e-05, "loss": 0.3274, "step": 172 }, { "epoch": 0.08200995496563167, "grad_norm": 6.142556190490723, "learning_rate": 1.6398104265402845e-05, "loss": 0.351, "step": 173 }, { "epoch": 0.08248400094809197, "grad_norm": 5.601083278656006, "learning_rate": 1.6492890995260666e-05, "loss": 0.2545, "step": 174 }, { "epoch": 0.08295804693055227, "grad_norm": 7.0043511390686035, "learning_rate": 1.6587677725118486e-05, "loss": 0.3409, "step": 175 }, { "epoch": 0.08343209291301257, "grad_norm": 6.3572678565979, "learning_rate": 1.6682464454976304e-05, "loss": 0.3188, "step": 176 }, { "epoch": 0.08390613889547287, "grad_norm": 5.977471351623535, "learning_rate": 1.6777251184834124e-05, "loss": 0.3103, "step": 177 }, { "epoch": 0.08438018487793315, "grad_norm": 4.649372577667236, "learning_rate": 1.6872037914691945e-05, "loss": 0.3375, "step": 178 }, { "epoch": 0.08485423086039345, "grad_norm": 3.5389981269836426, "learning_rate": 1.6966824644549766e-05, "loss": 0.2633, "step": 179 }, { "epoch": 0.08532827684285375, "grad_norm": 4.463602066040039, "learning_rate": 1.7061611374407587e-05, "loss": 0.257, "step": 180 }, { "epoch": 0.08532827684285375, "eval_accuracy": 0.9597423510466989, "eval_f1": 0.6323529411764706, "eval_loss": 0.10005601495504379, "eval_precision": 0.5119047619047619, "eval_recall": 0.8269230769230769, "eval_runtime": 50.292, "eval_samples_per_second": 5.389, "eval_steps_per_second": 0.179, "step": 180 }, { "epoch": 0.08580232282531405, "grad_norm": 6.894714832305908, "learning_rate": 1.7156398104265404e-05, "loss": 0.3074, "step": 181 }, { "epoch": 0.08627636880777435, "grad_norm": 6.089742183685303, "learning_rate": 1.7251184834123225e-05, "loss": 0.4012, "step": 182 }, { "epoch": 0.08675041479023465, "grad_norm": 5.172243118286133, "learning_rate": 1.7345971563981042e-05, "loss": 0.3681, "step": 183 }, { "epoch": 0.08722446077269495, "grad_norm": 4.032599925994873, "learning_rate": 1.7440758293838863e-05, "loss": 0.3502, "step": 184 }, { "epoch": 0.08769850675515525, "grad_norm": 4.708069324493408, "learning_rate": 1.7535545023696683e-05, "loss": 0.2372, "step": 185 }, { "epoch": 0.08817255273761555, "grad_norm": 3.9147534370422363, "learning_rate": 1.7630331753554504e-05, "loss": 0.2758, "step": 186 }, { "epoch": 0.08864659872007585, "grad_norm": 5.272024154663086, "learning_rate": 1.7725118483412325e-05, "loss": 0.2808, "step": 187 }, { "epoch": 0.08912064470253614, "grad_norm": 4.757286071777344, "learning_rate": 1.7819905213270142e-05, "loss": 0.3566, "step": 188 }, { "epoch": 0.08959469068499644, "grad_norm": 6.468900680541992, "learning_rate": 1.7914691943127963e-05, "loss": 0.2969, "step": 189 }, { "epoch": 0.09006873666745674, "grad_norm": 4.515785217285156, "learning_rate": 1.8009478672985784e-05, "loss": 0.2362, "step": 190 }, { "epoch": 0.09054278264991704, "grad_norm": 4.820523262023926, "learning_rate": 1.8104265402843604e-05, "loss": 0.2419, "step": 191 }, { "epoch": 0.09101682863237734, "grad_norm": 4.412991046905518, "learning_rate": 1.819905213270142e-05, "loss": 0.2317, "step": 192 }, { "epoch": 0.09149087461483764, "grad_norm": 5.667988300323486, "learning_rate": 1.8293838862559242e-05, "loss": 0.3614, "step": 193 }, { "epoch": 0.09196492059729794, "grad_norm": 5.581320285797119, "learning_rate": 1.8388625592417063e-05, "loss": 0.2405, "step": 194 }, { "epoch": 0.09243896657975824, "grad_norm": 5.467656135559082, "learning_rate": 1.8483412322274884e-05, "loss": 0.3066, "step": 195 }, { "epoch": 0.09291301256221854, "grad_norm": 4.500129222869873, "learning_rate": 1.8578199052132704e-05, "loss": 0.234, "step": 196 }, { "epoch": 0.09338705854467884, "grad_norm": 6.052828788757324, "learning_rate": 1.8672985781990522e-05, "loss": 0.2656, "step": 197 }, { "epoch": 0.09386110452713914, "grad_norm": 3.196599006652832, "learning_rate": 1.8767772511848342e-05, "loss": 0.213, "step": 198 }, { "epoch": 0.09433515050959942, "grad_norm": 3.0227274894714355, "learning_rate": 1.8862559241706163e-05, "loss": 0.2027, "step": 199 }, { "epoch": 0.09480919649205972, "grad_norm": 7.403257846832275, "learning_rate": 1.8957345971563984e-05, "loss": 0.3089, "step": 200 }, { "epoch": 0.09480919649205972, "eval_accuracy": 0.9750402576489533, "eval_f1": 0.7394957983193278, "eval_loss": 0.07466968148946762, "eval_precision": 0.6567164179104478, "eval_recall": 0.8461538461538461, "eval_runtime": 50.2595, "eval_samples_per_second": 5.392, "eval_steps_per_second": 0.179, "step": 200 }, { "epoch": 0.09528324247452002, "grad_norm": 4.3082594871521, "learning_rate": 1.90521327014218e-05, "loss": 0.2145, "step": 201 }, { "epoch": 0.09575728845698032, "grad_norm": 5.785930156707764, "learning_rate": 1.9146919431279622e-05, "loss": 0.305, "step": 202 }, { "epoch": 0.09623133443944062, "grad_norm": 6.636341571807861, "learning_rate": 1.9241706161137443e-05, "loss": 0.3244, "step": 203 }, { "epoch": 0.09670538042190092, "grad_norm": 4.880903720855713, "learning_rate": 1.9336492890995263e-05, "loss": 0.2325, "step": 204 }, { "epoch": 0.09717942640436122, "grad_norm": 6.563040733337402, "learning_rate": 1.9431279620853084e-05, "loss": 0.2905, "step": 205 }, { "epoch": 0.09765347238682152, "grad_norm": 6.47865629196167, "learning_rate": 1.95260663507109e-05, "loss": 0.2872, "step": 206 }, { "epoch": 0.09812751836928182, "grad_norm": 6.47356653213501, "learning_rate": 1.9620853080568722e-05, "loss": 0.2634, "step": 207 }, { "epoch": 0.09860156435174212, "grad_norm": 5.02006721496582, "learning_rate": 1.971563981042654e-05, "loss": 0.2019, "step": 208 }, { "epoch": 0.09907561033420242, "grad_norm": 6.659644603729248, "learning_rate": 1.9810426540284364e-05, "loss": 0.2409, "step": 209 }, { "epoch": 0.09954965631666271, "grad_norm": 5.4866862297058105, "learning_rate": 1.990521327014218e-05, "loss": 0.2173, "step": 210 }, { "epoch": 0.10002370229912301, "grad_norm": 6.1934614181518555, "learning_rate": 2e-05, "loss": 0.248, "step": 211 }, { "epoch": 0.10049774828158331, "grad_norm": 5.64393424987793, "learning_rate": 1.9999986301366886e-05, "loss": 0.2072, "step": 212 }, { "epoch": 0.10097179426404361, "grad_norm": 4.783505439758301, "learning_rate": 1.999994520550506e-05, "loss": 0.2368, "step": 213 }, { "epoch": 0.10144584024650391, "grad_norm": 11.98668384552002, "learning_rate": 1.9999876712527123e-05, "loss": 0.3723, "step": 214 }, { "epoch": 0.10191988622896421, "grad_norm": 9.690125465393066, "learning_rate": 1.9999780822620726e-05, "loss": 0.3454, "step": 215 }, { "epoch": 0.10239393221142451, "grad_norm": 5.787740230560303, "learning_rate": 1.9999657536048575e-05, "loss": 0.168, "step": 216 }, { "epoch": 0.10286797819388481, "grad_norm": 11.37346363067627, "learning_rate": 1.9999506853148447e-05, "loss": 0.2285, "step": 217 }, { "epoch": 0.10334202417634511, "grad_norm": 7.2158660888671875, "learning_rate": 1.999932877433317e-05, "loss": 0.2417, "step": 218 }, { "epoch": 0.10381607015880541, "grad_norm": 8.75778579711914, "learning_rate": 1.9999123300090633e-05, "loss": 0.2378, "step": 219 }, { "epoch": 0.10429011614126571, "grad_norm": 8.147066116333008, "learning_rate": 1.9998890430983776e-05, "loss": 0.329, "step": 220 }, { "epoch": 0.10429011614126571, "eval_accuracy": 0.9742351046698873, "eval_f1": 0.7333333333333333, "eval_loss": 0.07219066470861435, "eval_precision": 0.6470588235294118, "eval_recall": 0.8461538461538461, "eval_runtime": 49.5188, "eval_samples_per_second": 5.473, "eval_steps_per_second": 0.182, "step": 220 }, { "epoch": 0.104764162123726, "grad_norm": 5.941849231719971, "learning_rate": 1.9998630167650603e-05, "loss": 0.2196, "step": 221 }, { "epoch": 0.1052382081061863, "grad_norm": 7.25979471206665, "learning_rate": 1.9998342510804157e-05, "loss": 0.2636, "step": 222 }, { "epoch": 0.1057122540886466, "grad_norm": 5.941128253936768, "learning_rate": 1.9998027461232545e-05, "loss": 0.1794, "step": 223 }, { "epoch": 0.1061863000711069, "grad_norm": 5.614895820617676, "learning_rate": 1.9997685019798913e-05, "loss": 0.2548, "step": 224 }, { "epoch": 0.1066603460535672, "grad_norm": 6.68379545211792, "learning_rate": 1.9997315187441452e-05, "loss": 0.23, "step": 225 }, { "epoch": 0.1071343920360275, "grad_norm": 3.6642472743988037, "learning_rate": 1.9996917965173415e-05, "loss": 0.1773, "step": 226 }, { "epoch": 0.1076084380184878, "grad_norm": 6.345001697540283, "learning_rate": 1.9996493354083074e-05, "loss": 0.1915, "step": 227 }, { "epoch": 0.1080824840009481, "grad_norm": 5.718410015106201, "learning_rate": 1.9996041355333745e-05, "loss": 0.2641, "step": 228 }, { "epoch": 0.1085565299834084, "grad_norm": 5.814205646514893, "learning_rate": 1.9995561970163784e-05, "loss": 0.2935, "step": 229 }, { "epoch": 0.1090305759658687, "grad_norm": 5.936671733856201, "learning_rate": 1.9995055199886577e-05, "loss": 0.2259, "step": 230 }, { "epoch": 0.10950462194832898, "grad_norm": 4.607608318328857, "learning_rate": 1.9994521045890535e-05, "loss": 0.2704, "step": 231 }, { "epoch": 0.10997866793078928, "grad_norm": 5.686770439147949, "learning_rate": 1.9993959509639094e-05, "loss": 0.3073, "step": 232 }, { "epoch": 0.11045271391324958, "grad_norm": 5.333606719970703, "learning_rate": 1.999337059267071e-05, "loss": 0.2245, "step": 233 }, { "epoch": 0.11092675989570988, "grad_norm": 5.424774169921875, "learning_rate": 1.999275429659885e-05, "loss": 0.2453, "step": 234 }, { "epoch": 0.11140080587817018, "grad_norm": 5.3027424812316895, "learning_rate": 1.9992110623112004e-05, "loss": 0.2461, "step": 235 }, { "epoch": 0.11187485186063048, "grad_norm": 4.529107093811035, "learning_rate": 1.9991439573973655e-05, "loss": 0.2287, "step": 236 }, { "epoch": 0.11234889784309078, "grad_norm": 4.9876203536987305, "learning_rate": 1.9990741151022302e-05, "loss": 0.2031, "step": 237 }, { "epoch": 0.11282294382555108, "grad_norm": 7.0406270027160645, "learning_rate": 1.9990015356171426e-05, "loss": 0.1707, "step": 238 }, { "epoch": 0.11329698980801138, "grad_norm": 5.140834331512451, "learning_rate": 1.9989262191409505e-05, "loss": 0.2308, "step": 239 }, { "epoch": 0.11377103579047168, "grad_norm": 15.711700439453125, "learning_rate": 1.9988481658800012e-05, "loss": 0.4088, "step": 240 }, { "epoch": 0.11377103579047168, "eval_accuracy": 0.9814814814814815, "eval_f1": 0.7927927927927928, "eval_loss": 0.05809846892952919, "eval_precision": 0.7457627118644068, "eval_recall": 0.8461538461538461, "eval_runtime": 49.5702, "eval_samples_per_second": 5.467, "eval_steps_per_second": 0.182, "step": 240 }, { "epoch": 0.11424508177293198, "grad_norm": 6.078300476074219, "learning_rate": 1.9987673760481387e-05, "loss": 0.244, "step": 241 }, { "epoch": 0.11471912775539227, "grad_norm": 4.839064121246338, "learning_rate": 1.9986838498667053e-05, "loss": 0.2577, "step": 242 }, { "epoch": 0.11519317373785257, "grad_norm": 5.210578918457031, "learning_rate": 1.99859758756454e-05, "loss": 0.3084, "step": 243 }, { "epoch": 0.11566721972031287, "grad_norm": 9.506908416748047, "learning_rate": 1.9985085893779776e-05, "loss": 0.2728, "step": 244 }, { "epoch": 0.11614126570277317, "grad_norm": 5.610825538635254, "learning_rate": 1.9984168555508493e-05, "loss": 0.2323, "step": 245 }, { "epoch": 0.11661531168523347, "grad_norm": 4.386003017425537, "learning_rate": 1.99832238633448e-05, "loss": 0.1998, "step": 246 }, { "epoch": 0.11708935766769377, "grad_norm": 5.057579040527344, "learning_rate": 1.99822518198769e-05, "loss": 0.1713, "step": 247 }, { "epoch": 0.11756340365015407, "grad_norm": 6.064876079559326, "learning_rate": 1.998125242776793e-05, "loss": 0.2943, "step": 248 }, { "epoch": 0.11803744963261437, "grad_norm": 6.748780250549316, "learning_rate": 1.998022568975594e-05, "loss": 0.2065, "step": 249 }, { "epoch": 0.11851149561507467, "grad_norm": 7.5252275466918945, "learning_rate": 1.9979171608653926e-05, "loss": 0.202, "step": 250 }, { "epoch": 0.11898554159753497, "grad_norm": 3.4074177742004395, "learning_rate": 1.9978090187349766e-05, "loss": 0.1671, "step": 251 }, { "epoch": 0.11945958757999527, "grad_norm": 6.935286045074463, "learning_rate": 1.9976981428806272e-05, "loss": 0.2896, "step": 252 }, { "epoch": 0.11993363356245555, "grad_norm": 5.655433654785156, "learning_rate": 1.9975845336061134e-05, "loss": 0.2807, "step": 253 }, { "epoch": 0.12040767954491585, "grad_norm": 4.174770355224609, "learning_rate": 1.9974681912226934e-05, "loss": 0.2367, "step": 254 }, { "epoch": 0.12088172552737615, "grad_norm": 4.716368198394775, "learning_rate": 1.9973491160491137e-05, "loss": 0.2013, "step": 255 }, { "epoch": 0.12135577150983645, "grad_norm": 6.766290664672852, "learning_rate": 1.9972273084116082e-05, "loss": 0.222, "step": 256 }, { "epoch": 0.12182981749229675, "grad_norm": 4.768441677093506, "learning_rate": 1.9971027686438956e-05, "loss": 0.2788, "step": 257 }, { "epoch": 0.12230386347475705, "grad_norm": 4.213677883148193, "learning_rate": 1.9969754970871818e-05, "loss": 0.2942, "step": 258 }, { "epoch": 0.12277790945721735, "grad_norm": 5.842061519622803, "learning_rate": 1.996845494090155e-05, "loss": 0.313, "step": 259 }, { "epoch": 0.12325195543967765, "grad_norm": 4.989866256713867, "learning_rate": 1.9967127600089885e-05, "loss": 0.2598, "step": 260 }, { "epoch": 0.12325195543967765, "eval_accuracy": 0.9814814814814815, "eval_f1": 0.7927927927927928, "eval_loss": 0.05309082940220833, "eval_precision": 0.7457627118644068, "eval_recall": 0.8461538461538461, "eval_runtime": 49.7187, "eval_samples_per_second": 5.451, "eval_steps_per_second": 0.181, "step": 260 }, { "epoch": 0.12372600142213795, "grad_norm": 5.472978115081787, "learning_rate": 1.9965772952073376e-05, "loss": 0.2116, "step": 261 }, { "epoch": 0.12420004740459825, "grad_norm": 6.880504131317139, "learning_rate": 1.9964391000563382e-05, "loss": 0.2416, "step": 262 }, { "epoch": 0.12467409338705855, "grad_norm": 6.8643951416015625, "learning_rate": 1.996298174934608e-05, "loss": 0.224, "step": 263 }, { "epoch": 0.12514813936951885, "grad_norm": 5.625668048858643, "learning_rate": 1.9961545202282427e-05, "loss": 0.2725, "step": 264 }, { "epoch": 0.12562218535197914, "grad_norm": 4.625347137451172, "learning_rate": 1.996008136330817e-05, "loss": 0.1988, "step": 265 }, { "epoch": 0.12609623133443945, "grad_norm": 9.558283805847168, "learning_rate": 1.995859023643383e-05, "loss": 0.2296, "step": 266 }, { "epoch": 0.12657027731689974, "grad_norm": 4.514327526092529, "learning_rate": 1.9957071825744685e-05, "loss": 0.2054, "step": 267 }, { "epoch": 0.12704432329936005, "grad_norm": 8.091195106506348, "learning_rate": 1.9955526135400768e-05, "loss": 0.285, "step": 268 }, { "epoch": 0.12751836928182034, "grad_norm": 5.1086955070495605, "learning_rate": 1.995395316963684e-05, "loss": 0.2608, "step": 269 }, { "epoch": 0.12799241526428062, "grad_norm": 6.730742454528809, "learning_rate": 1.995235293276241e-05, "loss": 0.2596, "step": 270 }, { "epoch": 0.12846646124674094, "grad_norm": 7.361924648284912, "learning_rate": 1.995072542916168e-05, "loss": 0.1556, "step": 271 }, { "epoch": 0.12894050722920122, "grad_norm": 4.560647964477539, "learning_rate": 1.9949070663293567e-05, "loss": 0.1593, "step": 272 }, { "epoch": 0.12941455321166154, "grad_norm": 6.983797550201416, "learning_rate": 1.9947388639691678e-05, "loss": 0.2274, "step": 273 }, { "epoch": 0.12988859919412182, "grad_norm": 11.56694507598877, "learning_rate": 1.9945679362964298e-05, "loss": 0.1783, "step": 274 }, { "epoch": 0.13036264517658214, "grad_norm": 11.798758506774902, "learning_rate": 1.9943942837794378e-05, "loss": 0.2378, "step": 275 }, { "epoch": 0.13083669115904242, "grad_norm": 7.2937421798706055, "learning_rate": 1.994217906893952e-05, "loss": 0.2854, "step": 276 }, { "epoch": 0.13131073714150274, "grad_norm": 6.609043598175049, "learning_rate": 1.994038806123197e-05, "loss": 0.1323, "step": 277 }, { "epoch": 0.13178478312396302, "grad_norm": 8.741644859313965, "learning_rate": 1.9938569819578605e-05, "loss": 0.2774, "step": 278 }, { "epoch": 0.13225882910642334, "grad_norm": 9.836929321289062, "learning_rate": 1.9936724348960902e-05, "loss": 0.2584, "step": 279 }, { "epoch": 0.13273287508888362, "grad_norm": 5.216606140136719, "learning_rate": 1.9934851654434945e-05, "loss": 0.1978, "step": 280 }, { "epoch": 0.13273287508888362, "eval_accuracy": 0.9863123993558777, "eval_f1": 0.8440366972477065, "eval_loss": 0.04283216968178749, "eval_precision": 0.8070175438596491, "eval_recall": 0.8846153846153846, "eval_runtime": 51.2008, "eval_samples_per_second": 5.293, "eval_steps_per_second": 0.176, "step": 280 }, { "epoch": 0.1332069210713439, "grad_norm": 6.529329776763916, "learning_rate": 1.9932951741131413e-05, "loss": 0.2374, "step": 281 }, { "epoch": 0.13368096705380422, "grad_norm": 7.7884840965271, "learning_rate": 1.9931024614255542e-05, "loss": 0.2425, "step": 282 }, { "epoch": 0.1341550130362645, "grad_norm": 8.573697090148926, "learning_rate": 1.9929070279087137e-05, "loss": 0.2216, "step": 283 }, { "epoch": 0.13462905901872482, "grad_norm": 5.860241413116455, "learning_rate": 1.992708874098054e-05, "loss": 0.2176, "step": 284 }, { "epoch": 0.1351031050011851, "grad_norm": 11.865449905395508, "learning_rate": 1.9925080005364628e-05, "loss": 0.203, "step": 285 }, { "epoch": 0.13557715098364542, "grad_norm": 10.339160919189453, "learning_rate": 1.992304407774278e-05, "loss": 0.3055, "step": 286 }, { "epoch": 0.1360511969661057, "grad_norm": 7.775436878204346, "learning_rate": 1.9920980963692887e-05, "loss": 0.2248, "step": 287 }, { "epoch": 0.13652524294856602, "grad_norm": 5.498456954956055, "learning_rate": 1.9918890668867315e-05, "loss": 0.2034, "step": 288 }, { "epoch": 0.1369992889310263, "grad_norm": 5.207095146179199, "learning_rate": 1.99167731989929e-05, "loss": 0.2228, "step": 289 }, { "epoch": 0.1374733349134866, "grad_norm": 9.163966178894043, "learning_rate": 1.9914628559870933e-05, "loss": 0.1774, "step": 290 }, { "epoch": 0.1379473808959469, "grad_norm": 8.55766487121582, "learning_rate": 1.9912456757377138e-05, "loss": 0.2174, "step": 291 }, { "epoch": 0.1384214268784072, "grad_norm": 7.868490219116211, "learning_rate": 1.991025779746166e-05, "loss": 0.3014, "step": 292 }, { "epoch": 0.1388954728608675, "grad_norm": 7.967019081115723, "learning_rate": 1.9908031686149045e-05, "loss": 0.2616, "step": 293 }, { "epoch": 0.1393695188433278, "grad_norm": 6.3528008460998535, "learning_rate": 1.9905778429538232e-05, "loss": 0.2771, "step": 294 }, { "epoch": 0.1398435648257881, "grad_norm": 6.009703636169434, "learning_rate": 1.990349803380253e-05, "loss": 0.2595, "step": 295 }, { "epoch": 0.1403176108082484, "grad_norm": 7.102011680603027, "learning_rate": 1.9901190505189597e-05, "loss": 0.2498, "step": 296 }, { "epoch": 0.1407916567907087, "grad_norm": 4.991247177124023, "learning_rate": 1.9898855850021436e-05, "loss": 0.1678, "step": 297 }, { "epoch": 0.141265702773169, "grad_norm": 6.850471019744873, "learning_rate": 1.9896494074694352e-05, "loss": 0.3189, "step": 298 }, { "epoch": 0.1417397487556293, "grad_norm": 5.671426296234131, "learning_rate": 1.9894105185678977e-05, "loss": 0.2433, "step": 299 }, { "epoch": 0.1422137947380896, "grad_norm": 3.282658815383911, "learning_rate": 1.9891689189520208e-05, "loss": 0.1661, "step": 300 }, { "epoch": 0.1422137947380896, "eval_accuracy": 0.9838969404186796, "eval_f1": 0.8275862068965517, "eval_loss": 0.04517379775643349, "eval_precision": 0.75, "eval_recall": 0.9230769230769231, "eval_runtime": 49.5687, "eval_samples_per_second": 5.467, "eval_steps_per_second": 0.182, "step": 300 }, { "epoch": 0.14268784072054988, "grad_norm": 3.66693115234375, "learning_rate": 1.9889246092837208e-05, "loss": 0.1176, "step": 301 }, { "epoch": 0.1431618867030102, "grad_norm": 8.140231132507324, "learning_rate": 1.9886775902323405e-05, "loss": 0.2733, "step": 302 }, { "epoch": 0.14363593268547048, "grad_norm": 6.566082000732422, "learning_rate": 1.9884278624746442e-05, "loss": 0.229, "step": 303 }, { "epoch": 0.1441099786679308, "grad_norm": 8.189047813415527, "learning_rate": 1.9881754266948174e-05, "loss": 0.3327, "step": 304 }, { "epoch": 0.14458402465039108, "grad_norm": 7.237288951873779, "learning_rate": 1.987920283584465e-05, "loss": 0.2679, "step": 305 }, { "epoch": 0.1450580706328514, "grad_norm": 6.233216285705566, "learning_rate": 1.9876624338426103e-05, "loss": 0.237, "step": 306 }, { "epoch": 0.14553211661531168, "grad_norm": 8.068488121032715, "learning_rate": 1.9874018781756898e-05, "loss": 0.2285, "step": 307 }, { "epoch": 0.146006162597772, "grad_norm": 5.228760242462158, "learning_rate": 1.987138617297556e-05, "loss": 0.2345, "step": 308 }, { "epoch": 0.14648020858023228, "grad_norm": 4.717254638671875, "learning_rate": 1.986872651929471e-05, "loss": 0.2223, "step": 309 }, { "epoch": 0.1469542545626926, "grad_norm": 5.439334869384766, "learning_rate": 1.9866039828001075e-05, "loss": 0.2279, "step": 310 }, { "epoch": 0.14742830054515288, "grad_norm": 3.6440024375915527, "learning_rate": 1.986332610645545e-05, "loss": 0.133, "step": 311 }, { "epoch": 0.14790234652761317, "grad_norm": 5.25577974319458, "learning_rate": 1.98605853620927e-05, "loss": 0.2529, "step": 312 }, { "epoch": 0.14837639251007348, "grad_norm": 4.644561290740967, "learning_rate": 1.9857817602421705e-05, "loss": 0.2291, "step": 313 }, { "epoch": 0.14885043849253377, "grad_norm": 5.024320602416992, "learning_rate": 1.9855022835025373e-05, "loss": 0.2105, "step": 314 }, { "epoch": 0.14932448447499408, "grad_norm": 6.0894341468811035, "learning_rate": 1.9852201067560607e-05, "loss": 0.2932, "step": 315 }, { "epoch": 0.14979853045745437, "grad_norm": 7.116171836853027, "learning_rate": 1.9849352307758277e-05, "loss": 0.3021, "step": 316 }, { "epoch": 0.15027257643991468, "grad_norm": 7.116308212280273, "learning_rate": 1.98464765634232e-05, "loss": 0.1849, "step": 317 }, { "epoch": 0.15074662242237497, "grad_norm": 5.270240306854248, "learning_rate": 1.9843573842434135e-05, "loss": 0.2173, "step": 318 }, { "epoch": 0.15122066840483528, "grad_norm": 8.557943344116211, "learning_rate": 1.9840644152743742e-05, "loss": 0.1988, "step": 319 }, { "epoch": 0.15169471438729557, "grad_norm": 5.420962333679199, "learning_rate": 1.983768750237857e-05, "loss": 0.2944, "step": 320 }, { "epoch": 0.15169471438729557, "eval_accuracy": 0.9814814814814815, "eval_f1": 0.8099173553719008, "eval_loss": 0.05109033361077309, "eval_precision": 0.7101449275362319, "eval_recall": 0.9423076923076923, "eval_runtime": 50.8711, "eval_samples_per_second": 5.327, "eval_steps_per_second": 0.177, "step": 320 }, { "epoch": 0.15216876036975588, "grad_norm": 7.241580486297607, "learning_rate": 1.9834703899439035e-05, "loss": 0.2685, "step": 321 }, { "epoch": 0.15264280635221616, "grad_norm": 5.041566848754883, "learning_rate": 1.983169335209939e-05, "loss": 0.1973, "step": 322 }, { "epoch": 0.15311685233467645, "grad_norm": 6.842787742614746, "learning_rate": 1.9828655868607712e-05, "loss": 0.2339, "step": 323 }, { "epoch": 0.15359089831713676, "grad_norm": 7.793763160705566, "learning_rate": 1.982559145728588e-05, "loss": 0.1291, "step": 324 }, { "epoch": 0.15406494429959705, "grad_norm": 3.715549945831299, "learning_rate": 1.9822500126529536e-05, "loss": 0.189, "step": 325 }, { "epoch": 0.15453899028205736, "grad_norm": 6.653814792633057, "learning_rate": 1.981938188480809e-05, "loss": 0.2426, "step": 326 }, { "epoch": 0.15501303626451765, "grad_norm": 6.137849807739258, "learning_rate": 1.9816236740664664e-05, "loss": 0.2193, "step": 327 }, { "epoch": 0.15548708224697796, "grad_norm": 4.381377220153809, "learning_rate": 1.9813064702716094e-05, "loss": 0.2413, "step": 328 }, { "epoch": 0.15596112822943825, "grad_norm": 5.145009517669678, "learning_rate": 1.9809865779652902e-05, "loss": 0.2195, "step": 329 }, { "epoch": 0.15643517421189856, "grad_norm": 6.10648775100708, "learning_rate": 1.9806639980239257e-05, "loss": 0.2355, "step": 330 }, { "epoch": 0.15690922019435885, "grad_norm": 7.384158611297607, "learning_rate": 1.9803387313312973e-05, "loss": 0.2139, "step": 331 }, { "epoch": 0.15738326617681916, "grad_norm": 5.342582702636719, "learning_rate": 1.9800107787785462e-05, "loss": 0.2236, "step": 332 }, { "epoch": 0.15785731215927945, "grad_norm": 4.06926965713501, "learning_rate": 1.9796801412641735e-05, "loss": 0.2268, "step": 333 }, { "epoch": 0.15833135814173974, "grad_norm": 7.875843524932861, "learning_rate": 1.979346819694035e-05, "loss": 0.2154, "step": 334 }, { "epoch": 0.15880540412420005, "grad_norm": 9.37292766571045, "learning_rate": 1.9790108149813407e-05, "loss": 0.2205, "step": 335 }, { "epoch": 0.15927945010666034, "grad_norm": 6.9922075271606445, "learning_rate": 1.978672128046652e-05, "loss": 0.151, "step": 336 }, { "epoch": 0.15975349608912065, "grad_norm": 8.968308448791504, "learning_rate": 1.9783307598178784e-05, "loss": 0.2198, "step": 337 }, { "epoch": 0.16022754207158094, "grad_norm": 3.3544418811798096, "learning_rate": 1.977986711230275e-05, "loss": 0.1465, "step": 338 }, { "epoch": 0.16070158805404125, "grad_norm": 15.146354675292969, "learning_rate": 1.9776399832264416e-05, "loss": 0.3098, "step": 339 }, { "epoch": 0.16117563403650154, "grad_norm": 5.6771721839904785, "learning_rate": 1.9772905767563176e-05, "loss": 0.2463, "step": 340 }, { "epoch": 0.16117563403650154, "eval_accuracy": 0.9847020933977456, "eval_f1": 0.8403361344537815, "eval_loss": 0.03791453689336777, "eval_precision": 0.746268656716418, "eval_recall": 0.9615384615384616, "eval_runtime": 49.9027, "eval_samples_per_second": 5.431, "eval_steps_per_second": 0.18, "step": 340 }, { "epoch": 0.16164968001896185, "grad_norm": 8.764738082885742, "learning_rate": 1.976938492777182e-05, "loss": 0.2521, "step": 341 }, { "epoch": 0.16212372600142214, "grad_norm": 10.894025802612305, "learning_rate": 1.9765837322536476e-05, "loss": 0.2435, "step": 342 }, { "epoch": 0.16259777198388245, "grad_norm": 12.002713203430176, "learning_rate": 1.976226296157662e-05, "loss": 0.26, "step": 343 }, { "epoch": 0.16307181796634274, "grad_norm": 8.176495552062988, "learning_rate": 1.975866185468502e-05, "loss": 0.26, "step": 344 }, { "epoch": 0.16354586394880302, "grad_norm": 5.620816230773926, "learning_rate": 1.975503401172773e-05, "loss": 0.1916, "step": 345 }, { "epoch": 0.16401990993126334, "grad_norm": 5.21535587310791, "learning_rate": 1.975137944264404e-05, "loss": 0.2001, "step": 346 }, { "epoch": 0.16449395591372362, "grad_norm": 5.709356307983398, "learning_rate": 1.974769815744648e-05, "loss": 0.2117, "step": 347 }, { "epoch": 0.16496800189618394, "grad_norm": 5.04941987991333, "learning_rate": 1.974399016622075e-05, "loss": 0.1571, "step": 348 }, { "epoch": 0.16544204787864422, "grad_norm": 6.760519981384277, "learning_rate": 1.9740255479125747e-05, "loss": 0.2748, "step": 349 }, { "epoch": 0.16591609386110454, "grad_norm": 3.6168932914733887, "learning_rate": 1.9736494106393488e-05, "loss": 0.1107, "step": 350 }, { "epoch": 0.16639013984356482, "grad_norm": 8.968782424926758, "learning_rate": 1.9732706058329106e-05, "loss": 0.2859, "step": 351 }, { "epoch": 0.16686418582602514, "grad_norm": 4.959980487823486, "learning_rate": 1.972889134531081e-05, "loss": 0.1641, "step": 352 }, { "epoch": 0.16733823180848542, "grad_norm": 4.210104942321777, "learning_rate": 1.9725049977789883e-05, "loss": 0.1568, "step": 353 }, { "epoch": 0.16781227779094574, "grad_norm": 7.096119403839111, "learning_rate": 1.9721181966290614e-05, "loss": 0.1671, "step": 354 }, { "epoch": 0.16828632377340602, "grad_norm": 12.163064956665039, "learning_rate": 1.97172873214103e-05, "loss": 0.1913, "step": 355 }, { "epoch": 0.1687603697558663, "grad_norm": 4.686140060424805, "learning_rate": 1.9713366053819203e-05, "loss": 0.143, "step": 356 }, { "epoch": 0.16923441573832662, "grad_norm": 6.255122184753418, "learning_rate": 1.9709418174260523e-05, "loss": 0.2272, "step": 357 }, { "epoch": 0.1697084617207869, "grad_norm": 11.309954643249512, "learning_rate": 1.970544369355037e-05, "loss": 0.2876, "step": 358 }, { "epoch": 0.17018250770324722, "grad_norm": 5.268868923187256, "learning_rate": 1.9701442622577736e-05, "loss": 0.0876, "step": 359 }, { "epoch": 0.1706565536857075, "grad_norm": 5.804551601409912, "learning_rate": 1.9697414972304462e-05, "loss": 0.1767, "step": 360 }, { "epoch": 0.1706565536857075, "eval_accuracy": 0.9871175523349437, "eval_f1": 0.8620689655172413, "eval_loss": 0.03791343793272972, "eval_precision": 0.78125, "eval_recall": 0.9615384615384616, "eval_runtime": 51.2049, "eval_samples_per_second": 5.292, "eval_steps_per_second": 0.176, "step": 360 }, { "epoch": 0.17113059966816782, "grad_norm": 5.9536004066467285, "learning_rate": 1.969336075376521e-05, "loss": 0.1479, "step": 361 }, { "epoch": 0.1716046456506281, "grad_norm": 5.082642078399658, "learning_rate": 1.9689279978067427e-05, "loss": 0.1686, "step": 362 }, { "epoch": 0.17207869163308842, "grad_norm": 4.041834354400635, "learning_rate": 1.9685172656391326e-05, "loss": 0.1222, "step": 363 }, { "epoch": 0.1725527376155487, "grad_norm": 5.533458232879639, "learning_rate": 1.9681038799989843e-05, "loss": 0.2355, "step": 364 }, { "epoch": 0.17302678359800902, "grad_norm": 6.991927146911621, "learning_rate": 1.9676878420188618e-05, "loss": 0.1727, "step": 365 }, { "epoch": 0.1735008295804693, "grad_norm": 6.4098944664001465, "learning_rate": 1.9672691528385947e-05, "loss": 0.2689, "step": 366 }, { "epoch": 0.1739748755629296, "grad_norm": 5.587769508361816, "learning_rate": 1.9668478136052776e-05, "loss": 0.2517, "step": 367 }, { "epoch": 0.1744489215453899, "grad_norm": 9.980069160461426, "learning_rate": 1.9664238254732648e-05, "loss": 0.2402, "step": 368 }, { "epoch": 0.1749229675278502, "grad_norm": 8.207301139831543, "learning_rate": 1.9659971896041673e-05, "loss": 0.1865, "step": 369 }, { "epoch": 0.1753970135103105, "grad_norm": 8.306730270385742, "learning_rate": 1.9655679071668516e-05, "loss": 0.2575, "step": 370 }, { "epoch": 0.1758710594927708, "grad_norm": 8.415144920349121, "learning_rate": 1.9651359793374332e-05, "loss": 0.1954, "step": 371 }, { "epoch": 0.1763451054752311, "grad_norm": 5.703969478607178, "learning_rate": 1.9647014072992775e-05, "loss": 0.2631, "step": 372 }, { "epoch": 0.1768191514576914, "grad_norm": 10.404855728149414, "learning_rate": 1.9642641922429918e-05, "loss": 0.2563, "step": 373 }, { "epoch": 0.1772931974401517, "grad_norm": 6.341281890869141, "learning_rate": 1.9638243353664267e-05, "loss": 0.2554, "step": 374 }, { "epoch": 0.177767243422612, "grad_norm": 4.921003341674805, "learning_rate": 1.9633818378746694e-05, "loss": 0.109, "step": 375 }, { "epoch": 0.17824128940507228, "grad_norm": 7.55433464050293, "learning_rate": 1.962936700980042e-05, "loss": 0.2655, "step": 376 }, { "epoch": 0.1787153353875326, "grad_norm": 6.300623893737793, "learning_rate": 1.9624889259020987e-05, "loss": 0.1777, "step": 377 }, { "epoch": 0.17918938136999288, "grad_norm": 8.185931205749512, "learning_rate": 1.96203851386762e-05, "loss": 0.2699, "step": 378 }, { "epoch": 0.1796634273524532, "grad_norm": 4.677395343780518, "learning_rate": 1.9615854661106116e-05, "loss": 0.1681, "step": 379 }, { "epoch": 0.18013747333491348, "grad_norm": 3.601335048675537, "learning_rate": 1.961129783872301e-05, "loss": 0.1581, "step": 380 }, { "epoch": 0.18013747333491348, "eval_accuracy": 0.9903381642512077, "eval_f1": 0.8888888888888888, "eval_loss": 0.03145228698849678, "eval_precision": 0.8571428571428571, "eval_recall": 0.9230769230769231, "eval_runtime": 49.7191, "eval_samples_per_second": 5.451, "eval_steps_per_second": 0.181, "step": 380 }, { "epoch": 0.1806115193173738, "grad_norm": 3.093956232070923, "learning_rate": 1.9606714684011328e-05, "loss": 0.139, "step": 381 }, { "epoch": 0.18108556529983408, "grad_norm": 8.644227027893066, "learning_rate": 1.960210520952766e-05, "loss": 0.2294, "step": 382 }, { "epoch": 0.1815596112822944, "grad_norm": 5.26225471496582, "learning_rate": 1.9597469427900704e-05, "loss": 0.255, "step": 383 }, { "epoch": 0.18203365726475468, "grad_norm": 9.564775466918945, "learning_rate": 1.9592807351831244e-05, "loss": 0.2538, "step": 384 }, { "epoch": 0.182507703247215, "grad_norm": 4.813824653625488, "learning_rate": 1.9588118994092086e-05, "loss": 0.1689, "step": 385 }, { "epoch": 0.18298174922967528, "grad_norm": 8.66640853881836, "learning_rate": 1.9583404367528044e-05, "loss": 0.1746, "step": 386 }, { "epoch": 0.18345579521213556, "grad_norm": 8.124728202819824, "learning_rate": 1.957866348505592e-05, "loss": 0.1574, "step": 387 }, { "epoch": 0.18392984119459588, "grad_norm": 4.7182841300964355, "learning_rate": 1.957389635966442e-05, "loss": 0.1478, "step": 388 }, { "epoch": 0.18440388717705616, "grad_norm": 6.4295806884765625, "learning_rate": 1.9569103004414174e-05, "loss": 0.2611, "step": 389 }, { "epoch": 0.18487793315951648, "grad_norm": 7.237591743469238, "learning_rate": 1.9564283432437664e-05, "loss": 0.3119, "step": 390 }, { "epoch": 0.18535197914197676, "grad_norm": 9.914932250976562, "learning_rate": 1.9559437656939197e-05, "loss": 0.2644, "step": 391 }, { "epoch": 0.18582602512443708, "grad_norm": 9.77021312713623, "learning_rate": 1.9554565691194875e-05, "loss": 0.2911, "step": 392 }, { "epoch": 0.18630007110689736, "grad_norm": 6.747690677642822, "learning_rate": 1.9549667548552557e-05, "loss": 0.1652, "step": 393 }, { "epoch": 0.18677411708935768, "grad_norm": 4.450872898101807, "learning_rate": 1.9544743242431804e-05, "loss": 0.1541, "step": 394 }, { "epoch": 0.18724816307181796, "grad_norm": 4.8731794357299805, "learning_rate": 1.9539792786323874e-05, "loss": 0.2217, "step": 395 }, { "epoch": 0.18772220905427828, "grad_norm": 7.449690818786621, "learning_rate": 1.9534816193791664e-05, "loss": 0.3818, "step": 396 }, { "epoch": 0.18819625503673856, "grad_norm": 7.16092586517334, "learning_rate": 1.952981347846968e-05, "loss": 0.2265, "step": 397 }, { "epoch": 0.18867030101919885, "grad_norm": 4.920149326324463, "learning_rate": 1.9524784654063988e-05, "loss": 0.3079, "step": 398 }, { "epoch": 0.18914434700165916, "grad_norm": 4.092027187347412, "learning_rate": 1.9519729734352196e-05, "loss": 0.1891, "step": 399 }, { "epoch": 0.18961839298411945, "grad_norm": 3.7005388736724854, "learning_rate": 1.95146487331834e-05, "loss": 0.1162, "step": 400 }, { "epoch": 0.18961839298411945, "eval_accuracy": 0.9863123993558777, "eval_f1": 0.8547008547008547, "eval_loss": 0.03776899352669716, "eval_precision": 0.7692307692307693, "eval_recall": 0.9615384615384616, "eval_runtime": 50.1939, "eval_samples_per_second": 5.399, "eval_steps_per_second": 0.179, "step": 400 }, { "epoch": 0.19009243896657976, "grad_norm": 4.189998626708984, "learning_rate": 1.950954166447816e-05, "loss": 0.2315, "step": 401 }, { "epoch": 0.19056648494904005, "grad_norm": 7.759528636932373, "learning_rate": 1.950440854222844e-05, "loss": 0.1772, "step": 402 }, { "epoch": 0.19104053093150036, "grad_norm": 3.0708417892456055, "learning_rate": 1.9499249380497597e-05, "loss": 0.1683, "step": 403 }, { "epoch": 0.19151457691396065, "grad_norm": 6.50071907043457, "learning_rate": 1.949406419342032e-05, "loss": 0.2347, "step": 404 }, { "epoch": 0.19198862289642096, "grad_norm": 7.221626281738281, "learning_rate": 1.948885299520261e-05, "loss": 0.2549, "step": 405 }, { "epoch": 0.19246266887888125, "grad_norm": 2.5001566410064697, "learning_rate": 1.9483615800121717e-05, "loss": 0.099, "step": 406 }, { "epoch": 0.19293671486134156, "grad_norm": 6.132114410400391, "learning_rate": 1.947835262252613e-05, "loss": 0.1272, "step": 407 }, { "epoch": 0.19341076084380185, "grad_norm": 7.621204376220703, "learning_rate": 1.9473063476835518e-05, "loss": 0.1659, "step": 408 }, { "epoch": 0.19388480682626213, "grad_norm": 5.252957820892334, "learning_rate": 1.946774837754069e-05, "loss": 0.1525, "step": 409 }, { "epoch": 0.19435885280872245, "grad_norm": 6.265228271484375, "learning_rate": 1.946240733920356e-05, "loss": 0.304, "step": 410 }, { "epoch": 0.19483289879118273, "grad_norm": 6.704779624938965, "learning_rate": 1.945704037645713e-05, "loss": 0.1892, "step": 411 }, { "epoch": 0.19530694477364305, "grad_norm": 6.787877559661865, "learning_rate": 1.9451647504005394e-05, "loss": 0.2538, "step": 412 }, { "epoch": 0.19578099075610333, "grad_norm": 3.9719345569610596, "learning_rate": 1.9446228736623355e-05, "loss": 0.1344, "step": 413 }, { "epoch": 0.19625503673856365, "grad_norm": 5.928083896636963, "learning_rate": 1.9440784089156955e-05, "loss": 0.2413, "step": 414 }, { "epoch": 0.19672908272102393, "grad_norm": 3.9046196937561035, "learning_rate": 1.9435313576523037e-05, "loss": 0.2113, "step": 415 }, { "epoch": 0.19720312870348425, "grad_norm": 11.968066215515137, "learning_rate": 1.942981721370931e-05, "loss": 0.1943, "step": 416 }, { "epoch": 0.19767717468594453, "grad_norm": 6.351183891296387, "learning_rate": 1.942429501577431e-05, "loss": 0.1533, "step": 417 }, { "epoch": 0.19815122066840485, "grad_norm": 5.750969886779785, "learning_rate": 1.9418746997847347e-05, "loss": 0.2765, "step": 418 }, { "epoch": 0.19862526665086513, "grad_norm": 5.9713850021362305, "learning_rate": 1.9413173175128472e-05, "loss": 0.1987, "step": 419 }, { "epoch": 0.19909931263332542, "grad_norm": 4.042611122131348, "learning_rate": 1.9407573562888437e-05, "loss": 0.1442, "step": 420 }, { "epoch": 0.19909931263332542, "eval_accuracy": 0.9855072463768116, "eval_f1": 0.8448275862068966, "eval_loss": 0.039507586508989334, "eval_precision": 0.765625, "eval_recall": 0.9423076923076923, "eval_runtime": 48.9137, "eval_samples_per_second": 5.54, "eval_steps_per_second": 0.184, "step": 420 }, { "epoch": 0.19957335861578573, "grad_norm": 5.3435282707214355, "learning_rate": 1.9401948176468645e-05, "loss": 0.2182, "step": 421 }, { "epoch": 0.20004740459824602, "grad_norm": 7.2359747886657715, "learning_rate": 1.9396297031281124e-05, "loss": 0.2538, "step": 422 }, { "epoch": 0.20052145058070633, "grad_norm": 4.870368480682373, "learning_rate": 1.9390620142808462e-05, "loss": 0.2151, "step": 423 }, { "epoch": 0.20099549656316662, "grad_norm": 4.006406307220459, "learning_rate": 1.9384917526603783e-05, "loss": 0.254, "step": 424 }, { "epoch": 0.20146954254562693, "grad_norm": 5.7069244384765625, "learning_rate": 1.9379189198290697e-05, "loss": 0.2183, "step": 425 }, { "epoch": 0.20194358852808722, "grad_norm": 6.024723529815674, "learning_rate": 1.9373435173563257e-05, "loss": 0.2386, "step": 426 }, { "epoch": 0.20241763451054753, "grad_norm": 9.129636764526367, "learning_rate": 1.9367655468185913e-05, "loss": 0.2554, "step": 427 }, { "epoch": 0.20289168049300782, "grad_norm": 6.194615364074707, "learning_rate": 1.9361850097993487e-05, "loss": 0.2658, "step": 428 }, { "epoch": 0.20336572647546813, "grad_norm": 8.10988712310791, "learning_rate": 1.9356019078891098e-05, "loss": 0.2046, "step": 429 }, { "epoch": 0.20383977245792842, "grad_norm": 3.277400493621826, "learning_rate": 1.9350162426854152e-05, "loss": 0.191, "step": 430 }, { "epoch": 0.2043138184403887, "grad_norm": 4.1480278968811035, "learning_rate": 1.9344280157928265e-05, "loss": 0.1775, "step": 431 }, { "epoch": 0.20478786442284902, "grad_norm": 5.914576530456543, "learning_rate": 1.9338372288229253e-05, "loss": 0.2799, "step": 432 }, { "epoch": 0.2052619104053093, "grad_norm": 4.470582008361816, "learning_rate": 1.9332438833943065e-05, "loss": 0.144, "step": 433 }, { "epoch": 0.20573595638776962, "grad_norm": 5.149271488189697, "learning_rate": 1.9326479811325736e-05, "loss": 0.1882, "step": 434 }, { "epoch": 0.2062100023702299, "grad_norm": 5.199272632598877, "learning_rate": 1.9320495236703366e-05, "loss": 0.2096, "step": 435 }, { "epoch": 0.20668404835269022, "grad_norm": 4.065655708312988, "learning_rate": 1.931448512647205e-05, "loss": 0.1786, "step": 436 }, { "epoch": 0.2071580943351505, "grad_norm": 3.961599111557007, "learning_rate": 1.9308449497097847e-05, "loss": 0.1615, "step": 437 }, { "epoch": 0.20763214031761082, "grad_norm": 3.985469341278076, "learning_rate": 1.9302388365116734e-05, "loss": 0.1311, "step": 438 }, { "epoch": 0.2081061863000711, "grad_norm": 3.8260138034820557, "learning_rate": 1.9296301747134555e-05, "loss": 0.1956, "step": 439 }, { "epoch": 0.20858023228253142, "grad_norm": 4.328072547912598, "learning_rate": 1.9290189659826974e-05, "loss": 0.1957, "step": 440 }, { "epoch": 0.20858023228253142, "eval_accuracy": 0.9903381642512077, "eval_f1": 0.8928571428571429, "eval_loss": 0.03430945798754692, "eval_precision": 0.8333333333333334, "eval_recall": 0.9615384615384616, "eval_runtime": 48.6442, "eval_samples_per_second": 5.571, "eval_steps_per_second": 0.185, "step": 440 }, { "epoch": 0.2090542782649917, "grad_norm": 3.6678905487060547, "learning_rate": 1.9284052119939448e-05, "loss": 0.1329, "step": 441 }, { "epoch": 0.209528324247452, "grad_norm": 3.7596359252929688, "learning_rate": 1.927788914428715e-05, "loss": 0.1907, "step": 442 }, { "epoch": 0.2100023702299123, "grad_norm": 4.306353569030762, "learning_rate": 1.9271700749754954e-05, "loss": 0.1437, "step": 443 }, { "epoch": 0.2104764162123726, "grad_norm": 6.4847636222839355, "learning_rate": 1.9265486953297373e-05, "loss": 0.1889, "step": 444 }, { "epoch": 0.2109504621948329, "grad_norm": 7.348166465759277, "learning_rate": 1.92592477719385e-05, "loss": 0.1978, "step": 445 }, { "epoch": 0.2114245081772932, "grad_norm": 7.6974873542785645, "learning_rate": 1.9252983222771996e-05, "loss": 0.296, "step": 446 }, { "epoch": 0.2118985541597535, "grad_norm": 5.88078498840332, "learning_rate": 1.9246693322961002e-05, "loss": 0.1981, "step": 447 }, { "epoch": 0.2123726001422138, "grad_norm": 6.422124862670898, "learning_rate": 1.9240378089738136e-05, "loss": 0.1393, "step": 448 }, { "epoch": 0.2128466461246741, "grad_norm": 6.030878067016602, "learning_rate": 1.9234037540405405e-05, "loss": 0.1384, "step": 449 }, { "epoch": 0.2133206921071344, "grad_norm": 7.562985897064209, "learning_rate": 1.922767169233418e-05, "loss": 0.1907, "step": 450 }, { "epoch": 0.21379473808959468, "grad_norm": 6.239231586456299, "learning_rate": 1.9221280562965145e-05, "loss": 0.1804, "step": 451 }, { "epoch": 0.214268784072055, "grad_norm": 6.161927223205566, "learning_rate": 1.9214864169808252e-05, "loss": 0.1156, "step": 452 }, { "epoch": 0.21474283005451528, "grad_norm": 10.836844444274902, "learning_rate": 1.9208422530442658e-05, "loss": 0.3175, "step": 453 }, { "epoch": 0.2152168760369756, "grad_norm": 6.956441402435303, "learning_rate": 1.9201955662516693e-05, "loss": 0.1713, "step": 454 }, { "epoch": 0.21569092201943588, "grad_norm": 5.461636543273926, "learning_rate": 1.9195463583747814e-05, "loss": 0.1354, "step": 455 }, { "epoch": 0.2161649680018962, "grad_norm": 5.017518997192383, "learning_rate": 1.9188946311922537e-05, "loss": 0.2034, "step": 456 }, { "epoch": 0.21663901398435648, "grad_norm": 4.602156162261963, "learning_rate": 1.9182403864896407e-05, "loss": 0.1813, "step": 457 }, { "epoch": 0.2171130599668168, "grad_norm": 5.34616231918335, "learning_rate": 1.9175836260593937e-05, "loss": 0.1929, "step": 458 }, { "epoch": 0.21758710594927708, "grad_norm": 6.494266986846924, "learning_rate": 1.9169243517008572e-05, "loss": 0.2436, "step": 459 }, { "epoch": 0.2180611519317374, "grad_norm": 10.610939979553223, "learning_rate": 1.916262565220263e-05, "loss": 0.2593, "step": 460 }, { "epoch": 0.2180611519317374, "eval_accuracy": 0.9919484702093397, "eval_f1": 0.9074074074074074, "eval_loss": 0.03075222112238407, "eval_precision": 0.875, "eval_recall": 0.9423076923076923, "eval_runtime": 47.9054, "eval_samples_per_second": 5.657, "eval_steps_per_second": 0.188, "step": 460 }, { "epoch": 0.21853519791419768, "grad_norm": 5.9074788093566895, "learning_rate": 1.915598268430724e-05, "loss": 0.153, "step": 461 }, { "epoch": 0.21900924389665796, "grad_norm": 3.602909803390503, "learning_rate": 1.9149314631522325e-05, "loss": 0.1545, "step": 462 }, { "epoch": 0.21948328987911828, "grad_norm": 4.433838844299316, "learning_rate": 1.914262151211653e-05, "loss": 0.1526, "step": 463 }, { "epoch": 0.21995733586157856, "grad_norm": 8.290746688842773, "learning_rate": 1.9135903344427163e-05, "loss": 0.1552, "step": 464 }, { "epoch": 0.22043138184403888, "grad_norm": 13.321844100952148, "learning_rate": 1.912916014686018e-05, "loss": 0.2367, "step": 465 }, { "epoch": 0.22090542782649916, "grad_norm": 6.072572708129883, "learning_rate": 1.912239193789009e-05, "loss": 0.2007, "step": 466 }, { "epoch": 0.22137947380895948, "grad_norm": 3.8708927631378174, "learning_rate": 1.9115598736059934e-05, "loss": 0.1375, "step": 467 }, { "epoch": 0.22185351979141976, "grad_norm": 4.0472092628479, "learning_rate": 1.910878055998123e-05, "loss": 0.1327, "step": 468 }, { "epoch": 0.22232756577388008, "grad_norm": 4.479143142700195, "learning_rate": 1.9101937428333918e-05, "loss": 0.1913, "step": 469 }, { "epoch": 0.22280161175634036, "grad_norm": 4.356966495513916, "learning_rate": 1.909506935986631e-05, "loss": 0.1698, "step": 470 }, { "epoch": 0.22327565773880068, "grad_norm": 7.67513370513916, "learning_rate": 1.908817637339503e-05, "loss": 0.2444, "step": 471 }, { "epoch": 0.22374970372126096, "grad_norm": 6.602683067321777, "learning_rate": 1.908125848780498e-05, "loss": 0.2005, "step": 472 }, { "epoch": 0.22422374970372125, "grad_norm": 10.097046852111816, "learning_rate": 1.9074315722049278e-05, "loss": 0.2625, "step": 473 }, { "epoch": 0.22469779568618156, "grad_norm": 9.88956069946289, "learning_rate": 1.90673480951492e-05, "loss": 0.2045, "step": 474 }, { "epoch": 0.22517184166864185, "grad_norm": 6.545369625091553, "learning_rate": 1.906035562619414e-05, "loss": 0.1933, "step": 475 }, { "epoch": 0.22564588765110216, "grad_norm": 5.960228443145752, "learning_rate": 1.9053338334341553e-05, "loss": 0.1812, "step": 476 }, { "epoch": 0.22611993363356245, "grad_norm": 6.84769868850708, "learning_rate": 1.90462962388169e-05, "loss": 0.2436, "step": 477 }, { "epoch": 0.22659397961602276, "grad_norm": 5.503833770751953, "learning_rate": 1.9039229358913594e-05, "loss": 0.192, "step": 478 }, { "epoch": 0.22706802559848305, "grad_norm": 4.82657527923584, "learning_rate": 1.9032137713992957e-05, "loss": 0.1775, "step": 479 }, { "epoch": 0.22754207158094336, "grad_norm": 3.190770387649536, "learning_rate": 1.9025021323484155e-05, "loss": 0.1137, "step": 480 }, { "epoch": 0.22754207158094336, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9259259259259259, "eval_loss": 0.023625820875167847, "eval_precision": 0.8928571428571429, "eval_recall": 0.9615384615384616, "eval_runtime": 48.7682, "eval_samples_per_second": 5.557, "eval_steps_per_second": 0.185, "step": 480 }, { "epoch": 0.22801611756340365, "grad_norm": 5.586182594299316, "learning_rate": 1.9017880206884157e-05, "loss": 0.257, "step": 481 }, { "epoch": 0.22849016354586396, "grad_norm": 13.091703414916992, "learning_rate": 1.9010714383757668e-05, "loss": 0.2785, "step": 482 }, { "epoch": 0.22896420952832425, "grad_norm": 5.211515426635742, "learning_rate": 1.9003523873737082e-05, "loss": 0.1478, "step": 483 }, { "epoch": 0.22943825551078453, "grad_norm": 3.8805530071258545, "learning_rate": 1.8996308696522435e-05, "loss": 0.1164, "step": 484 }, { "epoch": 0.22991230149324485, "grad_norm": 5.7906293869018555, "learning_rate": 1.8989068871881336e-05, "loss": 0.1905, "step": 485 }, { "epoch": 0.23038634747570513, "grad_norm": 3.8140764236450195, "learning_rate": 1.898180441964893e-05, "loss": 0.2113, "step": 486 }, { "epoch": 0.23086039345816545, "grad_norm": 2.9873130321502686, "learning_rate": 1.897451535972783e-05, "loss": 0.1156, "step": 487 }, { "epoch": 0.23133443944062573, "grad_norm": 4.1001505851745605, "learning_rate": 1.8967201712088056e-05, "loss": 0.1476, "step": 488 }, { "epoch": 0.23180848542308605, "grad_norm": 4.187485694885254, "learning_rate": 1.8959863496767022e-05, "loss": 0.1837, "step": 489 }, { "epoch": 0.23228253140554633, "grad_norm": 7.173356533050537, "learning_rate": 1.8952500733869415e-05, "loss": 0.2964, "step": 490 }, { "epoch": 0.23275657738800665, "grad_norm": 7.43308162689209, "learning_rate": 1.8945113443567202e-05, "loss": 0.2136, "step": 491 }, { "epoch": 0.23323062337046693, "grad_norm": 4.697380542755127, "learning_rate": 1.8937701646099537e-05, "loss": 0.1953, "step": 492 }, { "epoch": 0.23370466935292725, "grad_norm": 2.4572548866271973, "learning_rate": 1.893026536177272e-05, "loss": 0.1506, "step": 493 }, { "epoch": 0.23417871533538753, "grad_norm": 9.078280448913574, "learning_rate": 1.8922804610960134e-05, "loss": 0.2009, "step": 494 }, { "epoch": 0.23465276131784782, "grad_norm": 3.59165620803833, "learning_rate": 1.8915319414102197e-05, "loss": 0.1611, "step": 495 }, { "epoch": 0.23512680730030813, "grad_norm": 3.31412935256958, "learning_rate": 1.8907809791706304e-05, "loss": 0.1465, "step": 496 }, { "epoch": 0.23560085328276842, "grad_norm": 4.140017986297607, "learning_rate": 1.890027576434677e-05, "loss": 0.1957, "step": 497 }, { "epoch": 0.23607489926522873, "grad_norm": 7.067059516906738, "learning_rate": 1.8892717352664762e-05, "loss": 0.2505, "step": 498 }, { "epoch": 0.23654894524768902, "grad_norm": 4.36193323135376, "learning_rate": 1.8885134577368268e-05, "loss": 0.1392, "step": 499 }, { "epoch": 0.23702299123014933, "grad_norm": 5.218912124633789, "learning_rate": 1.887752745923202e-05, "loss": 0.2213, "step": 500 }, { "epoch": 0.23702299123014933, "eval_accuracy": 0.9879227053140096, "eval_f1": 0.8717948717948718, "eval_loss": 0.02638288587331772, "eval_precision": 0.7846153846153846, "eval_recall": 0.9807692307692307, "eval_runtime": 50.0425, "eval_samples_per_second": 5.415, "eval_steps_per_second": 0.18, "step": 500 }, { "epoch": 0.23749703721260962, "grad_norm": 5.681730270385742, "learning_rate": 1.886989601909744e-05, "loss": 0.1176, "step": 501 }, { "epoch": 0.23797108319506993, "grad_norm": 5.455805778503418, "learning_rate": 1.8862240277872587e-05, "loss": 0.1556, "step": 502 }, { "epoch": 0.23844512917753022, "grad_norm": 6.3678879737854, "learning_rate": 1.8854560256532098e-05, "loss": 0.2133, "step": 503 }, { "epoch": 0.23891917515999053, "grad_norm": 8.105287551879883, "learning_rate": 1.8846855976117137e-05, "loss": 0.1864, "step": 504 }, { "epoch": 0.23939322114245082, "grad_norm": 12.376128196716309, "learning_rate": 1.8839127457735325e-05, "loss": 0.2301, "step": 505 }, { "epoch": 0.2398672671249111, "grad_norm": 4.791072368621826, "learning_rate": 1.8831374722560686e-05, "loss": 0.1711, "step": 506 }, { "epoch": 0.24034131310737142, "grad_norm": 7.560359001159668, "learning_rate": 1.8823597791833595e-05, "loss": 0.2847, "step": 507 }, { "epoch": 0.2408153590898317, "grad_norm": 4.705785751342773, "learning_rate": 1.8815796686860717e-05, "loss": 0.1704, "step": 508 }, { "epoch": 0.24128940507229202, "grad_norm": 4.360641002655029, "learning_rate": 1.880797142901495e-05, "loss": 0.171, "step": 509 }, { "epoch": 0.2417634510547523, "grad_norm": 2.9732701778411865, "learning_rate": 1.880012203973536e-05, "loss": 0.1867, "step": 510 }, { "epoch": 0.24223749703721262, "grad_norm": 5.549304008483887, "learning_rate": 1.8792248540527124e-05, "loss": 0.1999, "step": 511 }, { "epoch": 0.2427115430196729, "grad_norm": 4.155362129211426, "learning_rate": 1.878435095296148e-05, "loss": 0.0973, "step": 512 }, { "epoch": 0.24318558900213322, "grad_norm": 2.743211030960083, "learning_rate": 1.877642929867566e-05, "loss": 0.0951, "step": 513 }, { "epoch": 0.2436596349845935, "grad_norm": 5.166907787322998, "learning_rate": 1.876848359937283e-05, "loss": 0.2062, "step": 514 }, { "epoch": 0.24413368096705382, "grad_norm": 8.175763130187988, "learning_rate": 1.876051387682204e-05, "loss": 0.3424, "step": 515 }, { "epoch": 0.2446077269495141, "grad_norm": 7.783774375915527, "learning_rate": 1.8752520152858136e-05, "loss": 0.1457, "step": 516 }, { "epoch": 0.2450817729319744, "grad_norm": 3.428746461868286, "learning_rate": 1.8744502449381748e-05, "loss": 0.1067, "step": 517 }, { "epoch": 0.2455558189144347, "grad_norm": 5.763442039489746, "learning_rate": 1.8736460788359195e-05, "loss": 0.1122, "step": 518 }, { "epoch": 0.246029864896895, "grad_norm": 4.418912887573242, "learning_rate": 1.872839519182242e-05, "loss": 0.1818, "step": 519 }, { "epoch": 0.2465039108793553, "grad_norm": 12.567361831665039, "learning_rate": 1.8720305681868954e-05, "loss": 0.2092, "step": 520 }, { "epoch": 0.2465039108793553, "eval_accuracy": 0.9895330112721417, "eval_f1": 0.8849557522123894, "eval_loss": 0.03357277810573578, "eval_precision": 0.819672131147541, "eval_recall": 0.9615384615384616, "eval_runtime": 50.2265, "eval_samples_per_second": 5.396, "eval_steps_per_second": 0.179, "step": 520 }, { "epoch": 0.2469779568618156, "grad_norm": 4.847149848937988, "learning_rate": 1.871219228066185e-05, "loss": 0.1458, "step": 521 }, { "epoch": 0.2474520028442759, "grad_norm": 14.294929504394531, "learning_rate": 1.8704055010429604e-05, "loss": 0.2378, "step": 522 }, { "epoch": 0.2479260488267362, "grad_norm": 8.848465919494629, "learning_rate": 1.869589389346611e-05, "loss": 0.1957, "step": 523 }, { "epoch": 0.2484000948091965, "grad_norm": 7.963865280151367, "learning_rate": 1.8687708952130602e-05, "loss": 0.2389, "step": 524 }, { "epoch": 0.2488741407916568, "grad_norm": 6.276052951812744, "learning_rate": 1.867950020884758e-05, "loss": 0.1899, "step": 525 }, { "epoch": 0.2493481867741171, "grad_norm": 4.264331340789795, "learning_rate": 1.867126768610676e-05, "loss": 0.1336, "step": 526 }, { "epoch": 0.2498222327565774, "grad_norm": 5.881321907043457, "learning_rate": 1.866301140646299e-05, "loss": 0.1832, "step": 527 }, { "epoch": 0.2502962787390377, "grad_norm": 6.979321002960205, "learning_rate": 1.8654731392536238e-05, "loss": 0.2483, "step": 528 }, { "epoch": 0.25077032472149796, "grad_norm": 6.509945392608643, "learning_rate": 1.8646427667011465e-05, "loss": 0.1578, "step": 529 }, { "epoch": 0.2512443707039583, "grad_norm": 9.517057418823242, "learning_rate": 1.863810025263862e-05, "loss": 0.2089, "step": 530 }, { "epoch": 0.2517184166864186, "grad_norm": 6.159739017486572, "learning_rate": 1.8629749172232528e-05, "loss": 0.1491, "step": 531 }, { "epoch": 0.2521924626688789, "grad_norm": 7.890002727508545, "learning_rate": 1.8621374448672878e-05, "loss": 0.2089, "step": 532 }, { "epoch": 0.25266650865133916, "grad_norm": 5.750835418701172, "learning_rate": 1.861297610490412e-05, "loss": 0.1672, "step": 533 }, { "epoch": 0.2531405546337995, "grad_norm": 8.991978645324707, "learning_rate": 1.8604554163935416e-05, "loss": 0.2807, "step": 534 }, { "epoch": 0.2536146006162598, "grad_norm": 8.729525566101074, "learning_rate": 1.859610864884059e-05, "loss": 0.1845, "step": 535 }, { "epoch": 0.2540886465987201, "grad_norm": 5.400993824005127, "learning_rate": 1.8587639582758032e-05, "loss": 0.1581, "step": 536 }, { "epoch": 0.25456269258118036, "grad_norm": 3.7591798305511475, "learning_rate": 1.8579146988890677e-05, "loss": 0.151, "step": 537 }, { "epoch": 0.2550367385636407, "grad_norm": 4.554786682128906, "learning_rate": 1.857063089050591e-05, "loss": 0.1672, "step": 538 }, { "epoch": 0.255510784546101, "grad_norm": 5.148488998413086, "learning_rate": 1.8562091310935514e-05, "loss": 0.1258, "step": 539 }, { "epoch": 0.25598483052856125, "grad_norm": 7.819247245788574, "learning_rate": 1.8553528273575597e-05, "loss": 0.2247, "step": 540 }, { "epoch": 0.25598483052856125, "eval_accuracy": 0.9927536231884058, "eval_f1": 0.9158878504672897, "eval_loss": 0.02866183966398239, "eval_precision": 0.8909090909090909, "eval_recall": 0.9423076923076923, "eval_runtime": 50.5721, "eval_samples_per_second": 5.359, "eval_steps_per_second": 0.178, "step": 540 }, { "epoch": 0.25645887651102156, "grad_norm": 5.182741641998291, "learning_rate": 1.8544941801886543e-05, "loss": 0.1985, "step": 541 }, { "epoch": 0.2569329224934819, "grad_norm": 10.451058387756348, "learning_rate": 1.8536331919392938e-05, "loss": 0.2074, "step": 542 }, { "epoch": 0.2574069684759422, "grad_norm": 4.122173309326172, "learning_rate": 1.8527698649683507e-05, "loss": 0.1409, "step": 543 }, { "epoch": 0.25788101445840245, "grad_norm": 4.434998989105225, "learning_rate": 1.8519042016411045e-05, "loss": 0.1919, "step": 544 }, { "epoch": 0.25835506044086276, "grad_norm": 4.652968406677246, "learning_rate": 1.8510362043292363e-05, "loss": 0.2015, "step": 545 }, { "epoch": 0.2588291064233231, "grad_norm": 2.667080879211426, "learning_rate": 1.850165875410822e-05, "loss": 0.1196, "step": 546 }, { "epoch": 0.2593031524057834, "grad_norm": 4.295355796813965, "learning_rate": 1.8492932172703236e-05, "loss": 0.1693, "step": 547 }, { "epoch": 0.25977719838824365, "grad_norm": 7.52603006362915, "learning_rate": 1.848418232298587e-05, "loss": 0.1776, "step": 548 }, { "epoch": 0.26025124437070396, "grad_norm": 12.561964988708496, "learning_rate": 1.8475409228928314e-05, "loss": 0.2784, "step": 549 }, { "epoch": 0.2607252903531643, "grad_norm": 3.891218423843384, "learning_rate": 1.8466612914566447e-05, "loss": 0.1314, "step": 550 }, { "epoch": 0.26119933633562453, "grad_norm": 4.668924331665039, "learning_rate": 1.8457793403999766e-05, "loss": 0.1323, "step": 551 }, { "epoch": 0.26167338231808485, "grad_norm": 4.252039432525635, "learning_rate": 1.844895072139132e-05, "loss": 0.1404, "step": 552 }, { "epoch": 0.26214742830054516, "grad_norm": 13.645633697509766, "learning_rate": 1.844008489096764e-05, "loss": 0.2463, "step": 553 }, { "epoch": 0.2626214742830055, "grad_norm": 8.817793846130371, "learning_rate": 1.843119593701868e-05, "loss": 0.2191, "step": 554 }, { "epoch": 0.26309552026546573, "grad_norm": 3.518982410430908, "learning_rate": 1.8422283883897744e-05, "loss": 0.1856, "step": 555 }, { "epoch": 0.26356956624792605, "grad_norm": 4.6413044929504395, "learning_rate": 1.8413348756021415e-05, "loss": 0.2101, "step": 556 }, { "epoch": 0.26404361223038636, "grad_norm": 3.4393184185028076, "learning_rate": 1.8404390577869508e-05, "loss": 0.0917, "step": 557 }, { "epoch": 0.2645176582128467, "grad_norm": 7.83014440536499, "learning_rate": 1.839540937398498e-05, "loss": 0.1848, "step": 558 }, { "epoch": 0.26499170419530693, "grad_norm": 5.448614597320557, "learning_rate": 1.8386405168973875e-05, "loss": 0.201, "step": 559 }, { "epoch": 0.26546575017776725, "grad_norm": 9.165091514587402, "learning_rate": 1.8377377987505252e-05, "loss": 0.2684, "step": 560 }, { "epoch": 0.26546575017776725, "eval_accuracy": 0.9919484702093397, "eval_f1": 0.9090909090909091, "eval_loss": 0.028601787984371185, "eval_precision": 0.8620689655172413, "eval_recall": 0.9615384615384616, "eval_runtime": 49.8418, "eval_samples_per_second": 5.437, "eval_steps_per_second": 0.181, "step": 560 }, { "epoch": 0.26593979616022756, "grad_norm": 4.3900065422058105, "learning_rate": 1.836832785431112e-05, "loss": 0.1651, "step": 561 }, { "epoch": 0.2664138421426878, "grad_norm": 6.591796398162842, "learning_rate": 1.8359254794186368e-05, "loss": 0.2222, "step": 562 }, { "epoch": 0.26688788812514813, "grad_norm": 4.408583641052246, "learning_rate": 1.8350158831988708e-05, "loss": 0.1636, "step": 563 }, { "epoch": 0.26736193410760845, "grad_norm": 4.501555442810059, "learning_rate": 1.8341039992638585e-05, "loss": 0.1883, "step": 564 }, { "epoch": 0.26783598009006876, "grad_norm": 3.782682418823242, "learning_rate": 1.8331898301119123e-05, "loss": 0.1582, "step": 565 }, { "epoch": 0.268310026072529, "grad_norm": 3.5062944889068604, "learning_rate": 1.832273378247606e-05, "loss": 0.1352, "step": 566 }, { "epoch": 0.26878407205498933, "grad_norm": 3.86049747467041, "learning_rate": 1.831354646181767e-05, "loss": 0.106, "step": 567 }, { "epoch": 0.26925811803744965, "grad_norm": 3.148836374282837, "learning_rate": 1.8304336364314707e-05, "loss": 0.1623, "step": 568 }, { "epoch": 0.2697321640199099, "grad_norm": 6.754387855529785, "learning_rate": 1.829510351520031e-05, "loss": 0.2434, "step": 569 }, { "epoch": 0.2702062100023702, "grad_norm": 5.247784614562988, "learning_rate": 1.828584793976997e-05, "loss": 0.1331, "step": 570 }, { "epoch": 0.27068025598483053, "grad_norm": 5.818985939025879, "learning_rate": 1.827656966338143e-05, "loss": 0.1737, "step": 571 }, { "epoch": 0.27115430196729084, "grad_norm": 6.568758487701416, "learning_rate": 1.8267268711454634e-05, "loss": 0.1579, "step": 572 }, { "epoch": 0.2716283479497511, "grad_norm": 3.268862009048462, "learning_rate": 1.825794510947164e-05, "loss": 0.1111, "step": 573 }, { "epoch": 0.2721023939322114, "grad_norm": 6.984981060028076, "learning_rate": 1.8248598882976575e-05, "loss": 0.0992, "step": 574 }, { "epoch": 0.27257643991467173, "grad_norm": 7.59507942199707, "learning_rate": 1.8239230057575542e-05, "loss": 0.214, "step": 575 }, { "epoch": 0.27305048589713204, "grad_norm": 5.1507768630981445, "learning_rate": 1.8229838658936566e-05, "loss": 0.2327, "step": 576 }, { "epoch": 0.2735245318795923, "grad_norm": 5.358564853668213, "learning_rate": 1.8220424712789502e-05, "loss": 0.1408, "step": 577 }, { "epoch": 0.2739985778620526, "grad_norm": 4.9489054679870605, "learning_rate": 1.8210988244926003e-05, "loss": 0.1331, "step": 578 }, { "epoch": 0.27447262384451293, "grad_norm": 3.4182543754577637, "learning_rate": 1.8201529281199398e-05, "loss": 0.1632, "step": 579 }, { "epoch": 0.2749466698269732, "grad_norm": 4.689789772033691, "learning_rate": 1.8192047847524667e-05, "loss": 0.1485, "step": 580 }, { "epoch": 0.2749466698269732, "eval_accuracy": 0.9895330112721417, "eval_f1": 0.8849557522123894, "eval_loss": 0.035417672246694565, "eval_precision": 0.819672131147541, "eval_recall": 0.9615384615384616, "eval_runtime": 50.3007, "eval_samples_per_second": 5.388, "eval_steps_per_second": 0.179, "step": 580 }, { "epoch": 0.2754207158094335, "grad_norm": 10.920886039733887, "learning_rate": 1.818254396987835e-05, "loss": 0.1957, "step": 581 }, { "epoch": 0.2758947617918938, "grad_norm": 5.822461128234863, "learning_rate": 1.8173017674298467e-05, "loss": 0.2047, "step": 582 }, { "epoch": 0.27636880777435413, "grad_norm": 3.932877540588379, "learning_rate": 1.8163468986884466e-05, "loss": 0.1519, "step": 583 }, { "epoch": 0.2768428537568144, "grad_norm": 4.026641368865967, "learning_rate": 1.8153897933797145e-05, "loss": 0.1476, "step": 584 }, { "epoch": 0.2773168997392747, "grad_norm": 7.63388204574585, "learning_rate": 1.8144304541258562e-05, "loss": 0.1801, "step": 585 }, { "epoch": 0.277790945721735, "grad_norm": 6.409342288970947, "learning_rate": 1.8134688835552003e-05, "loss": 0.1739, "step": 586 }, { "epoch": 0.27826499170419533, "grad_norm": 6.363264560699463, "learning_rate": 1.8125050843021864e-05, "loss": 0.1361, "step": 587 }, { "epoch": 0.2787390376866556, "grad_norm": 4.324347972869873, "learning_rate": 1.8115390590073612e-05, "loss": 0.1366, "step": 588 }, { "epoch": 0.2792130836691159, "grad_norm": 4.929332256317139, "learning_rate": 1.81057081031737e-05, "loss": 0.1702, "step": 589 }, { "epoch": 0.2796871296515762, "grad_norm": 9.115571022033691, "learning_rate": 1.8096003408849494e-05, "loss": 0.2041, "step": 590 }, { "epoch": 0.2801611756340365, "grad_norm": 3.8357152938842773, "learning_rate": 1.8086276533689203e-05, "loss": 0.1817, "step": 591 }, { "epoch": 0.2806352216164968, "grad_norm": 6.761523723602295, "learning_rate": 1.807652750434181e-05, "loss": 0.1802, "step": 592 }, { "epoch": 0.2811092675989571, "grad_norm": 4.541932106018066, "learning_rate": 1.8066756347516985e-05, "loss": 0.19, "step": 593 }, { "epoch": 0.2815833135814174, "grad_norm": 4.590600967407227, "learning_rate": 1.805696308998503e-05, "loss": 0.2543, "step": 594 }, { "epoch": 0.2820573595638777, "grad_norm": 5.389986038208008, "learning_rate": 1.804714775857679e-05, "loss": 0.167, "step": 595 }, { "epoch": 0.282531405546338, "grad_norm": 4.69621467590332, "learning_rate": 1.803731038018359e-05, "loss": 0.1495, "step": 596 }, { "epoch": 0.2830054515287983, "grad_norm": 4.559863090515137, "learning_rate": 1.8027450981757162e-05, "loss": 0.1929, "step": 597 }, { "epoch": 0.2834794975112586, "grad_norm": 7.2930216789245605, "learning_rate": 1.8017569590309564e-05, "loss": 0.1402, "step": 598 }, { "epoch": 0.2839535434937189, "grad_norm": 5.581624507904053, "learning_rate": 1.8007666232913095e-05, "loss": 0.1774, "step": 599 }, { "epoch": 0.2844275894761792, "grad_norm": 6.207181930541992, "learning_rate": 1.799774093670026e-05, "loss": 0.1168, "step": 600 }, { "epoch": 0.2844275894761792, "eval_accuracy": 0.9927536231884058, "eval_f1": 0.9158878504672897, "eval_loss": 0.02737443894147873, "eval_precision": 0.8909090909090909, "eval_recall": 0.9423076923076923, "eval_runtime": 50.6456, "eval_samples_per_second": 5.351, "eval_steps_per_second": 0.178, "step": 600 }, { "epoch": 0.2849016354586395, "grad_norm": 3.393158435821533, "learning_rate": 1.798779372886365e-05, "loss": 0.1518, "step": 601 }, { "epoch": 0.28537568144109976, "grad_norm": 6.664277076721191, "learning_rate": 1.7977824636655897e-05, "loss": 0.2136, "step": 602 }, { "epoch": 0.2858497274235601, "grad_norm": 5.028671741485596, "learning_rate": 1.796783368738959e-05, "loss": 0.1294, "step": 603 }, { "epoch": 0.2863237734060204, "grad_norm": 6.041374683380127, "learning_rate": 1.79578209084372e-05, "loss": 0.1707, "step": 604 }, { "epoch": 0.2867978193884807, "grad_norm": 7.459338665008545, "learning_rate": 1.7947786327230996e-05, "loss": 0.2532, "step": 605 }, { "epoch": 0.28727186537094096, "grad_norm": 4.308289051055908, "learning_rate": 1.7937729971262992e-05, "loss": 0.1856, "step": 606 }, { "epoch": 0.2877459113534013, "grad_norm": 6.290380001068115, "learning_rate": 1.7927651868084857e-05, "loss": 0.183, "step": 607 }, { "epoch": 0.2882199573358616, "grad_norm": 8.412773132324219, "learning_rate": 1.7917552045307838e-05, "loss": 0.2314, "step": 608 }, { "epoch": 0.2886940033183219, "grad_norm": 4.447933197021484, "learning_rate": 1.7907430530602688e-05, "loss": 0.117, "step": 609 }, { "epoch": 0.28916804930078216, "grad_norm": 5.467257022857666, "learning_rate": 1.7897287351699586e-05, "loss": 0.1482, "step": 610 }, { "epoch": 0.2896420952832425, "grad_norm": 8.908904075622559, "learning_rate": 1.7887122536388073e-05, "loss": 0.2146, "step": 611 }, { "epoch": 0.2901161412657028, "grad_norm": 5.282848834991455, "learning_rate": 1.7876936112516963e-05, "loss": 0.2351, "step": 612 }, { "epoch": 0.29059018724816305, "grad_norm": 3.9758098125457764, "learning_rate": 1.786672810799427e-05, "loss": 0.1641, "step": 613 }, { "epoch": 0.29106423323062336, "grad_norm": 3.870427131652832, "learning_rate": 1.7856498550787144e-05, "loss": 0.1399, "step": 614 }, { "epoch": 0.2915382792130837, "grad_norm": 4.148342132568359, "learning_rate": 1.784624746892177e-05, "loss": 0.1141, "step": 615 }, { "epoch": 0.292012325195544, "grad_norm": 5.640356063842773, "learning_rate": 1.7835974890483307e-05, "loss": 0.2055, "step": 616 }, { "epoch": 0.29248637117800425, "grad_norm": 6.6784467697143555, "learning_rate": 1.782568084361582e-05, "loss": 0.1748, "step": 617 }, { "epoch": 0.29296041716046456, "grad_norm": 5.142460346221924, "learning_rate": 1.781536535652217e-05, "loss": 0.1884, "step": 618 }, { "epoch": 0.2934344631429249, "grad_norm": 5.571662425994873, "learning_rate": 1.7805028457463985e-05, "loss": 0.1912, "step": 619 }, { "epoch": 0.2939085091253852, "grad_norm": 8.899080276489258, "learning_rate": 1.7794670174761537e-05, "loss": 0.1899, "step": 620 }, { "epoch": 0.2939085091253852, "eval_accuracy": 0.9911433172302737, "eval_f1": 0.9009009009009009, "eval_loss": 0.026210227981209755, "eval_precision": 0.847457627118644, "eval_recall": 0.9615384615384616, "eval_runtime": 49.3474, "eval_samples_per_second": 5.492, "eval_steps_per_second": 0.182, "step": 620 }, { "epoch": 0.29438255510784544, "grad_norm": 6.550443649291992, "learning_rate": 1.7784290536793687e-05, "loss": 0.1596, "step": 621 }, { "epoch": 0.29485660109030576, "grad_norm": 4.088603973388672, "learning_rate": 1.777388957199781e-05, "loss": 0.201, "step": 622 }, { "epoch": 0.2953306470727661, "grad_norm": 6.610530853271484, "learning_rate": 1.77634673088697e-05, "loss": 0.2649, "step": 623 }, { "epoch": 0.29580469305522633, "grad_norm": 5.083434581756592, "learning_rate": 1.7753023775963516e-05, "loss": 0.1708, "step": 624 }, { "epoch": 0.29627873903768664, "grad_norm": 4.823783874511719, "learning_rate": 1.774255900189168e-05, "loss": 0.1727, "step": 625 }, { "epoch": 0.29675278502014696, "grad_norm": 4.803427696228027, "learning_rate": 1.773207301532481e-05, "loss": 0.1648, "step": 626 }, { "epoch": 0.2972268310026073, "grad_norm": 3.9158084392547607, "learning_rate": 1.7721565844991643e-05, "loss": 0.1532, "step": 627 }, { "epoch": 0.29770087698506753, "grad_norm": 8.883777618408203, "learning_rate": 1.7711037519678957e-05, "loss": 0.2497, "step": 628 }, { "epoch": 0.29817492296752784, "grad_norm": 6.019445896148682, "learning_rate": 1.770048806823148e-05, "loss": 0.2128, "step": 629 }, { "epoch": 0.29864896894998816, "grad_norm": 3.997671365737915, "learning_rate": 1.7689917519551834e-05, "loss": 0.1713, "step": 630 }, { "epoch": 0.29912301493244847, "grad_norm": 3.6591684818267822, "learning_rate": 1.767932590260042e-05, "loss": 0.1413, "step": 631 }, { "epoch": 0.29959706091490873, "grad_norm": 4.382582664489746, "learning_rate": 1.766871324639538e-05, "loss": 0.2013, "step": 632 }, { "epoch": 0.30007110689736904, "grad_norm": 3.2412827014923096, "learning_rate": 1.7658079580012494e-05, "loss": 0.1503, "step": 633 }, { "epoch": 0.30054515287982936, "grad_norm": 3.3120296001434326, "learning_rate": 1.7647424932585093e-05, "loss": 0.1252, "step": 634 }, { "epoch": 0.3010191988622896, "grad_norm": 7.659612655639648, "learning_rate": 1.7636749333304003e-05, "loss": 0.1497, "step": 635 }, { "epoch": 0.30149324484474993, "grad_norm": 5.772290229797363, "learning_rate": 1.762605281141745e-05, "loss": 0.2057, "step": 636 }, { "epoch": 0.30196729082721024, "grad_norm": 3.834017038345337, "learning_rate": 1.7615335396230974e-05, "loss": 0.1563, "step": 637 }, { "epoch": 0.30244133680967056, "grad_norm": 4.182459354400635, "learning_rate": 1.7604597117107363e-05, "loss": 0.155, "step": 638 }, { "epoch": 0.3029153827921308, "grad_norm": 8.041765213012695, "learning_rate": 1.7593838003466572e-05, "loss": 0.255, "step": 639 }, { "epoch": 0.30338942877459113, "grad_norm": 8.001297950744629, "learning_rate": 1.7583058084785626e-05, "loss": 0.1626, "step": 640 }, { "epoch": 0.30338942877459113, "eval_accuracy": 0.9895330112721417, "eval_f1": 0.8849557522123894, "eval_loss": 0.027411378920078278, "eval_precision": 0.819672131147541, "eval_recall": 0.9615384615384616, "eval_runtime": 50.2519, "eval_samples_per_second": 5.393, "eval_steps_per_second": 0.179, "step": 640 }, { "epoch": 0.30386347475705144, "grad_norm": 3.592210054397583, "learning_rate": 1.7572257390598558e-05, "loss": 0.1379, "step": 641 }, { "epoch": 0.30433752073951176, "grad_norm": 3.6556708812713623, "learning_rate": 1.7561435950496318e-05, "loss": 0.2032, "step": 642 }, { "epoch": 0.304811566721972, "grad_norm": 7.336647987365723, "learning_rate": 1.755059379412669e-05, "loss": 0.1737, "step": 643 }, { "epoch": 0.30528561270443233, "grad_norm": 5.175565242767334, "learning_rate": 1.7539730951194215e-05, "loss": 0.1701, "step": 644 }, { "epoch": 0.30575965868689264, "grad_norm": 3.353743553161621, "learning_rate": 1.7528847451460124e-05, "loss": 0.1603, "step": 645 }, { "epoch": 0.3062337046693529, "grad_norm": 4.312347412109375, "learning_rate": 1.7517943324742225e-05, "loss": 0.202, "step": 646 }, { "epoch": 0.3067077506518132, "grad_norm": 7.478756427764893, "learning_rate": 1.7507018600914846e-05, "loss": 0.2627, "step": 647 }, { "epoch": 0.30718179663427353, "grad_norm": 4.026299953460693, "learning_rate": 1.7496073309908738e-05, "loss": 0.211, "step": 648 }, { "epoch": 0.30765584261673384, "grad_norm": 4.048435211181641, "learning_rate": 1.7485107481711014e-05, "loss": 0.2195, "step": 649 }, { "epoch": 0.3081298885991941, "grad_norm": 6.194882869720459, "learning_rate": 1.7474121146365037e-05, "loss": 0.1521, "step": 650 }, { "epoch": 0.3086039345816544, "grad_norm": 4.389416694641113, "learning_rate": 1.7463114333970374e-05, "loss": 0.1671, "step": 651 }, { "epoch": 0.30907798056411473, "grad_norm": 6.922310829162598, "learning_rate": 1.745208707468267e-05, "loss": 0.1738, "step": 652 }, { "epoch": 0.30955202654657504, "grad_norm": 5.426624298095703, "learning_rate": 1.744103939871361e-05, "loss": 0.1777, "step": 653 }, { "epoch": 0.3100260725290353, "grad_norm": 3.606872081756592, "learning_rate": 1.7429971336330798e-05, "loss": 0.1197, "step": 654 }, { "epoch": 0.3105001185114956, "grad_norm": 9.372891426086426, "learning_rate": 1.7418882917857706e-05, "loss": 0.2389, "step": 655 }, { "epoch": 0.31097416449395593, "grad_norm": 3.3423619270324707, "learning_rate": 1.7407774173673567e-05, "loss": 0.1363, "step": 656 }, { "epoch": 0.3114482104764162, "grad_norm": 8.206243515014648, "learning_rate": 1.73966451342133e-05, "loss": 0.2414, "step": 657 }, { "epoch": 0.3119222564588765, "grad_norm": 9.203423500061035, "learning_rate": 1.7385495829967437e-05, "loss": 0.1649, "step": 658 }, { "epoch": 0.3123963024413368, "grad_norm": 3.244095802307129, "learning_rate": 1.7374326291482016e-05, "loss": 0.1007, "step": 659 }, { "epoch": 0.31287034842379713, "grad_norm": 5.194423198699951, "learning_rate": 1.7363136549358527e-05, "loss": 0.1919, "step": 660 }, { "epoch": 0.31287034842379713, "eval_accuracy": 0.9847020933977456, "eval_f1": 0.8455284552845529, "eval_loss": 0.04351545125246048, "eval_precision": 0.7323943661971831, "eval_recall": 1.0, "eval_runtime": 50.2675, "eval_samples_per_second": 5.391, "eval_steps_per_second": 0.179, "step": 660 }, { "epoch": 0.3133443944062574, "grad_norm": 6.757850170135498, "learning_rate": 1.73519266342538e-05, "loss": 0.1558, "step": 661 }, { "epoch": 0.3138184403887177, "grad_norm": 7.195196628570557, "learning_rate": 1.7340696576879938e-05, "loss": 0.1513, "step": 662 }, { "epoch": 0.314292486371178, "grad_norm": 3.7510671615600586, "learning_rate": 1.7329446408004224e-05, "loss": 0.1257, "step": 663 }, { "epoch": 0.31476653235363833, "grad_norm": 10.250309944152832, "learning_rate": 1.7318176158449057e-05, "loss": 0.2314, "step": 664 }, { "epoch": 0.3152405783360986, "grad_norm": 4.986364364624023, "learning_rate": 1.7306885859091828e-05, "loss": 0.1762, "step": 665 }, { "epoch": 0.3157146243185589, "grad_norm": 5.138982772827148, "learning_rate": 1.7295575540864878e-05, "loss": 0.1269, "step": 666 }, { "epoch": 0.3161886703010192, "grad_norm": 3.664776086807251, "learning_rate": 1.728424523475538e-05, "loss": 0.1825, "step": 667 }, { "epoch": 0.3166627162834795, "grad_norm": 10.553228378295898, "learning_rate": 1.7272894971805283e-05, "loss": 0.2915, "step": 668 }, { "epoch": 0.3171367622659398, "grad_norm": 5.9562883377075195, "learning_rate": 1.7261524783111202e-05, "loss": 0.1399, "step": 669 }, { "epoch": 0.3176108082484001, "grad_norm": 4.256638526916504, "learning_rate": 1.7250134699824343e-05, "loss": 0.1503, "step": 670 }, { "epoch": 0.3180848542308604, "grad_norm": 10.577091217041016, "learning_rate": 1.723872475315042e-05, "loss": 0.1675, "step": 671 }, { "epoch": 0.3185589002133207, "grad_norm": 5.027775287628174, "learning_rate": 1.7227294974349576e-05, "loss": 0.1442, "step": 672 }, { "epoch": 0.319032946195781, "grad_norm": 6.238917827606201, "learning_rate": 1.7215845394736268e-05, "loss": 0.1742, "step": 673 }, { "epoch": 0.3195069921782413, "grad_norm": 4.804416656494141, "learning_rate": 1.7204376045679225e-05, "loss": 0.1374, "step": 674 }, { "epoch": 0.3199810381607016, "grad_norm": 3.9071667194366455, "learning_rate": 1.719288695860132e-05, "loss": 0.0985, "step": 675 }, { "epoch": 0.3204550841431619, "grad_norm": 7.168766021728516, "learning_rate": 1.7181378164979515e-05, "loss": 0.2446, "step": 676 }, { "epoch": 0.3209291301256222, "grad_norm": 4.034299850463867, "learning_rate": 1.7169849696344756e-05, "loss": 0.1403, "step": 677 }, { "epoch": 0.3214031761080825, "grad_norm": 4.557724952697754, "learning_rate": 1.7158301584281898e-05, "loss": 0.164, "step": 678 }, { "epoch": 0.32187722209054276, "grad_norm": 5.220811367034912, "learning_rate": 1.7146733860429614e-05, "loss": 0.2, "step": 679 }, { "epoch": 0.32235126807300307, "grad_norm": 4.494997024536133, "learning_rate": 1.7135146556480294e-05, "loss": 0.1953, "step": 680 }, { "epoch": 0.32235126807300307, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9259259259259259, "eval_loss": 0.021485736593604088, "eval_precision": 0.8928571428571429, "eval_recall": 0.9615384615384616, "eval_runtime": 50.4417, "eval_samples_per_second": 5.373, "eval_steps_per_second": 0.178, "step": 680 }, { "epoch": 0.3228253140554634, "grad_norm": 4.126351833343506, "learning_rate": 1.7123539704179993e-05, "loss": 0.1752, "step": 681 }, { "epoch": 0.3232993600379237, "grad_norm": 10.326355934143066, "learning_rate": 1.711191333532831e-05, "loss": 0.169, "step": 682 }, { "epoch": 0.32377340602038396, "grad_norm": 4.122337341308594, "learning_rate": 1.7100267481778316e-05, "loss": 0.1566, "step": 683 }, { "epoch": 0.32424745200284427, "grad_norm": 3.1678662300109863, "learning_rate": 1.7088602175436468e-05, "loss": 0.1606, "step": 684 }, { "epoch": 0.3247214979853046, "grad_norm": 3.7549214363098145, "learning_rate": 1.7076917448262516e-05, "loss": 0.1603, "step": 685 }, { "epoch": 0.3251955439677649, "grad_norm": 13.620607376098633, "learning_rate": 1.7065213332269422e-05, "loss": 0.3794, "step": 686 }, { "epoch": 0.32566958995022516, "grad_norm": 4.941345691680908, "learning_rate": 1.7053489859523256e-05, "loss": 0.1969, "step": 687 }, { "epoch": 0.32614363593268547, "grad_norm": 4.709161758422852, "learning_rate": 1.7041747062143136e-05, "loss": 0.1313, "step": 688 }, { "epoch": 0.3266176819151458, "grad_norm": 3.6968085765838623, "learning_rate": 1.7029984972301108e-05, "loss": 0.1055, "step": 689 }, { "epoch": 0.32709172789760604, "grad_norm": 2.689204216003418, "learning_rate": 1.701820362222209e-05, "loss": 0.1531, "step": 690 }, { "epoch": 0.32756577388006636, "grad_norm": 8.326744079589844, "learning_rate": 1.7006403044183758e-05, "loss": 0.2377, "step": 691 }, { "epoch": 0.32803981986252667, "grad_norm": 4.975642204284668, "learning_rate": 1.699458327051647e-05, "loss": 0.118, "step": 692 }, { "epoch": 0.328513865844987, "grad_norm": 3.944392204284668, "learning_rate": 1.6982744333603177e-05, "loss": 0.1752, "step": 693 }, { "epoch": 0.32898791182744724, "grad_norm": 6.509869575500488, "learning_rate": 1.6970886265879328e-05, "loss": 0.1625, "step": 694 }, { "epoch": 0.32946195780990756, "grad_norm": 4.543463706970215, "learning_rate": 1.6959009099832787e-05, "loss": 0.1696, "step": 695 }, { "epoch": 0.32993600379236787, "grad_norm": 5.740193843841553, "learning_rate": 1.6947112868003737e-05, "loss": 0.1748, "step": 696 }, { "epoch": 0.3304100497748282, "grad_norm": 8.345794677734375, "learning_rate": 1.6935197602984607e-05, "loss": 0.2829, "step": 697 }, { "epoch": 0.33088409575728844, "grad_norm": 5.2554426193237305, "learning_rate": 1.6923263337419965e-05, "loss": 0.1685, "step": 698 }, { "epoch": 0.33135814173974876, "grad_norm": 4.45180606842041, "learning_rate": 1.6911310104006433e-05, "loss": 0.1581, "step": 699 }, { "epoch": 0.33183218772220907, "grad_norm": 5.367680072784424, "learning_rate": 1.6899337935492607e-05, "loss": 0.1978, "step": 700 }, { "epoch": 0.33183218772220907, "eval_accuracy": 0.9903381642512077, "eval_f1": 0.8928571428571429, "eval_loss": 0.02599775232374668, "eval_precision": 0.8333333333333334, "eval_recall": 0.9615384615384616, "eval_runtime": 50.241, "eval_samples_per_second": 5.394, "eval_steps_per_second": 0.179, "step": 700 }, { "epoch": 0.33230623370466933, "grad_norm": 5.1734700202941895, "learning_rate": 1.6887346864678953e-05, "loss": 0.1721, "step": 701 }, { "epoch": 0.33278027968712964, "grad_norm": 4.870246887207031, "learning_rate": 1.687533692441773e-05, "loss": 0.2229, "step": 702 }, { "epoch": 0.33325432566958996, "grad_norm": 5.313831806182861, "learning_rate": 1.686330814761289e-05, "loss": 0.2483, "step": 703 }, { "epoch": 0.33372837165205027, "grad_norm": 5.799126625061035, "learning_rate": 1.685126056721999e-05, "loss": 0.1355, "step": 704 }, { "epoch": 0.33420241763451053, "grad_norm": 11.835933685302734, "learning_rate": 1.683919421624611e-05, "loss": 0.186, "step": 705 }, { "epoch": 0.33467646361697084, "grad_norm": 6.19420862197876, "learning_rate": 1.682710912774975e-05, "loss": 0.1857, "step": 706 }, { "epoch": 0.33515050959943116, "grad_norm": 3.9267659187316895, "learning_rate": 1.681500533484075e-05, "loss": 0.1386, "step": 707 }, { "epoch": 0.33562455558189147, "grad_norm": 5.226176738739014, "learning_rate": 1.680288287068019e-05, "loss": 0.2356, "step": 708 }, { "epoch": 0.33609860156435173, "grad_norm": 8.258098602294922, "learning_rate": 1.6790741768480322e-05, "loss": 0.1536, "step": 709 }, { "epoch": 0.33657264754681204, "grad_norm": 4.574370384216309, "learning_rate": 1.677858206150443e-05, "loss": 0.1709, "step": 710 }, { "epoch": 0.33704669352927236, "grad_norm": 3.74572491645813, "learning_rate": 1.67664037830668e-05, "loss": 0.1429, "step": 711 }, { "epoch": 0.3375207395117326, "grad_norm": 5.610940456390381, "learning_rate": 1.6754206966532575e-05, "loss": 0.209, "step": 712 }, { "epoch": 0.33799478549419293, "grad_norm": 5.318949222564697, "learning_rate": 1.6741991645317698e-05, "loss": 0.2223, "step": 713 }, { "epoch": 0.33846883147665324, "grad_norm": 8.700793266296387, "learning_rate": 1.672975785288882e-05, "loss": 0.2023, "step": 714 }, { "epoch": 0.33894287745911356, "grad_norm": 6.486977577209473, "learning_rate": 1.6717505622763184e-05, "loss": 0.1546, "step": 715 }, { "epoch": 0.3394169234415738, "grad_norm": 8.953361511230469, "learning_rate": 1.6705234988508544e-05, "loss": 0.1262, "step": 716 }, { "epoch": 0.33989096942403413, "grad_norm": 4.526251792907715, "learning_rate": 1.6692945983743093e-05, "loss": 0.1704, "step": 717 }, { "epoch": 0.34036501540649444, "grad_norm": 2.327392101287842, "learning_rate": 1.6680638642135337e-05, "loss": 0.0834, "step": 718 }, { "epoch": 0.34083906138895476, "grad_norm": 4.169843673706055, "learning_rate": 1.666831299740403e-05, "loss": 0.1299, "step": 719 }, { "epoch": 0.341313107371415, "grad_norm": 10.23669719696045, "learning_rate": 1.665596908331807e-05, "loss": 0.311, "step": 720 }, { "epoch": 0.341313107371415, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9345794392523364, "eval_loss": 0.016849704086780548, "eval_precision": 0.9090909090909091, "eval_recall": 0.9615384615384616, "eval_runtime": 48.7737, "eval_samples_per_second": 5.556, "eval_steps_per_second": 0.185, "step": 720 }, { "epoch": 0.34178715335387533, "grad_norm": 12.211395263671875, "learning_rate": 1.664360693369641e-05, "loss": 0.3005, "step": 721 }, { "epoch": 0.34226119933633564, "grad_norm": 7.851336479187012, "learning_rate": 1.6631226582407954e-05, "loss": 0.2051, "step": 722 }, { "epoch": 0.3427352453187959, "grad_norm": 4.335603713989258, "learning_rate": 1.6618828063371482e-05, "loss": 0.1926, "step": 723 }, { "epoch": 0.3432092913012562, "grad_norm": 3.9641799926757812, "learning_rate": 1.6606411410555552e-05, "loss": 0.1486, "step": 724 }, { "epoch": 0.34368333728371653, "grad_norm": 5.774534702301025, "learning_rate": 1.659397665797839e-05, "loss": 0.1372, "step": 725 }, { "epoch": 0.34415738326617684, "grad_norm": 5.1008620262146, "learning_rate": 1.6581523839707823e-05, "loss": 0.1899, "step": 726 }, { "epoch": 0.3446314292486371, "grad_norm": 7.078022003173828, "learning_rate": 1.656905298986117e-05, "loss": 0.1551, "step": 727 }, { "epoch": 0.3451054752310974, "grad_norm": 3.9161665439605713, "learning_rate": 1.655656414260515e-05, "loss": 0.16, "step": 728 }, { "epoch": 0.3455795212135577, "grad_norm": 4.232361793518066, "learning_rate": 1.6544057332155787e-05, "loss": 0.1808, "step": 729 }, { "epoch": 0.34605356719601804, "grad_norm": 5.0609025955200195, "learning_rate": 1.653153259277833e-05, "loss": 0.1838, "step": 730 }, { "epoch": 0.3465276131784783, "grad_norm": 4.360866069793701, "learning_rate": 1.6518989958787126e-05, "loss": 0.1067, "step": 731 }, { "epoch": 0.3470016591609386, "grad_norm": 7.752100944519043, "learning_rate": 1.650642946454558e-05, "loss": 0.2231, "step": 732 }, { "epoch": 0.3474757051433989, "grad_norm": 8.94684886932373, "learning_rate": 1.6493851144466003e-05, "loss": 0.1324, "step": 733 }, { "epoch": 0.3479497511258592, "grad_norm": 6.798895359039307, "learning_rate": 1.6481255033009556e-05, "loss": 0.2331, "step": 734 }, { "epoch": 0.3484237971083195, "grad_norm": 4.9599223136901855, "learning_rate": 1.646864116468614e-05, "loss": 0.1869, "step": 735 }, { "epoch": 0.3488978430907798, "grad_norm": 4.743082523345947, "learning_rate": 1.6456009574054307e-05, "loss": 0.1665, "step": 736 }, { "epoch": 0.3493718890732401, "grad_norm": 6.6829094886779785, "learning_rate": 1.6443360295721163e-05, "loss": 0.1968, "step": 737 }, { "epoch": 0.3498459350557004, "grad_norm": 4.806887626647949, "learning_rate": 1.643069336434227e-05, "loss": 0.1726, "step": 738 }, { "epoch": 0.3503199810381607, "grad_norm": 6.034814834594727, "learning_rate": 1.6418008814621563e-05, "loss": 0.1971, "step": 739 }, { "epoch": 0.350794027020621, "grad_norm": 5.798818588256836, "learning_rate": 1.640530668131123e-05, "loss": 0.226, "step": 740 }, { "epoch": 0.350794027020621, "eval_accuracy": 0.9927536231884058, "eval_f1": 0.9203539823008849, "eval_loss": 0.016567079350352287, "eval_precision": 0.8524590163934426, "eval_recall": 1.0, "eval_runtime": 48.9001, "eval_samples_per_second": 5.542, "eval_steps_per_second": 0.184, "step": 740 }, { "epoch": 0.35126807300308127, "grad_norm": 3.9887707233428955, "learning_rate": 1.6392586999211654e-05, "loss": 0.1785, "step": 741 }, { "epoch": 0.3517421189855416, "grad_norm": 6.046712398529053, "learning_rate": 1.637984980317128e-05, "loss": 0.2305, "step": 742 }, { "epoch": 0.3522161649680019, "grad_norm": 3.9800610542297363, "learning_rate": 1.6367095128086544e-05, "loss": 0.1953, "step": 743 }, { "epoch": 0.3526902109504622, "grad_norm": 6.72821044921875, "learning_rate": 1.6354323008901774e-05, "loss": 0.1355, "step": 744 }, { "epoch": 0.35316425693292247, "grad_norm": 4.552807807922363, "learning_rate": 1.6341533480609082e-05, "loss": 0.1669, "step": 745 }, { "epoch": 0.3536383029153828, "grad_norm": 8.40992259979248, "learning_rate": 1.632872657824828e-05, "loss": 0.1916, "step": 746 }, { "epoch": 0.3541123488978431, "grad_norm": 7.337851524353027, "learning_rate": 1.6315902336906775e-05, "loss": 0.2786, "step": 747 }, { "epoch": 0.3545863948803034, "grad_norm": 3.8782455921173096, "learning_rate": 1.6303060791719487e-05, "loss": 0.1712, "step": 748 }, { "epoch": 0.35506044086276367, "grad_norm": 5.530329704284668, "learning_rate": 1.6290201977868738e-05, "loss": 0.2116, "step": 749 }, { "epoch": 0.355534486845224, "grad_norm": 4.863757133483887, "learning_rate": 1.627732593058416e-05, "loss": 0.1612, "step": 750 }, { "epoch": 0.3560085328276843, "grad_norm": 5.130858421325684, "learning_rate": 1.6264432685142603e-05, "loss": 0.1998, "step": 751 }, { "epoch": 0.35648257881014456, "grad_norm": 3.9180197715759277, "learning_rate": 1.6251522276868042e-05, "loss": 0.2278, "step": 752 }, { "epoch": 0.35695662479260487, "grad_norm": 7.8527655601501465, "learning_rate": 1.6238594741131458e-05, "loss": 0.1622, "step": 753 }, { "epoch": 0.3574306707750652, "grad_norm": 6.452600002288818, "learning_rate": 1.6225650113350772e-05, "loss": 0.1718, "step": 754 }, { "epoch": 0.3579047167575255, "grad_norm": 6.650462627410889, "learning_rate": 1.6212688428990717e-05, "loss": 0.1814, "step": 755 }, { "epoch": 0.35837876273998576, "grad_norm": 4.804896831512451, "learning_rate": 1.6199709723562772e-05, "loss": 0.1664, "step": 756 }, { "epoch": 0.35885280872244607, "grad_norm": 2.5763094425201416, "learning_rate": 1.6186714032625036e-05, "loss": 0.0896, "step": 757 }, { "epoch": 0.3593268547049064, "grad_norm": 5.129449367523193, "learning_rate": 1.6173701391782154e-05, "loss": 0.227, "step": 758 }, { "epoch": 0.3598009006873667, "grad_norm": 4.2139506340026855, "learning_rate": 1.61606718366852e-05, "loss": 0.2339, "step": 759 }, { "epoch": 0.36027494666982696, "grad_norm": 3.645012378692627, "learning_rate": 1.6147625403031598e-05, "loss": 0.1394, "step": 760 }, { "epoch": 0.36027494666982696, "eval_accuracy": 0.9919484702093397, "eval_f1": 0.9107142857142857, "eval_loss": 0.019461479038000107, "eval_precision": 0.85, "eval_recall": 0.9807692307692307, "eval_runtime": 49.0116, "eval_samples_per_second": 5.529, "eval_steps_per_second": 0.184, "step": 760 }, { "epoch": 0.36074899265228727, "grad_norm": 3.101369619369507, "learning_rate": 1.6134562126565007e-05, "loss": 0.1178, "step": 761 }, { "epoch": 0.3612230386347476, "grad_norm": 5.407668590545654, "learning_rate": 1.6121482043075233e-05, "loss": 0.1898, "step": 762 }, { "epoch": 0.36169708461720784, "grad_norm": 6.148521423339844, "learning_rate": 1.610838518839813e-05, "loss": 0.1782, "step": 763 }, { "epoch": 0.36217113059966816, "grad_norm": 4.2589287757873535, "learning_rate": 1.60952715984155e-05, "loss": 0.1539, "step": 764 }, { "epoch": 0.36264517658212847, "grad_norm": 7.476611137390137, "learning_rate": 1.6082141309054996e-05, "loss": 0.1756, "step": 765 }, { "epoch": 0.3631192225645888, "grad_norm": 9.146824836730957, "learning_rate": 1.6068994356290018e-05, "loss": 0.1856, "step": 766 }, { "epoch": 0.36359326854704904, "grad_norm": 5.721219062805176, "learning_rate": 1.6055830776139622e-05, "loss": 0.1755, "step": 767 }, { "epoch": 0.36406731452950936, "grad_norm": 7.07839822769165, "learning_rate": 1.6042650604668423e-05, "loss": 0.1517, "step": 768 }, { "epoch": 0.36454136051196967, "grad_norm": 4.418078422546387, "learning_rate": 1.602945387798648e-05, "loss": 0.1242, "step": 769 }, { "epoch": 0.36501540649443, "grad_norm": 9.750717163085938, "learning_rate": 1.6016240632249224e-05, "loss": 0.1659, "step": 770 }, { "epoch": 0.36548945247689024, "grad_norm": 4.934391498565674, "learning_rate": 1.6003010903657337e-05, "loss": 0.1674, "step": 771 }, { "epoch": 0.36596349845935056, "grad_norm": 3.9224469661712646, "learning_rate": 1.5989764728456655e-05, "loss": 0.1329, "step": 772 }, { "epoch": 0.36643754444181087, "grad_norm": 8.719375610351562, "learning_rate": 1.5976502142938074e-05, "loss": 0.1879, "step": 773 }, { "epoch": 0.36691159042427113, "grad_norm": 4.978146076202393, "learning_rate": 1.596322318343746e-05, "loss": 0.1067, "step": 774 }, { "epoch": 0.36738563640673144, "grad_norm": 3.350446939468384, "learning_rate": 1.5949927886335522e-05, "loss": 0.1377, "step": 775 }, { "epoch": 0.36785968238919176, "grad_norm": 10.434601783752441, "learning_rate": 1.593661628805775e-05, "loss": 0.207, "step": 776 }, { "epoch": 0.36833372837165207, "grad_norm": 6.763217449188232, "learning_rate": 1.592328842507428e-05, "loss": 0.1818, "step": 777 }, { "epoch": 0.3688077743541123, "grad_norm": 7.231940746307373, "learning_rate": 1.590994433389981e-05, "loss": 0.1322, "step": 778 }, { "epoch": 0.36928182033657264, "grad_norm": 4.025269985198975, "learning_rate": 1.5896584051093508e-05, "loss": 0.1721, "step": 779 }, { "epoch": 0.36975586631903296, "grad_norm": 9.545405387878418, "learning_rate": 1.5883207613258887e-05, "loss": 0.1702, "step": 780 }, { "epoch": 0.36975586631903296, "eval_accuracy": 0.9855072463768116, "eval_f1": 0.85, "eval_loss": 0.03580058738589287, "eval_precision": 0.75, "eval_recall": 0.9807692307692307, "eval_runtime": 48.9416, "eval_samples_per_second": 5.537, "eval_steps_per_second": 0.184, "step": 780 }, { "epoch": 0.37022991230149327, "grad_norm": 6.435138702392578, "learning_rate": 1.586981505704374e-05, "loss": 0.2205, "step": 781 }, { "epoch": 0.3707039582839535, "grad_norm": 9.83716106414795, "learning_rate": 1.5856406419140006e-05, "loss": 0.1698, "step": 782 }, { "epoch": 0.37117800426641384, "grad_norm": 6.209863662719727, "learning_rate": 1.5842981736283686e-05, "loss": 0.154, "step": 783 }, { "epoch": 0.37165205024887416, "grad_norm": 5.99310302734375, "learning_rate": 1.582954104525474e-05, "loss": 0.1056, "step": 784 }, { "epoch": 0.3721260962313344, "grad_norm": 5.836954593658447, "learning_rate": 1.5816084382876994e-05, "loss": 0.2097, "step": 785 }, { "epoch": 0.3726001422137947, "grad_norm": 6.66615104675293, "learning_rate": 1.5802611786018013e-05, "loss": 0.2048, "step": 786 }, { "epoch": 0.37307418819625504, "grad_norm": 4.270611763000488, "learning_rate": 1.578912329158904e-05, "loss": 0.14, "step": 787 }, { "epoch": 0.37354823417871535, "grad_norm": 2.8829944133758545, "learning_rate": 1.5775618936544853e-05, "loss": 0.1068, "step": 788 }, { "epoch": 0.3740222801611756, "grad_norm": 6.891777515411377, "learning_rate": 1.5762098757883703e-05, "loss": 0.1781, "step": 789 }, { "epoch": 0.3744963261436359, "grad_norm": 6.650791168212891, "learning_rate": 1.5748562792647177e-05, "loss": 0.2406, "step": 790 }, { "epoch": 0.37497037212609624, "grad_norm": 5.970800399780273, "learning_rate": 1.5735011077920117e-05, "loss": 0.0895, "step": 791 }, { "epoch": 0.37544441810855655, "grad_norm": 7.795815944671631, "learning_rate": 1.5721443650830527e-05, "loss": 0.1843, "step": 792 }, { "epoch": 0.3759184640910168, "grad_norm": 5.365365505218506, "learning_rate": 1.5707860548549433e-05, "loss": 0.2567, "step": 793 }, { "epoch": 0.3763925100734771, "grad_norm": 5.363485813140869, "learning_rate": 1.5694261808290836e-05, "loss": 0.1804, "step": 794 }, { "epoch": 0.37686655605593744, "grad_norm": 6.416436672210693, "learning_rate": 1.568064746731156e-05, "loss": 0.2047, "step": 795 }, { "epoch": 0.3773406020383977, "grad_norm": 3.2758121490478516, "learning_rate": 1.566701756291118e-05, "loss": 0.1261, "step": 796 }, { "epoch": 0.377814648020858, "grad_norm": 3.0586371421813965, "learning_rate": 1.5653372132431902e-05, "loss": 0.1208, "step": 797 }, { "epoch": 0.3782886940033183, "grad_norm": 5.383779525756836, "learning_rate": 1.563971121325848e-05, "loss": 0.2128, "step": 798 }, { "epoch": 0.37876273998577864, "grad_norm": 6.047075271606445, "learning_rate": 1.56260348428181e-05, "loss": 0.151, "step": 799 }, { "epoch": 0.3792367859682389, "grad_norm": 5.230149269104004, "learning_rate": 1.561234305858028e-05, "loss": 0.1413, "step": 800 }, { "epoch": 0.3792367859682389, "eval_accuracy": 0.9911433172302737, "eval_f1": 0.9026548672566371, "eval_loss": 0.026881376281380653, "eval_precision": 0.8360655737704918, "eval_recall": 0.9807692307692307, "eval_runtime": 50.6443, "eval_samples_per_second": 5.351, "eval_steps_per_second": 0.178, "step": 800 }, { "epoch": 0.3797108319506992, "grad_norm": 5.758953094482422, "learning_rate": 1.5598635898056754e-05, "loss": 0.1493, "step": 801 }, { "epoch": 0.3801848779331595, "grad_norm": 6.0659661293029785, "learning_rate": 1.5584913398801407e-05, "loss": 0.2677, "step": 802 }, { "epoch": 0.38065892391561984, "grad_norm": 5.709496974945068, "learning_rate": 1.557117559841013e-05, "loss": 0.1173, "step": 803 }, { "epoch": 0.3811329698980801, "grad_norm": 3.090986728668213, "learning_rate": 1.5557422534520742e-05, "loss": 0.1074, "step": 804 }, { "epoch": 0.3816070158805404, "grad_norm": 4.4366254806518555, "learning_rate": 1.5543654244812873e-05, "loss": 0.1772, "step": 805 }, { "epoch": 0.3820810618630007, "grad_norm": 4.978272914886475, "learning_rate": 1.5529870767007883e-05, "loss": 0.1848, "step": 806 }, { "epoch": 0.382555107845461, "grad_norm": 6.076455116271973, "learning_rate": 1.5516072138868726e-05, "loss": 0.1394, "step": 807 }, { "epoch": 0.3830291538279213, "grad_norm": 4.194643497467041, "learning_rate": 1.5502258398199873e-05, "loss": 0.1906, "step": 808 }, { "epoch": 0.3835031998103816, "grad_norm": 5.475553512573242, "learning_rate": 1.5488429582847194e-05, "loss": 0.1588, "step": 809 }, { "epoch": 0.3839772457928419, "grad_norm": 4.989933967590332, "learning_rate": 1.5474585730697866e-05, "loss": 0.2083, "step": 810 }, { "epoch": 0.3844512917753022, "grad_norm": 5.500510215759277, "learning_rate": 1.546072687968026e-05, "loss": 0.128, "step": 811 }, { "epoch": 0.3849253377577625, "grad_norm": 5.7488884925842285, "learning_rate": 1.5446853067763836e-05, "loss": 0.1925, "step": 812 }, { "epoch": 0.3853993837402228, "grad_norm": 5.636148929595947, "learning_rate": 1.5432964332959047e-05, "loss": 0.1603, "step": 813 }, { "epoch": 0.3858734297226831, "grad_norm": 4.349088191986084, "learning_rate": 1.5419060713317233e-05, "loss": 0.0912, "step": 814 }, { "epoch": 0.3863474757051434, "grad_norm": 5.55099630355835, "learning_rate": 1.5405142246930505e-05, "loss": 0.2131, "step": 815 }, { "epoch": 0.3868215216876037, "grad_norm": 5.109293460845947, "learning_rate": 1.539120897193166e-05, "loss": 0.1662, "step": 816 }, { "epoch": 0.387295567670064, "grad_norm": 5.997527122497559, "learning_rate": 1.537726092649406e-05, "loss": 0.1758, "step": 817 }, { "epoch": 0.38776961365252427, "grad_norm": 7.689452648162842, "learning_rate": 1.536329814883154e-05, "loss": 0.1838, "step": 818 }, { "epoch": 0.3882436596349846, "grad_norm": 4.673219203948975, "learning_rate": 1.5349320677198288e-05, "loss": 0.2374, "step": 819 }, { "epoch": 0.3887177056174449, "grad_norm": 5.657802104949951, "learning_rate": 1.533532854988876e-05, "loss": 0.2015, "step": 820 }, { "epoch": 0.3887177056174449, "eval_accuracy": 0.9911433172302737, "eval_f1": 0.9026548672566371, "eval_loss": 0.02378522790968418, "eval_precision": 0.8360655737704918, "eval_recall": 0.9807692307692307, "eval_runtime": 50.0097, "eval_samples_per_second": 5.419, "eval_steps_per_second": 0.18, "step": 820 }, { "epoch": 0.3891917515999052, "grad_norm": 5.630519390106201, "learning_rate": 1.532132180523756e-05, "loss": 0.1689, "step": 821 }, { "epoch": 0.38966579758236547, "grad_norm": 5.623863220214844, "learning_rate": 1.5307300481619334e-05, "loss": 0.1882, "step": 822 }, { "epoch": 0.3901398435648258, "grad_norm": 4.633841037750244, "learning_rate": 1.529326461744868e-05, "loss": 0.1186, "step": 823 }, { "epoch": 0.3906138895472861, "grad_norm": 3.7852747440338135, "learning_rate": 1.5279214251180034e-05, "loss": 0.14, "step": 824 }, { "epoch": 0.3910879355297464, "grad_norm": 7.2971510887146, "learning_rate": 1.5265149421307545e-05, "loss": 0.2072, "step": 825 }, { "epoch": 0.39156198151220667, "grad_norm": 4.57223653793335, "learning_rate": 1.525107016636501e-05, "loss": 0.1995, "step": 826 }, { "epoch": 0.392036027494667, "grad_norm": 3.6118946075439453, "learning_rate": 1.5236976524925743e-05, "loss": 0.1314, "step": 827 }, { "epoch": 0.3925100734771273, "grad_norm": 18.717708587646484, "learning_rate": 1.5222868535602455e-05, "loss": 0.2795, "step": 828 }, { "epoch": 0.39298411945958756, "grad_norm": 5.223725318908691, "learning_rate": 1.5208746237047196e-05, "loss": 0.167, "step": 829 }, { "epoch": 0.39345816544204787, "grad_norm": 6.281686782836914, "learning_rate": 1.5194609667951192e-05, "loss": 0.2476, "step": 830 }, { "epoch": 0.3939322114245082, "grad_norm": 5.123955249786377, "learning_rate": 1.5180458867044783e-05, "loss": 0.1243, "step": 831 }, { "epoch": 0.3944062574069685, "grad_norm": 5.037649631500244, "learning_rate": 1.5166293873097293e-05, "loss": 0.1332, "step": 832 }, { "epoch": 0.39488030338942876, "grad_norm": 7.415771961212158, "learning_rate": 1.5152114724916932e-05, "loss": 0.2292, "step": 833 }, { "epoch": 0.39535434937188907, "grad_norm": 4.953770637512207, "learning_rate": 1.5137921461350693e-05, "loss": 0.1386, "step": 834 }, { "epoch": 0.3958283953543494, "grad_norm": 4.328691482543945, "learning_rate": 1.512371412128424e-05, "loss": 0.1527, "step": 835 }, { "epoch": 0.3963024413368097, "grad_norm": 4.749328136444092, "learning_rate": 1.5109492743641791e-05, "loss": 0.1742, "step": 836 }, { "epoch": 0.39677648731926995, "grad_norm": 3.8692128658294678, "learning_rate": 1.509525736738604e-05, "loss": 0.179, "step": 837 }, { "epoch": 0.39725053330173027, "grad_norm": 4.17873477935791, "learning_rate": 1.5081008031518027e-05, "loss": 0.1642, "step": 838 }, { "epoch": 0.3977245792841906, "grad_norm": 3.9594457149505615, "learning_rate": 1.5066744775077036e-05, "loss": 0.1177, "step": 839 }, { "epoch": 0.39819862526665084, "grad_norm": 7.010453224182129, "learning_rate": 1.505246763714049e-05, "loss": 0.1752, "step": 840 }, { "epoch": 0.39819862526665084, "eval_accuracy": 0.9895330112721417, "eval_f1": 0.8888888888888888, "eval_loss": 0.025310639292001724, "eval_precision": 0.8, "eval_recall": 1.0, "eval_runtime": 49.9935, "eval_samples_per_second": 5.421, "eval_steps_per_second": 0.18, "step": 840 }, { "epoch": 0.39867267124911115, "grad_norm": 4.036323070526123, "learning_rate": 1.5038176656823844e-05, "loss": 0.0904, "step": 841 }, { "epoch": 0.39914671723157147, "grad_norm": 3.6895594596862793, "learning_rate": 1.5023871873280475e-05, "loss": 0.1252, "step": 842 }, { "epoch": 0.3996207632140318, "grad_norm": 10.439888000488281, "learning_rate": 1.5009553325701582e-05, "loss": 0.2124, "step": 843 }, { "epoch": 0.40009480919649204, "grad_norm": 4.229457378387451, "learning_rate": 1.499522105331607e-05, "loss": 0.1908, "step": 844 }, { "epoch": 0.40056885517895235, "grad_norm": 6.920833110809326, "learning_rate": 1.4980875095390447e-05, "loss": 0.2047, "step": 845 }, { "epoch": 0.40104290116141267, "grad_norm": 2.879361629486084, "learning_rate": 1.4966515491228717e-05, "loss": 0.1008, "step": 846 }, { "epoch": 0.401516947143873, "grad_norm": 5.6754631996154785, "learning_rate": 1.495214228017227e-05, "loss": 0.1698, "step": 847 }, { "epoch": 0.40199099312633324, "grad_norm": 4.049121379852295, "learning_rate": 1.4937755501599774e-05, "loss": 0.2256, "step": 848 }, { "epoch": 0.40246503910879355, "grad_norm": 5.170472145080566, "learning_rate": 1.4923355194927069e-05, "loss": 0.2448, "step": 849 }, { "epoch": 0.40293908509125387, "grad_norm": 4.938206195831299, "learning_rate": 1.490894139960706e-05, "loss": 0.2282, "step": 850 }, { "epoch": 0.4034131310737141, "grad_norm": 6.50287389755249, "learning_rate": 1.4894514155129605e-05, "loss": 0.1854, "step": 851 }, { "epoch": 0.40388717705617444, "grad_norm": 4.345598220825195, "learning_rate": 1.4880073501021407e-05, "loss": 0.1649, "step": 852 }, { "epoch": 0.40436122303863475, "grad_norm": 6.99996280670166, "learning_rate": 1.4865619476845914e-05, "loss": 0.2396, "step": 853 }, { "epoch": 0.40483526902109507, "grad_norm": 4.84694766998291, "learning_rate": 1.4851152122203198e-05, "loss": 0.1963, "step": 854 }, { "epoch": 0.4053093150035553, "grad_norm": 3.466024160385132, "learning_rate": 1.4836671476729862e-05, "loss": 0.1761, "step": 855 }, { "epoch": 0.40578336098601564, "grad_norm": 4.219127178192139, "learning_rate": 1.482217758009891e-05, "loss": 0.129, "step": 856 }, { "epoch": 0.40625740696847595, "grad_norm": 3.5250399112701416, "learning_rate": 1.4807670472019657e-05, "loss": 0.1151, "step": 857 }, { "epoch": 0.40673145295093627, "grad_norm": 4.0963053703308105, "learning_rate": 1.4793150192237615e-05, "loss": 0.1368, "step": 858 }, { "epoch": 0.4072054989333965, "grad_norm": 5.257630348205566, "learning_rate": 1.4778616780534383e-05, "loss": 0.1364, "step": 859 }, { "epoch": 0.40767954491585684, "grad_norm": 5.789881706237793, "learning_rate": 1.4764070276727528e-05, "loss": 0.212, "step": 860 }, { "epoch": 0.40767954491585684, "eval_accuracy": 0.9911433172302737, "eval_f1": 0.9009009009009009, "eval_loss": 0.02501463145017624, "eval_precision": 0.847457627118644, "eval_recall": 0.9615384615384616, "eval_runtime": 50.2315, "eval_samples_per_second": 5.395, "eval_steps_per_second": 0.179, "step": 860 }, { "epoch": 0.40815359089831715, "grad_norm": 5.214051246643066, "learning_rate": 1.4749510720670506e-05, "loss": 0.1629, "step": 861 }, { "epoch": 0.4086276368807774, "grad_norm": 5.269729137420654, "learning_rate": 1.473493815225251e-05, "loss": 0.2231, "step": 862 }, { "epoch": 0.4091016828632377, "grad_norm": 5.787201881408691, "learning_rate": 1.4720352611398394e-05, "loss": 0.237, "step": 863 }, { "epoch": 0.40957572884569804, "grad_norm": 3.796096086502075, "learning_rate": 1.4705754138068557e-05, "loss": 0.1696, "step": 864 }, { "epoch": 0.41004977482815835, "grad_norm": 4.975426197052002, "learning_rate": 1.4691142772258826e-05, "loss": 0.1847, "step": 865 }, { "epoch": 0.4105238208106186, "grad_norm": 3.5773916244506836, "learning_rate": 1.467651855400035e-05, "loss": 0.1927, "step": 866 }, { "epoch": 0.4109978667930789, "grad_norm": 3.7105495929718018, "learning_rate": 1.466188152335948e-05, "loss": 0.1435, "step": 867 }, { "epoch": 0.41147191277553924, "grad_norm": 5.359220027923584, "learning_rate": 1.4647231720437687e-05, "loss": 0.2145, "step": 868 }, { "epoch": 0.41194595875799955, "grad_norm": 3.0884811878204346, "learning_rate": 1.4632569185371424e-05, "loss": 0.1535, "step": 869 }, { "epoch": 0.4124200047404598, "grad_norm": 8.89234733581543, "learning_rate": 1.4617893958332025e-05, "loss": 0.1519, "step": 870 }, { "epoch": 0.4128940507229201, "grad_norm": 5.38965368270874, "learning_rate": 1.4603206079525607e-05, "loss": 0.1654, "step": 871 }, { "epoch": 0.41336809670538044, "grad_norm": 4.978879928588867, "learning_rate": 1.4588505589192936e-05, "loss": 0.1561, "step": 872 }, { "epoch": 0.4138421426878407, "grad_norm": 5.315103054046631, "learning_rate": 1.4573792527609343e-05, "loss": 0.2338, "step": 873 }, { "epoch": 0.414316188670301, "grad_norm": 3.3019869327545166, "learning_rate": 1.455906693508459e-05, "loss": 0.1383, "step": 874 }, { "epoch": 0.4147902346527613, "grad_norm": 5.54788875579834, "learning_rate": 1.4544328851962774e-05, "loss": 0.1859, "step": 875 }, { "epoch": 0.41526428063522164, "grad_norm": 4.469765663146973, "learning_rate": 1.452957831862222e-05, "loss": 0.1507, "step": 876 }, { "epoch": 0.4157383266176819, "grad_norm": 5.146998405456543, "learning_rate": 1.4514815375475351e-05, "loss": 0.1831, "step": 877 }, { "epoch": 0.4162123726001422, "grad_norm": 4.996446132659912, "learning_rate": 1.4500040062968597e-05, "loss": 0.1769, "step": 878 }, { "epoch": 0.4166864185826025, "grad_norm": 8.464068412780762, "learning_rate": 1.4485252421582274e-05, "loss": 0.3377, "step": 879 }, { "epoch": 0.41716046456506284, "grad_norm": 5.580779075622559, "learning_rate": 1.4470452491830477e-05, "loss": 0.1556, "step": 880 }, { "epoch": 0.41716046456506284, "eval_accuracy": 0.9927536231884058, "eval_f1": 0.9158878504672897, "eval_loss": 0.024685567244887352, "eval_precision": 0.8909090909090909, "eval_recall": 0.9423076923076923, "eval_runtime": 49.595, "eval_samples_per_second": 5.464, "eval_steps_per_second": 0.181, "step": 880 }, { "epoch": 0.4176345105475231, "grad_norm": 4.486780166625977, "learning_rate": 1.4455640314260972e-05, "loss": 0.1155, "step": 881 }, { "epoch": 0.4181085565299834, "grad_norm": 4.462590217590332, "learning_rate": 1.4440815929455072e-05, "loss": 0.2113, "step": 882 }, { "epoch": 0.4185826025124437, "grad_norm": 4.029243469238281, "learning_rate": 1.442597937802754e-05, "loss": 0.1435, "step": 883 }, { "epoch": 0.419056648494904, "grad_norm": 3.141965389251709, "learning_rate": 1.4411130700626467e-05, "loss": 0.117, "step": 884 }, { "epoch": 0.4195306944773643, "grad_norm": 4.770340919494629, "learning_rate": 1.4396269937933176e-05, "loss": 0.1502, "step": 885 }, { "epoch": 0.4200047404598246, "grad_norm": 6.025394916534424, "learning_rate": 1.4381397130662092e-05, "loss": 0.1583, "step": 886 }, { "epoch": 0.4204787864422849, "grad_norm": 4.995217800140381, "learning_rate": 1.4366512319560642e-05, "loss": 0.1963, "step": 887 }, { "epoch": 0.4209528324247452, "grad_norm": 5.447535037994385, "learning_rate": 1.4351615545409137e-05, "loss": 0.1628, "step": 888 }, { "epoch": 0.4214268784072055, "grad_norm": 5.995816230773926, "learning_rate": 1.433670684902066e-05, "loss": 0.157, "step": 889 }, { "epoch": 0.4219009243896658, "grad_norm": 5.347133159637451, "learning_rate": 1.4321786271240975e-05, "loss": 0.1771, "step": 890 }, { "epoch": 0.4223749703721261, "grad_norm": 9.27304458618164, "learning_rate": 1.4306853852948383e-05, "loss": 0.1654, "step": 891 }, { "epoch": 0.4228490163545864, "grad_norm": 4.350383758544922, "learning_rate": 1.4291909635053627e-05, "loss": 0.1535, "step": 892 }, { "epoch": 0.4233230623370467, "grad_norm": 2.4228219985961914, "learning_rate": 1.4276953658499775e-05, "loss": 0.0949, "step": 893 }, { "epoch": 0.423797108319507, "grad_norm": 6.395793914794922, "learning_rate": 1.4261985964262118e-05, "loss": 0.1808, "step": 894 }, { "epoch": 0.42427115430196727, "grad_norm": 3.162377119064331, "learning_rate": 1.4247006593348042e-05, "loss": 0.1139, "step": 895 }, { "epoch": 0.4247452002844276, "grad_norm": 8.321721076965332, "learning_rate": 1.4232015586796938e-05, "loss": 0.2048, "step": 896 }, { "epoch": 0.4252192462668879, "grad_norm": 2.381802797317505, "learning_rate": 1.4217012985680054e-05, "loss": 0.116, "step": 897 }, { "epoch": 0.4256932922493482, "grad_norm": 4.9117913246154785, "learning_rate": 1.4201998831100424e-05, "loss": 0.1995, "step": 898 }, { "epoch": 0.42616733823180847, "grad_norm": 3.578552484512329, "learning_rate": 1.4186973164192722e-05, "loss": 0.1651, "step": 899 }, { "epoch": 0.4266413842142688, "grad_norm": 3.712299108505249, "learning_rate": 1.417193602612317e-05, "loss": 0.1495, "step": 900 }, { "epoch": 0.4266413842142688, "eval_accuracy": 0.9919484702093397, "eval_f1": 0.9107142857142857, "eval_loss": 0.014072530902922153, "eval_precision": 0.85, "eval_recall": 0.9807692307692307, "eval_runtime": 49.6759, "eval_samples_per_second": 5.455, "eval_steps_per_second": 0.181, "step": 900 }, { "epoch": 0.4271154301967291, "grad_norm": 4.017418384552002, "learning_rate": 1.4156887458089414e-05, "loss": 0.1161, "step": 901 }, { "epoch": 0.42758947617918935, "grad_norm": 5.4440507888793945, "learning_rate": 1.4141827501320422e-05, "loss": 0.1478, "step": 902 }, { "epoch": 0.42806352216164967, "grad_norm": 7.295302867889404, "learning_rate": 1.4126756197076352e-05, "loss": 0.2912, "step": 903 }, { "epoch": 0.42853756814411, "grad_norm": 17.25461196899414, "learning_rate": 1.411167358664846e-05, "loss": 0.3949, "step": 904 }, { "epoch": 0.4290116141265703, "grad_norm": 4.282469272613525, "learning_rate": 1.4096579711358975e-05, "loss": 0.1726, "step": 905 }, { "epoch": 0.42948566010903055, "grad_norm": 3.889463424682617, "learning_rate": 1.4081474612560986e-05, "loss": 0.1042, "step": 906 }, { "epoch": 0.42995970609149087, "grad_norm": 4.046245098114014, "learning_rate": 1.4066358331638344e-05, "loss": 0.1435, "step": 907 }, { "epoch": 0.4304337520739512, "grad_norm": 8.381349563598633, "learning_rate": 1.4051230910005516e-05, "loss": 0.265, "step": 908 }, { "epoch": 0.4309077980564115, "grad_norm": 6.036510467529297, "learning_rate": 1.4036092389107502e-05, "loss": 0.2364, "step": 909 }, { "epoch": 0.43138184403887175, "grad_norm": 5.527349472045898, "learning_rate": 1.402094281041972e-05, "loss": 0.2173, "step": 910 }, { "epoch": 0.43185589002133207, "grad_norm": 6.1843438148498535, "learning_rate": 1.4005782215447865e-05, "loss": 0.1482, "step": 911 }, { "epoch": 0.4323299360037924, "grad_norm": 3.8995513916015625, "learning_rate": 1.3990610645727829e-05, "loss": 0.1989, "step": 912 }, { "epoch": 0.43280398198625264, "grad_norm": 4.9433369636535645, "learning_rate": 1.3975428142825562e-05, "loss": 0.1242, "step": 913 }, { "epoch": 0.43327802796871295, "grad_norm": 6.509253025054932, "learning_rate": 1.396023474833697e-05, "loss": 0.1721, "step": 914 }, { "epoch": 0.43375207395117327, "grad_norm": 5.314474105834961, "learning_rate": 1.3945030503887801e-05, "loss": 0.1053, "step": 915 }, { "epoch": 0.4342261199336336, "grad_norm": 3.406217336654663, "learning_rate": 1.392981545113353e-05, "loss": 0.1666, "step": 916 }, { "epoch": 0.43470016591609384, "grad_norm": 4.486688613891602, "learning_rate": 1.3914589631759245e-05, "loss": 0.1617, "step": 917 }, { "epoch": 0.43517421189855415, "grad_norm": 3.416886329650879, "learning_rate": 1.3899353087479526e-05, "loss": 0.0978, "step": 918 }, { "epoch": 0.43564825788101447, "grad_norm": 3.7753748893737793, "learning_rate": 1.3884105860038335e-05, "loss": 0.1286, "step": 919 }, { "epoch": 0.4361223038634748, "grad_norm": 5.81754732131958, "learning_rate": 1.386884799120891e-05, "loss": 0.1666, "step": 920 }, { "epoch": 0.4361223038634748, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9245283018867925, "eval_loss": 0.018749000504612923, "eval_precision": 0.9074074074074074, "eval_recall": 0.9423076923076923, "eval_runtime": 50.0364, "eval_samples_per_second": 5.416, "eval_steps_per_second": 0.18, "step": 920 }, { "epoch": 0.43659634984593504, "grad_norm": 5.7555389404296875, "learning_rate": 1.3853579522793643e-05, "loss": 0.1387, "step": 921 }, { "epoch": 0.43707039582839535, "grad_norm": 4.422433376312256, "learning_rate": 1.383830049662396e-05, "loss": 0.1191, "step": 922 }, { "epoch": 0.43754444181085567, "grad_norm": 10.869050979614258, "learning_rate": 1.382301095456022e-05, "loss": 0.2034, "step": 923 }, { "epoch": 0.4380184877933159, "grad_norm": 8.92359447479248, "learning_rate": 1.3807710938491581e-05, "loss": 0.1927, "step": 924 }, { "epoch": 0.43849253377577624, "grad_norm": 2.743741512298584, "learning_rate": 1.3792400490335911e-05, "loss": 0.1213, "step": 925 }, { "epoch": 0.43896657975823655, "grad_norm": 3.5043816566467285, "learning_rate": 1.3777079652039649e-05, "loss": 0.142, "step": 926 }, { "epoch": 0.43944062574069687, "grad_norm": 6.005928993225098, "learning_rate": 1.3761748465577706e-05, "loss": 0.1826, "step": 927 }, { "epoch": 0.4399146717231571, "grad_norm": 8.12239933013916, "learning_rate": 1.374640697295334e-05, "loss": 0.1672, "step": 928 }, { "epoch": 0.44038871770561744, "grad_norm": 7.5266947746276855, "learning_rate": 1.373105521619805e-05, "loss": 0.1555, "step": 929 }, { "epoch": 0.44086276368807775, "grad_norm": 8.656370162963867, "learning_rate": 1.3715693237371448e-05, "loss": 0.2167, "step": 930 }, { "epoch": 0.44133680967053807, "grad_norm": 4.375191688537598, "learning_rate": 1.3700321078561158e-05, "loss": 0.1318, "step": 931 }, { "epoch": 0.4418108556529983, "grad_norm": 12.426055908203125, "learning_rate": 1.3684938781882692e-05, "loss": 0.2014, "step": 932 }, { "epoch": 0.44228490163545864, "grad_norm": 3.1371965408325195, "learning_rate": 1.3669546389479342e-05, "loss": 0.16, "step": 933 }, { "epoch": 0.44275894761791895, "grad_norm": 5.246861457824707, "learning_rate": 1.3654143943522051e-05, "loss": 0.1569, "step": 934 }, { "epoch": 0.4432329936003792, "grad_norm": 6.054107189178467, "learning_rate": 1.363873148620931e-05, "loss": 0.225, "step": 935 }, { "epoch": 0.4437070395828395, "grad_norm": 5.0818305015563965, "learning_rate": 1.3623309059767043e-05, "loss": 0.2101, "step": 936 }, { "epoch": 0.44418108556529984, "grad_norm": 3.7268683910369873, "learning_rate": 1.3607876706448477e-05, "loss": 0.1511, "step": 937 }, { "epoch": 0.44465513154776015, "grad_norm": 7.065274715423584, "learning_rate": 1.3592434468534046e-05, "loss": 0.1875, "step": 938 }, { "epoch": 0.4451291775302204, "grad_norm": 5.633383750915527, "learning_rate": 1.3576982388331258e-05, "loss": 0.1531, "step": 939 }, { "epoch": 0.4456032235126807, "grad_norm": 4.752358913421631, "learning_rate": 1.3561520508174586e-05, "loss": 0.1475, "step": 940 }, { "epoch": 0.4456032235126807, "eval_accuracy": 0.9911433172302737, "eval_f1": 0.9026548672566371, "eval_loss": 0.019943224266171455, "eval_precision": 0.8360655737704918, "eval_recall": 0.9807692307692307, "eval_runtime": 50.4533, "eval_samples_per_second": 5.371, "eval_steps_per_second": 0.178, "step": 940 }, { "epoch": 0.44607726949514104, "grad_norm": 3.1623804569244385, "learning_rate": 1.3546048870425356e-05, "loss": 0.151, "step": 941 }, { "epoch": 0.44655131547760135, "grad_norm": 3.126525402069092, "learning_rate": 1.3530567517471632e-05, "loss": 0.0928, "step": 942 }, { "epoch": 0.4470253614600616, "grad_norm": 3.9661409854888916, "learning_rate": 1.3515076491728079e-05, "loss": 0.1572, "step": 943 }, { "epoch": 0.4474994074425219, "grad_norm": 8.030318260192871, "learning_rate": 1.3499575835635884e-05, "loss": 0.194, "step": 944 }, { "epoch": 0.44797345342498224, "grad_norm": 12.486641883850098, "learning_rate": 1.3484065591662596e-05, "loss": 0.262, "step": 945 }, { "epoch": 0.4484474994074425, "grad_norm": 4.205323696136475, "learning_rate": 1.3468545802302048e-05, "loss": 0.1763, "step": 946 }, { "epoch": 0.4489215453899028, "grad_norm": 4.213166236877441, "learning_rate": 1.3453016510074222e-05, "loss": 0.1301, "step": 947 }, { "epoch": 0.4493955913723631, "grad_norm": 7.569645881652832, "learning_rate": 1.3437477757525131e-05, "loss": 0.1618, "step": 948 }, { "epoch": 0.44986963735482344, "grad_norm": 5.092619895935059, "learning_rate": 1.342192958722671e-05, "loss": 0.1668, "step": 949 }, { "epoch": 0.4503436833372837, "grad_norm": 4.923454284667969, "learning_rate": 1.3406372041776694e-05, "loss": 0.1665, "step": 950 }, { "epoch": 0.450817729319744, "grad_norm": 3.7351295948028564, "learning_rate": 1.3390805163798506e-05, "loss": 0.1616, "step": 951 }, { "epoch": 0.4512917753022043, "grad_norm": 8.060539245605469, "learning_rate": 1.3375228995941135e-05, "loss": 0.1823, "step": 952 }, { "epoch": 0.45176582128466464, "grad_norm": 7.6434855461120605, "learning_rate": 1.3359643580879023e-05, "loss": 0.1558, "step": 953 }, { "epoch": 0.4522398672671249, "grad_norm": 4.453567028045654, "learning_rate": 1.3344048961311947e-05, "loss": 0.0936, "step": 954 }, { "epoch": 0.4527139132495852, "grad_norm": 11.421374320983887, "learning_rate": 1.3328445179964902e-05, "loss": 0.1791, "step": 955 }, { "epoch": 0.4531879592320455, "grad_norm": 4.431858539581299, "learning_rate": 1.3312832279587981e-05, "loss": 0.1444, "step": 956 }, { "epoch": 0.4536620052145058, "grad_norm": 3.108346700668335, "learning_rate": 1.3297210302956263e-05, "loss": 0.0835, "step": 957 }, { "epoch": 0.4541360511969661, "grad_norm": 3.830599784851074, "learning_rate": 1.3281579292869693e-05, "loss": 0.1073, "step": 958 }, { "epoch": 0.4546100971794264, "grad_norm": 3.9181289672851562, "learning_rate": 1.3265939292152971e-05, "loss": 0.1496, "step": 959 }, { "epoch": 0.4550841431618867, "grad_norm": 4.183866500854492, "learning_rate": 1.3250290343655419e-05, "loss": 0.1459, "step": 960 }, { "epoch": 0.4550841431618867, "eval_accuracy": 0.9919484702093397, "eval_f1": 0.9122807017543859, "eval_loss": 0.016616741195321083, "eval_precision": 0.8387096774193549, "eval_recall": 1.0, "eval_runtime": 50.0168, "eval_samples_per_second": 5.418, "eval_steps_per_second": 0.18, "step": 960 }, { "epoch": 0.455558189144347, "grad_norm": 3.235133171081543, "learning_rate": 1.3234632490250875e-05, "loss": 0.1544, "step": 961 }, { "epoch": 0.4560322351268073, "grad_norm": 3.485752582550049, "learning_rate": 1.3218965774837584e-05, "loss": 0.1296, "step": 962 }, { "epoch": 0.4565062811092676, "grad_norm": 2.8867838382720947, "learning_rate": 1.3203290240338056e-05, "loss": 0.1596, "step": 963 }, { "epoch": 0.4569803270917279, "grad_norm": 4.817343711853027, "learning_rate": 1.3187605929698975e-05, "loss": 0.1423, "step": 964 }, { "epoch": 0.4574543730741882, "grad_norm": 6.942486763000488, "learning_rate": 1.3171912885891063e-05, "loss": 0.225, "step": 965 }, { "epoch": 0.4579284190566485, "grad_norm": 3.653506278991699, "learning_rate": 1.3156211151908967e-05, "loss": 0.1183, "step": 966 }, { "epoch": 0.4584024650391088, "grad_norm": 9.019591331481934, "learning_rate": 1.314050077077115e-05, "loss": 0.2022, "step": 967 }, { "epoch": 0.45887651102156907, "grad_norm": 3.084385395050049, "learning_rate": 1.312478178551976e-05, "loss": 0.1307, "step": 968 }, { "epoch": 0.4593505570040294, "grad_norm": 4.272575855255127, "learning_rate": 1.310905423922052e-05, "loss": 0.0869, "step": 969 }, { "epoch": 0.4598246029864897, "grad_norm": 3.859203577041626, "learning_rate": 1.3093318174962609e-05, "loss": 0.1234, "step": 970 }, { "epoch": 0.46029864896895, "grad_norm": 7.830322742462158, "learning_rate": 1.3077573635858536e-05, "loss": 0.1374, "step": 971 }, { "epoch": 0.46077269495141027, "grad_norm": 2.7453970909118652, "learning_rate": 1.3061820665044036e-05, "loss": 0.1008, "step": 972 }, { "epoch": 0.4612467409338706, "grad_norm": 2.8466203212738037, "learning_rate": 1.3046059305677944e-05, "loss": 0.0858, "step": 973 }, { "epoch": 0.4617207869163309, "grad_norm": 3.5846686363220215, "learning_rate": 1.3030289600942074e-05, "loss": 0.1682, "step": 974 }, { "epoch": 0.4621948328987912, "grad_norm": 3.473472833633423, "learning_rate": 1.301451159404111e-05, "loss": 0.0976, "step": 975 }, { "epoch": 0.46266887888125147, "grad_norm": 6.576850891113281, "learning_rate": 1.2998725328202473e-05, "loss": 0.2096, "step": 976 }, { "epoch": 0.4631429248637118, "grad_norm": 6.014435768127441, "learning_rate": 1.2982930846676215e-05, "loss": 0.1486, "step": 977 }, { "epoch": 0.4636169708461721, "grad_norm": 3.5731709003448486, "learning_rate": 1.2967128192734903e-05, "loss": 0.1621, "step": 978 }, { "epoch": 0.46409101682863235, "grad_norm": 4.256763935089111, "learning_rate": 1.2951317409673484e-05, "loss": 0.2164, "step": 979 }, { "epoch": 0.46456506281109267, "grad_norm": 6.394613742828369, "learning_rate": 1.2935498540809186e-05, "loss": 0.1264, "step": 980 }, { "epoch": 0.46456506281109267, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.01547270268201828, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 50.1407, "eval_samples_per_second": 5.405, "eval_steps_per_second": 0.179, "step": 980 }, { "epoch": 0.465039108793553, "grad_norm": 6.473601341247559, "learning_rate": 1.2919671629481383e-05, "loss": 0.1806, "step": 981 }, { "epoch": 0.4655131547760133, "grad_norm": 3.8189542293548584, "learning_rate": 1.2903836719051482e-05, "loss": 0.1563, "step": 982 }, { "epoch": 0.46598720075847355, "grad_norm": 6.113621234893799, "learning_rate": 1.2887993852902811e-05, "loss": 0.1501, "step": 983 }, { "epoch": 0.46646124674093387, "grad_norm": 3.9226691722869873, "learning_rate": 1.287214307444049e-05, "loss": 0.149, "step": 984 }, { "epoch": 0.4669352927233942, "grad_norm": 5.873342990875244, "learning_rate": 1.2856284427091324e-05, "loss": 0.1763, "step": 985 }, { "epoch": 0.4674093387058545, "grad_norm": 4.942972183227539, "learning_rate": 1.284041795430367e-05, "loss": 0.2116, "step": 986 }, { "epoch": 0.46788338468831475, "grad_norm": 3.7082228660583496, "learning_rate": 1.2824543699547323e-05, "loss": 0.1274, "step": 987 }, { "epoch": 0.46835743067077507, "grad_norm": 4.795472621917725, "learning_rate": 1.2808661706313402e-05, "loss": 0.1163, "step": 988 }, { "epoch": 0.4688314766532354, "grad_norm": 3.9650912284851074, "learning_rate": 1.2792772018114227e-05, "loss": 0.1454, "step": 989 }, { "epoch": 0.46930552263569564, "grad_norm": 6.581302642822266, "learning_rate": 1.2776874678483201e-05, "loss": 0.1112, "step": 990 }, { "epoch": 0.46977956861815595, "grad_norm": 4.1125264167785645, "learning_rate": 1.2760969730974692e-05, "loss": 0.1589, "step": 991 }, { "epoch": 0.47025361460061627, "grad_norm": 4.1806159019470215, "learning_rate": 1.2745057219163898e-05, "loss": 0.1468, "step": 992 }, { "epoch": 0.4707276605830766, "grad_norm": 3.8207151889801025, "learning_rate": 1.272913718664676e-05, "loss": 0.1427, "step": 993 }, { "epoch": 0.47120170656553684, "grad_norm": 3.915806531906128, "learning_rate": 1.2713209677039813e-05, "loss": 0.1802, "step": 994 }, { "epoch": 0.47167575254799715, "grad_norm": 3.6201188564300537, "learning_rate": 1.2697274733980077e-05, "loss": 0.165, "step": 995 }, { "epoch": 0.47214979853045747, "grad_norm": 5.235833644866943, "learning_rate": 1.2681332401124943e-05, "loss": 0.1808, "step": 996 }, { "epoch": 0.4726238445129178, "grad_norm": 2.5903854370117188, "learning_rate": 1.2665382722152042e-05, "loss": 0.1575, "step": 997 }, { "epoch": 0.47309789049537804, "grad_norm": 8.416844367980957, "learning_rate": 1.2649425740759133e-05, "loss": 0.2345, "step": 998 }, { "epoch": 0.47357193647783835, "grad_norm": 11.294881820678711, "learning_rate": 1.2633461500663989e-05, "loss": 0.1582, "step": 999 }, { "epoch": 0.47404598246029866, "grad_norm": 4.148478984832764, "learning_rate": 1.2617490045604256e-05, "loss": 0.1389, "step": 1000 }, { "epoch": 0.47404598246029866, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9345794392523364, "eval_loss": 0.015172837302088737, "eval_precision": 0.9090909090909091, "eval_recall": 0.9615384615384616, "eval_runtime": 50.6151, "eval_samples_per_second": 5.354, "eval_steps_per_second": 0.178, "step": 1000 }, { "epoch": 0.4745200284427589, "grad_norm": 4.268529891967773, "learning_rate": 1.2601511419337358e-05, "loss": 0.1608, "step": 1001 }, { "epoch": 0.47499407442521924, "grad_norm": 3.095743417739868, "learning_rate": 1.2585525665640364e-05, "loss": 0.135, "step": 1002 }, { "epoch": 0.47546812040767955, "grad_norm": 4.274344444274902, "learning_rate": 1.2569532828309866e-05, "loss": 0.2169, "step": 1003 }, { "epoch": 0.47594216639013986, "grad_norm": 4.288705348968506, "learning_rate": 1.255353295116187e-05, "loss": 0.1557, "step": 1004 }, { "epoch": 0.4764162123726001, "grad_norm": 6.122418403625488, "learning_rate": 1.2537526078031666e-05, "loss": 0.1682, "step": 1005 }, { "epoch": 0.47689025835506044, "grad_norm": 4.8099589347839355, "learning_rate": 1.2521512252773705e-05, "loss": 0.1881, "step": 1006 }, { "epoch": 0.47736430433752075, "grad_norm": 4.638556957244873, "learning_rate": 1.2505491519261495e-05, "loss": 0.1124, "step": 1007 }, { "epoch": 0.47783835031998106, "grad_norm": 3.0793027877807617, "learning_rate": 1.2489463921387461e-05, "loss": 0.1015, "step": 1008 }, { "epoch": 0.4783123963024413, "grad_norm": 5.5951457023620605, "learning_rate": 1.2473429503062846e-05, "loss": 0.1587, "step": 1009 }, { "epoch": 0.47878644228490164, "grad_norm": 6.0107035636901855, "learning_rate": 1.2457388308217565e-05, "loss": 0.1981, "step": 1010 }, { "epoch": 0.47926048826736195, "grad_norm": 8.58751392364502, "learning_rate": 1.2441340380800118e-05, "loss": 0.1583, "step": 1011 }, { "epoch": 0.4797345342498222, "grad_norm": 4.693225860595703, "learning_rate": 1.242528576477743e-05, "loss": 0.1211, "step": 1012 }, { "epoch": 0.4802085802322825, "grad_norm": 6.166213512420654, "learning_rate": 1.2409224504134763e-05, "loss": 0.1974, "step": 1013 }, { "epoch": 0.48068262621474284, "grad_norm": 5.781782150268555, "learning_rate": 1.2393156642875579e-05, "loss": 0.1895, "step": 1014 }, { "epoch": 0.48115667219720315, "grad_norm": 3.4138295650482178, "learning_rate": 1.2377082225021426e-05, "loss": 0.1096, "step": 1015 }, { "epoch": 0.4816307181796634, "grad_norm": 6.498300075531006, "learning_rate": 1.2361001294611813e-05, "loss": 0.1807, "step": 1016 }, { "epoch": 0.4821047641621237, "grad_norm": 6.345750331878662, "learning_rate": 1.2344913895704099e-05, "loss": 0.1902, "step": 1017 }, { "epoch": 0.48257881014458404, "grad_norm": 3.625999927520752, "learning_rate": 1.2328820072373354e-05, "loss": 0.1031, "step": 1018 }, { "epoch": 0.48305285612704435, "grad_norm": 3.37025785446167, "learning_rate": 1.2312719868712251e-05, "loss": 0.0962, "step": 1019 }, { "epoch": 0.4835269021095046, "grad_norm": 4.30148458480835, "learning_rate": 1.2296613328830952e-05, "loss": 0.1433, "step": 1020 }, { "epoch": 0.4835269021095046, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9345794392523364, "eval_loss": 0.014748076908290386, "eval_precision": 0.9090909090909091, "eval_recall": 0.9615384615384616, "eval_runtime": 49.6437, "eval_samples_per_second": 5.459, "eval_steps_per_second": 0.181, "step": 1020 }, { "epoch": 0.4840009480919649, "grad_norm": 6.231382369995117, "learning_rate": 1.2280500496856976e-05, "loss": 0.1807, "step": 1021 }, { "epoch": 0.48447499407442524, "grad_norm": 3.694305896759033, "learning_rate": 1.2264381416935072e-05, "loss": 0.1332, "step": 1022 }, { "epoch": 0.4849490400568855, "grad_norm": 8.44955062866211, "learning_rate": 1.2248256133227113e-05, "loss": 0.1693, "step": 1023 }, { "epoch": 0.4854230860393458, "grad_norm": 3.2484383583068848, "learning_rate": 1.2232124689911968e-05, "loss": 0.104, "step": 1024 }, { "epoch": 0.4858971320218061, "grad_norm": 5.29164457321167, "learning_rate": 1.2215987131185385e-05, "loss": 0.1593, "step": 1025 }, { "epoch": 0.48637117800426644, "grad_norm": 4.244653224945068, "learning_rate": 1.219984350125986e-05, "loss": 0.1739, "step": 1026 }, { "epoch": 0.4868452239867267, "grad_norm": 5.924075126647949, "learning_rate": 1.2183693844364527e-05, "loss": 0.1883, "step": 1027 }, { "epoch": 0.487319269969187, "grad_norm": 4.187386512756348, "learning_rate": 1.2167538204745032e-05, "loss": 0.178, "step": 1028 }, { "epoch": 0.4877933159516473, "grad_norm": 7.9537482261657715, "learning_rate": 1.2151376626663407e-05, "loss": 0.1713, "step": 1029 }, { "epoch": 0.48826736193410764, "grad_norm": 5.777340888977051, "learning_rate": 1.2135209154397962e-05, "loss": 0.1812, "step": 1030 }, { "epoch": 0.4887414079165679, "grad_norm": 4.7019195556640625, "learning_rate": 1.2119035832243148e-05, "loss": 0.1215, "step": 1031 }, { "epoch": 0.4892154538990282, "grad_norm": 11.10740852355957, "learning_rate": 1.210285670450945e-05, "loss": 0.2011, "step": 1032 }, { "epoch": 0.4896894998814885, "grad_norm": 7.827641487121582, "learning_rate": 1.2086671815523257e-05, "loss": 0.2163, "step": 1033 }, { "epoch": 0.4901635458639488, "grad_norm": 5.775716781616211, "learning_rate": 1.2070481209626731e-05, "loss": 0.1644, "step": 1034 }, { "epoch": 0.4906375918464091, "grad_norm": 4.169939994812012, "learning_rate": 1.205428493117771e-05, "loss": 0.1293, "step": 1035 }, { "epoch": 0.4911116378288694, "grad_norm": 3.2122132778167725, "learning_rate": 1.2038083024549574e-05, "loss": 0.0987, "step": 1036 }, { "epoch": 0.4915856838113297, "grad_norm": 3.7689731121063232, "learning_rate": 1.2021875534131114e-05, "loss": 0.1896, "step": 1037 }, { "epoch": 0.49205972979379, "grad_norm": 3.5483527183532715, "learning_rate": 1.2005662504326426e-05, "loss": 0.1217, "step": 1038 }, { "epoch": 0.4925337757762503, "grad_norm": 5.175573825836182, "learning_rate": 1.1989443979554774e-05, "loss": 0.1447, "step": 1039 }, { "epoch": 0.4930078217587106, "grad_norm": 5.255129814147949, "learning_rate": 1.1973220004250483e-05, "loss": 0.1389, "step": 1040 }, { "epoch": 0.4930078217587106, "eval_accuracy": 0.9967793880837359, "eval_f1": 0.9622641509433962, "eval_loss": 0.01255769096314907, "eval_precision": 0.9444444444444444, "eval_recall": 0.9807692307692307, "eval_runtime": 51.4883, "eval_samples_per_second": 5.263, "eval_steps_per_second": 0.175, "step": 1040 }, { "epoch": 0.4934818677411709, "grad_norm": 6.275894641876221, "learning_rate": 1.1956990622862813e-05, "loss": 0.1295, "step": 1041 }, { "epoch": 0.4939559137236312, "grad_norm": 9.091854095458984, "learning_rate": 1.1940755879855834e-05, "loss": 0.2263, "step": 1042 }, { "epoch": 0.4944299597060915, "grad_norm": 3.2177767753601074, "learning_rate": 1.19245158197083e-05, "loss": 0.1189, "step": 1043 }, { "epoch": 0.4949040056885518, "grad_norm": 4.994039058685303, "learning_rate": 1.1908270486913538e-05, "loss": 0.193, "step": 1044 }, { "epoch": 0.49537805167101207, "grad_norm": 4.388595104217529, "learning_rate": 1.1892019925979317e-05, "loss": 0.1579, "step": 1045 }, { "epoch": 0.4958520976534724, "grad_norm": 7.059874057769775, "learning_rate": 1.187576418142773e-05, "loss": 0.231, "step": 1046 }, { "epoch": 0.4963261436359327, "grad_norm": 3.518444061279297, "learning_rate": 1.185950329779508e-05, "loss": 0.1306, "step": 1047 }, { "epoch": 0.496800189618393, "grad_norm": 6.214210033416748, "learning_rate": 1.1843237319631737e-05, "loss": 0.2263, "step": 1048 }, { "epoch": 0.49727423560085326, "grad_norm": 2.9103147983551025, "learning_rate": 1.1826966291502036e-05, "loss": 0.1021, "step": 1049 }, { "epoch": 0.4977482815833136, "grad_norm": 10.44170093536377, "learning_rate": 1.1810690257984145e-05, "loss": 0.1261, "step": 1050 }, { "epoch": 0.4982223275657739, "grad_norm": 5.4244794845581055, "learning_rate": 1.1794409263669948e-05, "loss": 0.1622, "step": 1051 }, { "epoch": 0.4986963735482342, "grad_norm": 3.7462995052337646, "learning_rate": 1.1778123353164917e-05, "loss": 0.1358, "step": 1052 }, { "epoch": 0.49917041953069446, "grad_norm": 3.7800092697143555, "learning_rate": 1.1761832571087994e-05, "loss": 0.1411, "step": 1053 }, { "epoch": 0.4996444655131548, "grad_norm": 3.845872402191162, "learning_rate": 1.1745536962071471e-05, "loss": 0.113, "step": 1054 }, { "epoch": 0.500118511495615, "grad_norm": 5.4136786460876465, "learning_rate": 1.172923657076086e-05, "loss": 0.1868, "step": 1055 }, { "epoch": 0.5005925574780754, "grad_norm": 5.000911235809326, "learning_rate": 1.1712931441814776e-05, "loss": 0.2072, "step": 1056 }, { "epoch": 0.5010666034605357, "grad_norm": 3.6361448764801025, "learning_rate": 1.169662161990482e-05, "loss": 0.1279, "step": 1057 }, { "epoch": 0.5015406494429959, "grad_norm": 5.903532981872559, "learning_rate": 1.168030714971544e-05, "loss": 0.2141, "step": 1058 }, { "epoch": 0.5020146954254563, "grad_norm": 4.509527683258057, "learning_rate": 1.1663988075943828e-05, "loss": 0.1327, "step": 1059 }, { "epoch": 0.5024887414079166, "grad_norm": 4.647515773773193, "learning_rate": 1.164766444329978e-05, "loss": 0.1452, "step": 1060 }, { "epoch": 0.5024887414079166, "eval_accuracy": 0.9903381642512077, "eval_f1": 0.896551724137931, "eval_loss": 0.022998766973614693, "eval_precision": 0.8125, "eval_recall": 1.0, "eval_runtime": 50.1283, "eval_samples_per_second": 5.406, "eval_steps_per_second": 0.18, "step": 1060 }, { "epoch": 0.5029627873903769, "grad_norm": 7.983517646789551, "learning_rate": 1.163133629650559e-05, "loss": 0.161, "step": 1061 }, { "epoch": 0.5034368333728372, "grad_norm": 7.8180131912231445, "learning_rate": 1.1615003680295917e-05, "loss": 0.1329, "step": 1062 }, { "epoch": 0.5039108793552974, "grad_norm": 10.5711669921875, "learning_rate": 1.1598666639417664e-05, "loss": 0.2081, "step": 1063 }, { "epoch": 0.5043849253377578, "grad_norm": 3.753976583480835, "learning_rate": 1.158232521862986e-05, "loss": 0.1605, "step": 1064 }, { "epoch": 0.5048589713202181, "grad_norm": 3.544933795928955, "learning_rate": 1.1565979462703525e-05, "loss": 0.0951, "step": 1065 }, { "epoch": 0.5053330173026783, "grad_norm": 6.760609149932861, "learning_rate": 1.1549629416421561e-05, "loss": 0.1136, "step": 1066 }, { "epoch": 0.5058070632851387, "grad_norm": 3.890387773513794, "learning_rate": 1.1533275124578631e-05, "loss": 0.1401, "step": 1067 }, { "epoch": 0.506281109267599, "grad_norm": 5.5880818367004395, "learning_rate": 1.1516916631981021e-05, "loss": 0.1523, "step": 1068 }, { "epoch": 0.5067551552500592, "grad_norm": 4.130406379699707, "learning_rate": 1.1500553983446527e-05, "loss": 0.1365, "step": 1069 }, { "epoch": 0.5072292012325196, "grad_norm": 5.780043125152588, "learning_rate": 1.1484187223804337e-05, "loss": 0.1468, "step": 1070 }, { "epoch": 0.5077032472149798, "grad_norm": 4.496865749359131, "learning_rate": 1.1467816397894893e-05, "loss": 0.2138, "step": 1071 }, { "epoch": 0.5081772931974402, "grad_norm": 6.2288923263549805, "learning_rate": 1.1451441550569787e-05, "loss": 0.0839, "step": 1072 }, { "epoch": 0.5086513391799005, "grad_norm": 3.994375705718994, "learning_rate": 1.1435062726691622e-05, "loss": 0.1458, "step": 1073 }, { "epoch": 0.5091253851623607, "grad_norm": 9.764244079589844, "learning_rate": 1.14186799711339e-05, "loss": 0.2082, "step": 1074 }, { "epoch": 0.5095994311448211, "grad_norm": 4.505104064941406, "learning_rate": 1.1402293328780887e-05, "loss": 0.1548, "step": 1075 }, { "epoch": 0.5100734771272813, "grad_norm": 7.453891277313232, "learning_rate": 1.1385902844527508e-05, "loss": 0.2104, "step": 1076 }, { "epoch": 0.5105475231097416, "grad_norm": 4.776457786560059, "learning_rate": 1.1369508563279207e-05, "loss": 0.1753, "step": 1077 }, { "epoch": 0.511021569092202, "grad_norm": 4.367084503173828, "learning_rate": 1.1353110529951836e-05, "loss": 0.1376, "step": 1078 }, { "epoch": 0.5114956150746622, "grad_norm": 7.010676860809326, "learning_rate": 1.1336708789471522e-05, "loss": 0.1964, "step": 1079 }, { "epoch": 0.5119696610571225, "grad_norm": 4.374360084533691, "learning_rate": 1.1320303386774546e-05, "loss": 0.1623, "step": 1080 }, { "epoch": 0.5119696610571225, "eval_accuracy": 0.9951690821256038, "eval_f1": 0.9433962264150944, "eval_loss": 0.012818964198231697, "eval_precision": 0.9259259259259259, "eval_recall": 0.9615384615384616, "eval_runtime": 51.2178, "eval_samples_per_second": 5.291, "eval_steps_per_second": 0.176, "step": 1080 }, { "epoch": 0.5124437070395829, "grad_norm": 4.532538414001465, "learning_rate": 1.1303894366807234e-05, "loss": 0.1986, "step": 1081 }, { "epoch": 0.5129177530220431, "grad_norm": 6.917778968811035, "learning_rate": 1.128748177452581e-05, "loss": 0.1567, "step": 1082 }, { "epoch": 0.5133917990045035, "grad_norm": 4.427739143371582, "learning_rate": 1.127106565489629e-05, "loss": 0.1958, "step": 1083 }, { "epoch": 0.5138658449869637, "grad_norm": 9.344569206237793, "learning_rate": 1.1254646052894353e-05, "loss": 0.115, "step": 1084 }, { "epoch": 0.514339890969424, "grad_norm": 2.4701592922210693, "learning_rate": 1.1238223013505227e-05, "loss": 0.1011, "step": 1085 }, { "epoch": 0.5148139369518844, "grad_norm": 4.596907615661621, "learning_rate": 1.1221796581723543e-05, "loss": 0.1347, "step": 1086 }, { "epoch": 0.5152879829343446, "grad_norm": 5.753283500671387, "learning_rate": 1.1205366802553231e-05, "loss": 0.1835, "step": 1087 }, { "epoch": 0.5157620289168049, "grad_norm": 4.703371047973633, "learning_rate": 1.1188933721007402e-05, "loss": 0.1329, "step": 1088 }, { "epoch": 0.5162360748992653, "grad_norm": 6.252144813537598, "learning_rate": 1.1172497382108203e-05, "loss": 0.1797, "step": 1089 }, { "epoch": 0.5167101208817255, "grad_norm": 5.656299591064453, "learning_rate": 1.1156057830886713e-05, "loss": 0.2235, "step": 1090 }, { "epoch": 0.5171841668641858, "grad_norm": 4.578910827636719, "learning_rate": 1.1139615112382804e-05, "loss": 0.2132, "step": 1091 }, { "epoch": 0.5176582128466461, "grad_norm": 11.287896156311035, "learning_rate": 1.112316927164503e-05, "loss": 0.1917, "step": 1092 }, { "epoch": 0.5181322588291064, "grad_norm": 4.847326755523682, "learning_rate": 1.11067203537305e-05, "loss": 0.1787, "step": 1093 }, { "epoch": 0.5186063048115668, "grad_norm": 2.949026584625244, "learning_rate": 1.1090268403704751e-05, "loss": 0.0892, "step": 1094 }, { "epoch": 0.519080350794027, "grad_norm": 4.3202595710754395, "learning_rate": 1.1073813466641633e-05, "loss": 0.1221, "step": 1095 }, { "epoch": 0.5195543967764873, "grad_norm": 4.129481315612793, "learning_rate": 1.1057355587623168e-05, "loss": 0.1316, "step": 1096 }, { "epoch": 0.5200284427589477, "grad_norm": 4.425373077392578, "learning_rate": 1.1040894811739449e-05, "loss": 0.147, "step": 1097 }, { "epoch": 0.5205024887414079, "grad_norm": 5.3371710777282715, "learning_rate": 1.1024431184088505e-05, "loss": 0.1778, "step": 1098 }, { "epoch": 0.5209765347238682, "grad_norm": 6.158993721008301, "learning_rate": 1.1007964749776167e-05, "loss": 0.1638, "step": 1099 }, { "epoch": 0.5214505807063285, "grad_norm": 4.83911657333374, "learning_rate": 1.0991495553915974e-05, "loss": 0.1179, "step": 1100 }, { "epoch": 0.5214505807063285, "eval_accuracy": 0.9951690821256038, "eval_f1": 0.9454545454545454, "eval_loss": 0.015559504739940166, "eval_precision": 0.896551724137931, "eval_recall": 1.0, "eval_runtime": 50.8122, "eval_samples_per_second": 5.333, "eval_steps_per_second": 0.177, "step": 1100 }, { "epoch": 0.5219246266887888, "grad_norm": 4.8156046867370605, "learning_rate": 1.0975023641629013e-05, "loss": 0.1646, "step": 1101 }, { "epoch": 0.5223986726712491, "grad_norm": 7.469467639923096, "learning_rate": 1.0958549058043821e-05, "loss": 0.2287, "step": 1102 }, { "epoch": 0.5228727186537094, "grad_norm": 6.2885847091674805, "learning_rate": 1.0942071848296257e-05, "loss": 0.1084, "step": 1103 }, { "epoch": 0.5233467646361697, "grad_norm": 5.288135051727295, "learning_rate": 1.0925592057529364e-05, "loss": 0.134, "step": 1104 }, { "epoch": 0.5238208106186301, "grad_norm": 5.3313212394714355, "learning_rate": 1.0909109730893273e-05, "loss": 0.1674, "step": 1105 }, { "epoch": 0.5242948566010903, "grad_norm": 4.348062515258789, "learning_rate": 1.0892624913545046e-05, "loss": 0.1363, "step": 1106 }, { "epoch": 0.5247689025835506, "grad_norm": 3.602989673614502, "learning_rate": 1.0876137650648579e-05, "loss": 0.0938, "step": 1107 }, { "epoch": 0.525242948566011, "grad_norm": 7.360906600952148, "learning_rate": 1.0859647987374467e-05, "loss": 0.2281, "step": 1108 }, { "epoch": 0.5257169945484712, "grad_norm": 6.693387508392334, "learning_rate": 1.0843155968899875e-05, "loss": 0.2278, "step": 1109 }, { "epoch": 0.5261910405309315, "grad_norm": 7.876137733459473, "learning_rate": 1.0826661640408427e-05, "loss": 0.2026, "step": 1110 }, { "epoch": 0.5266650865133918, "grad_norm": 14.165900230407715, "learning_rate": 1.0810165047090076e-05, "loss": 0.1884, "step": 1111 }, { "epoch": 0.5271391324958521, "grad_norm": 2.937747001647949, "learning_rate": 1.0793666234140974e-05, "loss": 0.0771, "step": 1112 }, { "epoch": 0.5276131784783124, "grad_norm": 5.6442999839782715, "learning_rate": 1.0777165246763357e-05, "loss": 0.1532, "step": 1113 }, { "epoch": 0.5280872244607727, "grad_norm": 5.834621429443359, "learning_rate": 1.0760662130165426e-05, "loss": 0.1136, "step": 1114 }, { "epoch": 0.528561270443233, "grad_norm": 3.5276947021484375, "learning_rate": 1.0744156929561206e-05, "loss": 0.0837, "step": 1115 }, { "epoch": 0.5290353164256933, "grad_norm": 7.095658779144287, "learning_rate": 1.0727649690170434e-05, "loss": 0.1891, "step": 1116 }, { "epoch": 0.5295093624081536, "grad_norm": 6.207774639129639, "learning_rate": 1.0711140457218435e-05, "loss": 0.1539, "step": 1117 }, { "epoch": 0.5299834083906139, "grad_norm": 4.916074275970459, "learning_rate": 1.0694629275935989e-05, "loss": 0.2181, "step": 1118 }, { "epoch": 0.5304574543730742, "grad_norm": 4.461488723754883, "learning_rate": 1.0678116191559222e-05, "loss": 0.1505, "step": 1119 }, { "epoch": 0.5309315003555345, "grad_norm": 3.7093873023986816, "learning_rate": 1.0661601249329472e-05, "loss": 0.1256, "step": 1120 }, { "epoch": 0.5309315003555345, "eval_accuracy": 0.9951690821256038, "eval_f1": 0.9444444444444444, "eval_loss": 0.01748965121805668, "eval_precision": 0.9107142857142857, "eval_recall": 0.9807692307692307, "eval_runtime": 50.2232, "eval_samples_per_second": 5.396, "eval_steps_per_second": 0.179, "step": 1120 }, { "epoch": 0.5314055463379947, "grad_norm": 6.480965614318848, "learning_rate": 1.0645084494493166e-05, "loss": 0.2021, "step": 1121 }, { "epoch": 0.5318795923204551, "grad_norm": 2.9335877895355225, "learning_rate": 1.0628565972301694e-05, "loss": 0.0458, "step": 1122 }, { "epoch": 0.5323536383029154, "grad_norm": 3.652796983718872, "learning_rate": 1.0612045728011294e-05, "loss": 0.0939, "step": 1123 }, { "epoch": 0.5328276842853756, "grad_norm": 2.967013359069824, "learning_rate": 1.0595523806882916e-05, "loss": 0.1155, "step": 1124 }, { "epoch": 0.533301730267836, "grad_norm": 6.951047897338867, "learning_rate": 1.0579000254182112e-05, "loss": 0.2415, "step": 1125 }, { "epoch": 0.5337757762502963, "grad_norm": 4.515738010406494, "learning_rate": 1.0562475115178896e-05, "loss": 0.1584, "step": 1126 }, { "epoch": 0.5342498222327566, "grad_norm": 5.67434549331665, "learning_rate": 1.0545948435147633e-05, "loss": 0.1609, "step": 1127 }, { "epoch": 0.5347238682152169, "grad_norm": 3.1618642807006836, "learning_rate": 1.0529420259366907e-05, "loss": 0.1435, "step": 1128 }, { "epoch": 0.5351979141976771, "grad_norm": 5.694845676422119, "learning_rate": 1.05128906331194e-05, "loss": 0.1783, "step": 1129 }, { "epoch": 0.5356719601801375, "grad_norm": 5.417916774749756, "learning_rate": 1.0496359601691768e-05, "loss": 0.1319, "step": 1130 }, { "epoch": 0.5361460061625978, "grad_norm": 3.616260290145874, "learning_rate": 1.0479827210374525e-05, "loss": 0.1023, "step": 1131 }, { "epoch": 0.536620052145058, "grad_norm": 5.539340496063232, "learning_rate": 1.0463293504461898e-05, "loss": 0.1199, "step": 1132 }, { "epoch": 0.5370940981275184, "grad_norm": 9.369730949401855, "learning_rate": 1.044675852925172e-05, "loss": 0.1834, "step": 1133 }, { "epoch": 0.5375681441099787, "grad_norm": 4.001905918121338, "learning_rate": 1.0430222330045306e-05, "loss": 0.1696, "step": 1134 }, { "epoch": 0.5380421900924389, "grad_norm": 3.1338417530059814, "learning_rate": 1.041368495214732e-05, "loss": 0.1639, "step": 1135 }, { "epoch": 0.5385162360748993, "grad_norm": 5.778520107269287, "learning_rate": 1.0397146440865658e-05, "loss": 0.243, "step": 1136 }, { "epoch": 0.5389902820573595, "grad_norm": 4.286486625671387, "learning_rate": 1.038060684151132e-05, "loss": 0.1057, "step": 1137 }, { "epoch": 0.5394643280398198, "grad_norm": 5.017862796783447, "learning_rate": 1.0364066199398285e-05, "loss": 0.1923, "step": 1138 }, { "epoch": 0.5399383740222802, "grad_norm": 7.659731388092041, "learning_rate": 1.0347524559843385e-05, "loss": 0.1946, "step": 1139 }, { "epoch": 0.5404124200047404, "grad_norm": 5.743497848510742, "learning_rate": 1.03309819681662e-05, "loss": 0.1536, "step": 1140 }, { "epoch": 0.5404124200047404, "eval_accuracy": 0.9919484702093397, "eval_f1": 0.9107142857142857, "eval_loss": 0.022149212658405304, "eval_precision": 0.85, "eval_recall": 0.9807692307692307, "eval_runtime": 50.3591, "eval_samples_per_second": 5.381, "eval_steps_per_second": 0.179, "step": 1140 }, { "epoch": 0.5408864659872008, "grad_norm": 5.815116882324219, "learning_rate": 1.0314438469688905e-05, "loss": 0.1734, "step": 1141 }, { "epoch": 0.5413605119696611, "grad_norm": 6.610226631164551, "learning_rate": 1.0297894109736165e-05, "loss": 0.1349, "step": 1142 }, { "epoch": 0.5418345579521213, "grad_norm": 8.291587829589844, "learning_rate": 1.0281348933634997e-05, "loss": 0.1959, "step": 1143 }, { "epoch": 0.5423086039345817, "grad_norm": 7.731545448303223, "learning_rate": 1.026480298671467e-05, "loss": 0.1686, "step": 1144 }, { "epoch": 0.542782649917042, "grad_norm": 5.443119525909424, "learning_rate": 1.0248256314306547e-05, "loss": 0.2093, "step": 1145 }, { "epoch": 0.5432566958995022, "grad_norm": 3.96079683303833, "learning_rate": 1.0231708961743991e-05, "loss": 0.1552, "step": 1146 }, { "epoch": 0.5437307418819626, "grad_norm": 6.89942741394043, "learning_rate": 1.0215160974362224e-05, "loss": 0.1644, "step": 1147 }, { "epoch": 0.5442047878644228, "grad_norm": 3.82415771484375, "learning_rate": 1.0198612397498207e-05, "loss": 0.1146, "step": 1148 }, { "epoch": 0.5446788338468831, "grad_norm": 7.8120436668396, "learning_rate": 1.0182063276490515e-05, "loss": 0.2138, "step": 1149 }, { "epoch": 0.5451528798293435, "grad_norm": 2.5428757667541504, "learning_rate": 1.016551365667922e-05, "loss": 0.114, "step": 1150 }, { "epoch": 0.5456269258118037, "grad_norm": 10.939910888671875, "learning_rate": 1.0148963583405751e-05, "loss": 0.207, "step": 1151 }, { "epoch": 0.5461009717942641, "grad_norm": 4.319568157196045, "learning_rate": 1.0132413102012788e-05, "loss": 0.1531, "step": 1152 }, { "epoch": 0.5465750177767243, "grad_norm": 6.863345146179199, "learning_rate": 1.0115862257844124e-05, "loss": 0.1224, "step": 1153 }, { "epoch": 0.5470490637591846, "grad_norm": 7.866089820861816, "learning_rate": 1.0099311096244549e-05, "loss": 0.156, "step": 1154 }, { "epoch": 0.547523109741645, "grad_norm": 3.0218968391418457, "learning_rate": 1.0082759662559718e-05, "loss": 0.1336, "step": 1155 }, { "epoch": 0.5479971557241052, "grad_norm": 3.61643123626709, "learning_rate": 1.0066208002136033e-05, "loss": 0.0917, "step": 1156 }, { "epoch": 0.5484712017065655, "grad_norm": 4.540095329284668, "learning_rate": 1.0049656160320524e-05, "loss": 0.1543, "step": 1157 }, { "epoch": 0.5489452476890259, "grad_norm": 3.9380855560302734, "learning_rate": 1.0033104182460714e-05, "loss": 0.1557, "step": 1158 }, { "epoch": 0.5494192936714861, "grad_norm": 6.315436363220215, "learning_rate": 1.0016552113904492e-05, "loss": 0.1729, "step": 1159 }, { "epoch": 0.5498933396539464, "grad_norm": 6.182584762573242, "learning_rate": 1e-05, "loss": 0.1873, "step": 1160 }, { "epoch": 0.5498933396539464, "eval_accuracy": 0.9903381642512077, "eval_f1": 0.896551724137931, "eval_loss": 0.02337927743792534, "eval_precision": 0.8125, "eval_recall": 1.0, "eval_runtime": 50.2532, "eval_samples_per_second": 5.393, "eval_steps_per_second": 0.179, "step": 1160 }, { "epoch": 0.5503673856364067, "grad_norm": 4.766855716705322, "learning_rate": 9.983447886095512e-06, "loss": 0.1541, "step": 1161 }, { "epoch": 0.550841431618867, "grad_norm": 6.353938102722168, "learning_rate": 9.966895817539288e-06, "loss": 0.1841, "step": 1162 }, { "epoch": 0.5513154776013274, "grad_norm": 6.427106857299805, "learning_rate": 9.950343839679478e-06, "loss": 0.1741, "step": 1163 }, { "epoch": 0.5517895235837876, "grad_norm": 3.5585386753082275, "learning_rate": 9.93379199786397e-06, "loss": 0.1323, "step": 1164 }, { "epoch": 0.5522635695662479, "grad_norm": 4.5427141189575195, "learning_rate": 9.917240337440288e-06, "loss": 0.2179, "step": 1165 }, { "epoch": 0.5527376155487083, "grad_norm": 5.13054084777832, "learning_rate": 9.900688903755456e-06, "loss": 0.174, "step": 1166 }, { "epoch": 0.5532116615311685, "grad_norm": 3.7031896114349365, "learning_rate": 9.88413774215588e-06, "loss": 0.1615, "step": 1167 }, { "epoch": 0.5536857075136288, "grad_norm": 3.9363503456115723, "learning_rate": 9.867586897987214e-06, "loss": 0.1123, "step": 1168 }, { "epoch": 0.5541597534960891, "grad_norm": 2.7204768657684326, "learning_rate": 9.851036416594249e-06, "loss": 0.1168, "step": 1169 }, { "epoch": 0.5546337994785494, "grad_norm": 7.216203689575195, "learning_rate": 9.834486343320782e-06, "loss": 0.2236, "step": 1170 }, { "epoch": 0.5551078454610097, "grad_norm": 5.118651866912842, "learning_rate": 9.817936723509485e-06, "loss": 0.1727, "step": 1171 }, { "epoch": 0.55558189144347, "grad_norm": 7.6894354820251465, "learning_rate": 9.801387602501795e-06, "loss": 0.1546, "step": 1172 }, { "epoch": 0.5560559374259303, "grad_norm": 4.37026309967041, "learning_rate": 9.78483902563778e-06, "loss": 0.1381, "step": 1173 }, { "epoch": 0.5565299834083907, "grad_norm": 4.4173736572265625, "learning_rate": 9.76829103825601e-06, "loss": 0.1251, "step": 1174 }, { "epoch": 0.5570040293908509, "grad_norm": 4.060060024261475, "learning_rate": 9.751743685693455e-06, "loss": 0.1184, "step": 1175 }, { "epoch": 0.5574780753733112, "grad_norm": 8.306419372558594, "learning_rate": 9.735197013285334e-06, "loss": 0.269, "step": 1176 }, { "epoch": 0.5579521213557715, "grad_norm": 3.8779776096343994, "learning_rate": 9.718651066365004e-06, "loss": 0.1348, "step": 1177 }, { "epoch": 0.5584261673382318, "grad_norm": 3.9062576293945312, "learning_rate": 9.702105890263839e-06, "loss": 0.1759, "step": 1178 }, { "epoch": 0.5589002133206921, "grad_norm": 3.3265230655670166, "learning_rate": 9.685561530311098e-06, "loss": 0.0996, "step": 1179 }, { "epoch": 0.5593742593031524, "grad_norm": 3.3085429668426514, "learning_rate": 9.669018031833803e-06, "loss": 0.1234, "step": 1180 }, { "epoch": 0.5593742593031524, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9345794392523364, "eval_loss": 0.017351333051919937, "eval_precision": 0.9090909090909091, "eval_recall": 0.9615384615384616, "eval_runtime": 49.0649, "eval_samples_per_second": 5.523, "eval_steps_per_second": 0.183, "step": 1180 }, { "epoch": 0.5598483052856127, "grad_norm": 6.352241039276123, "learning_rate": 9.652475440156618e-06, "loss": 0.2167, "step": 1181 }, { "epoch": 0.560322351268073, "grad_norm": 3.4842302799224854, "learning_rate": 9.635933800601722e-06, "loss": 0.0942, "step": 1182 }, { "epoch": 0.5607963972505333, "grad_norm": 3.8137149810791016, "learning_rate": 9.619393158488684e-06, "loss": 0.0993, "step": 1183 }, { "epoch": 0.5612704432329936, "grad_norm": 5.45013427734375, "learning_rate": 9.602853559134345e-06, "loss": 0.1589, "step": 1184 }, { "epoch": 0.561744489215454, "grad_norm": 4.813329219818115, "learning_rate": 9.586315047852685e-06, "loss": 0.117, "step": 1185 }, { "epoch": 0.5622185351979142, "grad_norm": 4.4595627784729, "learning_rate": 9.569777669954694e-06, "loss": 0.1529, "step": 1186 }, { "epoch": 0.5626925811803745, "grad_norm": 3.629746437072754, "learning_rate": 9.553241470748282e-06, "loss": 0.1445, "step": 1187 }, { "epoch": 0.5631666271628348, "grad_norm": 4.730684280395508, "learning_rate": 9.536706495538106e-06, "loss": 0.1823, "step": 1188 }, { "epoch": 0.5636406731452951, "grad_norm": 5.676868438720703, "learning_rate": 9.520172789625478e-06, "loss": 0.159, "step": 1189 }, { "epoch": 0.5641147191277553, "grad_norm": 4.608530521392822, "learning_rate": 9.503640398308232e-06, "loss": 0.1431, "step": 1190 }, { "epoch": 0.5645887651102157, "grad_norm": 5.198445796966553, "learning_rate": 9.487109366880604e-06, "loss": 0.174, "step": 1191 }, { "epoch": 0.565062811092676, "grad_norm": 3.781919240951538, "learning_rate": 9.470579740633096e-06, "loss": 0.1408, "step": 1192 }, { "epoch": 0.5655368570751362, "grad_norm": 6.478548526763916, "learning_rate": 9.454051564852368e-06, "loss": 0.1658, "step": 1193 }, { "epoch": 0.5660109030575966, "grad_norm": 7.026795864105225, "learning_rate": 9.437524884821106e-06, "loss": 0.1533, "step": 1194 }, { "epoch": 0.5664849490400569, "grad_norm": 6.0873703956604, "learning_rate": 9.420999745817891e-06, "loss": 0.2185, "step": 1195 }, { "epoch": 0.5669589950225172, "grad_norm": 5.5675950050354, "learning_rate": 9.404476193117085e-06, "loss": 0.1663, "step": 1196 }, { "epoch": 0.5674330410049775, "grad_norm": 3.557807445526123, "learning_rate": 9.38795427198871e-06, "loss": 0.1022, "step": 1197 }, { "epoch": 0.5679070869874377, "grad_norm": 4.164217948913574, "learning_rate": 9.371434027698309e-06, "loss": 0.1131, "step": 1198 }, { "epoch": 0.5683811329698981, "grad_norm": 6.597793102264404, "learning_rate": 9.354915505506839e-06, "loss": 0.1452, "step": 1199 }, { "epoch": 0.5688551789523584, "grad_norm": 6.633536338806152, "learning_rate": 9.338398750670533e-06, "loss": 0.205, "step": 1200 }, { "epoch": 0.5688551789523584, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9369369369369369, "eval_loss": 0.016158046200871468, "eval_precision": 0.8813559322033898, "eval_recall": 1.0, "eval_runtime": 49.1353, "eval_samples_per_second": 5.515, "eval_steps_per_second": 0.183, "step": 1200 }, { "epoch": 0.5693292249348186, "grad_norm": 4.861762523651123, "learning_rate": 9.321883808440784e-06, "loss": 0.1574, "step": 1201 }, { "epoch": 0.569803270917279, "grad_norm": 4.254230976104736, "learning_rate": 9.305370724064016e-06, "loss": 0.1325, "step": 1202 }, { "epoch": 0.5702773168997393, "grad_norm": 4.238177299499512, "learning_rate": 9.288859542781572e-06, "loss": 0.1199, "step": 1203 }, { "epoch": 0.5707513628821995, "grad_norm": 3.961149215698242, "learning_rate": 9.272350309829568e-06, "loss": 0.1335, "step": 1204 }, { "epoch": 0.5712254088646599, "grad_norm": 4.954134941101074, "learning_rate": 9.255843070438795e-06, "loss": 0.1497, "step": 1205 }, { "epoch": 0.5716994548471201, "grad_norm": 5.570772647857666, "learning_rate": 9.239337869834574e-06, "loss": 0.1159, "step": 1206 }, { "epoch": 0.5721735008295805, "grad_norm": 5.868134021759033, "learning_rate": 9.222834753236643e-06, "loss": 0.1348, "step": 1207 }, { "epoch": 0.5726475468120408, "grad_norm": 4.09353494644165, "learning_rate": 9.20633376585903e-06, "loss": 0.1998, "step": 1208 }, { "epoch": 0.573121592794501, "grad_norm": 3.01023268699646, "learning_rate": 9.189834952909927e-06, "loss": 0.1293, "step": 1209 }, { "epoch": 0.5735956387769614, "grad_norm": 5.966034889221191, "learning_rate": 9.173338359591578e-06, "loss": 0.2143, "step": 1210 }, { "epoch": 0.5740696847594217, "grad_norm": 3.319744110107422, "learning_rate": 9.15684403110013e-06, "loss": 0.0837, "step": 1211 }, { "epoch": 0.5745437307418819, "grad_norm": 7.259401798248291, "learning_rate": 9.140352012625538e-06, "loss": 0.2666, "step": 1212 }, { "epoch": 0.5750177767243423, "grad_norm": 3.2887086868286133, "learning_rate": 9.123862349351423e-06, "loss": 0.135, "step": 1213 }, { "epoch": 0.5754918227068025, "grad_norm": 4.192896366119385, "learning_rate": 9.107375086454956e-06, "loss": 0.179, "step": 1214 }, { "epoch": 0.5759658686892628, "grad_norm": 2.307966947555542, "learning_rate": 9.09089026910673e-06, "loss": 0.0965, "step": 1215 }, { "epoch": 0.5764399146717232, "grad_norm": 4.469087600708008, "learning_rate": 9.07440794247064e-06, "loss": 0.1703, "step": 1216 }, { "epoch": 0.5769139606541834, "grad_norm": 4.343681812286377, "learning_rate": 9.05792815170375e-06, "loss": 0.1194, "step": 1217 }, { "epoch": 0.5773880066366438, "grad_norm": 4.0815839767456055, "learning_rate": 9.041450941956184e-06, "loss": 0.1497, "step": 1218 }, { "epoch": 0.5778620526191041, "grad_norm": 4.294894218444824, "learning_rate": 9.024976358370992e-06, "loss": 0.0954, "step": 1219 }, { "epoch": 0.5783360986015643, "grad_norm": 4.8514909744262695, "learning_rate": 9.00850444608403e-06, "loss": 0.1975, "step": 1220 }, { "epoch": 0.5783360986015643, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9345794392523364, "eval_loss": 0.015528516843914986, "eval_precision": 0.9090909090909091, "eval_recall": 0.9615384615384616, "eval_runtime": 49.0072, "eval_samples_per_second": 5.53, "eval_steps_per_second": 0.184, "step": 1220 }, { "epoch": 0.5788101445840247, "grad_norm": 3.158719062805176, "learning_rate": 8.992035250223831e-06, "loss": 0.1038, "step": 1221 }, { "epoch": 0.579284190566485, "grad_norm": 5.278292179107666, "learning_rate": 8.975568815911497e-06, "loss": 0.155, "step": 1222 }, { "epoch": 0.5797582365489452, "grad_norm": 5.992212772369385, "learning_rate": 8.95910518826055e-06, "loss": 0.2075, "step": 1223 }, { "epoch": 0.5802322825314056, "grad_norm": 4.989220142364502, "learning_rate": 8.942644412376833e-06, "loss": 0.1216, "step": 1224 }, { "epoch": 0.5807063285138658, "grad_norm": 3.9594602584838867, "learning_rate": 8.92618653335837e-06, "loss": 0.1658, "step": 1225 }, { "epoch": 0.5811803744963261, "grad_norm": 6.822264671325684, "learning_rate": 8.90973159629525e-06, "loss": 0.1859, "step": 1226 }, { "epoch": 0.5816544204787865, "grad_norm": 5.479275226593018, "learning_rate": 8.893279646269502e-06, "loss": 0.1323, "step": 1227 }, { "epoch": 0.5821284664612467, "grad_norm": 4.39878511428833, "learning_rate": 8.876830728354973e-06, "loss": 0.1554, "step": 1228 }, { "epoch": 0.5826025124437071, "grad_norm": 6.936598300933838, "learning_rate": 8.860384887617198e-06, "loss": 0.2093, "step": 1229 }, { "epoch": 0.5830765584261673, "grad_norm": 3.4033169746398926, "learning_rate": 8.843942169113289e-06, "loss": 0.1684, "step": 1230 }, { "epoch": 0.5835506044086276, "grad_norm": 6.939477443695068, "learning_rate": 8.827502617891799e-06, "loss": 0.2766, "step": 1231 }, { "epoch": 0.584024650391088, "grad_norm": 3.5070977210998535, "learning_rate": 8.8110662789926e-06, "loss": 0.1527, "step": 1232 }, { "epoch": 0.5844986963735482, "grad_norm": 2.4979946613311768, "learning_rate": 8.79463319744677e-06, "loss": 0.0714, "step": 1233 }, { "epoch": 0.5849727423560085, "grad_norm": 4.240323543548584, "learning_rate": 8.778203418276463e-06, "loss": 0.1527, "step": 1234 }, { "epoch": 0.5854467883384689, "grad_norm": 5.190901279449463, "learning_rate": 8.761776986494778e-06, "loss": 0.13, "step": 1235 }, { "epoch": 0.5859208343209291, "grad_norm": 3.0019147396087646, "learning_rate": 8.745353947105649e-06, "loss": 0.1092, "step": 1236 }, { "epoch": 0.5863948803033894, "grad_norm": 8.624896049499512, "learning_rate": 8.728934345103715e-06, "loss": 0.1767, "step": 1237 }, { "epoch": 0.5868689262858497, "grad_norm": 3.439699172973633, "learning_rate": 8.712518225474191e-06, "loss": 0.1247, "step": 1238 }, { "epoch": 0.58734297226831, "grad_norm": 3.3309326171875, "learning_rate": 8.696105633192766e-06, "loss": 0.1385, "step": 1239 }, { "epoch": 0.5878170182507704, "grad_norm": 3.2595584392547607, "learning_rate": 8.679696613225452e-06, "loss": 0.1302, "step": 1240 }, { "epoch": 0.5878170182507704, "eval_accuracy": 0.9959742351046699, "eval_f1": 0.9541284403669725, "eval_loss": 0.013090784661471844, "eval_precision": 0.9122807017543859, "eval_recall": 1.0, "eval_runtime": 48.7699, "eval_samples_per_second": 5.557, "eval_steps_per_second": 0.185, "step": 1240 }, { "epoch": 0.5882910642332306, "grad_norm": 2.58913516998291, "learning_rate": 8.663291210528481e-06, "loss": 0.098, "step": 1241 }, { "epoch": 0.5887651102156909, "grad_norm": 3.9105873107910156, "learning_rate": 8.646889470048166e-06, "loss": 0.1949, "step": 1242 }, { "epoch": 0.5892391561981513, "grad_norm": 6.584453582763672, "learning_rate": 8.630491436720794e-06, "loss": 0.3064, "step": 1243 }, { "epoch": 0.5897132021806115, "grad_norm": 4.178667068481445, "learning_rate": 8.614097155472496e-06, "loss": 0.1363, "step": 1244 }, { "epoch": 0.5901872481630718, "grad_norm": 4.628752708435059, "learning_rate": 8.597706671219116e-06, "loss": 0.1645, "step": 1245 }, { "epoch": 0.5906612941455321, "grad_norm": 4.536274433135986, "learning_rate": 8.581320028866105e-06, "loss": 0.1557, "step": 1246 }, { "epoch": 0.5911353401279924, "grad_norm": 5.495678901672363, "learning_rate": 8.564937273308382e-06, "loss": 0.2236, "step": 1247 }, { "epoch": 0.5916093861104527, "grad_norm": 3.747703790664673, "learning_rate": 8.548558449430217e-06, "loss": 0.1116, "step": 1248 }, { "epoch": 0.592083432092913, "grad_norm": 7.915394306182861, "learning_rate": 8.53218360210511e-06, "loss": 0.2076, "step": 1249 }, { "epoch": 0.5925574780753733, "grad_norm": 4.242615699768066, "learning_rate": 8.515812776195667e-06, "loss": 0.147, "step": 1250 }, { "epoch": 0.5930315240578337, "grad_norm": 5.444154739379883, "learning_rate": 8.499446016553475e-06, "loss": 0.1733, "step": 1251 }, { "epoch": 0.5935055700402939, "grad_norm": 4.993553161621094, "learning_rate": 8.483083368018984e-06, "loss": 0.1496, "step": 1252 }, { "epoch": 0.5939796160227542, "grad_norm": 6.0502400398254395, "learning_rate": 8.466724875421374e-06, "loss": 0.1944, "step": 1253 }, { "epoch": 0.5944536620052145, "grad_norm": 5.558779716491699, "learning_rate": 8.450370583578444e-06, "loss": 0.2226, "step": 1254 }, { "epoch": 0.5949277079876748, "grad_norm": 4.802134037017822, "learning_rate": 8.434020537296477e-06, "loss": 0.1447, "step": 1255 }, { "epoch": 0.5954017539701351, "grad_norm": 3.6929593086242676, "learning_rate": 8.417674781370143e-06, "loss": 0.1181, "step": 1256 }, { "epoch": 0.5958757999525954, "grad_norm": 3.9746272563934326, "learning_rate": 8.401333360582336e-06, "loss": 0.1379, "step": 1257 }, { "epoch": 0.5963498459350557, "grad_norm": 7.822659492492676, "learning_rate": 8.384996319704084e-06, "loss": 0.2003, "step": 1258 }, { "epoch": 0.596823891917516, "grad_norm": 7.521732330322266, "learning_rate": 8.36866370349441e-06, "loss": 0.1, "step": 1259 }, { "epoch": 0.5972979378999763, "grad_norm": 9.920129776000977, "learning_rate": 8.352335556700221e-06, "loss": 0.2996, "step": 1260 }, { "epoch": 0.5972979378999763, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9369369369369369, "eval_loss": 0.017237627878785133, "eval_precision": 0.8813559322033898, "eval_recall": 1.0, "eval_runtime": 49.3888, "eval_samples_per_second": 5.487, "eval_steps_per_second": 0.182, "step": 1260 }, { "epoch": 0.5977719838824366, "grad_norm": 3.91894268989563, "learning_rate": 8.336011924056175e-06, "loss": 0.0938, "step": 1261 }, { "epoch": 0.5982460298648969, "grad_norm": 7.406013011932373, "learning_rate": 8.319692850284564e-06, "loss": 0.2363, "step": 1262 }, { "epoch": 0.5987200758473572, "grad_norm": 5.263122081756592, "learning_rate": 8.303378380095184e-06, "loss": 0.1539, "step": 1263 }, { "epoch": 0.5991941218298175, "grad_norm": 5.790138244628906, "learning_rate": 8.287068558185225e-06, "loss": 0.2055, "step": 1264 }, { "epoch": 0.5996681678122778, "grad_norm": 6.286695957183838, "learning_rate": 8.270763429239144e-06, "loss": 0.201, "step": 1265 }, { "epoch": 0.6001422137947381, "grad_norm": 6.946226119995117, "learning_rate": 8.254463037928534e-06, "loss": 0.16, "step": 1266 }, { "epoch": 0.6006162597771983, "grad_norm": 2.566415548324585, "learning_rate": 8.238167428912009e-06, "loss": 0.1239, "step": 1267 }, { "epoch": 0.6010903057596587, "grad_norm": 5.948537349700928, "learning_rate": 8.22187664683509e-06, "loss": 0.196, "step": 1268 }, { "epoch": 0.601564351742119, "grad_norm": 3.735405445098877, "learning_rate": 8.205590736330058e-06, "loss": 0.1026, "step": 1269 }, { "epoch": 0.6020383977245792, "grad_norm": 3.433272123336792, "learning_rate": 8.18930974201586e-06, "loss": 0.0932, "step": 1270 }, { "epoch": 0.6025124437070396, "grad_norm": 7.357568740844727, "learning_rate": 8.173033708497968e-06, "loss": 0.1722, "step": 1271 }, { "epoch": 0.6029864896894999, "grad_norm": 5.364899158477783, "learning_rate": 8.156762680368267e-06, "loss": 0.2159, "step": 1272 }, { "epoch": 0.6034605356719602, "grad_norm": 3.7120745182037354, "learning_rate": 8.140496702204921e-06, "loss": 0.1332, "step": 1273 }, { "epoch": 0.6039345816544205, "grad_norm": 3.985130786895752, "learning_rate": 8.124235818572268e-06, "loss": 0.1074, "step": 1274 }, { "epoch": 0.6044086276368807, "grad_norm": 3.8434746265411377, "learning_rate": 8.107980074020684e-06, "loss": 0.2165, "step": 1275 }, { "epoch": 0.6048826736193411, "grad_norm": 7.933750629425049, "learning_rate": 8.091729513086462e-06, "loss": 0.1933, "step": 1276 }, { "epoch": 0.6053567196018014, "grad_norm": 6.892407417297363, "learning_rate": 8.075484180291702e-06, "loss": 0.2022, "step": 1277 }, { "epoch": 0.6058307655842616, "grad_norm": 3.9653921127319336, "learning_rate": 8.059244120144167e-06, "loss": 0.1633, "step": 1278 }, { "epoch": 0.606304811566722, "grad_norm": 5.983907222747803, "learning_rate": 8.043009377137188e-06, "loss": 0.18, "step": 1279 }, { "epoch": 0.6067788575491823, "grad_norm": 5.66121244430542, "learning_rate": 8.026779995749519e-06, "loss": 0.1381, "step": 1280 }, { "epoch": 0.6067788575491823, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9259259259259259, "eval_loss": 0.014485717751085758, "eval_precision": 0.8928571428571429, "eval_recall": 0.9615384615384616, "eval_runtime": 48.9363, "eval_samples_per_second": 5.538, "eval_steps_per_second": 0.184, "step": 1280 }, { "epoch": 0.6072529035316425, "grad_norm": 2.993018865585327, "learning_rate": 8.010556020445231e-06, "loss": 0.1033, "step": 1281 }, { "epoch": 0.6077269495141029, "grad_norm": 4.771989822387695, "learning_rate": 7.99433749567358e-06, "loss": 0.1558, "step": 1282 }, { "epoch": 0.6082009954965631, "grad_norm": 5.141221046447754, "learning_rate": 7.97812446586889e-06, "loss": 0.1381, "step": 1283 }, { "epoch": 0.6086750414790235, "grad_norm": 3.860772132873535, "learning_rate": 7.96191697545043e-06, "loss": 0.1682, "step": 1284 }, { "epoch": 0.6091490874614838, "grad_norm": 3.4374563694000244, "learning_rate": 7.945715068822291e-06, "loss": 0.132, "step": 1285 }, { "epoch": 0.609623133443944, "grad_norm": 3.378507137298584, "learning_rate": 7.929518790373274e-06, "loss": 0.1912, "step": 1286 }, { "epoch": 0.6100971794264044, "grad_norm": 3.9019570350646973, "learning_rate": 7.913328184476748e-06, "loss": 0.1236, "step": 1287 }, { "epoch": 0.6105712254088647, "grad_norm": 3.8174707889556885, "learning_rate": 7.897143295490551e-06, "loss": 0.1624, "step": 1288 }, { "epoch": 0.6110452713913249, "grad_norm": 3.3840582370758057, "learning_rate": 7.880964167756855e-06, "loss": 0.0858, "step": 1289 }, { "epoch": 0.6115193173737853, "grad_norm": 5.801539897918701, "learning_rate": 7.86479084560204e-06, "loss": 0.1997, "step": 1290 }, { "epoch": 0.6119933633562455, "grad_norm": 4.482763290405273, "learning_rate": 7.848623373336594e-06, "loss": 0.1977, "step": 1291 }, { "epoch": 0.6124674093387058, "grad_norm": 3.4488887786865234, "learning_rate": 7.83246179525497e-06, "loss": 0.1393, "step": 1292 }, { "epoch": 0.6129414553211662, "grad_norm": 5.420791149139404, "learning_rate": 7.816306155635475e-06, "loss": 0.1744, "step": 1293 }, { "epoch": 0.6134155013036264, "grad_norm": 8.423394203186035, "learning_rate": 7.800156498740143e-06, "loss": 0.1435, "step": 1294 }, { "epoch": 0.6138895472860868, "grad_norm": 3.0613040924072266, "learning_rate": 7.784012868814618e-06, "loss": 0.1134, "step": 1295 }, { "epoch": 0.6143635932685471, "grad_norm": 10.920926094055176, "learning_rate": 7.767875310088034e-06, "loss": 0.2181, "step": 1296 }, { "epoch": 0.6148376392510073, "grad_norm": 4.460597991943359, "learning_rate": 7.751743866772889e-06, "loss": 0.1475, "step": 1297 }, { "epoch": 0.6153116852334677, "grad_norm": 6.000149250030518, "learning_rate": 7.735618583064931e-06, "loss": 0.1448, "step": 1298 }, { "epoch": 0.615785731215928, "grad_norm": 4.731883525848389, "learning_rate": 7.719499503143027e-06, "loss": 0.2625, "step": 1299 }, { "epoch": 0.6162597771983882, "grad_norm": 4.700830459594727, "learning_rate": 7.70338667116905e-06, "loss": 0.1559, "step": 1300 }, { "epoch": 0.6162597771983882, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9259259259259259, "eval_loss": 0.014198314398527145, "eval_precision": 0.8928571428571429, "eval_recall": 0.9615384615384616, "eval_runtime": 49.8628, "eval_samples_per_second": 5.435, "eval_steps_per_second": 0.18, "step": 1300 }, { "epoch": 0.6167338231808486, "grad_norm": 6.278774261474609, "learning_rate": 7.68728013128775e-06, "loss": 0.1788, "step": 1301 }, { "epoch": 0.6172078691633088, "grad_norm": 3.930607557296753, "learning_rate": 7.671179927626651e-06, "loss": 0.2176, "step": 1302 }, { "epoch": 0.6176819151457691, "grad_norm": 3.799132823944092, "learning_rate": 7.655086104295904e-06, "loss": 0.1051, "step": 1303 }, { "epoch": 0.6181559611282295, "grad_norm": 4.671762466430664, "learning_rate": 7.638998705388188e-06, "loss": 0.1648, "step": 1304 }, { "epoch": 0.6186300071106897, "grad_norm": 6.8837504386901855, "learning_rate": 7.622917774978579e-06, "loss": 0.1712, "step": 1305 }, { "epoch": 0.6191040530931501, "grad_norm": 3.0409445762634277, "learning_rate": 7.606843357124426e-06, "loss": 0.1149, "step": 1306 }, { "epoch": 0.6195780990756103, "grad_norm": 3.255094289779663, "learning_rate": 7.5907754958652365e-06, "loss": 0.1327, "step": 1307 }, { "epoch": 0.6200521450580706, "grad_norm": 5.722508430480957, "learning_rate": 7.574714235222571e-06, "loss": 0.1593, "step": 1308 }, { "epoch": 0.620526191040531, "grad_norm": 17.10514259338379, "learning_rate": 7.558659619199884e-06, "loss": 0.3056, "step": 1309 }, { "epoch": 0.6210002370229912, "grad_norm": 7.020135402679443, "learning_rate": 7.542611691782433e-06, "loss": 0.1793, "step": 1310 }, { "epoch": 0.6214742830054515, "grad_norm": 4.9687066078186035, "learning_rate": 7.526570496937157e-06, "loss": 0.1326, "step": 1311 }, { "epoch": 0.6219483289879119, "grad_norm": 5.2314133644104, "learning_rate": 7.5105360786125405e-06, "loss": 0.1843, "step": 1312 }, { "epoch": 0.6224223749703721, "grad_norm": 2.8971641063690186, "learning_rate": 7.494508480738508e-06, "loss": 0.1332, "step": 1313 }, { "epoch": 0.6228964209528324, "grad_norm": 3.5975356101989746, "learning_rate": 7.4784877472262994e-06, "loss": 0.1212, "step": 1314 }, { "epoch": 0.6233704669352927, "grad_norm": 4.035305976867676, "learning_rate": 7.462473921968338e-06, "loss": 0.1722, "step": 1315 }, { "epoch": 0.623844512917753, "grad_norm": 4.344238758087158, "learning_rate": 7.446467048838131e-06, "loss": 0.1702, "step": 1316 }, { "epoch": 0.6243185589002134, "grad_norm": 4.068875789642334, "learning_rate": 7.430467171690134e-06, "loss": 0.1253, "step": 1317 }, { "epoch": 0.6247926048826736, "grad_norm": 4.388603210449219, "learning_rate": 7.4144743343596385e-06, "loss": 0.1894, "step": 1318 }, { "epoch": 0.6252666508651339, "grad_norm": 2.7790372371673584, "learning_rate": 7.398488580662644e-06, "loss": 0.0985, "step": 1319 }, { "epoch": 0.6257406968475943, "grad_norm": 6.451686382293701, "learning_rate": 7.382509954395749e-06, "loss": 0.1588, "step": 1320 }, { "epoch": 0.6257406968475943, "eval_accuracy": 0.9911433172302737, "eval_f1": 0.9026548672566371, "eval_loss": 0.01939067617058754, "eval_precision": 0.8360655737704918, "eval_recall": 0.9807692307692307, "eval_runtime": 49.8529, "eval_samples_per_second": 5.436, "eval_steps_per_second": 0.181, "step": 1320 }, { "epoch": 0.6262147428300545, "grad_norm": 5.8468918800354, "learning_rate": 7.366538499336018e-06, "loss": 0.1876, "step": 1321 }, { "epoch": 0.6266887888125148, "grad_norm": 5.291234970092773, "learning_rate": 7.35057425924087e-06, "loss": 0.2141, "step": 1322 }, { "epoch": 0.6271628347949751, "grad_norm": 4.23124885559082, "learning_rate": 7.334617277847963e-06, "loss": 0.1612, "step": 1323 }, { "epoch": 0.6276368807774354, "grad_norm": 3.941624879837036, "learning_rate": 7.31866759887506e-06, "loss": 0.1906, "step": 1324 }, { "epoch": 0.6281109267598957, "grad_norm": 7.514248371124268, "learning_rate": 7.302725266019924e-06, "loss": 0.1179, "step": 1325 }, { "epoch": 0.628584972742356, "grad_norm": 3.3683080673217773, "learning_rate": 7.286790322960189e-06, "loss": 0.1339, "step": 1326 }, { "epoch": 0.6290590187248163, "grad_norm": 6.781601905822754, "learning_rate": 7.270862813353241e-06, "loss": 0.237, "step": 1327 }, { "epoch": 0.6295330647072767, "grad_norm": 2.775303602218628, "learning_rate": 7.254942780836103e-06, "loss": 0.109, "step": 1328 }, { "epoch": 0.6300071106897369, "grad_norm": 6.326327323913574, "learning_rate": 7.239030269025311e-06, "loss": 0.1799, "step": 1329 }, { "epoch": 0.6304811566721972, "grad_norm": 3.2964067459106445, "learning_rate": 7.2231253215168e-06, "loss": 0.1008, "step": 1330 }, { "epoch": 0.6309552026546575, "grad_norm": 4.055788040161133, "learning_rate": 7.2072279818857745e-06, "loss": 0.1524, "step": 1331 }, { "epoch": 0.6314292486371178, "grad_norm": 8.913673400878906, "learning_rate": 7.191338293686601e-06, "loss": 0.1624, "step": 1332 }, { "epoch": 0.6319032946195781, "grad_norm": 3.2821767330169678, "learning_rate": 7.175456300452681e-06, "loss": 0.1139, "step": 1333 }, { "epoch": 0.6323773406020384, "grad_norm": 4.261881351470947, "learning_rate": 7.159582045696334e-06, "loss": 0.117, "step": 1334 }, { "epoch": 0.6328513865844987, "grad_norm": 3.6744134426116943, "learning_rate": 7.143715572908679e-06, "loss": 0.1175, "step": 1335 }, { "epoch": 0.633325432566959, "grad_norm": 6.114347457885742, "learning_rate": 7.127856925559513e-06, "loss": 0.2092, "step": 1336 }, { "epoch": 0.6337994785494193, "grad_norm": 6.3057451248168945, "learning_rate": 7.112006147097195e-06, "loss": 0.1928, "step": 1337 }, { "epoch": 0.6342735245318796, "grad_norm": 3.8282015323638916, "learning_rate": 7.096163280948523e-06, "loss": 0.1564, "step": 1338 }, { "epoch": 0.6347475705143399, "grad_norm": 3.46825909614563, "learning_rate": 7.080328370518623e-06, "loss": 0.1227, "step": 1339 }, { "epoch": 0.6352216164968002, "grad_norm": 2.634822368621826, "learning_rate": 7.064501459190816e-06, "loss": 0.1101, "step": 1340 }, { "epoch": 0.6352216164968002, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9259259259259259, "eval_loss": 0.014886138029396534, "eval_precision": 0.8928571428571429, "eval_recall": 0.9615384615384616, "eval_runtime": 50.0008, "eval_samples_per_second": 5.42, "eval_steps_per_second": 0.18, "step": 1340 }, { "epoch": 0.6356956624792605, "grad_norm": 4.82327938079834, "learning_rate": 7.048682590326519e-06, "loss": 0.1271, "step": 1341 }, { "epoch": 0.6361697084617208, "grad_norm": 7.885534763336182, "learning_rate": 7.032871807265097e-06, "loss": 0.2163, "step": 1342 }, { "epoch": 0.6366437544441811, "grad_norm": 13.83066177368164, "learning_rate": 7.017069153323785e-06, "loss": 0.2698, "step": 1343 }, { "epoch": 0.6371178004266413, "grad_norm": 3.2929160594940186, "learning_rate": 7.0012746717975284e-06, "loss": 0.1059, "step": 1344 }, { "epoch": 0.6375918464091017, "grad_norm": 3.376429319381714, "learning_rate": 6.9854884059588934e-06, "loss": 0.0846, "step": 1345 }, { "epoch": 0.638065892391562, "grad_norm": 4.017861843109131, "learning_rate": 6.969710399057927e-06, "loss": 0.1109, "step": 1346 }, { "epoch": 0.6385399383740222, "grad_norm": 3.740814447402954, "learning_rate": 6.953940694322059e-06, "loss": 0.1207, "step": 1347 }, { "epoch": 0.6390139843564826, "grad_norm": 8.038143157958984, "learning_rate": 6.938179334955967e-06, "loss": 0.119, "step": 1348 }, { "epoch": 0.6394880303389429, "grad_norm": 3.9361705780029297, "learning_rate": 6.9224263641414675e-06, "loss": 0.1581, "step": 1349 }, { "epoch": 0.6399620763214032, "grad_norm": 2.7756080627441406, "learning_rate": 6.906681825037395e-06, "loss": 0.0784, "step": 1350 }, { "epoch": 0.6404361223038635, "grad_norm": 5.108663082122803, "learning_rate": 6.8909457607794824e-06, "loss": 0.1245, "step": 1351 }, { "epoch": 0.6409101682863237, "grad_norm": 7.079373836517334, "learning_rate": 6.8752182144802415e-06, "loss": 0.1754, "step": 1352 }, { "epoch": 0.6413842142687841, "grad_norm": 4.77249813079834, "learning_rate": 6.859499229228852e-06, "loss": 0.1782, "step": 1353 }, { "epoch": 0.6418582602512444, "grad_norm": 4.985091209411621, "learning_rate": 6.8437888480910355e-06, "loss": 0.129, "step": 1354 }, { "epoch": 0.6423323062337046, "grad_norm": 6.746637344360352, "learning_rate": 6.8280871141089415e-06, "loss": 0.2279, "step": 1355 }, { "epoch": 0.642806352216165, "grad_norm": 4.10652494430542, "learning_rate": 6.81239407030103e-06, "loss": 0.143, "step": 1356 }, { "epoch": 0.6432803981986253, "grad_norm": 4.236819267272949, "learning_rate": 6.7967097596619495e-06, "loss": 0.1236, "step": 1357 }, { "epoch": 0.6437544441810855, "grad_norm": 8.469802856445312, "learning_rate": 6.781034225162422e-06, "loss": 0.1694, "step": 1358 }, { "epoch": 0.6442284901635459, "grad_norm": 4.33465051651001, "learning_rate": 6.765367509749123e-06, "loss": 0.1511, "step": 1359 }, { "epoch": 0.6447025361460061, "grad_norm": 5.169299602508545, "learning_rate": 6.749709656344584e-06, "loss": 0.1533, "step": 1360 }, { "epoch": 0.6447025361460061, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9285714285714286, "eval_loss": 0.015230515040457249, "eval_precision": 0.8666666666666667, "eval_recall": 1.0, "eval_runtime": 50.0425, "eval_samples_per_second": 5.415, "eval_steps_per_second": 0.18, "step": 1360 }, { "epoch": 0.6451765821284665, "grad_norm": 5.502664089202881, "learning_rate": 6.73406070784703e-06, "loss": 0.1049, "step": 1361 }, { "epoch": 0.6456506281109268, "grad_norm": 3.8858397006988525, "learning_rate": 6.7184207071303065e-06, "loss": 0.1058, "step": 1362 }, { "epoch": 0.646124674093387, "grad_norm": 3.3023645877838135, "learning_rate": 6.702789697043739e-06, "loss": 0.1119, "step": 1363 }, { "epoch": 0.6465987200758474, "grad_norm": 6.430544376373291, "learning_rate": 6.687167720412022e-06, "loss": 0.2107, "step": 1364 }, { "epoch": 0.6470727660583077, "grad_norm": 3.9412176609039307, "learning_rate": 6.6715548200351025e-06, "loss": 0.1272, "step": 1365 }, { "epoch": 0.6475468120407679, "grad_norm": 6.815352439880371, "learning_rate": 6.6559510386880575e-06, "loss": 0.172, "step": 1366 }, { "epoch": 0.6480208580232283, "grad_norm": 6.693808555603027, "learning_rate": 6.640356419120981e-06, "loss": 0.1169, "step": 1367 }, { "epoch": 0.6484949040056885, "grad_norm": 7.749126434326172, "learning_rate": 6.624771004058869e-06, "loss": 0.2337, "step": 1368 }, { "epoch": 0.6489689499881488, "grad_norm": 5.6740336418151855, "learning_rate": 6.609194836201498e-06, "loss": 0.1464, "step": 1369 }, { "epoch": 0.6494429959706092, "grad_norm": 5.359683990478516, "learning_rate": 6.59362795822331e-06, "loss": 0.171, "step": 1370 }, { "epoch": 0.6499170419530694, "grad_norm": 7.062665939331055, "learning_rate": 6.578070412773294e-06, "loss": 0.1379, "step": 1371 }, { "epoch": 0.6503910879355298, "grad_norm": 3.281470775604248, "learning_rate": 6.562522242474873e-06, "loss": 0.124, "step": 1372 }, { "epoch": 0.6508651339179901, "grad_norm": 3.4849565029144287, "learning_rate": 6.546983489925783e-06, "loss": 0.0776, "step": 1373 }, { "epoch": 0.6513391799004503, "grad_norm": 9.621129989624023, "learning_rate": 6.531454197697956e-06, "loss": 0.2229, "step": 1374 }, { "epoch": 0.6518132258829107, "grad_norm": 5.371448993682861, "learning_rate": 6.51593440833741e-06, "loss": 0.1627, "step": 1375 }, { "epoch": 0.6522872718653709, "grad_norm": 3.7554807662963867, "learning_rate": 6.5004241643641204e-06, "loss": 0.1219, "step": 1376 }, { "epoch": 0.6527613178478312, "grad_norm": 10.365527153015137, "learning_rate": 6.484923508271921e-06, "loss": 0.1246, "step": 1377 }, { "epoch": 0.6532353638302916, "grad_norm": 3.904869556427002, "learning_rate": 6.469432482528371e-06, "loss": 0.1454, "step": 1378 }, { "epoch": 0.6537094098127518, "grad_norm": 8.565876007080078, "learning_rate": 6.453951129574644e-06, "loss": 0.191, "step": 1379 }, { "epoch": 0.6541834557952121, "grad_norm": 5.346278667449951, "learning_rate": 6.4384794918254155e-06, "loss": 0.1567, "step": 1380 }, { "epoch": 0.6541834557952121, "eval_accuracy": 0.9927536231884058, "eval_f1": 0.9174311926605505, "eval_loss": 0.013926459476351738, "eval_precision": 0.8771929824561403, "eval_recall": 0.9615384615384616, "eval_runtime": 50.4117, "eval_samples_per_second": 5.376, "eval_steps_per_second": 0.179, "step": 1380 }, { "epoch": 0.6546575017776725, "grad_norm": 7.4207539558410645, "learning_rate": 6.423017611668745e-06, "loss": 0.2611, "step": 1381 }, { "epoch": 0.6551315477601327, "grad_norm": 4.336532115936279, "learning_rate": 6.407565531465958e-06, "loss": 0.1129, "step": 1382 }, { "epoch": 0.6556055937425931, "grad_norm": 8.779532432556152, "learning_rate": 6.392123293551524e-06, "loss": 0.2831, "step": 1383 }, { "epoch": 0.6560796397250533, "grad_norm": 3.4393012523651123, "learning_rate": 6.3766909402329595e-06, "loss": 0.1244, "step": 1384 }, { "epoch": 0.6565536857075136, "grad_norm": 8.261950492858887, "learning_rate": 6.36126851379069e-06, "loss": 0.2614, "step": 1385 }, { "epoch": 0.657027731689974, "grad_norm": 6.831699848175049, "learning_rate": 6.3458560564779506e-06, "loss": 0.1364, "step": 1386 }, { "epoch": 0.6575017776724342, "grad_norm": 3.5841522216796875, "learning_rate": 6.330453610520659e-06, "loss": 0.1327, "step": 1387 }, { "epoch": 0.6579758236548945, "grad_norm": 9.064952850341797, "learning_rate": 6.315061218117311e-06, "loss": 0.164, "step": 1388 }, { "epoch": 0.6584498696373549, "grad_norm": 3.6154866218566895, "learning_rate": 6.299678921438845e-06, "loss": 0.0897, "step": 1389 }, { "epoch": 0.6589239156198151, "grad_norm": 4.849295616149902, "learning_rate": 6.284306762628556e-06, "loss": 0.1721, "step": 1390 }, { "epoch": 0.6593979616022754, "grad_norm": 9.754378318786621, "learning_rate": 6.268944783801954e-06, "loss": 0.1912, "step": 1391 }, { "epoch": 0.6598720075847357, "grad_norm": 5.799689292907715, "learning_rate": 6.253593027046663e-06, "loss": 0.1779, "step": 1392 }, { "epoch": 0.660346053567196, "grad_norm": 3.9141523838043213, "learning_rate": 6.238251534422295e-06, "loss": 0.2328, "step": 1393 }, { "epoch": 0.6608200995496564, "grad_norm": 5.237335205078125, "learning_rate": 6.22292034796035e-06, "loss": 0.179, "step": 1394 }, { "epoch": 0.6612941455321166, "grad_norm": 3.3698062896728516, "learning_rate": 6.20759950966409e-06, "loss": 0.1331, "step": 1395 }, { "epoch": 0.6617681915145769, "grad_norm": 5.780014991760254, "learning_rate": 6.19228906150842e-06, "loss": 0.1727, "step": 1396 }, { "epoch": 0.6622422374970373, "grad_norm": 5.892086029052734, "learning_rate": 6.176989045439785e-06, "loss": 0.1575, "step": 1397 }, { "epoch": 0.6627162834794975, "grad_norm": 5.020101070404053, "learning_rate": 6.161699503376042e-06, "loss": 0.1429, "step": 1398 }, { "epoch": 0.6631903294619578, "grad_norm": 5.539973735809326, "learning_rate": 6.14642047720636e-06, "loss": 0.1591, "step": 1399 }, { "epoch": 0.6636643754444181, "grad_norm": 8.077629089355469, "learning_rate": 6.131152008791092e-06, "loss": 0.1771, "step": 1400 }, { "epoch": 0.6636643754444181, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.01761007122695446, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 49.9728, "eval_samples_per_second": 5.423, "eval_steps_per_second": 0.18, "step": 1400 }, { "epoch": 0.6641384214268784, "grad_norm": 4.013208389282227, "learning_rate": 6.115894139961668e-06, "loss": 0.1888, "step": 1401 }, { "epoch": 0.6646124674093387, "grad_norm": 3.7554798126220703, "learning_rate": 6.1006469125204785e-06, "loss": 0.1271, "step": 1402 }, { "epoch": 0.665086513391799, "grad_norm": 3.1166861057281494, "learning_rate": 6.085410368240759e-06, "loss": 0.1656, "step": 1403 }, { "epoch": 0.6655605593742593, "grad_norm": 5.062197208404541, "learning_rate": 6.070184548866471e-06, "loss": 0.1014, "step": 1404 }, { "epoch": 0.6660346053567197, "grad_norm": 3.389256238937378, "learning_rate": 6.054969496112202e-06, "loss": 0.1676, "step": 1405 }, { "epoch": 0.6665086513391799, "grad_norm": 4.408236026763916, "learning_rate": 6.039765251663034e-06, "loss": 0.1012, "step": 1406 }, { "epoch": 0.6669826973216402, "grad_norm": 3.6748061180114746, "learning_rate": 6.024571857174443e-06, "loss": 0.1329, "step": 1407 }, { "epoch": 0.6674567433041005, "grad_norm": 6.179364204406738, "learning_rate": 6.009389354272175e-06, "loss": 0.2444, "step": 1408 }, { "epoch": 0.6679307892865608, "grad_norm": 6.529764652252197, "learning_rate": 5.994217784552139e-06, "loss": 0.1713, "step": 1409 }, { "epoch": 0.6684048352690211, "grad_norm": 5.036710262298584, "learning_rate": 5.979057189580284e-06, "loss": 0.1116, "step": 1410 }, { "epoch": 0.6688788812514814, "grad_norm": 5.518298149108887, "learning_rate": 5.963907610892497e-06, "loss": 0.1619, "step": 1411 }, { "epoch": 0.6693529272339417, "grad_norm": 3.653196096420288, "learning_rate": 5.948769089994486e-06, "loss": 0.1237, "step": 1412 }, { "epoch": 0.6698269732164019, "grad_norm": 5.620137691497803, "learning_rate": 5.93364166836166e-06, "loss": 0.1464, "step": 1413 }, { "epoch": 0.6703010191988623, "grad_norm": 3.4907541275024414, "learning_rate": 5.918525387439014e-06, "loss": 0.1936, "step": 1414 }, { "epoch": 0.6707750651813226, "grad_norm": 4.219289779663086, "learning_rate": 5.903420288641029e-06, "loss": 0.1451, "step": 1415 }, { "epoch": 0.6712491111637829, "grad_norm": 6.039963245391846, "learning_rate": 5.888326413351542e-06, "loss": 0.2012, "step": 1416 }, { "epoch": 0.6717231571462432, "grad_norm": 4.030850887298584, "learning_rate": 5.873243802923651e-06, "loss": 0.128, "step": 1417 }, { "epoch": 0.6721972031287035, "grad_norm": 5.661362648010254, "learning_rate": 5.85817249867958e-06, "loss": 0.0769, "step": 1418 }, { "epoch": 0.6726712491111638, "grad_norm": 5.221426963806152, "learning_rate": 5.843112541910587e-06, "loss": 0.1391, "step": 1419 }, { "epoch": 0.6731452950936241, "grad_norm": 5.903331756591797, "learning_rate": 5.828063973876834e-06, "loss": 0.1531, "step": 1420 }, { "epoch": 0.6731452950936241, "eval_accuracy": 0.9927536231884058, "eval_f1": 0.9174311926605505, "eval_loss": 0.014160308986902237, "eval_precision": 0.8771929824561403, "eval_recall": 0.9615384615384616, "eval_runtime": 49.5874, "eval_samples_per_second": 5.465, "eval_steps_per_second": 0.181, "step": 1420 }, { "epoch": 0.6736193410760843, "grad_norm": 4.724630832672119, "learning_rate": 5.813026835807282e-06, "loss": 0.1842, "step": 1421 }, { "epoch": 0.6740933870585447, "grad_norm": 5.695087432861328, "learning_rate": 5.7980011688995806e-06, "loss": 0.2552, "step": 1422 }, { "epoch": 0.674567433041005, "grad_norm": 5.807624816894531, "learning_rate": 5.78298701431995e-06, "loss": 0.1253, "step": 1423 }, { "epoch": 0.6750414790234652, "grad_norm": 4.752889156341553, "learning_rate": 5.7679844132030674e-06, "loss": 0.1999, "step": 1424 }, { "epoch": 0.6755155250059256, "grad_norm": 3.557983160018921, "learning_rate": 5.7529934066519585e-06, "loss": 0.1648, "step": 1425 }, { "epoch": 0.6759895709883859, "grad_norm": 5.358379364013672, "learning_rate": 5.738014035737885e-06, "loss": 0.1123, "step": 1426 }, { "epoch": 0.6764636169708462, "grad_norm": 5.0176005363464355, "learning_rate": 5.723046341500231e-06, "loss": 0.2249, "step": 1427 }, { "epoch": 0.6769376629533065, "grad_norm": 3.9538960456848145, "learning_rate": 5.708090364946376e-06, "loss": 0.1335, "step": 1428 }, { "epoch": 0.6774117089357667, "grad_norm": 3.8372397422790527, "learning_rate": 5.693146147051618e-06, "loss": 0.128, "step": 1429 }, { "epoch": 0.6778857549182271, "grad_norm": 4.459795951843262, "learning_rate": 5.678213728759024e-06, "loss": 0.179, "step": 1430 }, { "epoch": 0.6783598009006874, "grad_norm": 4.049709796905518, "learning_rate": 5.663293150979339e-06, "loss": 0.085, "step": 1431 }, { "epoch": 0.6788338468831476, "grad_norm": 4.896775722503662, "learning_rate": 5.648384454590867e-06, "loss": 0.1561, "step": 1432 }, { "epoch": 0.679307892865608, "grad_norm": 6.124163627624512, "learning_rate": 5.633487680439362e-06, "loss": 0.1417, "step": 1433 }, { "epoch": 0.6797819388480683, "grad_norm": 4.774771690368652, "learning_rate": 5.618602869337909e-06, "loss": 0.1875, "step": 1434 }, { "epoch": 0.6802559848305285, "grad_norm": 6.165420055389404, "learning_rate": 5.6037300620668235e-06, "loss": 0.2191, "step": 1435 }, { "epoch": 0.6807300308129889, "grad_norm": 3.7102625370025635, "learning_rate": 5.588869299373533e-06, "loss": 0.1479, "step": 1436 }, { "epoch": 0.6812040767954491, "grad_norm": 5.609609127044678, "learning_rate": 5.574020621972467e-06, "loss": 0.2101, "step": 1437 }, { "epoch": 0.6816781227779095, "grad_norm": 4.578980445861816, "learning_rate": 5.559184070544933e-06, "loss": 0.1952, "step": 1438 }, { "epoch": 0.6821521687603698, "grad_norm": 7.65181827545166, "learning_rate": 5.544359685739033e-06, "loss": 0.2265, "step": 1439 }, { "epoch": 0.68262621474283, "grad_norm": 6.7683186531066895, "learning_rate": 5.529547508169526e-06, "loss": 0.1366, "step": 1440 }, { "epoch": 0.68262621474283, "eval_accuracy": 0.9927536231884058, "eval_f1": 0.9203539823008849, "eval_loss": 0.015710242092609406, "eval_precision": 0.8524590163934426, "eval_recall": 1.0, "eval_runtime": 48.8366, "eval_samples_per_second": 5.549, "eval_steps_per_second": 0.184, "step": 1440 }, { "epoch": 0.6831002607252904, "grad_norm": 5.058575630187988, "learning_rate": 5.514747578417731e-06, "loss": 0.1425, "step": 1441 }, { "epoch": 0.6835743067077507, "grad_norm": 7.0907511711120605, "learning_rate": 5.4999599370314085e-06, "loss": 0.1608, "step": 1442 }, { "epoch": 0.6840483526902109, "grad_norm": 4.7569499015808105, "learning_rate": 5.4851846245246545e-06, "loss": 0.1637, "step": 1443 }, { "epoch": 0.6845223986726713, "grad_norm": 5.035114765167236, "learning_rate": 5.470421681377785e-06, "loss": 0.2071, "step": 1444 }, { "epoch": 0.6849964446551315, "grad_norm": 4.729055881500244, "learning_rate": 5.455671148037225e-06, "loss": 0.1052, "step": 1445 }, { "epoch": 0.6854704906375918, "grad_norm": 3.187558889389038, "learning_rate": 5.440933064915414e-06, "loss": 0.0986, "step": 1446 }, { "epoch": 0.6859445366200522, "grad_norm": 5.462747573852539, "learning_rate": 5.426207472390661e-06, "loss": 0.1472, "step": 1447 }, { "epoch": 0.6864185826025124, "grad_norm": 6.169122695922852, "learning_rate": 5.411494410807065e-06, "loss": 0.2281, "step": 1448 }, { "epoch": 0.6868926285849728, "grad_norm": 3.3611762523651123, "learning_rate": 5.396793920474397e-06, "loss": 0.1381, "step": 1449 }, { "epoch": 0.6873666745674331, "grad_norm": 5.044204235076904, "learning_rate": 5.382106041667976e-06, "loss": 0.1799, "step": 1450 }, { "epoch": 0.6878407205498933, "grad_norm": 6.95055627822876, "learning_rate": 5.3674308146285804e-06, "loss": 0.1582, "step": 1451 }, { "epoch": 0.6883147665323537, "grad_norm": 4.531217575073242, "learning_rate": 5.352768279562315e-06, "loss": 0.1486, "step": 1452 }, { "epoch": 0.6887888125148139, "grad_norm": 8.44272518157959, "learning_rate": 5.3381184766405215e-06, "loss": 0.2031, "step": 1453 }, { "epoch": 0.6892628584972742, "grad_norm": 4.025146007537842, "learning_rate": 5.323481445999654e-06, "loss": 0.144, "step": 1454 }, { "epoch": 0.6897369044797346, "grad_norm": 5.411812782287598, "learning_rate": 5.308857227741173e-06, "loss": 0.1882, "step": 1455 }, { "epoch": 0.6902109504621948, "grad_norm": 4.180476665496826, "learning_rate": 5.294245861931442e-06, "loss": 0.1546, "step": 1456 }, { "epoch": 0.6906849964446551, "grad_norm": 5.586846828460693, "learning_rate": 5.2796473886016075e-06, "loss": 0.1934, "step": 1457 }, { "epoch": 0.6911590424271155, "grad_norm": 5.631136894226074, "learning_rate": 5.265061847747497e-06, "loss": 0.1787, "step": 1458 }, { "epoch": 0.6916330884095757, "grad_norm": 3.7365481853485107, "learning_rate": 5.250489279329501e-06, "loss": 0.1179, "step": 1459 }, { "epoch": 0.6921071343920361, "grad_norm": 4.168713092803955, "learning_rate": 5.235929723272475e-06, "loss": 0.1597, "step": 1460 }, { "epoch": 0.6921071343920361, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9369369369369369, "eval_loss": 0.014942323789000511, "eval_precision": 0.8813559322033898, "eval_recall": 1.0, "eval_runtime": 48.875, "eval_samples_per_second": 5.545, "eval_steps_per_second": 0.184, "step": 1460 }, { "epoch": 0.6925811803744963, "grad_norm": 6.216986179351807, "learning_rate": 5.221383219465623e-06, "loss": 0.1951, "step": 1461 }, { "epoch": 0.6930552263569566, "grad_norm": 5.227940082550049, "learning_rate": 5.2068498077623845e-06, "loss": 0.1243, "step": 1462 }, { "epoch": 0.693529272339417, "grad_norm": 3.4037044048309326, "learning_rate": 5.192329527980342e-06, "loss": 0.077, "step": 1463 }, { "epoch": 0.6940033183218772, "grad_norm": 5.699437141418457, "learning_rate": 5.1778224199010905e-06, "loss": 0.172, "step": 1464 }, { "epoch": 0.6944773643043375, "grad_norm": 4.675843238830566, "learning_rate": 5.163328523270138e-06, "loss": 0.1933, "step": 1465 }, { "epoch": 0.6949514102867979, "grad_norm": 3.4957215785980225, "learning_rate": 5.1488478777968e-06, "loss": 0.0953, "step": 1466 }, { "epoch": 0.6954254562692581, "grad_norm": 5.185683727264404, "learning_rate": 5.134380523154086e-06, "loss": 0.1663, "step": 1467 }, { "epoch": 0.6958995022517184, "grad_norm": 11.080448150634766, "learning_rate": 5.1199264989785976e-06, "loss": 0.2107, "step": 1468 }, { "epoch": 0.6963735482341787, "grad_norm": 3.1490566730499268, "learning_rate": 5.105485844870401e-06, "loss": 0.0822, "step": 1469 }, { "epoch": 0.696847594216639, "grad_norm": 7.173159599304199, "learning_rate": 5.091058600392944e-06, "loss": 0.1476, "step": 1470 }, { "epoch": 0.6973216401990993, "grad_norm": 3.537616491317749, "learning_rate": 5.076644805072934e-06, "loss": 0.1315, "step": 1471 }, { "epoch": 0.6977956861815596, "grad_norm": 4.076348304748535, "learning_rate": 5.062244498400228e-06, "loss": 0.1245, "step": 1472 }, { "epoch": 0.6982697321640199, "grad_norm": 8.091590881347656, "learning_rate": 5.0478577198277325e-06, "loss": 0.1385, "step": 1473 }, { "epoch": 0.6987437781464803, "grad_norm": 3.7141075134277344, "learning_rate": 5.033484508771285e-06, "loss": 0.0763, "step": 1474 }, { "epoch": 0.6992178241289405, "grad_norm": 5.427310466766357, "learning_rate": 5.019124904609555e-06, "loss": 0.1781, "step": 1475 }, { "epoch": 0.6996918701114008, "grad_norm": 4.147554397583008, "learning_rate": 5.004778946683932e-06, "loss": 0.1233, "step": 1476 }, { "epoch": 0.7001659160938611, "grad_norm": 3.4888908863067627, "learning_rate": 4.99044667429842e-06, "loss": 0.0655, "step": 1477 }, { "epoch": 0.7006399620763214, "grad_norm": 8.225471496582031, "learning_rate": 4.976128126719527e-06, "loss": 0.2731, "step": 1478 }, { "epoch": 0.7011140080587817, "grad_norm": 2.3801865577697754, "learning_rate": 4.9618233431761584e-06, "loss": 0.0832, "step": 1479 }, { "epoch": 0.701588054041242, "grad_norm": 4.968540668487549, "learning_rate": 4.947532362859511e-06, "loss": 0.1527, "step": 1480 }, { "epoch": 0.701588054041242, "eval_accuracy": 0.9919484702093397, "eval_f1": 0.9122807017543859, "eval_loss": 0.015569827519357204, "eval_precision": 0.8387096774193549, "eval_recall": 1.0, "eval_runtime": 49.1307, "eval_samples_per_second": 5.516, "eval_steps_per_second": 0.183, "step": 1480 }, { "epoch": 0.7020621000237023, "grad_norm": 4.390884876251221, "learning_rate": 4.933255224922964e-06, "loss": 0.1427, "step": 1481 }, { "epoch": 0.7025361460061625, "grad_norm": 2.681668758392334, "learning_rate": 4.918991968481973e-06, "loss": 0.0957, "step": 1482 }, { "epoch": 0.7030101919886229, "grad_norm": 4.006803512573242, "learning_rate": 4.90474263261396e-06, "loss": 0.1637, "step": 1483 }, { "epoch": 0.7034842379710832, "grad_norm": 9.42789363861084, "learning_rate": 4.890507256358211e-06, "loss": 0.2156, "step": 1484 }, { "epoch": 0.7039582839535435, "grad_norm": 3.9107887744903564, "learning_rate": 4.876285878715764e-06, "loss": 0.0922, "step": 1485 }, { "epoch": 0.7044323299360038, "grad_norm": 6.439977645874023, "learning_rate": 4.862078538649308e-06, "loss": 0.15, "step": 1486 }, { "epoch": 0.7049063759184641, "grad_norm": 4.352283000946045, "learning_rate": 4.847885275083068e-06, "loss": 0.1205, "step": 1487 }, { "epoch": 0.7053804219009244, "grad_norm": 4.814163684844971, "learning_rate": 4.833706126902709e-06, "loss": 0.1941, "step": 1488 }, { "epoch": 0.7058544678833847, "grad_norm": 5.048938751220703, "learning_rate": 4.819541132955222e-06, "loss": 0.1567, "step": 1489 }, { "epoch": 0.7063285138658449, "grad_norm": 3.386812448501587, "learning_rate": 4.805390332048813e-06, "loss": 0.1236, "step": 1490 }, { "epoch": 0.7068025598483053, "grad_norm": 3.832181930541992, "learning_rate": 4.79125376295281e-06, "loss": 0.1296, "step": 1491 }, { "epoch": 0.7072766058307656, "grad_norm": 3.9667928218841553, "learning_rate": 4.7771314643975475e-06, "loss": 0.1306, "step": 1492 }, { "epoch": 0.7077506518132258, "grad_norm": 6.3302459716796875, "learning_rate": 4.763023475074264e-06, "loss": 0.1424, "step": 1493 }, { "epoch": 0.7082246977956862, "grad_norm": 6.816648483276367, "learning_rate": 4.748929833634993e-06, "loss": 0.1773, "step": 1494 }, { "epoch": 0.7086987437781465, "grad_norm": 3.8036956787109375, "learning_rate": 4.734850578692461e-06, "loss": 0.1543, "step": 1495 }, { "epoch": 0.7091727897606068, "grad_norm": 7.832408905029297, "learning_rate": 4.720785748819973e-06, "loss": 0.2031, "step": 1496 }, { "epoch": 0.7096468357430671, "grad_norm": 8.086996078491211, "learning_rate": 4.706735382551318e-06, "loss": 0.1469, "step": 1497 }, { "epoch": 0.7101208817255273, "grad_norm": 3.4060211181640625, "learning_rate": 4.692699518380664e-06, "loss": 0.0912, "step": 1498 }, { "epoch": 0.7105949277079877, "grad_norm": 5.158050060272217, "learning_rate": 4.678678194762444e-06, "loss": 0.1996, "step": 1499 }, { "epoch": 0.711068973690448, "grad_norm": 4.354590892791748, "learning_rate": 4.6646714501112425e-06, "loss": 0.2199, "step": 1500 }, { "epoch": 0.711068973690448, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9369369369369369, "eval_loss": 0.013688492588698864, "eval_precision": 0.8813559322033898, "eval_recall": 1.0, "eval_runtime": 49.14, "eval_samples_per_second": 5.515, "eval_steps_per_second": 0.183, "step": 1500 }, { "epoch": 0.7115430196729082, "grad_norm": 4.872258186340332, "learning_rate": 4.6506793228017145e-06, "loss": 0.1726, "step": 1501 }, { "epoch": 0.7120170656553686, "grad_norm": 2.535837411880493, "learning_rate": 4.636701851168465e-06, "loss": 0.107, "step": 1502 }, { "epoch": 0.7124911116378289, "grad_norm": 3.766982078552246, "learning_rate": 4.622739073505943e-06, "loss": 0.1215, "step": 1503 }, { "epoch": 0.7129651576202891, "grad_norm": 3.6720077991485596, "learning_rate": 4.608791028068344e-06, "loss": 0.1504, "step": 1504 }, { "epoch": 0.7134392036027495, "grad_norm": 10.374195098876953, "learning_rate": 4.594857753069497e-06, "loss": 0.2005, "step": 1505 }, { "epoch": 0.7139132495852097, "grad_norm": 5.587888717651367, "learning_rate": 4.5809392866827704e-06, "loss": 0.1502, "step": 1506 }, { "epoch": 0.7143872955676701, "grad_norm": 5.169453144073486, "learning_rate": 4.5670356670409544e-06, "loss": 0.164, "step": 1507 }, { "epoch": 0.7148613415501304, "grad_norm": 4.9008378982543945, "learning_rate": 4.5531469322361656e-06, "loss": 0.1771, "step": 1508 }, { "epoch": 0.7153353875325906, "grad_norm": 5.234181880950928, "learning_rate": 4.539273120319743e-06, "loss": 0.1507, "step": 1509 }, { "epoch": 0.715809433515051, "grad_norm": 8.27335262298584, "learning_rate": 4.525414269302138e-06, "loss": 0.2135, "step": 1510 }, { "epoch": 0.7162834794975113, "grad_norm": 3.990186929702759, "learning_rate": 4.5115704171528105e-06, "loss": 0.1408, "step": 1511 }, { "epoch": 0.7167575254799715, "grad_norm": 3.081641435623169, "learning_rate": 4.497741601800134e-06, "loss": 0.0909, "step": 1512 }, { "epoch": 0.7172315714624319, "grad_norm": 5.241434097290039, "learning_rate": 4.48392786113128e-06, "loss": 0.1314, "step": 1513 }, { "epoch": 0.7177056174448921, "grad_norm": 4.4905266761779785, "learning_rate": 4.470129232992118e-06, "loss": 0.1542, "step": 1514 }, { "epoch": 0.7181796634273524, "grad_norm": 10.312759399414062, "learning_rate": 4.456345755187126e-06, "loss": 0.1859, "step": 1515 }, { "epoch": 0.7186537094098128, "grad_norm": 6.353287696838379, "learning_rate": 4.44257746547926e-06, "loss": 0.2085, "step": 1516 }, { "epoch": 0.719127755392273, "grad_norm": 3.957456588745117, "learning_rate": 4.428824401589871e-06, "loss": 0.1338, "step": 1517 }, { "epoch": 0.7196018013747334, "grad_norm": 6.032851219177246, "learning_rate": 4.415086601198592e-06, "loss": 0.1798, "step": 1518 }, { "epoch": 0.7200758473571937, "grad_norm": 2.888061285018921, "learning_rate": 4.401364101943244e-06, "loss": 0.1094, "step": 1519 }, { "epoch": 0.7205498933396539, "grad_norm": 7.244020462036133, "learning_rate": 4.387656941419726e-06, "loss": 0.2215, "step": 1520 }, { "epoch": 0.7205498933396539, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9369369369369369, "eval_loss": 0.014029080979526043, "eval_precision": 0.8813559322033898, "eval_recall": 1.0, "eval_runtime": 49.9916, "eval_samples_per_second": 5.421, "eval_steps_per_second": 0.18, "step": 1520 }, { "epoch": 0.7210239393221143, "grad_norm": 3.4449071884155273, "learning_rate": 4.373965157181901e-06, "loss": 0.1298, "step": 1521 }, { "epoch": 0.7214979853045745, "grad_norm": 3.9357986450195312, "learning_rate": 4.360288786741521e-06, "loss": 0.1853, "step": 1522 }, { "epoch": 0.7219720312870348, "grad_norm": 4.31260871887207, "learning_rate": 4.3466278675681e-06, "loss": 0.1271, "step": 1523 }, { "epoch": 0.7224460772694952, "grad_norm": 4.201765537261963, "learning_rate": 4.332982437088825e-06, "loss": 0.1158, "step": 1524 }, { "epoch": 0.7229201232519554, "grad_norm": 4.641693592071533, "learning_rate": 4.319352532688444e-06, "loss": 0.1888, "step": 1525 }, { "epoch": 0.7233941692344157, "grad_norm": 9.28537368774414, "learning_rate": 4.305738191709167e-06, "loss": 0.176, "step": 1526 }, { "epoch": 0.723868215216876, "grad_norm": 3.4520504474639893, "learning_rate": 4.292139451450569e-06, "loss": 0.1157, "step": 1527 }, { "epoch": 0.7243422611993363, "grad_norm": 4.482147216796875, "learning_rate": 4.2785563491694785e-06, "loss": 0.1584, "step": 1528 }, { "epoch": 0.7248163071817967, "grad_norm": 3.378488779067993, "learning_rate": 4.264988922079885e-06, "loss": 0.1235, "step": 1529 }, { "epoch": 0.7252903531642569, "grad_norm": 5.183737754821777, "learning_rate": 4.251437207352826e-06, "loss": 0.1409, "step": 1530 }, { "epoch": 0.7257643991467172, "grad_norm": 12.602823257446289, "learning_rate": 4.237901242116299e-06, "loss": 0.2079, "step": 1531 }, { "epoch": 0.7262384451291776, "grad_norm": 6.954885959625244, "learning_rate": 4.224381063455147e-06, "loss": 0.1736, "step": 1532 }, { "epoch": 0.7267124911116378, "grad_norm": 3.512031316757202, "learning_rate": 4.210876708410962e-06, "loss": 0.0673, "step": 1533 }, { "epoch": 0.7271865370940981, "grad_norm": 4.1737847328186035, "learning_rate": 4.1973882139819875e-06, "loss": 0.187, "step": 1534 }, { "epoch": 0.7276605830765585, "grad_norm": 6.307000637054443, "learning_rate": 4.18391561712301e-06, "loss": 0.2948, "step": 1535 }, { "epoch": 0.7281346290590187, "grad_norm": 3.5658984184265137, "learning_rate": 4.17045895474526e-06, "loss": 0.1893, "step": 1536 }, { "epoch": 0.728608675041479, "grad_norm": 4.611821174621582, "learning_rate": 4.1570182637163155e-06, "loss": 0.1359, "step": 1537 }, { "epoch": 0.7290827210239393, "grad_norm": 6.059764862060547, "learning_rate": 4.143593580859995e-06, "loss": 0.2027, "step": 1538 }, { "epoch": 0.7295567670063996, "grad_norm": 4.427293300628662, "learning_rate": 4.13018494295626e-06, "loss": 0.1617, "step": 1539 }, { "epoch": 0.73003081298886, "grad_norm": 4.276612281799316, "learning_rate": 4.116792386741112e-06, "loss": 0.199, "step": 1540 }, { "epoch": 0.73003081298886, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9285714285714286, "eval_loss": 0.016725175082683563, "eval_precision": 0.8666666666666667, "eval_recall": 1.0, "eval_runtime": 49.5623, "eval_samples_per_second": 5.468, "eval_steps_per_second": 0.182, "step": 1540 }, { "epoch": 0.7305048589713202, "grad_norm": 9.366026878356934, "learning_rate": 4.103415948906498e-06, "loss": 0.1595, "step": 1541 }, { "epoch": 0.7309789049537805, "grad_norm": 3.4646170139312744, "learning_rate": 4.090055666100193e-06, "loss": 0.1132, "step": 1542 }, { "epoch": 0.7314529509362409, "grad_norm": 2.879565477371216, "learning_rate": 4.076711574925725e-06, "loss": 0.0768, "step": 1543 }, { "epoch": 0.7319269969187011, "grad_norm": 3.3566582202911377, "learning_rate": 4.063383711942253e-06, "loss": 0.1202, "step": 1544 }, { "epoch": 0.7324010429011614, "grad_norm": 5.941265106201172, "learning_rate": 4.050072113664479e-06, "loss": 0.1689, "step": 1545 }, { "epoch": 0.7328750888836217, "grad_norm": 3.312645673751831, "learning_rate": 4.036776816562546e-06, "loss": 0.1175, "step": 1546 }, { "epoch": 0.733349134866082, "grad_norm": 5.1289896965026855, "learning_rate": 4.023497857061929e-06, "loss": 0.168, "step": 1547 }, { "epoch": 0.7338231808485423, "grad_norm": 5.3061041831970215, "learning_rate": 4.010235271543349e-06, "loss": 0.1091, "step": 1548 }, { "epoch": 0.7342972268310026, "grad_norm": 2.737217426300049, "learning_rate": 3.9969890963426615e-06, "loss": 0.0868, "step": 1549 }, { "epoch": 0.7347712728134629, "grad_norm": 3.673825740814209, "learning_rate": 3.983759367750772e-06, "loss": 0.1859, "step": 1550 }, { "epoch": 0.7352453187959233, "grad_norm": 2.3337619304656982, "learning_rate": 3.970546122013521e-06, "loss": 0.0998, "step": 1551 }, { "epoch": 0.7357193647783835, "grad_norm": 4.11493444442749, "learning_rate": 3.957349395331581e-06, "loss": 0.1624, "step": 1552 }, { "epoch": 0.7361934107608438, "grad_norm": 6.830287933349609, "learning_rate": 3.94416922386038e-06, "loss": 0.2052, "step": 1553 }, { "epoch": 0.7366674567433041, "grad_norm": 3.5485124588012695, "learning_rate": 3.931005643709985e-06, "loss": 0.1101, "step": 1554 }, { "epoch": 0.7371415027257644, "grad_norm": 3.0349855422973633, "learning_rate": 3.917858690945006e-06, "loss": 0.1351, "step": 1555 }, { "epoch": 0.7376155487082247, "grad_norm": 4.467025279998779, "learning_rate": 3.9047284015844985e-06, "loss": 0.2263, "step": 1556 }, { "epoch": 0.738089594690685, "grad_norm": 3.118184804916382, "learning_rate": 3.891614811601869e-06, "loss": 0.1084, "step": 1557 }, { "epoch": 0.7385636406731453, "grad_norm": 3.0133302211761475, "learning_rate": 3.8785179569247666e-06, "loss": 0.124, "step": 1558 }, { "epoch": 0.7390376866556055, "grad_norm": 5.373924732208252, "learning_rate": 3.865437873434994e-06, "loss": 0.1461, "step": 1559 }, { "epoch": 0.7395117326380659, "grad_norm": 4.622145175933838, "learning_rate": 3.852374596968402e-06, "loss": 0.1505, "step": 1560 }, { "epoch": 0.7395117326380659, "eval_accuracy": 0.9959742351046699, "eval_f1": 0.9532710280373832, "eval_loss": 0.010591572150588036, "eval_precision": 0.9272727272727272, "eval_recall": 0.9807692307692307, "eval_runtime": 50.5817, "eval_samples_per_second": 5.358, "eval_steps_per_second": 0.178, "step": 1560 }, { "epoch": 0.7399857786205262, "grad_norm": 4.967133045196533, "learning_rate": 3.8393281633148e-06, "loss": 0.2355, "step": 1561 }, { "epoch": 0.7404598246029865, "grad_norm": 6.222865104675293, "learning_rate": 3.8262986082178485e-06, "loss": 0.1244, "step": 1562 }, { "epoch": 0.7409338705854468, "grad_norm": 3.9064159393310547, "learning_rate": 3.8132859673749688e-06, "loss": 0.1583, "step": 1563 }, { "epoch": 0.741407916567907, "grad_norm": 4.444194316864014, "learning_rate": 3.800290276437234e-06, "loss": 0.1243, "step": 1564 }, { "epoch": 0.7418819625503674, "grad_norm": 9.474536895751953, "learning_rate": 3.787311571009288e-06, "loss": 0.1425, "step": 1565 }, { "epoch": 0.7423560085328277, "grad_norm": 6.501336097717285, "learning_rate": 3.77434988664923e-06, "loss": 0.2399, "step": 1566 }, { "epoch": 0.7428300545152879, "grad_norm": 6.09113883972168, "learning_rate": 3.761405258868541e-06, "loss": 0.2184, "step": 1567 }, { "epoch": 0.7433041004977483, "grad_norm": 3.368168830871582, "learning_rate": 3.748477723131958e-06, "loss": 0.1311, "step": 1568 }, { "epoch": 0.7437781464802086, "grad_norm": 2.424818277359009, "learning_rate": 3.7355673148573956e-06, "loss": 0.0999, "step": 1569 }, { "epoch": 0.7442521924626688, "grad_norm": 4.34131383895874, "learning_rate": 3.7226740694158413e-06, "loss": 0.0945, "step": 1570 }, { "epoch": 0.7447262384451292, "grad_norm": 7.627197742462158, "learning_rate": 3.7097980221312645e-06, "loss": 0.1769, "step": 1571 }, { "epoch": 0.7452002844275895, "grad_norm": 3.147916316986084, "learning_rate": 3.696939208280518e-06, "loss": 0.1703, "step": 1572 }, { "epoch": 0.7456743304100498, "grad_norm": 4.2883734703063965, "learning_rate": 3.6840976630932292e-06, "loss": 0.1882, "step": 1573 }, { "epoch": 0.7461483763925101, "grad_norm": 4.216695785522461, "learning_rate": 3.6712734217517256e-06, "loss": 0.1268, "step": 1574 }, { "epoch": 0.7466224223749703, "grad_norm": 4.144759654998779, "learning_rate": 3.658466519390921e-06, "loss": 0.1698, "step": 1575 }, { "epoch": 0.7470964683574307, "grad_norm": 4.534786701202393, "learning_rate": 3.645676991098227e-06, "loss": 0.2115, "step": 1576 }, { "epoch": 0.747570514339891, "grad_norm": 6.341872215270996, "learning_rate": 3.6329048719134564e-06, "loss": 0.1636, "step": 1577 }, { "epoch": 0.7480445603223512, "grad_norm": 2.7161991596221924, "learning_rate": 3.6201501968287225e-06, "loss": 0.0904, "step": 1578 }, { "epoch": 0.7485186063048116, "grad_norm": 11.25274658203125, "learning_rate": 3.60741300078835e-06, "loss": 0.1936, "step": 1579 }, { "epoch": 0.7489926522872719, "grad_norm": 7.705114841461182, "learning_rate": 3.5946933186887722e-06, "loss": 0.2082, "step": 1580 }, { "epoch": 0.7489926522872719, "eval_accuracy": 0.9927536231884058, "eval_f1": 0.9203539823008849, "eval_loss": 0.01460732240229845, "eval_precision": 0.8524590163934426, "eval_recall": 1.0, "eval_runtime": 51.5983, "eval_samples_per_second": 5.252, "eval_steps_per_second": 0.174, "step": 1580 }, { "epoch": 0.7494666982697321, "grad_norm": 4.415287494659424, "learning_rate": 3.581991185378442e-06, "loss": 0.1656, "step": 1581 }, { "epoch": 0.7499407442521925, "grad_norm": 6.075917720794678, "learning_rate": 3.56930663565773e-06, "loss": 0.0892, "step": 1582 }, { "epoch": 0.7504147902346527, "grad_norm": 3.186579704284668, "learning_rate": 3.556639704278838e-06, "loss": 0.1422, "step": 1583 }, { "epoch": 0.7508888362171131, "grad_norm": 6.558785915374756, "learning_rate": 3.543990425945694e-06, "loss": 0.1339, "step": 1584 }, { "epoch": 0.7513628821995734, "grad_norm": 6.33551549911499, "learning_rate": 3.5313588353138605e-06, "loss": 0.2222, "step": 1585 }, { "epoch": 0.7518369281820336, "grad_norm": 3.3917043209075928, "learning_rate": 3.518744966990446e-06, "loss": 0.136, "step": 1586 }, { "epoch": 0.752310974164494, "grad_norm": 3.0663864612579346, "learning_rate": 3.5061488555339997e-06, "loss": 0.1326, "step": 1587 }, { "epoch": 0.7527850201469543, "grad_norm": 3.1277832984924316, "learning_rate": 3.4935705354544224e-06, "loss": 0.1434, "step": 1588 }, { "epoch": 0.7532590661294145, "grad_norm": 7.34363317489624, "learning_rate": 3.4810100412128743e-06, "loss": 0.204, "step": 1589 }, { "epoch": 0.7537331121118749, "grad_norm": 4.843137741088867, "learning_rate": 3.468467407221676e-06, "loss": 0.128, "step": 1590 }, { "epoch": 0.7542071580943351, "grad_norm": 6.338703632354736, "learning_rate": 3.455942667844214e-06, "loss": 0.1744, "step": 1591 }, { "epoch": 0.7546812040767954, "grad_norm": 3.496189832687378, "learning_rate": 3.443435857394851e-06, "loss": 0.15, "step": 1592 }, { "epoch": 0.7551552500592558, "grad_norm": 5.926684379577637, "learning_rate": 3.430947010138833e-06, "loss": 0.0859, "step": 1593 }, { "epoch": 0.755629296041716, "grad_norm": 2.3819658756256104, "learning_rate": 3.41847616029218e-06, "loss": 0.0843, "step": 1594 }, { "epoch": 0.7561033420241764, "grad_norm": 3.258310317993164, "learning_rate": 3.4060233420216136e-06, "loss": 0.1761, "step": 1595 }, { "epoch": 0.7565773880066367, "grad_norm": 5.501744270324707, "learning_rate": 3.393588589444453e-06, "loss": 0.1055, "step": 1596 }, { "epoch": 0.7570514339890969, "grad_norm": 8.512458801269531, "learning_rate": 3.381171936628521e-06, "loss": 0.1718, "step": 1597 }, { "epoch": 0.7575254799715573, "grad_norm": 5.183568954467773, "learning_rate": 3.3687734175920505e-06, "loss": 0.1423, "step": 1598 }, { "epoch": 0.7579995259540175, "grad_norm": 3.6577601432800293, "learning_rate": 3.356393066303595e-06, "loss": 0.2042, "step": 1599 }, { "epoch": 0.7584735719364778, "grad_norm": 4.060479164123535, "learning_rate": 3.3440309166819284e-06, "loss": 0.1431, "step": 1600 }, { "epoch": 0.7584735719364778, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9357798165137615, "eval_loss": 0.010845650918781757, "eval_precision": 0.8947368421052632, "eval_recall": 0.9807692307692307, "eval_runtime": 50.3102, "eval_samples_per_second": 5.387, "eval_steps_per_second": 0.179, "step": 1600 }, { "epoch": 0.7589476179189382, "grad_norm": 2.9690921306610107, "learning_rate": 3.3316870025959693e-06, "loss": 0.1082, "step": 1601 }, { "epoch": 0.7594216639013984, "grad_norm": 5.340085506439209, "learning_rate": 3.3193613578646633e-06, "loss": 0.1429, "step": 1602 }, { "epoch": 0.7598957098838587, "grad_norm": 3.9515483379364014, "learning_rate": 3.307054016256912e-06, "loss": 0.1083, "step": 1603 }, { "epoch": 0.760369755866319, "grad_norm": 6.481595993041992, "learning_rate": 3.2947650114914587e-06, "loss": 0.189, "step": 1604 }, { "epoch": 0.7608438018487793, "grad_norm": 5.499702453613281, "learning_rate": 3.2824943772368213e-06, "loss": 0.1637, "step": 1605 }, { "epoch": 0.7613178478312397, "grad_norm": 6.298553943634033, "learning_rate": 3.270242147111182e-06, "loss": 0.1212, "step": 1606 }, { "epoch": 0.7617918938136999, "grad_norm": 3.0482068061828613, "learning_rate": 3.258008354682303e-06, "loss": 0.1139, "step": 1607 }, { "epoch": 0.7622659397961602, "grad_norm": 8.920551300048828, "learning_rate": 3.2457930334674304e-06, "loss": 0.1548, "step": 1608 }, { "epoch": 0.7627399857786206, "grad_norm": 7.286499500274658, "learning_rate": 3.233596216933206e-06, "loss": 0.1776, "step": 1609 }, { "epoch": 0.7632140317610808, "grad_norm": 12.77665901184082, "learning_rate": 3.2214179384955713e-06, "loss": 0.1825, "step": 1610 }, { "epoch": 0.7636880777435411, "grad_norm": 5.8278374671936035, "learning_rate": 3.209258231519682e-06, "loss": 0.1913, "step": 1611 }, { "epoch": 0.7641621237260015, "grad_norm": 3.0561583042144775, "learning_rate": 3.197117129319808e-06, "loss": 0.1343, "step": 1612 }, { "epoch": 0.7646361697084617, "grad_norm": 6.679983139038086, "learning_rate": 3.1849946651592532e-06, "loss": 0.1593, "step": 1613 }, { "epoch": 0.765110215690922, "grad_norm": 4.746762275695801, "learning_rate": 3.172890872250254e-06, "loss": 0.2468, "step": 1614 }, { "epoch": 0.7655842616733823, "grad_norm": 3.5384531021118164, "learning_rate": 3.1608057837538976e-06, "loss": 0.0998, "step": 1615 }, { "epoch": 0.7660583076558426, "grad_norm": 3.744356870651245, "learning_rate": 3.1487394327800156e-06, "loss": 0.1393, "step": 1616 }, { "epoch": 0.766532353638303, "grad_norm": 3.5314719676971436, "learning_rate": 3.136691852387116e-06, "loss": 0.0888, "step": 1617 }, { "epoch": 0.7670063996207632, "grad_norm": 5.413354396820068, "learning_rate": 3.1246630755822703e-06, "loss": 0.1746, "step": 1618 }, { "epoch": 0.7674804456032235, "grad_norm": 5.721497535705566, "learning_rate": 3.1126531353210456e-06, "loss": 0.1132, "step": 1619 }, { "epoch": 0.7679544915856839, "grad_norm": 6.063429355621338, "learning_rate": 3.1006620645073925e-06, "loss": 0.1388, "step": 1620 }, { "epoch": 0.7679544915856839, "eval_accuracy": 0.9951690821256038, "eval_f1": 0.9454545454545454, "eval_loss": 0.012482204474508762, "eval_precision": 0.896551724137931, "eval_recall": 1.0, "eval_runtime": 49.5878, "eval_samples_per_second": 5.465, "eval_steps_per_second": 0.181, "step": 1620 }, { "epoch": 0.7684285375681441, "grad_norm": 5.663280487060547, "learning_rate": 3.0886898959935663e-06, "loss": 0.1339, "step": 1621 }, { "epoch": 0.7689025835506044, "grad_norm": 3.009401321411133, "learning_rate": 3.0767366625800366e-06, "loss": 0.1137, "step": 1622 }, { "epoch": 0.7693766295330647, "grad_norm": 4.703526973724365, "learning_rate": 3.064802397015394e-06, "loss": 0.2366, "step": 1623 }, { "epoch": 0.769850675515525, "grad_norm": 3.2940542697906494, "learning_rate": 3.052887131996267e-06, "loss": 0.1395, "step": 1624 }, { "epoch": 0.7703247214979853, "grad_norm": 3.261302947998047, "learning_rate": 3.040990900167219e-06, "loss": 0.1505, "step": 1625 }, { "epoch": 0.7707987674804456, "grad_norm": 3.4305295944213867, "learning_rate": 3.0291137341206755e-06, "loss": 0.1372, "step": 1626 }, { "epoch": 0.7712728134629059, "grad_norm": 8.65300178527832, "learning_rate": 3.0172556663968254e-06, "loss": 0.1821, "step": 1627 }, { "epoch": 0.7717468594453663, "grad_norm": 5.62878942489624, "learning_rate": 3.0054167294835314e-06, "loss": 0.1512, "step": 1628 }, { "epoch": 0.7722209054278265, "grad_norm": 5.76574182510376, "learning_rate": 2.993596955816244e-06, "loss": 0.1573, "step": 1629 }, { "epoch": 0.7726949514102868, "grad_norm": 7.997915267944336, "learning_rate": 2.9817963777779124e-06, "loss": 0.2725, "step": 1630 }, { "epoch": 0.7731689973927471, "grad_norm": 3.254222869873047, "learning_rate": 2.970015027698895e-06, "loss": 0.1247, "step": 1631 }, { "epoch": 0.7736430433752074, "grad_norm": 8.073678016662598, "learning_rate": 2.958252937856869e-06, "loss": 0.1538, "step": 1632 }, { "epoch": 0.7741170893576677, "grad_norm": 2.6469109058380127, "learning_rate": 2.946510140476747e-06, "loss": 0.0928, "step": 1633 }, { "epoch": 0.774591135340128, "grad_norm": 6.9095869064331055, "learning_rate": 2.9347866677305814e-06, "loss": 0.1415, "step": 1634 }, { "epoch": 0.7750651813225883, "grad_norm": 3.802766799926758, "learning_rate": 2.923082551737484e-06, "loss": 0.1323, "step": 1635 }, { "epoch": 0.7755392273050485, "grad_norm": 4.053550720214844, "learning_rate": 2.911397824563533e-06, "loss": 0.1498, "step": 1636 }, { "epoch": 0.7760132732875089, "grad_norm": 5.973599910736084, "learning_rate": 2.899732518221685e-06, "loss": 0.149, "step": 1637 }, { "epoch": 0.7764873192699692, "grad_norm": 3.402735710144043, "learning_rate": 2.888086664671693e-06, "loss": 0.1312, "step": 1638 }, { "epoch": 0.7769613652524295, "grad_norm": 6.684436798095703, "learning_rate": 2.8764602958200096e-06, "loss": 0.1108, "step": 1639 }, { "epoch": 0.7774354112348898, "grad_norm": 3.762352466583252, "learning_rate": 2.8648534435197086e-06, "loss": 0.1221, "step": 1640 }, { "epoch": 0.7774354112348898, "eval_accuracy": 0.9959742351046699, "eval_f1": 0.9532710280373832, "eval_loss": 0.009969827719032764, "eval_precision": 0.9272727272727272, "eval_recall": 0.9807692307692307, "eval_runtime": 50.0594, "eval_samples_per_second": 5.414, "eval_steps_per_second": 0.18, "step": 1640 }, { "epoch": 0.77790945721735, "grad_norm": 5.541801452636719, "learning_rate": 2.853266139570391e-06, "loss": 0.1781, "step": 1641 }, { "epoch": 0.7783835031998104, "grad_norm": 5.2935638427734375, "learning_rate": 2.841698415718103e-06, "loss": 0.1746, "step": 1642 }, { "epoch": 0.7788575491822707, "grad_norm": 3.5511698722839355, "learning_rate": 2.8301503036552446e-06, "loss": 0.1303, "step": 1643 }, { "epoch": 0.7793315951647309, "grad_norm": 2.210439682006836, "learning_rate": 2.8186218350204865e-06, "loss": 0.1052, "step": 1644 }, { "epoch": 0.7798056411471913, "grad_norm": 3.1148386001586914, "learning_rate": 2.8071130413986814e-06, "loss": 0.0829, "step": 1645 }, { "epoch": 0.7802796871296516, "grad_norm": 7.042520999908447, "learning_rate": 2.795623954320781e-06, "loss": 0.2299, "step": 1646 }, { "epoch": 0.7807537331121118, "grad_norm": 4.106062889099121, "learning_rate": 2.7841546052637346e-06, "loss": 0.119, "step": 1647 }, { "epoch": 0.7812277790945722, "grad_norm": 2.969593048095703, "learning_rate": 2.7727050256504295e-06, "loss": 0.0684, "step": 1648 }, { "epoch": 0.7817018250770325, "grad_norm": 6.737387180328369, "learning_rate": 2.761275246849582e-06, "loss": 0.1164, "step": 1649 }, { "epoch": 0.7821758710594928, "grad_norm": 6.33607292175293, "learning_rate": 2.7498653001756615e-06, "loss": 0.1104, "step": 1650 }, { "epoch": 0.7826499170419531, "grad_norm": 3.347256898880005, "learning_rate": 2.738475216888802e-06, "loss": 0.1036, "step": 1651 }, { "epoch": 0.7831239630244133, "grad_norm": 3.709547281265259, "learning_rate": 2.7271050281947165e-06, "loss": 0.1436, "step": 1652 }, { "epoch": 0.7835980090068737, "grad_norm": 3.4499459266662598, "learning_rate": 2.7157547652446193e-06, "loss": 0.1515, "step": 1653 }, { "epoch": 0.784072054989334, "grad_norm": 2.6657423973083496, "learning_rate": 2.704424459135123e-06, "loss": 0.1087, "step": 1654 }, { "epoch": 0.7845461009717942, "grad_norm": 6.451166152954102, "learning_rate": 2.6931141409081753e-06, "loss": 0.2029, "step": 1655 }, { "epoch": 0.7850201469542546, "grad_norm": 4.049078464508057, "learning_rate": 2.681823841550947e-06, "loss": 0.1342, "step": 1656 }, { "epoch": 0.7854941929367149, "grad_norm": 5.632473468780518, "learning_rate": 2.6705535919957772e-06, "loss": 0.1467, "step": 1657 }, { "epoch": 0.7859682389191751, "grad_norm": 3.3033530712127686, "learning_rate": 2.6593034231200664e-06, "loss": 0.1404, "step": 1658 }, { "epoch": 0.7864422849016355, "grad_norm": 3.3128445148468018, "learning_rate": 2.648073365746204e-06, "loss": 0.1129, "step": 1659 }, { "epoch": 0.7869163308840957, "grad_norm": 5.318967342376709, "learning_rate": 2.6368634506414757e-06, "loss": 0.1571, "step": 1660 }, { "epoch": 0.7869163308840957, "eval_accuracy": 0.9959742351046699, "eval_f1": 0.9532710280373832, "eval_loss": 0.010810844600200653, "eval_precision": 0.9272727272727272, "eval_recall": 0.9807692307692307, "eval_runtime": 49.9177, "eval_samples_per_second": 5.429, "eval_steps_per_second": 0.18, "step": 1660 }, { "epoch": 0.7873903768665561, "grad_norm": 6.077727317810059, "learning_rate": 2.6256737085179852e-06, "loss": 0.1892, "step": 1661 }, { "epoch": 0.7878644228490164, "grad_norm": 5.929904460906982, "learning_rate": 2.614504170032567e-06, "loss": 0.1609, "step": 1662 }, { "epoch": 0.7883384688314766, "grad_norm": 12.54429817199707, "learning_rate": 2.6033548657867013e-06, "loss": 0.149, "step": 1663 }, { "epoch": 0.788812514813937, "grad_norm": 3.4696834087371826, "learning_rate": 2.5922258263264366e-06, "loss": 0.1037, "step": 1664 }, { "epoch": 0.7892865607963973, "grad_norm": 3.9441494941711426, "learning_rate": 2.581117082142296e-06, "loss": 0.1487, "step": 1665 }, { "epoch": 0.7897606067788575, "grad_norm": 3.3771462440490723, "learning_rate": 2.570028663669204e-06, "loss": 0.0966, "step": 1666 }, { "epoch": 0.7902346527613179, "grad_norm": 5.6400604248046875, "learning_rate": 2.5589606012863968e-06, "loss": 0.1358, "step": 1667 }, { "epoch": 0.7907086987437781, "grad_norm": 3.4519641399383545, "learning_rate": 2.547912925317334e-06, "loss": 0.0834, "step": 1668 }, { "epoch": 0.7911827447262384, "grad_norm": 7.2654242515563965, "learning_rate": 2.5368856660296327e-06, "loss": 0.1244, "step": 1669 }, { "epoch": 0.7916567907086988, "grad_norm": 6.323776721954346, "learning_rate": 2.5258788536349622e-06, "loss": 0.1153, "step": 1670 }, { "epoch": 0.792130836691159, "grad_norm": 8.622234344482422, "learning_rate": 2.514892518288988e-06, "loss": 0.2104, "step": 1671 }, { "epoch": 0.7926048826736194, "grad_norm": 3.370286703109741, "learning_rate": 2.503926690091263e-06, "loss": 0.0609, "step": 1672 }, { "epoch": 0.7930789286560797, "grad_norm": 5.871740818023682, "learning_rate": 2.492981399085157e-06, "loss": 0.1789, "step": 1673 }, { "epoch": 0.7935529746385399, "grad_norm": 5.285881519317627, "learning_rate": 2.482056675257776e-06, "loss": 0.1565, "step": 1674 }, { "epoch": 0.7940270206210003, "grad_norm": 6.630995273590088, "learning_rate": 2.471152548539876e-06, "loss": 0.176, "step": 1675 }, { "epoch": 0.7945010666034605, "grad_norm": 2.7057905197143555, "learning_rate": 2.4602690488057836e-06, "loss": 0.0897, "step": 1676 }, { "epoch": 0.7949751125859208, "grad_norm": 3.194324493408203, "learning_rate": 2.4494062058733157e-06, "loss": 0.1121, "step": 1677 }, { "epoch": 0.7954491585683812, "grad_norm": 7.977220058441162, "learning_rate": 2.438564049503688e-06, "loss": 0.1833, "step": 1678 }, { "epoch": 0.7959232045508414, "grad_norm": 4.833785057067871, "learning_rate": 2.4277426094014457e-06, "loss": 0.1875, "step": 1679 }, { "epoch": 0.7963972505333017, "grad_norm": 5.799574375152588, "learning_rate": 2.416941915214377e-06, "loss": 0.1472, "step": 1680 }, { "epoch": 0.7963972505333017, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9357798165137615, "eval_loss": 0.011454065330326557, "eval_precision": 0.8947368421052632, "eval_recall": 0.9807692307692307, "eval_runtime": 49.6644, "eval_samples_per_second": 5.457, "eval_steps_per_second": 0.181, "step": 1680 }, { "epoch": 0.796871296515762, "grad_norm": 5.203197956085205, "learning_rate": 2.4061619965334314e-06, "loss": 0.1582, "step": 1681 }, { "epoch": 0.7973453424982223, "grad_norm": 8.293927192687988, "learning_rate": 2.395402882892639e-06, "loss": 0.2625, "step": 1682 }, { "epoch": 0.7978193884806827, "grad_norm": 4.733770847320557, "learning_rate": 2.3846646037690304e-06, "loss": 0.1162, "step": 1683 }, { "epoch": 0.7982934344631429, "grad_norm": 3.6557698249816895, "learning_rate": 2.3739471885825536e-06, "loss": 0.142, "step": 1684 }, { "epoch": 0.7987674804456032, "grad_norm": 5.944900989532471, "learning_rate": 2.363250666695999e-06, "loss": 0.1202, "step": 1685 }, { "epoch": 0.7992415264280636, "grad_norm": 3.3309900760650635, "learning_rate": 2.3525750674149094e-06, "loss": 0.1227, "step": 1686 }, { "epoch": 0.7997155724105238, "grad_norm": 5.317230224609375, "learning_rate": 2.34192041998751e-06, "loss": 0.1406, "step": 1687 }, { "epoch": 0.8001896183929841, "grad_norm": 4.319701671600342, "learning_rate": 2.331286753604621e-06, "loss": 0.1916, "step": 1688 }, { "epoch": 0.8006636643754445, "grad_norm": 4.4361982345581055, "learning_rate": 2.3206740973995823e-06, "loss": 0.1844, "step": 1689 }, { "epoch": 0.8011377103579047, "grad_norm": 3.2999582290649414, "learning_rate": 2.3100824804481703e-06, "loss": 0.0952, "step": 1690 }, { "epoch": 0.801611756340365, "grad_norm": 7.211174964904785, "learning_rate": 2.29951193176852e-06, "loss": 0.1072, "step": 1691 }, { "epoch": 0.8020858023228253, "grad_norm": 5.33006477355957, "learning_rate": 2.2889624803210453e-06, "loss": 0.1978, "step": 1692 }, { "epoch": 0.8025598483052856, "grad_norm": 5.028670787811279, "learning_rate": 2.2784341550083577e-06, "loss": 0.0922, "step": 1693 }, { "epoch": 0.803033894287746, "grad_norm": 5.079577445983887, "learning_rate": 2.2679269846751915e-06, "loss": 0.1134, "step": 1694 }, { "epoch": 0.8035079402702062, "grad_norm": 3.310760974884033, "learning_rate": 2.2574409981083224e-06, "loss": 0.0928, "step": 1695 }, { "epoch": 0.8039819862526665, "grad_norm": 5.977758884429932, "learning_rate": 2.2469762240364847e-06, "loss": 0.1011, "step": 1696 }, { "epoch": 0.8044560322351268, "grad_norm": 6.029415607452393, "learning_rate": 2.236532691130299e-06, "loss": 0.1699, "step": 1697 }, { "epoch": 0.8049300782175871, "grad_norm": 9.231821060180664, "learning_rate": 2.2261104280021937e-06, "loss": 0.2549, "step": 1698 }, { "epoch": 0.8054041242000474, "grad_norm": 2.8385801315307617, "learning_rate": 2.215709463206316e-06, "loss": 0.0953, "step": 1699 }, { "epoch": 0.8058781701825077, "grad_norm": 6.947047233581543, "learning_rate": 2.205329825238467e-06, "loss": 0.1236, "step": 1700 }, { "epoch": 0.8058781701825077, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.012126692570745945, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 49.2509, "eval_samples_per_second": 5.502, "eval_steps_per_second": 0.183, "step": 1700 }, { "epoch": 0.806352216164968, "grad_norm": 5.284399509429932, "learning_rate": 2.1949715425360173e-06, "loss": 0.2302, "step": 1701 }, { "epoch": 0.8068262621474283, "grad_norm": 3.2276976108551025, "learning_rate": 2.184634643477831e-06, "loss": 0.1153, "step": 1702 }, { "epoch": 0.8073003081298886, "grad_norm": 3.992670774459839, "learning_rate": 2.174319156384186e-06, "loss": 0.1346, "step": 1703 }, { "epoch": 0.8077743541123489, "grad_norm": 3.4810454845428467, "learning_rate": 2.164025109516692e-06, "loss": 0.095, "step": 1704 }, { "epoch": 0.8082484000948092, "grad_norm": 6.212849140167236, "learning_rate": 2.15375253107823e-06, "loss": 0.2299, "step": 1705 }, { "epoch": 0.8087224460772695, "grad_norm": 6.363603591918945, "learning_rate": 2.1435014492128547e-06, "loss": 0.2338, "step": 1706 }, { "epoch": 0.8091964920597298, "grad_norm": 2.451110363006592, "learning_rate": 2.1332718920057307e-06, "loss": 0.084, "step": 1707 }, { "epoch": 0.8096705380421901, "grad_norm": 6.087503910064697, "learning_rate": 2.1230638874830413e-06, "loss": 0.1361, "step": 1708 }, { "epoch": 0.8101445840246504, "grad_norm": 6.973792552947998, "learning_rate": 2.1128774636119307e-06, "loss": 0.2558, "step": 1709 }, { "epoch": 0.8106186300071107, "grad_norm": 6.504344463348389, "learning_rate": 2.102712648300418e-06, "loss": 0.1156, "step": 1710 }, { "epoch": 0.811092675989571, "grad_norm": 5.404758930206299, "learning_rate": 2.0925694693973162e-06, "loss": 0.1702, "step": 1711 }, { "epoch": 0.8115667219720313, "grad_norm": 5.191570281982422, "learning_rate": 2.082447954692164e-06, "loss": 0.1003, "step": 1712 }, { "epoch": 0.8120407679544915, "grad_norm": 3.821763515472412, "learning_rate": 2.0723481319151427e-06, "loss": 0.1208, "step": 1713 }, { "epoch": 0.8125148139369519, "grad_norm": 6.716168403625488, "learning_rate": 2.062270028737008e-06, "loss": 0.2031, "step": 1714 }, { "epoch": 0.8129888599194122, "grad_norm": 2.98030948638916, "learning_rate": 2.052213672769007e-06, "loss": 0.101, "step": 1715 }, { "epoch": 0.8134629059018725, "grad_norm": 5.622329235076904, "learning_rate": 2.042179091562805e-06, "loss": 0.1626, "step": 1716 }, { "epoch": 0.8139369518843328, "grad_norm": 3.3454723358154297, "learning_rate": 2.032166312610411e-06, "loss": 0.0978, "step": 1717 }, { "epoch": 0.814410997866793, "grad_norm": 5.193914413452148, "learning_rate": 2.0221753633441033e-06, "loss": 0.1742, "step": 1718 }, { "epoch": 0.8148850438492534, "grad_norm": 6.165769577026367, "learning_rate": 2.012206271136353e-06, "loss": 0.1491, "step": 1719 }, { "epoch": 0.8153590898317137, "grad_norm": 6.2735795974731445, "learning_rate": 2.002259063299744e-06, "loss": 0.1477, "step": 1720 }, { "epoch": 0.8153590898317137, "eval_accuracy": 0.9951690821256038, "eval_f1": 0.9444444444444444, "eval_loss": 0.0112903518602252, "eval_precision": 0.9107142857142857, "eval_recall": 0.9807692307692307, "eval_runtime": 49.2837, "eval_samples_per_second": 5.499, "eval_steps_per_second": 0.183, "step": 1720 }, { "epoch": 0.8158331358141739, "grad_norm": 8.393010139465332, "learning_rate": 1.992333767086905e-06, "loss": 0.2223, "step": 1721 }, { "epoch": 0.8163071817966343, "grad_norm": 8.414145469665527, "learning_rate": 1.982430409690439e-06, "loss": 0.1873, "step": 1722 }, { "epoch": 0.8167812277790946, "grad_norm": 3.9387011528015137, "learning_rate": 1.972549018242836e-06, "loss": 0.1164, "step": 1723 }, { "epoch": 0.8172552737615548, "grad_norm": 6.245006084442139, "learning_rate": 1.9626896198164093e-06, "loss": 0.1791, "step": 1724 }, { "epoch": 0.8177293197440152, "grad_norm": 3.7231218814849854, "learning_rate": 1.9528522414232122e-06, "loss": 0.1483, "step": 1725 }, { "epoch": 0.8182033657264755, "grad_norm": 7.006000995635986, "learning_rate": 1.9430369100149727e-06, "loss": 0.2054, "step": 1726 }, { "epoch": 0.8186774117089358, "grad_norm": 4.269167900085449, "learning_rate": 1.9332436524830167e-06, "loss": 0.1074, "step": 1727 }, { "epoch": 0.8191514576913961, "grad_norm": 4.43737268447876, "learning_rate": 1.9234724956581918e-06, "loss": 0.1194, "step": 1728 }, { "epoch": 0.8196255036738563, "grad_norm": 5.734049320220947, "learning_rate": 1.9137234663107995e-06, "loss": 0.2281, "step": 1729 }, { "epoch": 0.8200995496563167, "grad_norm": 4.329126358032227, "learning_rate": 1.9039965911505098e-06, "loss": 0.1464, "step": 1730 }, { "epoch": 0.820573595638777, "grad_norm": 6.123368263244629, "learning_rate": 1.8942918968263036e-06, "loss": 0.1876, "step": 1731 }, { "epoch": 0.8210476416212372, "grad_norm": 5.27827262878418, "learning_rate": 1.8846094099263911e-06, "loss": 0.1182, "step": 1732 }, { "epoch": 0.8215216876036976, "grad_norm": 3.905064344406128, "learning_rate": 1.8749491569781397e-06, "loss": 0.1297, "step": 1733 }, { "epoch": 0.8219957335861579, "grad_norm": 4.914556980133057, "learning_rate": 1.8653111644480004e-06, "loss": 0.1504, "step": 1734 }, { "epoch": 0.8224697795686181, "grad_norm": 2.135080337524414, "learning_rate": 1.8556954587414377e-06, "loss": 0.0756, "step": 1735 }, { "epoch": 0.8229438255510785, "grad_norm": 5.558071136474609, "learning_rate": 1.8461020662028583e-06, "loss": 0.1586, "step": 1736 }, { "epoch": 0.8234178715335387, "grad_norm": 4.518209934234619, "learning_rate": 1.8365310131155345e-06, "loss": 0.2052, "step": 1737 }, { "epoch": 0.8238919175159991, "grad_norm": 5.095973014831543, "learning_rate": 1.8269823257015351e-06, "loss": 0.1441, "step": 1738 }, { "epoch": 0.8243659634984594, "grad_norm": 5.229091644287109, "learning_rate": 1.8174560301216527e-06, "loss": 0.1543, "step": 1739 }, { "epoch": 0.8248400094809196, "grad_norm": 5.269925594329834, "learning_rate": 1.807952152475333e-06, "loss": 0.1781, "step": 1740 }, { "epoch": 0.8248400094809196, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.01386988628655672, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 50.0148, "eval_samples_per_second": 5.418, "eval_steps_per_second": 0.18, "step": 1740 }, { "epoch": 0.82531405546338, "grad_norm": 3.926471710205078, "learning_rate": 1.7984707188006034e-06, "loss": 0.1284, "step": 1741 }, { "epoch": 0.8257881014458403, "grad_norm": 3.689272403717041, "learning_rate": 1.7890117550739995e-06, "loss": 0.1592, "step": 1742 }, { "epoch": 0.8262621474283005, "grad_norm": 8.198687553405762, "learning_rate": 1.7795752872104965e-06, "loss": 0.2166, "step": 1743 }, { "epoch": 0.8267361934107609, "grad_norm": 5.243244171142578, "learning_rate": 1.7701613410634367e-06, "loss": 0.238, "step": 1744 }, { "epoch": 0.8272102393932211, "grad_norm": 7.627920150756836, "learning_rate": 1.7607699424244583e-06, "loss": 0.1358, "step": 1745 }, { "epoch": 0.8276842853756814, "grad_norm": 4.7214884757995605, "learning_rate": 1.7514011170234258e-06, "loss": 0.1165, "step": 1746 }, { "epoch": 0.8281583313581418, "grad_norm": 4.949544906616211, "learning_rate": 1.7420548905283619e-06, "loss": 0.1227, "step": 1747 }, { "epoch": 0.828632377340602, "grad_norm": 3.5602853298187256, "learning_rate": 1.7327312885453695e-06, "loss": 0.1247, "step": 1748 }, { "epoch": 0.8291064233230624, "grad_norm": 4.533194065093994, "learning_rate": 1.7234303366185712e-06, "loss": 0.183, "step": 1749 }, { "epoch": 0.8295804693055226, "grad_norm": 4.326444625854492, "learning_rate": 1.7141520602300332e-06, "loss": 0.1427, "step": 1750 }, { "epoch": 0.8300545152879829, "grad_norm": 2.9050464630126953, "learning_rate": 1.7048964847996928e-06, "loss": 0.1105, "step": 1751 }, { "epoch": 0.8305285612704433, "grad_norm": 4.191965579986572, "learning_rate": 1.6956636356852984e-06, "loss": 0.1444, "step": 1752 }, { "epoch": 0.8310026072529035, "grad_norm": 4.245302677154541, "learning_rate": 1.6864535381823333e-06, "loss": 0.1695, "step": 1753 }, { "epoch": 0.8314766532353638, "grad_norm": 4.924167156219482, "learning_rate": 1.6772662175239451e-06, "loss": 0.1323, "step": 1754 }, { "epoch": 0.8319506992178242, "grad_norm": 3.5585765838623047, "learning_rate": 1.668101698880883e-06, "loss": 0.1285, "step": 1755 }, { "epoch": 0.8324247452002844, "grad_norm": 5.416965484619141, "learning_rate": 1.6589600073614175e-06, "loss": 0.1603, "step": 1756 }, { "epoch": 0.8328987911827447, "grad_norm": 4.387139320373535, "learning_rate": 1.6498411680112925e-06, "loss": 0.1554, "step": 1757 }, { "epoch": 0.833372837165205, "grad_norm": 3.8568196296691895, "learning_rate": 1.6407452058136298e-06, "loss": 0.1279, "step": 1758 }, { "epoch": 0.8338468831476653, "grad_norm": 5.32737922668457, "learning_rate": 1.6316721456888807e-06, "loss": 0.209, "step": 1759 }, { "epoch": 0.8343209291301257, "grad_norm": 6.815487861633301, "learning_rate": 1.6226220124947513e-06, "loss": 0.1517, "step": 1760 }, { "epoch": 0.8343209291301257, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.012907618656754494, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 50.0121, "eval_samples_per_second": 5.419, "eval_steps_per_second": 0.18, "step": 1760 }, { "epoch": 0.8347949751125859, "grad_norm": 3.5248119831085205, "learning_rate": 1.6135948310261272e-06, "loss": 0.1413, "step": 1761 }, { "epoch": 0.8352690210950462, "grad_norm": 3.0675511360168457, "learning_rate": 1.6045906260150212e-06, "loss": 0.1353, "step": 1762 }, { "epoch": 0.8357430670775066, "grad_norm": 7.004110336303711, "learning_rate": 1.595609422130494e-06, "loss": 0.1473, "step": 1763 }, { "epoch": 0.8362171130599668, "grad_norm": 2.630929470062256, "learning_rate": 1.5866512439785876e-06, "loss": 0.0991, "step": 1764 }, { "epoch": 0.8366911590424271, "grad_norm": 3.722667694091797, "learning_rate": 1.5777161161022614e-06, "loss": 0.1403, "step": 1765 }, { "epoch": 0.8371652050248874, "grad_norm": 3.6338841915130615, "learning_rate": 1.5688040629813229e-06, "loss": 0.0963, "step": 1766 }, { "epoch": 0.8376392510073477, "grad_norm": 2.220780611038208, "learning_rate": 1.5599151090323627e-06, "loss": 0.094, "step": 1767 }, { "epoch": 0.838113296989808, "grad_norm": 5.549960136413574, "learning_rate": 1.5510492786086828e-06, "loss": 0.2037, "step": 1768 }, { "epoch": 0.8385873429722683, "grad_norm": 3.976283073425293, "learning_rate": 1.5422065960002364e-06, "loss": 0.1453, "step": 1769 }, { "epoch": 0.8390613889547286, "grad_norm": 5.508865833282471, "learning_rate": 1.5333870854335554e-06, "loss": 0.2064, "step": 1770 }, { "epoch": 0.839535434937189, "grad_norm": 3.408942222595215, "learning_rate": 1.5245907710716912e-06, "loss": 0.1245, "step": 1771 }, { "epoch": 0.8400094809196492, "grad_norm": 7.579738616943359, "learning_rate": 1.5158176770141342e-06, "loss": 0.1888, "step": 1772 }, { "epoch": 0.8404835269021095, "grad_norm": 6.760648727416992, "learning_rate": 1.5070678272967654e-06, "loss": 0.2148, "step": 1773 }, { "epoch": 0.8409575728845698, "grad_norm": 3.159531593322754, "learning_rate": 1.4983412458917846e-06, "loss": 0.1209, "step": 1774 }, { "epoch": 0.8414316188670301, "grad_norm": 4.622367858886719, "learning_rate": 1.4896379567076369e-06, "loss": 0.1917, "step": 1775 }, { "epoch": 0.8419056648494904, "grad_norm": 3.152876377105713, "learning_rate": 1.4809579835889564e-06, "loss": 0.0609, "step": 1776 }, { "epoch": 0.8423797108319507, "grad_norm": 3.5214357376098633, "learning_rate": 1.472301350316495e-06, "loss": 0.1228, "step": 1777 }, { "epoch": 0.842853756814411, "grad_norm": 2.849489450454712, "learning_rate": 1.4636680806070625e-06, "loss": 0.0938, "step": 1778 }, { "epoch": 0.8433278027968713, "grad_norm": 5.918670654296875, "learning_rate": 1.4550581981134571e-06, "loss": 0.185, "step": 1779 }, { "epoch": 0.8438018487793316, "grad_norm": 4.758391857147217, "learning_rate": 1.4464717264244043e-06, "loss": 0.1811, "step": 1780 }, { "epoch": 0.8438018487793316, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9357798165137615, "eval_loss": 0.012266670353710651, "eval_precision": 0.8947368421052632, "eval_recall": 0.9807692307692307, "eval_runtime": 50.3857, "eval_samples_per_second": 5.379, "eval_steps_per_second": 0.179, "step": 1780 }, { "epoch": 0.8442758947617919, "grad_norm": 3.0533530712127686, "learning_rate": 1.43790868906449e-06, "loss": 0.0867, "step": 1781 }, { "epoch": 0.8447499407442522, "grad_norm": 3.962001085281372, "learning_rate": 1.429369109494091e-06, "loss": 0.182, "step": 1782 }, { "epoch": 0.8452239867267125, "grad_norm": 4.5312180519104, "learning_rate": 1.4208530111093244e-06, "loss": 0.1194, "step": 1783 }, { "epoch": 0.8456980327091728, "grad_norm": 6.248788356781006, "learning_rate": 1.4123604172419714e-06, "loss": 0.1418, "step": 1784 }, { "epoch": 0.8461720786916331, "grad_norm": 5.026639938354492, "learning_rate": 1.4038913511594166e-06, "loss": 0.1182, "step": 1785 }, { "epoch": 0.8466461246740934, "grad_norm": 3.611996650695801, "learning_rate": 1.395445836064586e-06, "loss": 0.1078, "step": 1786 }, { "epoch": 0.8471201706565537, "grad_norm": 6.5539984703063965, "learning_rate": 1.3870238950958837e-06, "loss": 0.15, "step": 1787 }, { "epoch": 0.847594216639014, "grad_norm": 5.68766450881958, "learning_rate": 1.378625551327124e-06, "loss": 0.1344, "step": 1788 }, { "epoch": 0.8480682626214743, "grad_norm": 5.3630828857421875, "learning_rate": 1.3702508277674731e-06, "loss": 0.2046, "step": 1789 }, { "epoch": 0.8485423086039345, "grad_norm": 3.6046223640441895, "learning_rate": 1.3618997473613837e-06, "loss": 0.1093, "step": 1790 }, { "epoch": 0.8490163545863949, "grad_norm": 5.983584403991699, "learning_rate": 1.353572332988534e-06, "loss": 0.1991, "step": 1791 }, { "epoch": 0.8494904005688552, "grad_norm": 10.93433952331543, "learning_rate": 1.3452686074637632e-06, "loss": 0.1925, "step": 1792 }, { "epoch": 0.8499644465513154, "grad_norm": 5.214844226837158, "learning_rate": 1.3369885935370086e-06, "loss": 0.1227, "step": 1793 }, { "epoch": 0.8504384925337758, "grad_norm": 4.261415481567383, "learning_rate": 1.328732313893245e-06, "loss": 0.1287, "step": 1794 }, { "epoch": 0.850912538516236, "grad_norm": 4.207308292388916, "learning_rate": 1.320499791152421e-06, "loss": 0.2133, "step": 1795 }, { "epoch": 0.8513865844986964, "grad_norm": 4.4212141036987305, "learning_rate": 1.3122910478693984e-06, "loss": 0.1429, "step": 1796 }, { "epoch": 0.8518606304811567, "grad_norm": 4.221442222595215, "learning_rate": 1.30410610653389e-06, "loss": 0.1585, "step": 1797 }, { "epoch": 0.8523346764636169, "grad_norm": 3.037301778793335, "learning_rate": 1.295944989570398e-06, "loss": 0.1518, "step": 1798 }, { "epoch": 0.8528087224460773, "grad_norm": 4.325135231018066, "learning_rate": 1.2878077193381511e-06, "loss": 0.1136, "step": 1799 }, { "epoch": 0.8532827684285376, "grad_norm": 4.634499549865723, "learning_rate": 1.279694318131046e-06, "loss": 0.1592, "step": 1800 }, { "epoch": 0.8532827684285376, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9369369369369369, "eval_loss": 0.01357492059469223, "eval_precision": 0.8813559322033898, "eval_recall": 1.0, "eval_runtime": 51.1253, "eval_samples_per_second": 5.301, "eval_steps_per_second": 0.176, "step": 1800 }, { "epoch": 0.8537568144109978, "grad_norm": 3.1635870933532715, "learning_rate": 1.2716048081775823e-06, "loss": 0.1749, "step": 1801 }, { "epoch": 0.8542308603934582, "grad_norm": 8.236771583557129, "learning_rate": 1.2635392116408095e-06, "loss": 0.1951, "step": 1802 }, { "epoch": 0.8547049063759184, "grad_norm": 7.184986114501953, "learning_rate": 1.2554975506182533e-06, "loss": 0.157, "step": 1803 }, { "epoch": 0.8551789523583787, "grad_norm": 2.5440175533294678, "learning_rate": 1.247479847141867e-06, "loss": 0.0708, "step": 1804 }, { "epoch": 0.8556529983408391, "grad_norm": 3.4107348918914795, "learning_rate": 1.2394861231779677e-06, "loss": 0.0968, "step": 1805 }, { "epoch": 0.8561270443232993, "grad_norm": 8.80566692352295, "learning_rate": 1.2315164006271718e-06, "loss": 0.1692, "step": 1806 }, { "epoch": 0.8566010903057597, "grad_norm": 5.312666416168213, "learning_rate": 1.2235707013243426e-06, "loss": 0.119, "step": 1807 }, { "epoch": 0.85707513628822, "grad_norm": 7.94031286239624, "learning_rate": 1.2156490470385207e-06, "loss": 0.1993, "step": 1808 }, { "epoch": 0.8575491822706802, "grad_norm": 3.85893177986145, "learning_rate": 1.2077514594728778e-06, "loss": 0.1085, "step": 1809 }, { "epoch": 0.8580232282531406, "grad_norm": 2.8813283443450928, "learning_rate": 1.1998779602646438e-06, "loss": 0.1099, "step": 1810 }, { "epoch": 0.8584972742356008, "grad_norm": 4.950772762298584, "learning_rate": 1.1920285709850509e-06, "loss": 0.1064, "step": 1811 }, { "epoch": 0.8589713202180611, "grad_norm": 3.9935288429260254, "learning_rate": 1.184203313139286e-06, "loss": 0.1145, "step": 1812 }, { "epoch": 0.8594453662005215, "grad_norm": 5.1902360916137695, "learning_rate": 1.1764022081664094e-06, "loss": 0.164, "step": 1813 }, { "epoch": 0.8599194121829817, "grad_norm": 4.6810150146484375, "learning_rate": 1.1686252774393181e-06, "loss": 0.1272, "step": 1814 }, { "epoch": 0.860393458165442, "grad_norm": 3.890429735183716, "learning_rate": 1.1608725422646782e-06, "loss": 0.1128, "step": 1815 }, { "epoch": 0.8608675041479024, "grad_norm": 9.929910659790039, "learning_rate": 1.1531440238828639e-06, "loss": 0.169, "step": 1816 }, { "epoch": 0.8613415501303626, "grad_norm": 3.39127516746521, "learning_rate": 1.1454397434679022e-06, "loss": 0.0916, "step": 1817 }, { "epoch": 0.861815596112823, "grad_norm": 3.8935232162475586, "learning_rate": 1.137759722127415e-06, "loss": 0.1236, "step": 1818 }, { "epoch": 0.8622896420952832, "grad_norm": 4.592057704925537, "learning_rate": 1.1301039809025628e-06, "loss": 0.1573, "step": 1819 }, { "epoch": 0.8627636880777435, "grad_norm": 3.4906246662139893, "learning_rate": 1.1224725407679814e-06, "loss": 0.0799, "step": 1820 }, { "epoch": 0.8627636880777435, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9369369369369369, "eval_loss": 0.014933480881154537, "eval_precision": 0.8813559322033898, "eval_recall": 1.0, "eval_runtime": 49.8899, "eval_samples_per_second": 5.432, "eval_steps_per_second": 0.18, "step": 1820 }, { "epoch": 0.8632377340602039, "grad_norm": 7.27462911605835, "learning_rate": 1.1148654226317325e-06, "loss": 0.1538, "step": 1821 }, { "epoch": 0.8637117800426641, "grad_norm": 3.6112170219421387, "learning_rate": 1.1072826473352394e-06, "loss": 0.1337, "step": 1822 }, { "epoch": 0.8641858260251244, "grad_norm": 4.936607360839844, "learning_rate": 1.0997242356532335e-06, "loss": 0.152, "step": 1823 }, { "epoch": 0.8646598720075848, "grad_norm": 7.105523109436035, "learning_rate": 1.0921902082936987e-06, "loss": 0.1772, "step": 1824 }, { "epoch": 0.865133917990045, "grad_norm": 7.956032752990723, "learning_rate": 1.0846805858978038e-06, "loss": 0.1794, "step": 1825 }, { "epoch": 0.8656079639725053, "grad_norm": 2.654674530029297, "learning_rate": 1.0771953890398679e-06, "loss": 0.1223, "step": 1826 }, { "epoch": 0.8660820099549656, "grad_norm": 5.592787265777588, "learning_rate": 1.0697346382272822e-06, "loss": 0.1459, "step": 1827 }, { "epoch": 0.8665560559374259, "grad_norm": 9.735966682434082, "learning_rate": 1.0622983539004628e-06, "loss": 0.176, "step": 1828 }, { "epoch": 0.8670301019198863, "grad_norm": 7.254702568054199, "learning_rate": 1.054886556432798e-06, "loss": 0.1962, "step": 1829 }, { "epoch": 0.8675041479023465, "grad_norm": 3.388284206390381, "learning_rate": 1.047499266130585e-06, "loss": 0.1744, "step": 1830 }, { "epoch": 0.8679781938848068, "grad_norm": 5.419455051422119, "learning_rate": 1.0401365032329812e-06, "loss": 0.2004, "step": 1831 }, { "epoch": 0.8684522398672672, "grad_norm": 5.385417938232422, "learning_rate": 1.0327982879119425e-06, "loss": 0.2027, "step": 1832 }, { "epoch": 0.8689262858497274, "grad_norm": 4.17827033996582, "learning_rate": 1.0254846402721764e-06, "loss": 0.1501, "step": 1833 }, { "epoch": 0.8694003318321877, "grad_norm": 4.2940354347229, "learning_rate": 1.0181955803510724e-06, "loss": 0.1162, "step": 1834 }, { "epoch": 0.869874377814648, "grad_norm": 8.596222877502441, "learning_rate": 1.010931128118665e-06, "loss": 0.1216, "step": 1835 }, { "epoch": 0.8703484237971083, "grad_norm": 4.9963884353637695, "learning_rate": 1.0036913034775675e-06, "loss": 0.1779, "step": 1836 }, { "epoch": 0.8708224697795686, "grad_norm": 4.238993167877197, "learning_rate": 9.964761262629196e-07, "loss": 0.1237, "step": 1837 }, { "epoch": 0.8712965157620289, "grad_norm": 8.45755672454834, "learning_rate": 9.892856162423348e-07, "loss": 0.1578, "step": 1838 }, { "epoch": 0.8717705617444892, "grad_norm": 7.35408353805542, "learning_rate": 9.821197931158455e-07, "loss": 0.2077, "step": 1839 }, { "epoch": 0.8722446077269496, "grad_norm": 4.194153785705566, "learning_rate": 9.749786765158464e-07, "loss": 0.1294, "step": 1840 }, { "epoch": 0.8722446077269496, "eval_accuracy": 0.9943639291465378, "eval_f1": 0.9369369369369369, "eval_loss": 0.012980014085769653, "eval_precision": 0.8813559322033898, "eval_recall": 1.0, "eval_runtime": 49.6245, "eval_samples_per_second": 5.461, "eval_steps_per_second": 0.181, "step": 1840 }, { "epoch": 0.8727186537094098, "grad_norm": 8.562105178833008, "learning_rate": 9.678622860070474e-07, "loss": 0.2731, "step": 1841 }, { "epoch": 0.8731926996918701, "grad_norm": 7.327461242675781, "learning_rate": 9.607706410864083e-07, "loss": 0.1846, "step": 1842 }, { "epoch": 0.8736667456743304, "grad_norm": 5.737156867980957, "learning_rate": 9.537037611831047e-07, "loss": 0.2219, "step": 1843 }, { "epoch": 0.8741407916567907, "grad_norm": 3.665459156036377, "learning_rate": 9.466616656584493e-07, "loss": 0.1163, "step": 1844 }, { "epoch": 0.874614837639251, "grad_norm": 5.575207710266113, "learning_rate": 9.396443738058614e-07, "loss": 0.1411, "step": 1845 }, { "epoch": 0.8750888836217113, "grad_norm": 2.1095454692840576, "learning_rate": 9.32651904850801e-07, "loss": 0.0826, "step": 1846 }, { "epoch": 0.8755629296041716, "grad_norm": 8.68192195892334, "learning_rate": 9.256842779507236e-07, "loss": 0.1324, "step": 1847 }, { "epoch": 0.8760369755866318, "grad_norm": 7.812302112579346, "learning_rate": 9.187415121950194e-07, "loss": 0.2442, "step": 1848 }, { "epoch": 0.8765110215690922, "grad_norm": 3.16363787651062, "learning_rate": 9.118236266049707e-07, "loss": 0.1255, "step": 1849 }, { "epoch": 0.8769850675515525, "grad_norm": 5.470139503479004, "learning_rate": 9.049306401336922e-07, "loss": 0.1974, "step": 1850 }, { "epoch": 0.8774591135340128, "grad_norm": 3.268472194671631, "learning_rate": 8.980625716660829e-07, "loss": 0.0863, "step": 1851 }, { "epoch": 0.8779331595164731, "grad_norm": 2.927609920501709, "learning_rate": 8.912194400187712e-07, "loss": 0.0827, "step": 1852 }, { "epoch": 0.8784072054989334, "grad_norm": 6.857902526855469, "learning_rate": 8.84401263940069e-07, "loss": 0.1555, "step": 1853 }, { "epoch": 0.8788812514813937, "grad_norm": 4.798774719238281, "learning_rate": 8.776080621099159e-07, "loss": 0.1973, "step": 1854 }, { "epoch": 0.879355297463854, "grad_norm": 4.6252946853637695, "learning_rate": 8.708398531398233e-07, "loss": 0.1612, "step": 1855 }, { "epoch": 0.8798293434463142, "grad_norm": 4.394217491149902, "learning_rate": 8.640966555728369e-07, "loss": 0.1261, "step": 1856 }, { "epoch": 0.8803033894287746, "grad_norm": 6.826826095581055, "learning_rate": 8.573784878834734e-07, "loss": 0.1587, "step": 1857 }, { "epoch": 0.8807774354112349, "grad_norm": 10.54698657989502, "learning_rate": 8.506853684776773e-07, "loss": 0.154, "step": 1858 }, { "epoch": 0.8812514813936951, "grad_norm": 4.272285461425781, "learning_rate": 8.440173156927612e-07, "loss": 0.1157, "step": 1859 }, { "epoch": 0.8817255273761555, "grad_norm": 5.016007900238037, "learning_rate": 8.373743477973739e-07, "loss": 0.2076, "step": 1860 }, { "epoch": 0.8817255273761555, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.012083540670573711, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 49.9345, "eval_samples_per_second": 5.427, "eval_steps_per_second": 0.18, "step": 1860 }, { "epoch": 0.8821995733586158, "grad_norm": 5.868921756744385, "learning_rate": 8.307564829914272e-07, "loss": 0.168, "step": 1861 }, { "epoch": 0.8826736193410761, "grad_norm": 8.008037567138672, "learning_rate": 8.241637394060619e-07, "loss": 0.0711, "step": 1862 }, { "epoch": 0.8831476653235364, "grad_norm": 4.42324686050415, "learning_rate": 8.175961351035943e-07, "loss": 0.0957, "step": 1863 }, { "epoch": 0.8836217113059966, "grad_norm": 5.00337553024292, "learning_rate": 8.110536880774655e-07, "loss": 0.1814, "step": 1864 }, { "epoch": 0.884095757288457, "grad_norm": 4.169017791748047, "learning_rate": 8.045364162521884e-07, "loss": 0.112, "step": 1865 }, { "epoch": 0.8845698032709173, "grad_norm": 8.79692554473877, "learning_rate": 7.98044337483308e-07, "loss": 0.2539, "step": 1866 }, { "epoch": 0.8850438492533775, "grad_norm": 6.905977725982666, "learning_rate": 7.915774695573452e-07, "loss": 0.1628, "step": 1867 }, { "epoch": 0.8855178952358379, "grad_norm": 3.759481430053711, "learning_rate": 7.851358301917511e-07, "loss": 0.17, "step": 1868 }, { "epoch": 0.8859919412182982, "grad_norm": 4.754873275756836, "learning_rate": 7.787194370348549e-07, "loss": 0.2469, "step": 1869 }, { "epoch": 0.8864659872007584, "grad_norm": 5.5656280517578125, "learning_rate": 7.723283076658217e-07, "loss": 0.1551, "step": 1870 }, { "epoch": 0.8869400331832188, "grad_norm": 2.3625526428222656, "learning_rate": 7.659624595945969e-07, "loss": 0.0846, "step": 1871 }, { "epoch": 0.887414079165679, "grad_norm": 10.592917442321777, "learning_rate": 7.596219102618652e-07, "loss": 0.2762, "step": 1872 }, { "epoch": 0.8878881251481394, "grad_norm": 5.2067952156066895, "learning_rate": 7.533066770389985e-07, "loss": 0.1768, "step": 1873 }, { "epoch": 0.8883621711305997, "grad_norm": 4.715292930603027, "learning_rate": 7.470167772280091e-07, "loss": 0.1107, "step": 1874 }, { "epoch": 0.8888362171130599, "grad_norm": 3.512718439102173, "learning_rate": 7.40752228061502e-07, "loss": 0.1145, "step": 1875 }, { "epoch": 0.8893102630955203, "grad_norm": 3.8536527156829834, "learning_rate": 7.345130467026318e-07, "loss": 0.1473, "step": 1876 }, { "epoch": 0.8897843090779806, "grad_norm": 3.4637436866760254, "learning_rate": 7.282992502450447e-07, "loss": 0.1661, "step": 1877 }, { "epoch": 0.8902583550604408, "grad_norm": 2.695815324783325, "learning_rate": 7.221108557128509e-07, "loss": 0.139, "step": 1878 }, { "epoch": 0.8907324010429012, "grad_norm": 4.534758567810059, "learning_rate": 7.159478800605546e-07, "loss": 0.1425, "step": 1879 }, { "epoch": 0.8912064470253614, "grad_norm": 7.158409595489502, "learning_rate": 7.098103401730272e-07, "loss": 0.1628, "step": 1880 }, { "epoch": 0.8912064470253614, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.012087295763194561, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 49.8216, "eval_samples_per_second": 5.439, "eval_steps_per_second": 0.181, "step": 1880 }, { "epoch": 0.8916804930078217, "grad_norm": 4.419368267059326, "learning_rate": 7.03698252865449e-07, "loss": 0.0986, "step": 1881 }, { "epoch": 0.8921545389902821, "grad_norm": 5.9724931716918945, "learning_rate": 6.976116348832684e-07, "loss": 0.2064, "step": 1882 }, { "epoch": 0.8926285849727423, "grad_norm": 4.130607604980469, "learning_rate": 6.915505029021552e-07, "loss": 0.1445, "step": 1883 }, { "epoch": 0.8931026309552027, "grad_norm": 4.273713111877441, "learning_rate": 6.855148735279527e-07, "loss": 0.1389, "step": 1884 }, { "epoch": 0.893576676937663, "grad_norm": 5.399996280670166, "learning_rate": 6.795047632966379e-07, "loss": 0.1461, "step": 1885 }, { "epoch": 0.8940507229201232, "grad_norm": 6.056548118591309, "learning_rate": 6.735201886742671e-07, "loss": 0.1935, "step": 1886 }, { "epoch": 0.8945247689025836, "grad_norm": 5.537142276763916, "learning_rate": 6.675611660569403e-07, "loss": 0.1816, "step": 1887 }, { "epoch": 0.8949988148850438, "grad_norm": 6.469786167144775, "learning_rate": 6.616277117707493e-07, "loss": 0.1772, "step": 1888 }, { "epoch": 0.8954728608675041, "grad_norm": 4.300382137298584, "learning_rate": 6.55719842071737e-07, "loss": 0.0932, "step": 1889 }, { "epoch": 0.8959469068499645, "grad_norm": 6.920015335083008, "learning_rate": 6.498375731458529e-07, "loss": 0.208, "step": 1890 }, { "epoch": 0.8964209528324247, "grad_norm": 5.358169078826904, "learning_rate": 6.439809211089043e-07, "loss": 0.1518, "step": 1891 }, { "epoch": 0.896894998814885, "grad_norm": 9.420503616333008, "learning_rate": 6.381499020065163e-07, "loss": 0.1817, "step": 1892 }, { "epoch": 0.8973690447973454, "grad_norm": 5.0321855545043945, "learning_rate": 6.323445318140886e-07, "loss": 0.1786, "step": 1893 }, { "epoch": 0.8978430907798056, "grad_norm": 4.13561487197876, "learning_rate": 6.265648264367452e-07, "loss": 0.1003, "step": 1894 }, { "epoch": 0.898317136762266, "grad_norm": 7.733060359954834, "learning_rate": 6.20810801709305e-07, "loss": 0.216, "step": 1895 }, { "epoch": 0.8987911827447262, "grad_norm": 2.7273457050323486, "learning_rate": 6.15082473396218e-07, "loss": 0.1149, "step": 1896 }, { "epoch": 0.8992652287271865, "grad_norm": 2.0938057899475098, "learning_rate": 6.093798571915389e-07, "loss": 0.0787, "step": 1897 }, { "epoch": 0.8997392747096469, "grad_norm": 6.044375896453857, "learning_rate": 6.037029687188767e-07, "loss": 0.1878, "step": 1898 }, { "epoch": 0.9002133206921071, "grad_norm": 2.365513563156128, "learning_rate": 5.980518235313549e-07, "loss": 0.1065, "step": 1899 }, { "epoch": 0.9006873666745674, "grad_norm": 4.049135684967041, "learning_rate": 5.924264371115652e-07, "loss": 0.156, "step": 1900 }, { "epoch": 0.9006873666745674, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.012837257236242294, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 49.4558, "eval_samples_per_second": 5.48, "eval_steps_per_second": 0.182, "step": 1900 }, { "epoch": 0.9011614126570278, "grad_norm": 4.734807014465332, "learning_rate": 5.868268248715292e-07, "loss": 0.2087, "step": 1901 }, { "epoch": 0.901635458639488, "grad_norm": 3.426779270172119, "learning_rate": 5.812530021526541e-07, "loss": 0.1254, "step": 1902 }, { "epoch": 0.9021095046219483, "grad_norm": 6.730406761169434, "learning_rate": 5.7570498422569e-07, "loss": 0.1027, "step": 1903 }, { "epoch": 0.9025835506044086, "grad_norm": 7.400092601776123, "learning_rate": 5.701827862906894e-07, "loss": 0.2117, "step": 1904 }, { "epoch": 0.9030575965868689, "grad_norm": 5.896395206451416, "learning_rate": 5.646864234769644e-07, "loss": 0.2147, "step": 1905 }, { "epoch": 0.9035316425693293, "grad_norm": 4.465495586395264, "learning_rate": 5.592159108430472e-07, "loss": 0.0945, "step": 1906 }, { "epoch": 0.9040056885517895, "grad_norm": 3.8033761978149414, "learning_rate": 5.537712633766479e-07, "loss": 0.0876, "step": 1907 }, { "epoch": 0.9044797345342498, "grad_norm": 4.422494888305664, "learning_rate": 5.483524959946097e-07, "loss": 0.0863, "step": 1908 }, { "epoch": 0.9049537805167102, "grad_norm": 5.122066974639893, "learning_rate": 5.429596235428746e-07, "loss": 0.1666, "step": 1909 }, { "epoch": 0.9054278264991704, "grad_norm": 4.508274078369141, "learning_rate": 5.375926607964399e-07, "loss": 0.1302, "step": 1910 }, { "epoch": 0.9059018724816307, "grad_norm": 5.495452880859375, "learning_rate": 5.322516224593143e-07, "loss": 0.1386, "step": 1911 }, { "epoch": 0.906375918464091, "grad_norm": 4.296015739440918, "learning_rate": 5.269365231644851e-07, "loss": 0.1947, "step": 1912 }, { "epoch": 0.9068499644465513, "grad_norm": 5.446202278137207, "learning_rate": 5.216473774738706e-07, "loss": 0.2568, "step": 1913 }, { "epoch": 0.9073240104290116, "grad_norm": 3.5428926944732666, "learning_rate": 5.163841998782837e-07, "loss": 0.0723, "step": 1914 }, { "epoch": 0.9077980564114719, "grad_norm": 3.931621789932251, "learning_rate": 5.111470047973932e-07, "loss": 0.1773, "step": 1915 }, { "epoch": 0.9082721023939322, "grad_norm": 2.9896233081817627, "learning_rate": 5.059358065796816e-07, "loss": 0.1289, "step": 1916 }, { "epoch": 0.9087461483763926, "grad_norm": 4.83162784576416, "learning_rate": 5.007506195024059e-07, "loss": 0.1292, "step": 1917 }, { "epoch": 0.9092201943588528, "grad_norm": 3.8033645153045654, "learning_rate": 4.955914577715615e-07, "loss": 0.1018, "step": 1918 }, { "epoch": 0.9096942403413131, "grad_norm": 3.928222417831421, "learning_rate": 4.904583355218429e-07, "loss": 0.1198, "step": 1919 }, { "epoch": 0.9101682863237734, "grad_norm": 5.3848557472229, "learning_rate": 4.853512668166005e-07, "loss": 0.0762, "step": 1920 }, { "epoch": 0.9101682863237734, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.012816701084375381, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 50.4456, "eval_samples_per_second": 5.372, "eval_steps_per_second": 0.178, "step": 1920 }, { "epoch": 0.9106423323062337, "grad_norm": 5.235629558563232, "learning_rate": 4.802702656478053e-07, "loss": 0.1394, "step": 1921 }, { "epoch": 0.911116378288694, "grad_norm": 6.737102031707764, "learning_rate": 4.752153459360143e-07, "loss": 0.1753, "step": 1922 }, { "epoch": 0.9115904242711543, "grad_norm": 6.279690265655518, "learning_rate": 4.701865215303236e-07, "loss": 0.1381, "step": 1923 }, { "epoch": 0.9120644702536146, "grad_norm": 5.9221086502075195, "learning_rate": 4.6518380620833694e-07, "loss": 0.1984, "step": 1924 }, { "epoch": 0.9125385162360748, "grad_norm": 3.1503348350524902, "learning_rate": 4.602072136761282e-07, "loss": 0.1374, "step": 1925 }, { "epoch": 0.9130125622185352, "grad_norm": 5.046225070953369, "learning_rate": 4.5525675756819987e-07, "loss": 0.1439, "step": 1926 }, { "epoch": 0.9134866082009955, "grad_norm": 4.287316799163818, "learning_rate": 4.503324514474483e-07, "loss": 0.1769, "step": 1927 }, { "epoch": 0.9139606541834558, "grad_norm": 3.769968032836914, "learning_rate": 4.4543430880512604e-07, "loss": 0.1381, "step": 1928 }, { "epoch": 0.9144347001659161, "grad_norm": 3.910022020339966, "learning_rate": 4.4056234306080415e-07, "loss": 0.142, "step": 1929 }, { "epoch": 0.9149087461483764, "grad_norm": 2.3405351638793945, "learning_rate": 4.357165675623376e-07, "loss": 0.1014, "step": 1930 }, { "epoch": 0.9153827921308367, "grad_norm": 5.19395637512207, "learning_rate": 4.3089699558582776e-07, "loss": 0.1192, "step": 1931 }, { "epoch": 0.915856838113297, "grad_norm": 4.177900314331055, "learning_rate": 4.261036403355823e-07, "loss": 0.1327, "step": 1932 }, { "epoch": 0.9163308840957572, "grad_norm": 8.132122039794922, "learning_rate": 4.2133651494408513e-07, "loss": 0.1815, "step": 1933 }, { "epoch": 0.9168049300782176, "grad_norm": 4.363158702850342, "learning_rate": 4.165956324719556e-07, "loss": 0.1009, "step": 1934 }, { "epoch": 0.9172789760606779, "grad_norm": 2.370462417602539, "learning_rate": 4.1188100590791704e-07, "loss": 0.0722, "step": 1935 }, { "epoch": 0.9177530220431381, "grad_norm": 3.8629467487335205, "learning_rate": 4.0719264816875713e-07, "loss": 0.1657, "step": 1936 }, { "epoch": 0.9182270680255985, "grad_norm": 5.270659923553467, "learning_rate": 4.0253057209929556e-07, "loss": 0.1906, "step": 1937 }, { "epoch": 0.9187011140080588, "grad_norm": 3.739020824432373, "learning_rate": 3.9789479047234293e-07, "loss": 0.1354, "step": 1938 }, { "epoch": 0.9191751599905191, "grad_norm": 3.877326011657715, "learning_rate": 3.9328531598867517e-07, "loss": 0.1159, "step": 1939 }, { "epoch": 0.9196492059729794, "grad_norm": 3.6506576538085938, "learning_rate": 3.887021612769937e-07, "loss": 0.1372, "step": 1940 }, { "epoch": 0.9196492059729794, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.012422804720699787, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 49.6153, "eval_samples_per_second": 5.462, "eval_steps_per_second": 0.181, "step": 1940 }, { "epoch": 0.9201232519554396, "grad_norm": 4.246330261230469, "learning_rate": 3.841453388938876e-07, "loss": 0.1756, "step": 1941 }, { "epoch": 0.9205972979379, "grad_norm": 4.453822135925293, "learning_rate": 3.7961486132380487e-07, "loss": 0.0994, "step": 1942 }, { "epoch": 0.9210713439203603, "grad_norm": 3.8408455848693848, "learning_rate": 3.7511074097901557e-07, "loss": 0.1043, "step": 1943 }, { "epoch": 0.9215453899028205, "grad_norm": 4.067150592803955, "learning_rate": 3.7063299019957867e-07, "loss": 0.1134, "step": 1944 }, { "epoch": 0.9220194358852809, "grad_norm": 6.159415245056152, "learning_rate": 3.661816212533076e-07, "loss": 0.1361, "step": 1945 }, { "epoch": 0.9224934818677412, "grad_norm": 2.458495616912842, "learning_rate": 3.617566463357336e-07, "loss": 0.0948, "step": 1946 }, { "epoch": 0.9229675278502014, "grad_norm": 6.143227577209473, "learning_rate": 3.5735807757008354e-07, "loss": 0.2176, "step": 1947 }, { "epoch": 0.9234415738326618, "grad_norm": 3.9085354804992676, "learning_rate": 3.529859270072289e-07, "loss": 0.1214, "step": 1948 }, { "epoch": 0.923915619815122, "grad_norm": 4.7195963859558105, "learning_rate": 3.4864020662566775e-07, "loss": 0.1125, "step": 1949 }, { "epoch": 0.9243896657975824, "grad_norm": 4.297183990478516, "learning_rate": 3.443209283314863e-07, "loss": 0.1678, "step": 1950 }, { "epoch": 0.9248637117800427, "grad_norm": 2.1935582160949707, "learning_rate": 3.4002810395832753e-07, "loss": 0.0998, "step": 1951 }, { "epoch": 0.9253377577625029, "grad_norm": 4.309812068939209, "learning_rate": 3.357617452673545e-07, "loss": 0.0861, "step": 1952 }, { "epoch": 0.9258118037449633, "grad_norm": 5.1253743171691895, "learning_rate": 3.3152186394722506e-07, "loss": 0.1119, "step": 1953 }, { "epoch": 0.9262858497274236, "grad_norm": 4.127727031707764, "learning_rate": 3.27308471614054e-07, "loss": 0.1562, "step": 1954 }, { "epoch": 0.9267598957098838, "grad_norm": 3.679004430770874, "learning_rate": 3.2312157981138626e-07, "loss": 0.1136, "step": 1955 }, { "epoch": 0.9272339416923442, "grad_norm": 6.002187728881836, "learning_rate": 3.189612000101594e-07, "loss": 0.1292, "step": 1956 }, { "epoch": 0.9277079876748044, "grad_norm": 4.653674125671387, "learning_rate": 3.148273436086757e-07, "loss": 0.1301, "step": 1957 }, { "epoch": 0.9281820336572647, "grad_norm": 3.5616414546966553, "learning_rate": 3.107200219325746e-07, "loss": 0.1325, "step": 1958 }, { "epoch": 0.9286560796397251, "grad_norm": 3.704566240310669, "learning_rate": 3.0663924623479337e-07, "loss": 0.1194, "step": 1959 }, { "epoch": 0.9291301256221853, "grad_norm": 3.3392817974090576, "learning_rate": 3.0258502769553996e-07, "loss": 0.0837, "step": 1960 }, { "epoch": 0.9291301256221853, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.012340452522039413, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 50.1351, "eval_samples_per_second": 5.405, "eval_steps_per_second": 0.18, "step": 1960 }, { "epoch": 0.9296041716046457, "grad_norm": 8.908299446105957, "learning_rate": 2.985573774222661e-07, "loss": 0.1625, "step": 1961 }, { "epoch": 0.930078217587106, "grad_norm": 3.4975991249084473, "learning_rate": 2.945563064496326e-07, "loss": 0.1712, "step": 1962 }, { "epoch": 0.9305522635695662, "grad_norm": 4.6506147384643555, "learning_rate": 2.905818257394799e-07, "loss": 0.1451, "step": 1963 }, { "epoch": 0.9310263095520266, "grad_norm": 1.9439915418624878, "learning_rate": 2.8663394618079875e-07, "loss": 0.0572, "step": 1964 }, { "epoch": 0.9315003555344868, "grad_norm": 4.1189374923706055, "learning_rate": 2.827126785897005e-07, "loss": 0.1361, "step": 1965 }, { "epoch": 0.9319744015169471, "grad_norm": 6.66880989074707, "learning_rate": 2.78818033709386e-07, "loss": 0.1701, "step": 1966 }, { "epoch": 0.9324484474994075, "grad_norm": 4.2832794189453125, "learning_rate": 2.7495002221011757e-07, "loss": 0.1376, "step": 1967 }, { "epoch": 0.9329224934818677, "grad_norm": 3.8820581436157227, "learning_rate": 2.7110865468919057e-07, "loss": 0.1829, "step": 1968 }, { "epoch": 0.933396539464328, "grad_norm": 4.808830261230469, "learning_rate": 2.672939416708986e-07, "loss": 0.1535, "step": 1969 }, { "epoch": 0.9338705854467884, "grad_norm": 3.9023189544677734, "learning_rate": 2.635058936065138e-07, "loss": 0.1386, "step": 1970 }, { "epoch": 0.9343446314292486, "grad_norm": 8.328058242797852, "learning_rate": 2.5974452087425437e-07, "loss": 0.2852, "step": 1971 }, { "epoch": 0.934818677411709, "grad_norm": 7.914390563964844, "learning_rate": 2.5600983377925046e-07, "loss": 0.1979, "step": 1972 }, { "epoch": 0.9352927233941692, "grad_norm": 3.548283815383911, "learning_rate": 2.523018425535251e-07, "loss": 0.1297, "step": 1973 }, { "epoch": 0.9357667693766295, "grad_norm": 6.728952884674072, "learning_rate": 2.486205573559608e-07, "loss": 0.1692, "step": 1974 }, { "epoch": 0.9362408153590899, "grad_norm": 3.3491721153259277, "learning_rate": 2.4496598827227213e-07, "loss": 0.0886, "step": 1975 }, { "epoch": 0.9367148613415501, "grad_norm": 5.318295001983643, "learning_rate": 2.413381453149799e-07, "loss": 0.1496, "step": 1976 }, { "epoch": 0.9371889073240104, "grad_norm": 8.961012840270996, "learning_rate": 2.3773703842338125e-07, "loss": 0.1294, "step": 1977 }, { "epoch": 0.9376629533064708, "grad_norm": 8.02442741394043, "learning_rate": 2.3416267746352528e-07, "loss": 0.1405, "step": 1978 }, { "epoch": 0.938136999288931, "grad_norm": 5.480352401733398, "learning_rate": 2.3061507222818303e-07, "loss": 0.1055, "step": 1979 }, { "epoch": 0.9386110452713913, "grad_norm": 4.235230445861816, "learning_rate": 2.2709423243682416e-07, "loss": 0.1353, "step": 1980 }, { "epoch": 0.9386110452713913, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.01255668792873621, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 49.3085, "eval_samples_per_second": 5.496, "eval_steps_per_second": 0.183, "step": 1980 }, { "epoch": 0.9390850912538516, "grad_norm": 3.984555959701538, "learning_rate": 2.23600167735587e-07, "loss": 0.1236, "step": 1981 }, { "epoch": 0.9395591372363119, "grad_norm": 5.206995487213135, "learning_rate": 2.2013288769725194e-07, "loss": 0.2124, "step": 1982 }, { "epoch": 0.9400331832187723, "grad_norm": 4.533375263214111, "learning_rate": 2.166924018212202e-07, "loss": 0.1632, "step": 1983 }, { "epoch": 0.9405072292012325, "grad_norm": 5.9977641105651855, "learning_rate": 2.132787195334829e-07, "loss": 0.2039, "step": 1984 }, { "epoch": 0.9409812751836928, "grad_norm": 6.321089744567871, "learning_rate": 2.0989185018659431e-07, "loss": 0.1539, "step": 1985 }, { "epoch": 0.9414553211661532, "grad_norm": 4.989069938659668, "learning_rate": 2.0653180305965194e-07, "loss": 0.1501, "step": 1986 }, { "epoch": 0.9419293671486134, "grad_norm": 4.138362407684326, "learning_rate": 2.0319858735826648e-07, "loss": 0.1388, "step": 1987 }, { "epoch": 0.9424034131310737, "grad_norm": 11.884577751159668, "learning_rate": 1.9989221221453746e-07, "loss": 0.2071, "step": 1988 }, { "epoch": 0.942877459113534, "grad_norm": 4.623379707336426, "learning_rate": 1.966126866870277e-07, "loss": 0.1529, "step": 1989 }, { "epoch": 0.9433515050959943, "grad_norm": 5.523632526397705, "learning_rate": 1.9336001976074326e-07, "loss": 0.2024, "step": 1990 }, { "epoch": 0.9438255510784546, "grad_norm": 4.839412689208984, "learning_rate": 1.9013422034710016e-07, "loss": 0.1426, "step": 1991 }, { "epoch": 0.9442995970609149, "grad_norm": 9.59015941619873, "learning_rate": 1.869352972839067e-07, "loss": 0.2005, "step": 1992 }, { "epoch": 0.9447736430433752, "grad_norm": 4.234097957611084, "learning_rate": 1.837632593353389e-07, "loss": 0.1123, "step": 1993 }, { "epoch": 0.9452476890258356, "grad_norm": 4.442883491516113, "learning_rate": 1.8061811519191287e-07, "loss": 0.1053, "step": 1994 }, { "epoch": 0.9457217350082958, "grad_norm": 4.088728904724121, "learning_rate": 1.7749987347046471e-07, "loss": 0.0867, "step": 1995 }, { "epoch": 0.9461957809907561, "grad_norm": 4.195045471191406, "learning_rate": 1.7440854271412288e-07, "loss": 0.159, "step": 1996 }, { "epoch": 0.9466698269732164, "grad_norm": 4.0102739334106445, "learning_rate": 1.7134413139228812e-07, "loss": 0.1162, "step": 1997 }, { "epoch": 0.9471438729556767, "grad_norm": 5.108349800109863, "learning_rate": 1.6830664790061124e-07, "loss": 0.1445, "step": 1998 }, { "epoch": 0.947617918938137, "grad_norm": 6.93289852142334, "learning_rate": 1.6529610056096768e-07, "loss": 0.1204, "step": 1999 }, { "epoch": 0.9480919649205973, "grad_norm": 8.224555015563965, "learning_rate": 1.6231249762143187e-07, "loss": 0.1914, "step": 2000 }, { "epoch": 0.9480919649205973, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.012721872888505459, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 50.4202, "eval_samples_per_second": 5.375, "eval_steps_per_second": 0.179, "step": 2000 }, { "epoch": 0.9485660109030576, "grad_norm": 2.924222707748413, "learning_rate": 1.5935584725626062e-07, "loss": 0.0924, "step": 2001 }, { "epoch": 0.9490400568855178, "grad_norm": 5.106085300445557, "learning_rate": 1.5642615756586765e-07, "loss": 0.1919, "step": 2002 }, { "epoch": 0.9495141028679782, "grad_norm": 5.821203708648682, "learning_rate": 1.5352343657680234e-07, "loss": 0.1551, "step": 2003 }, { "epoch": 0.9499881488504385, "grad_norm": 4.752243518829346, "learning_rate": 1.506476922417266e-07, "loss": 0.16, "step": 2004 }, { "epoch": 0.9504621948328988, "grad_norm": 4.044118404388428, "learning_rate": 1.4779893243939358e-07, "loss": 0.1228, "step": 2005 }, { "epoch": 0.9509362408153591, "grad_norm": 5.809322834014893, "learning_rate": 1.4497716497462676e-07, "loss": 0.1309, "step": 2006 }, { "epoch": 0.9514102867978194, "grad_norm": 5.9313459396362305, "learning_rate": 1.4218239757829656e-07, "loss": 0.1126, "step": 2007 }, { "epoch": 0.9518843327802797, "grad_norm": 5.524699687957764, "learning_rate": 1.3941463790730248e-07, "loss": 0.0932, "step": 2008 }, { "epoch": 0.95235837876274, "grad_norm": 3.8316197395324707, "learning_rate": 1.3667389354454997e-07, "loss": 0.1288, "step": 2009 }, { "epoch": 0.9528324247452002, "grad_norm": 4.261562347412109, "learning_rate": 1.3396017199892808e-07, "loss": 0.0725, "step": 2010 }, { "epoch": 0.9533064707276606, "grad_norm": 3.045381546020508, "learning_rate": 1.312734807052929e-07, "loss": 0.1336, "step": 2011 }, { "epoch": 0.9537805167101209, "grad_norm": 4.582825183868408, "learning_rate": 1.2861382702444304e-07, "loss": 0.1122, "step": 2012 }, { "epoch": 0.9542545626925811, "grad_norm": 5.358804702758789, "learning_rate": 1.2598121824310305e-07, "loss": 0.2103, "step": 2013 }, { "epoch": 0.9547286086750415, "grad_norm": 8.944177627563477, "learning_rate": 1.2337566157390124e-07, "loss": 0.2294, "step": 2014 }, { "epoch": 0.9552026546575018, "grad_norm": 3.860495090484619, "learning_rate": 1.2079716415534958e-07, "loss": 0.1725, "step": 2015 }, { "epoch": 0.9556767006399621, "grad_norm": 5.890530586242676, "learning_rate": 1.1824573305182829e-07, "loss": 0.1347, "step": 2016 }, { "epoch": 0.9561507466224224, "grad_norm": 4.890679359436035, "learning_rate": 1.1572137525356019e-07, "loss": 0.1632, "step": 2017 }, { "epoch": 0.9566247926048826, "grad_norm": 3.409152030944824, "learning_rate": 1.1322409767659526e-07, "loss": 0.1673, "step": 2018 }, { "epoch": 0.957098838587343, "grad_norm": 2.9978771209716797, "learning_rate": 1.1075390716279167e-07, "loss": 0.0933, "step": 2019 }, { "epoch": 0.9575728845698033, "grad_norm": 4.279489994049072, "learning_rate": 1.0831081047979585e-07, "loss": 0.1072, "step": 2020 }, { "epoch": 0.9575728845698033, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.012736320495605469, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 50.4208, "eval_samples_per_second": 5.375, "eval_steps_per_second": 0.178, "step": 2020 }, { "epoch": 0.9580469305522635, "grad_norm": 3.377288579940796, "learning_rate": 1.0589481432102588e-07, "loss": 0.1007, "step": 2021 }, { "epoch": 0.9585209765347239, "grad_norm": 4.943248271942139, "learning_rate": 1.0350592530564919e-07, "loss": 0.1345, "step": 2022 }, { "epoch": 0.9589950225171842, "grad_norm": 3.178915500640869, "learning_rate": 1.0114414997856814e-07, "loss": 0.1501, "step": 2023 }, { "epoch": 0.9594690684996444, "grad_norm": 2.851790428161621, "learning_rate": 9.880949481040347e-08, "loss": 0.1128, "step": 2024 }, { "epoch": 0.9599431144821048, "grad_norm": 7.474143981933594, "learning_rate": 9.650196619747088e-08, "loss": 0.2338, "step": 2025 }, { "epoch": 0.960417160464565, "grad_norm": 4.426879405975342, "learning_rate": 9.422157046176772e-08, "loss": 0.1695, "step": 2026 }, { "epoch": 0.9608912064470254, "grad_norm": 4.276393890380859, "learning_rate": 9.19683138509564e-08, "loss": 0.1695, "step": 2027 }, { "epoch": 0.9613652524294857, "grad_norm": 4.4484357833862305, "learning_rate": 8.974220253834209e-08, "loss": 0.1489, "step": 2028 }, { "epoch": 0.9618392984119459, "grad_norm": 6.785750389099121, "learning_rate": 8.754324262286284e-08, "loss": 0.1951, "step": 2029 }, { "epoch": 0.9623133443944063, "grad_norm": 5.472995281219482, "learning_rate": 8.537144012906728e-08, "loss": 0.1067, "step": 2030 }, { "epoch": 0.9627873903768666, "grad_norm": 7.181637287139893, "learning_rate": 8.322680100710023e-08, "loss": 0.161, "step": 2031 }, { "epoch": 0.9632614363593268, "grad_norm": 2.844578504562378, "learning_rate": 8.110933113268604e-08, "loss": 0.1443, "step": 2032 }, { "epoch": 0.9637354823417872, "grad_norm": 4.869422435760498, "learning_rate": 7.901903630711416e-08, "loss": 0.0947, "step": 2033 }, { "epoch": 0.9642095283242474, "grad_norm": 8.479473114013672, "learning_rate": 7.695592225722137e-08, "loss": 0.1779, "step": 2034 }, { "epoch": 0.9646835743067077, "grad_norm": 6.677021503448486, "learning_rate": 7.491999463537403e-08, "loss": 0.2122, "step": 2035 }, { "epoch": 0.9651576202891681, "grad_norm": 6.709725379943848, "learning_rate": 7.291125901946027e-08, "loss": 0.2262, "step": 2036 }, { "epoch": 0.9656316662716283, "grad_norm": 9.639774322509766, "learning_rate": 7.092972091286454e-08, "loss": 0.1651, "step": 2037 }, { "epoch": 0.9661057122540887, "grad_norm": 4.166116714477539, "learning_rate": 6.897538574445972e-08, "loss": 0.0947, "step": 2038 }, { "epoch": 0.966579758236549, "grad_norm": 5.3391876220703125, "learning_rate": 6.704825886858946e-08, "loss": 0.1904, "step": 2039 }, { "epoch": 0.9670538042190092, "grad_norm": 4.7893595695495605, "learning_rate": 6.5148345565057e-08, "loss": 0.2165, "step": 2040 }, { "epoch": 0.9670538042190092, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.012882479466497898, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 50.0769, "eval_samples_per_second": 5.412, "eval_steps_per_second": 0.18, "step": 2040 }, { "epoch": 0.9675278502014696, "grad_norm": 4.419187545776367, "learning_rate": 6.327565103910193e-08, "loss": 0.168, "step": 2041 }, { "epoch": 0.9680018961839298, "grad_norm": 10.345693588256836, "learning_rate": 6.143018042139903e-08, "loss": 0.1948, "step": 2042 }, { "epoch": 0.9684759421663901, "grad_norm": 8.49881649017334, "learning_rate": 5.96119387680294e-08, "loss": 0.1262, "step": 2043 }, { "epoch": 0.9689499881488505, "grad_norm": 3.0430965423583984, "learning_rate": 5.782093106048159e-08, "loss": 0.1379, "step": 2044 }, { "epoch": 0.9694240341313107, "grad_norm": 3.5971128940582275, "learning_rate": 5.605716220562385e-08, "loss": 0.1537, "step": 2045 }, { "epoch": 0.969898080113771, "grad_norm": 2.2294721603393555, "learning_rate": 5.4320637035704114e-08, "loss": 0.0722, "step": 2046 }, { "epoch": 0.9703721260962314, "grad_norm": 6.108776569366455, "learning_rate": 5.2611360308323364e-08, "loss": 0.1432, "step": 2047 }, { "epoch": 0.9708461720786916, "grad_norm": 4.834316730499268, "learning_rate": 5.092933670643452e-08, "loss": 0.1153, "step": 2048 }, { "epoch": 0.971320218061152, "grad_norm": 3.5349068641662598, "learning_rate": 4.9274570838322436e-08, "loss": 0.09, "step": 2049 }, { "epoch": 0.9717942640436122, "grad_norm": 5.70138692855835, "learning_rate": 4.764706723759172e-08, "loss": 0.1562, "step": 2050 }, { "epoch": 0.9722683100260725, "grad_norm": 7.175850868225098, "learning_rate": 4.604683036316004e-08, "loss": 0.3258, "step": 2051 }, { "epoch": 0.9727423560085329, "grad_norm": 4.018371105194092, "learning_rate": 4.4473864599235975e-08, "loss": 0.2188, "step": 2052 }, { "epoch": 0.9732164019909931, "grad_norm": 5.978484153747559, "learning_rate": 4.29281742553167e-08, "loss": 0.1379, "step": 2053 }, { "epoch": 0.9736904479734534, "grad_norm": 3.8246636390686035, "learning_rate": 4.1409763566172544e-08, "loss": 0.1241, "step": 2054 }, { "epoch": 0.9741644939559138, "grad_norm": 3.782214641571045, "learning_rate": 3.991863669183138e-08, "loss": 0.1725, "step": 2055 }, { "epoch": 0.974638539938374, "grad_norm": 5.974038124084473, "learning_rate": 3.845479771757532e-08, "loss": 0.0927, "step": 2056 }, { "epoch": 0.9751125859208343, "grad_norm": 4.958864688873291, "learning_rate": 3.701825065392184e-08, "loss": 0.1666, "step": 2057 }, { "epoch": 0.9755866319032946, "grad_norm": 7.515510559082031, "learning_rate": 3.560899943661822e-08, "loss": 0.1924, "step": 2058 }, { "epoch": 0.9760606778857549, "grad_norm": 6.320629596710205, "learning_rate": 3.422704792662601e-08, "loss": 0.1618, "step": 2059 }, { "epoch": 0.9765347238682153, "grad_norm": 3.3863184452056885, "learning_rate": 3.2872399910115484e-08, "loss": 0.1417, "step": 2060 }, { "epoch": 0.9765347238682153, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.012739640660583973, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 49.7841, "eval_samples_per_second": 5.444, "eval_steps_per_second": 0.181, "step": 2060 }, { "epoch": 0.9770087698506755, "grad_norm": 4.818965911865234, "learning_rate": 3.154505909845229e-08, "loss": 0.1236, "step": 2061 }, { "epoch": 0.9774828158331358, "grad_norm": 6.437606334686279, "learning_rate": 3.024502912818528e-08, "loss": 0.2105, "step": 2062 }, { "epoch": 0.9779568618155962, "grad_norm": 4.611502647399902, "learning_rate": 2.897231356104424e-08, "loss": 0.1779, "step": 2063 }, { "epoch": 0.9784309077980564, "grad_norm": 5.7043843269348145, "learning_rate": 2.7726915883919958e-08, "loss": 0.1738, "step": 2064 }, { "epoch": 0.9789049537805167, "grad_norm": 3.3945627212524414, "learning_rate": 2.6508839508861963e-08, "loss": 0.1066, "step": 2065 }, { "epoch": 0.979378999762977, "grad_norm": 3.7300400733947754, "learning_rate": 2.5318087773066325e-08, "loss": 0.1186, "step": 2066 }, { "epoch": 0.9798530457454373, "grad_norm": 5.509089469909668, "learning_rate": 2.4154663938867894e-08, "loss": 0.1847, "step": 2067 }, { "epoch": 0.9803270917278976, "grad_norm": 3.7570600509643555, "learning_rate": 2.3018571193729188e-08, "loss": 0.1604, "step": 2068 }, { "epoch": 0.9808011377103579, "grad_norm": 3.3540408611297607, "learning_rate": 2.190981265023373e-08, "loss": 0.0865, "step": 2069 }, { "epoch": 0.9812751836928182, "grad_norm": 3.976696252822876, "learning_rate": 2.082839134607828e-08, "loss": 0.1681, "step": 2070 }, { "epoch": 0.9817492296752786, "grad_norm": 5.1151838302612305, "learning_rate": 1.9774310244059512e-08, "loss": 0.1638, "step": 2071 }, { "epoch": 0.9822232756577388, "grad_norm": 6.2366414070129395, "learning_rate": 1.874757223207291e-08, "loss": 0.1142, "step": 2072 }, { "epoch": 0.9826973216401991, "grad_norm": 3.962942361831665, "learning_rate": 1.7748180123100535e-08, "loss": 0.136, "step": 2073 }, { "epoch": 0.9831713676226594, "grad_norm": 9.428374290466309, "learning_rate": 1.677613665520106e-08, "loss": 0.1083, "step": 2074 }, { "epoch": 0.9836454136051197, "grad_norm": 6.933211326599121, "learning_rate": 1.583144449150975e-08, "loss": 0.1716, "step": 2075 }, { "epoch": 0.98411945958758, "grad_norm": 5.4883575439453125, "learning_rate": 1.4914106220225156e-08, "loss": 0.1931, "step": 2076 }, { "epoch": 0.9845935055700403, "grad_norm": 3.674689531326294, "learning_rate": 1.402412435460132e-08, "loss": 0.167, "step": 2077 }, { "epoch": 0.9850675515525006, "grad_norm": 3.3293275833129883, "learning_rate": 1.3161501332947802e-08, "loss": 0.1224, "step": 2078 }, { "epoch": 0.9855415975349608, "grad_norm": 5.863772392272949, "learning_rate": 1.2326239518614114e-08, "loss": 0.1418, "step": 2079 }, { "epoch": 0.9860156435174212, "grad_norm": 4.600866317749023, "learning_rate": 1.1518341199989735e-08, "loss": 0.101, "step": 2080 }, { "epoch": 0.9860156435174212, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.012615163810551167, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 50.0081, "eval_samples_per_second": 5.419, "eval_steps_per_second": 0.18, "step": 2080 }, { "epoch": 0.9864896894998815, "grad_norm": 6.645082950592041, "learning_rate": 1.0737808590495225e-08, "loss": 0.1798, "step": 2081 }, { "epoch": 0.9869637354823418, "grad_norm": 3.026750087738037, "learning_rate": 9.984643828576669e-09, "loss": 0.1094, "step": 2082 }, { "epoch": 0.9874377814648021, "grad_norm": 4.144604206085205, "learning_rate": 9.25884897770013e-09, "loss": 0.1389, "step": 2083 }, { "epoch": 0.9879118274472624, "grad_norm": 5.202576637268066, "learning_rate": 8.560426026343881e-09, "loss": 0.2081, "step": 2084 }, { "epoch": 0.9883858734297227, "grad_norm": 3.02374005317688, "learning_rate": 7.889376887997291e-09, "loss": 0.1092, "step": 2085 }, { "epoch": 0.988859919412183, "grad_norm": 2.9011049270629883, "learning_rate": 7.245703401149717e-09, "loss": 0.1357, "step": 2086 }, { "epoch": 0.9893339653946432, "grad_norm": 6.7899250984191895, "learning_rate": 6.629407329292736e-09, "loss": 0.1352, "step": 2087 }, { "epoch": 0.9898080113771036, "grad_norm": 3.2638795375823975, "learning_rate": 6.0404903609068146e-09, "loss": 0.1101, "step": 2088 }, { "epoch": 0.9902820573595639, "grad_norm": 5.045032501220703, "learning_rate": 5.47895410946575e-09, "loss": 0.1703, "step": 2089 }, { "epoch": 0.9907561033420241, "grad_norm": 3.866666078567505, "learning_rate": 4.9448001134233536e-09, "loss": 0.1551, "step": 2090 }, { "epoch": 0.9912301493244845, "grad_norm": 3.6094112396240234, "learning_rate": 4.438029836216773e-09, "loss": 0.1368, "step": 2091 }, { "epoch": 0.9917041953069448, "grad_norm": 4.1954779624938965, "learning_rate": 3.958644666257616e-09, "loss": 0.1503, "step": 2092 }, { "epoch": 0.9921782412894051, "grad_norm": 3.4606990814208984, "learning_rate": 3.5066459169297294e-09, "loss": 0.1193, "step": 2093 }, { "epoch": 0.9926522872718654, "grad_norm": 3.768021821975708, "learning_rate": 3.082034826586977e-09, "loss": 0.171, "step": 2094 }, { "epoch": 0.9931263332543256, "grad_norm": 3.0747859477996826, "learning_rate": 2.684812558547689e-09, "loss": 0.0801, "step": 2095 }, { "epoch": 0.993600379236786, "grad_norm": 6.338437080383301, "learning_rate": 2.3149802010913323e-09, "loss": 0.1644, "step": 2096 }, { "epoch": 0.9940744252192463, "grad_norm": 3.9809048175811768, "learning_rate": 1.9725387674585095e-09, "loss": 0.1284, "step": 2097 }, { "epoch": 0.9945484712017065, "grad_norm": 4.982059955596924, "learning_rate": 1.6574891958442973e-09, "loss": 0.1434, "step": 2098 }, { "epoch": 0.9950225171841669, "grad_norm": 2.924060583114624, "learning_rate": 1.3698323493993582e-09, "loss": 0.0911, "step": 2099 }, { "epoch": 0.9954965631666272, "grad_norm": 7.759647846221924, "learning_rate": 1.1095690162243878e-09, "loss": 0.2136, "step": 2100 }, { "epoch": 0.9954965631666272, "eval_accuracy": 0.9935587761674718, "eval_f1": 0.9272727272727272, "eval_loss": 0.012555374763906002, "eval_precision": 0.8793103448275862, "eval_recall": 0.9807692307692307, "eval_runtime": 50.017, "eval_samples_per_second": 5.418, "eval_steps_per_second": 0.18, "step": 2100 }, { "epoch": 0.9959706091490874, "grad_norm": 4.999462127685547, "learning_rate": 8.766999093690054e-10, "loss": 0.139, "step": 2101 }, { "epoch": 0.9964446551315478, "grad_norm": 7.40416145324707, "learning_rate": 6.71225666831754e-10, "loss": 0.1862, "step": 2102 }, { "epoch": 0.996918701114008, "grad_norm": 3.0805890560150146, "learning_rate": 4.931468515556593e-10, "loss": 0.1179, "step": 2103 }, { "epoch": 0.9973927470964684, "grad_norm": 3.0441319942474365, "learning_rate": 3.4246395142822906e-10, "loss": 0.1261, "step": 2104 }, { "epoch": 0.9978667930789287, "grad_norm": 3.2093586921691895, "learning_rate": 2.1917737927812377e-10, "loss": 0.0813, "step": 2105 }, { "epoch": 0.9983408390613889, "grad_norm": 5.661715507507324, "learning_rate": 1.2328747287848609e-10, "loss": 0.1345, "step": 2106 }, { "epoch": 0.9988148850438493, "grad_norm": 3.4387176036834717, "learning_rate": 5.4794494941390333e-11, "loss": 0.1216, "step": 2107 }, { "epoch": 0.9992889310263096, "grad_norm": 3.1428146362304688, "learning_rate": 1.3698633117842365e-11, "loss": 0.1275, "step": 2108 }, { "epoch": 0.9997629770087698, "grad_norm": 8.101336479187012, "learning_rate": 0.0, "loss": 0.2897, "step": 2109 } ], "logging_steps": 1, "max_steps": 2109, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.63300984822956e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }