{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 15492, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 9.199494361877441, "learning_rate": 2.1505376344086022e-08, "loss": 1.1437, "step": 1 }, { "epoch": 0.0, "grad_norm": 6.0545454025268555, "learning_rate": 4.3010752688172045e-08, "loss": 1.1482, "step": 2 }, { "epoch": 0.0, "grad_norm": 7.064575672149658, "learning_rate": 6.451612903225807e-08, "loss": 1.1427, "step": 3 }, { "epoch": 0.0, "grad_norm": 7.665438175201416, "learning_rate": 8.602150537634409e-08, "loss": 1.2363, "step": 4 }, { "epoch": 0.0, "grad_norm": 7.169973850250244, "learning_rate": 1.0752688172043012e-07, "loss": 1.13, "step": 5 }, { "epoch": 0.0, "grad_norm": 7.985989570617676, "learning_rate": 1.2903225806451614e-07, "loss": 1.1699, "step": 6 }, { "epoch": 0.0, "grad_norm": 6.5622968673706055, "learning_rate": 1.5053763440860217e-07, "loss": 1.1077, "step": 7 }, { "epoch": 0.0, "grad_norm": 7.340312480926514, "learning_rate": 1.7204301075268818e-07, "loss": 1.1568, "step": 8 }, { "epoch": 0.0, "grad_norm": 5.591065883636475, "learning_rate": 1.935483870967742e-07, "loss": 1.0991, "step": 9 }, { "epoch": 0.0, "grad_norm": 6.89580774307251, "learning_rate": 2.1505376344086024e-07, "loss": 1.1472, "step": 10 }, { "epoch": 0.0, "grad_norm": 6.943924903869629, "learning_rate": 2.3655913978494625e-07, "loss": 1.168, "step": 11 }, { "epoch": 0.0, "grad_norm": 6.1693830490112305, "learning_rate": 2.580645161290323e-07, "loss": 1.136, "step": 12 }, { "epoch": 0.0, "grad_norm": 6.702217102050781, "learning_rate": 2.795698924731183e-07, "loss": 1.1373, "step": 13 }, { "epoch": 0.0, "grad_norm": 8.71485424041748, "learning_rate": 3.0107526881720434e-07, "loss": 1.13, "step": 14 }, { "epoch": 0.0, "grad_norm": 6.789973735809326, "learning_rate": 3.2258064516129035e-07, "loss": 1.1722, "step": 15 }, { "epoch": 0.0, "grad_norm": 6.896481513977051, "learning_rate": 3.4408602150537636e-07, "loss": 1.1282, "step": 16 }, { "epoch": 0.0, "grad_norm": 9.546806335449219, "learning_rate": 3.655913978494624e-07, "loss": 1.1901, "step": 17 }, { "epoch": 0.0, "grad_norm": 6.853231430053711, "learning_rate": 3.870967741935484e-07, "loss": 1.1032, "step": 18 }, { "epoch": 0.0, "grad_norm": 7.103644371032715, "learning_rate": 4.086021505376345e-07, "loss": 1.1358, "step": 19 }, { "epoch": 0.0, "grad_norm": 8.076460838317871, "learning_rate": 4.301075268817205e-07, "loss": 1.111, "step": 20 }, { "epoch": 0.0, "grad_norm": 5.919093132019043, "learning_rate": 4.5161290322580644e-07, "loss": 1.0959, "step": 21 }, { "epoch": 0.0, "grad_norm": 6.185170650482178, "learning_rate": 4.731182795698925e-07, "loss": 1.0636, "step": 22 }, { "epoch": 0.0, "grad_norm": 5.398933410644531, "learning_rate": 4.946236559139786e-07, "loss": 1.0246, "step": 23 }, { "epoch": 0.0, "grad_norm": 5.460718631744385, "learning_rate": 5.161290322580646e-07, "loss": 1.0656, "step": 24 }, { "epoch": 0.0, "grad_norm": 4.4205241203308105, "learning_rate": 5.376344086021506e-07, "loss": 1.0801, "step": 25 }, { "epoch": 0.0, "grad_norm": 4.60645866394043, "learning_rate": 5.591397849462366e-07, "loss": 1.0519, "step": 26 }, { "epoch": 0.0, "grad_norm": 4.965516090393066, "learning_rate": 5.806451612903227e-07, "loss": 1.0758, "step": 27 }, { "epoch": 0.0, "grad_norm": 4.6869635581970215, "learning_rate": 6.021505376344087e-07, "loss": 1.0803, "step": 28 }, { "epoch": 0.0, "grad_norm": 4.233418941497803, "learning_rate": 6.236559139784947e-07, "loss": 0.9739, "step": 29 }, { "epoch": 0.0, "grad_norm": 3.845629930496216, "learning_rate": 6.451612903225807e-07, "loss": 0.9805, "step": 30 }, { "epoch": 0.0, "grad_norm": 3.9328348636627197, "learning_rate": 6.666666666666667e-07, "loss": 0.9818, "step": 31 }, { "epoch": 0.0, "grad_norm": 3.7508742809295654, "learning_rate": 6.881720430107527e-07, "loss": 0.9613, "step": 32 }, { "epoch": 0.0, "grad_norm": 3.8866469860076904, "learning_rate": 7.096774193548388e-07, "loss": 0.9899, "step": 33 }, { "epoch": 0.0, "grad_norm": 3.6516504287719727, "learning_rate": 7.311827956989248e-07, "loss": 0.9788, "step": 34 }, { "epoch": 0.0, "grad_norm": 3.6458332538604736, "learning_rate": 7.526881720430108e-07, "loss": 0.9496, "step": 35 }, { "epoch": 0.0, "grad_norm": 3.688161849975586, "learning_rate": 7.741935483870968e-07, "loss": 0.9631, "step": 36 }, { "epoch": 0.0, "grad_norm": 3.6499686241149902, "learning_rate": 7.95698924731183e-07, "loss": 0.9827, "step": 37 }, { "epoch": 0.0, "grad_norm": 4.352916240692139, "learning_rate": 8.17204301075269e-07, "loss": 0.9729, "step": 38 }, { "epoch": 0.0, "grad_norm": 3.2920162677764893, "learning_rate": 8.38709677419355e-07, "loss": 0.94, "step": 39 }, { "epoch": 0.0, "grad_norm": 2.8973357677459717, "learning_rate": 8.60215053763441e-07, "loss": 0.9002, "step": 40 }, { "epoch": 0.0, "grad_norm": 2.6978607177734375, "learning_rate": 8.817204301075269e-07, "loss": 0.8871, "step": 41 }, { "epoch": 0.0, "grad_norm": 2.9298810958862305, "learning_rate": 9.032258064516129e-07, "loss": 0.8874, "step": 42 }, { "epoch": 0.0, "grad_norm": 2.602858781814575, "learning_rate": 9.24731182795699e-07, "loss": 0.8815, "step": 43 }, { "epoch": 0.0, "grad_norm": 2.4206223487854004, "learning_rate": 9.46236559139785e-07, "loss": 0.8874, "step": 44 }, { "epoch": 0.0, "grad_norm": 2.330326557159424, "learning_rate": 9.67741935483871e-07, "loss": 0.8577, "step": 45 }, { "epoch": 0.0, "grad_norm": 2.3294131755828857, "learning_rate": 9.892473118279571e-07, "loss": 0.9535, "step": 46 }, { "epoch": 0.0, "grad_norm": 2.343327283859253, "learning_rate": 1.010752688172043e-06, "loss": 0.8169, "step": 47 }, { "epoch": 0.0, "grad_norm": 2.401000499725342, "learning_rate": 1.0322580645161291e-06, "loss": 0.8267, "step": 48 }, { "epoch": 0.0, "grad_norm": 2.074589967727661, "learning_rate": 1.0537634408602152e-06, "loss": 0.8182, "step": 49 }, { "epoch": 0.0, "grad_norm": 2.105560779571533, "learning_rate": 1.0752688172043011e-06, "loss": 0.8219, "step": 50 }, { "epoch": 0.0, "grad_norm": 2.3930771350860596, "learning_rate": 1.0967741935483872e-06, "loss": 0.8373, "step": 51 }, { "epoch": 0.0, "grad_norm": 1.9240361452102661, "learning_rate": 1.1182795698924731e-06, "loss": 0.8418, "step": 52 }, { "epoch": 0.0, "grad_norm": 2.412742853164673, "learning_rate": 1.1397849462365593e-06, "loss": 0.8717, "step": 53 }, { "epoch": 0.0, "grad_norm": 1.973429799079895, "learning_rate": 1.1612903225806454e-06, "loss": 0.853, "step": 54 }, { "epoch": 0.0, "grad_norm": 1.8849254846572876, "learning_rate": 1.1827956989247313e-06, "loss": 0.7893, "step": 55 }, { "epoch": 0.0, "grad_norm": 1.9804376363754272, "learning_rate": 1.2043010752688174e-06, "loss": 0.839, "step": 56 }, { "epoch": 0.0, "grad_norm": 2.039210081100464, "learning_rate": 1.2258064516129033e-06, "loss": 0.8568, "step": 57 }, { "epoch": 0.0, "grad_norm": 2.208841562271118, "learning_rate": 1.2473118279569894e-06, "loss": 0.7834, "step": 58 }, { "epoch": 0.0, "grad_norm": 1.9432287216186523, "learning_rate": 1.2688172043010755e-06, "loss": 0.6998, "step": 59 }, { "epoch": 0.0, "grad_norm": 1.8315590620040894, "learning_rate": 1.2903225806451614e-06, "loss": 0.7802, "step": 60 }, { "epoch": 0.0, "grad_norm": 1.9207030534744263, "learning_rate": 1.3118279569892475e-06, "loss": 0.7934, "step": 61 }, { "epoch": 0.0, "grad_norm": 1.9561203718185425, "learning_rate": 1.3333333333333334e-06, "loss": 0.756, "step": 62 }, { "epoch": 0.0, "grad_norm": 1.992568016052246, "learning_rate": 1.3548387096774195e-06, "loss": 0.8479, "step": 63 }, { "epoch": 0.0, "grad_norm": 1.6897177696228027, "learning_rate": 1.3763440860215054e-06, "loss": 0.7737, "step": 64 }, { "epoch": 0.0, "grad_norm": 1.8338932991027832, "learning_rate": 1.3978494623655913e-06, "loss": 0.7786, "step": 65 }, { "epoch": 0.0, "grad_norm": 1.7465412616729736, "learning_rate": 1.4193548387096776e-06, "loss": 0.8564, "step": 66 }, { "epoch": 0.0, "grad_norm": 1.5714951753616333, "learning_rate": 1.4408602150537635e-06, "loss": 0.7401, "step": 67 }, { "epoch": 0.0, "grad_norm": 1.658527135848999, "learning_rate": 1.4623655913978497e-06, "loss": 0.8224, "step": 68 }, { "epoch": 0.0, "grad_norm": 2.278364896774292, "learning_rate": 1.4838709677419356e-06, "loss": 0.7847, "step": 69 }, { "epoch": 0.0, "grad_norm": 1.7019484043121338, "learning_rate": 1.5053763440860217e-06, "loss": 0.7668, "step": 70 }, { "epoch": 0.0, "grad_norm": 1.5094424486160278, "learning_rate": 1.5268817204301076e-06, "loss": 0.677, "step": 71 }, { "epoch": 0.0, "grad_norm": 1.6096973419189453, "learning_rate": 1.5483870967741937e-06, "loss": 0.7383, "step": 72 }, { "epoch": 0.0, "grad_norm": 1.5990463495254517, "learning_rate": 1.5698924731182796e-06, "loss": 0.7269, "step": 73 }, { "epoch": 0.0, "grad_norm": 1.5064425468444824, "learning_rate": 1.591397849462366e-06, "loss": 0.735, "step": 74 }, { "epoch": 0.0, "grad_norm": 1.626851201057434, "learning_rate": 1.6129032258064516e-06, "loss": 0.7365, "step": 75 }, { "epoch": 0.0, "grad_norm": 1.5428876876831055, "learning_rate": 1.634408602150538e-06, "loss": 0.7441, "step": 76 }, { "epoch": 0.0, "grad_norm": 1.520472526550293, "learning_rate": 1.6559139784946238e-06, "loss": 0.7176, "step": 77 }, { "epoch": 0.01, "grad_norm": 1.6144437789916992, "learning_rate": 1.67741935483871e-06, "loss": 0.7346, "step": 78 }, { "epoch": 0.01, "grad_norm": 1.5003567934036255, "learning_rate": 1.6989247311827958e-06, "loss": 0.6916, "step": 79 }, { "epoch": 0.01, "grad_norm": 1.4560390710830688, "learning_rate": 1.720430107526882e-06, "loss": 0.6949, "step": 80 }, { "epoch": 0.01, "grad_norm": 1.5620219707489014, "learning_rate": 1.7419354838709678e-06, "loss": 0.7344, "step": 81 }, { "epoch": 0.01, "grad_norm": 1.8219013214111328, "learning_rate": 1.7634408602150537e-06, "loss": 0.7725, "step": 82 }, { "epoch": 0.01, "grad_norm": 1.5620462894439697, "learning_rate": 1.7849462365591399e-06, "loss": 0.7008, "step": 83 }, { "epoch": 0.01, "grad_norm": 1.6449252367019653, "learning_rate": 1.8064516129032258e-06, "loss": 0.738, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.5631426572799683, "learning_rate": 1.827956989247312e-06, "loss": 0.7803, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.6407616138458252, "learning_rate": 1.849462365591398e-06, "loss": 0.7451, "step": 86 }, { "epoch": 0.01, "grad_norm": 1.4969933032989502, "learning_rate": 1.870967741935484e-06, "loss": 0.7748, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.587488055229187, "learning_rate": 1.89247311827957e-06, "loss": 0.7774, "step": 88 }, { "epoch": 0.01, "grad_norm": 1.8017183542251587, "learning_rate": 1.9139784946236563e-06, "loss": 0.7497, "step": 89 }, { "epoch": 0.01, "grad_norm": 1.4971867799758911, "learning_rate": 1.935483870967742e-06, "loss": 0.6959, "step": 90 }, { "epoch": 0.01, "grad_norm": 1.4527785778045654, "learning_rate": 1.956989247311828e-06, "loss": 0.7569, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.4927266836166382, "learning_rate": 1.9784946236559142e-06, "loss": 0.7317, "step": 92 }, { "epoch": 0.01, "grad_norm": 1.533997893333435, "learning_rate": 2.0000000000000003e-06, "loss": 0.726, "step": 93 }, { "epoch": 0.01, "grad_norm": 1.4823975563049316, "learning_rate": 2.021505376344086e-06, "loss": 0.735, "step": 94 }, { "epoch": 0.01, "grad_norm": 1.50624418258667, "learning_rate": 2.043010752688172e-06, "loss": 0.7305, "step": 95 }, { "epoch": 0.01, "grad_norm": 1.4327868223190308, "learning_rate": 2.0645161290322582e-06, "loss": 0.7355, "step": 96 }, { "epoch": 0.01, "grad_norm": 1.8432235717773438, "learning_rate": 2.086021505376344e-06, "loss": 0.7635, "step": 97 }, { "epoch": 0.01, "grad_norm": 1.4271914958953857, "learning_rate": 2.1075268817204305e-06, "loss": 0.6642, "step": 98 }, { "epoch": 0.01, "grad_norm": 1.4882450103759766, "learning_rate": 2.129032258064516e-06, "loss": 0.7776, "step": 99 }, { "epoch": 0.01, "grad_norm": 1.6545829772949219, "learning_rate": 2.1505376344086023e-06, "loss": 0.749, "step": 100 }, { "epoch": 0.01, "grad_norm": 1.6064702272415161, "learning_rate": 2.1720430107526884e-06, "loss": 0.6742, "step": 101 }, { "epoch": 0.01, "grad_norm": 1.5505890846252441, "learning_rate": 2.1935483870967745e-06, "loss": 0.6955, "step": 102 }, { "epoch": 0.01, "grad_norm": 1.5697438716888428, "learning_rate": 2.21505376344086e-06, "loss": 0.7352, "step": 103 }, { "epoch": 0.01, "grad_norm": 1.5753288269042969, "learning_rate": 2.2365591397849463e-06, "loss": 0.7069, "step": 104 }, { "epoch": 0.01, "grad_norm": 1.5402828454971313, "learning_rate": 2.2580645161290324e-06, "loss": 0.6918, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.4019324779510498, "learning_rate": 2.2795698924731185e-06, "loss": 0.6673, "step": 106 }, { "epoch": 0.01, "grad_norm": 1.5553447008132935, "learning_rate": 2.3010752688172046e-06, "loss": 0.75, "step": 107 }, { "epoch": 0.01, "grad_norm": 1.4179664850234985, "learning_rate": 2.3225806451612907e-06, "loss": 0.6868, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.5014973878860474, "learning_rate": 2.3440860215053764e-06, "loss": 0.662, "step": 109 }, { "epoch": 0.01, "grad_norm": 1.327265977859497, "learning_rate": 2.3655913978494625e-06, "loss": 0.7385, "step": 110 }, { "epoch": 0.01, "grad_norm": 1.4811031818389893, "learning_rate": 2.3870967741935486e-06, "loss": 0.6582, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.6561529636383057, "learning_rate": 2.4086021505376348e-06, "loss": 0.6604, "step": 112 }, { "epoch": 0.01, "grad_norm": 1.700042486190796, "learning_rate": 2.4301075268817204e-06, "loss": 0.6496, "step": 113 }, { "epoch": 0.01, "grad_norm": 1.4990754127502441, "learning_rate": 2.4516129032258066e-06, "loss": 0.7421, "step": 114 }, { "epoch": 0.01, "grad_norm": 1.590571403503418, "learning_rate": 2.4731182795698927e-06, "loss": 0.6999, "step": 115 }, { "epoch": 0.01, "grad_norm": 1.6044598817825317, "learning_rate": 2.4946236559139788e-06, "loss": 0.7294, "step": 116 }, { "epoch": 0.01, "grad_norm": 1.4657864570617676, "learning_rate": 2.5161290322580645e-06, "loss": 0.6599, "step": 117 }, { "epoch": 0.01, "grad_norm": 1.4239683151245117, "learning_rate": 2.537634408602151e-06, "loss": 0.6536, "step": 118 }, { "epoch": 0.01, "grad_norm": 1.371087908744812, "learning_rate": 2.5591397849462367e-06, "loss": 0.6272, "step": 119 }, { "epoch": 0.01, "grad_norm": 1.4175894260406494, "learning_rate": 2.580645161290323e-06, "loss": 0.6928, "step": 120 }, { "epoch": 0.01, "grad_norm": 1.5577291250228882, "learning_rate": 2.6021505376344085e-06, "loss": 0.6251, "step": 121 }, { "epoch": 0.01, "grad_norm": 1.502320647239685, "learning_rate": 2.623655913978495e-06, "loss": 0.7476, "step": 122 }, { "epoch": 0.01, "grad_norm": 1.608627200126648, "learning_rate": 2.645161290322581e-06, "loss": 0.7487, "step": 123 }, { "epoch": 0.01, "grad_norm": 1.4116089344024658, "learning_rate": 2.666666666666667e-06, "loss": 0.6685, "step": 124 }, { "epoch": 0.01, "grad_norm": 1.4255626201629639, "learning_rate": 2.688172043010753e-06, "loss": 0.7016, "step": 125 }, { "epoch": 0.01, "grad_norm": 1.4606821537017822, "learning_rate": 2.709677419354839e-06, "loss": 0.7342, "step": 126 }, { "epoch": 0.01, "grad_norm": 1.5134891271591187, "learning_rate": 2.731182795698925e-06, "loss": 0.6933, "step": 127 }, { "epoch": 0.01, "grad_norm": 1.6419521570205688, "learning_rate": 2.752688172043011e-06, "loss": 0.7219, "step": 128 }, { "epoch": 0.01, "grad_norm": 1.5144115686416626, "learning_rate": 2.774193548387097e-06, "loss": 0.6619, "step": 129 }, { "epoch": 0.01, "grad_norm": 1.5899344682693481, "learning_rate": 2.7956989247311827e-06, "loss": 0.7218, "step": 130 }, { "epoch": 0.01, "grad_norm": 1.5220930576324463, "learning_rate": 2.817204301075269e-06, "loss": 0.7338, "step": 131 }, { "epoch": 0.01, "grad_norm": 1.4487104415893555, "learning_rate": 2.8387096774193553e-06, "loss": 0.7107, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.436444640159607, "learning_rate": 2.860215053763441e-06, "loss": 0.6749, "step": 133 }, { "epoch": 0.01, "grad_norm": 1.464216947555542, "learning_rate": 2.881720430107527e-06, "loss": 0.6303, "step": 134 }, { "epoch": 0.01, "grad_norm": 1.5824499130249023, "learning_rate": 2.903225806451613e-06, "loss": 0.6414, "step": 135 }, { "epoch": 0.01, "grad_norm": 1.656369686126709, "learning_rate": 2.9247311827956993e-06, "loss": 0.6985, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.5063303709030151, "learning_rate": 2.946236559139785e-06, "loss": 0.6392, "step": 137 }, { "epoch": 0.01, "grad_norm": 1.444278359413147, "learning_rate": 2.967741935483871e-06, "loss": 0.6953, "step": 138 }, { "epoch": 0.01, "grad_norm": 1.3497573137283325, "learning_rate": 2.9892473118279577e-06, "loss": 0.6987, "step": 139 }, { "epoch": 0.01, "grad_norm": 1.4798181056976318, "learning_rate": 3.0107526881720433e-06, "loss": 0.728, "step": 140 }, { "epoch": 0.01, "grad_norm": 1.8889732360839844, "learning_rate": 3.0322580645161295e-06, "loss": 0.6552, "step": 141 }, { "epoch": 0.01, "grad_norm": 1.8460392951965332, "learning_rate": 3.053763440860215e-06, "loss": 0.7035, "step": 142 }, { "epoch": 0.01, "grad_norm": 1.5472880601882935, "learning_rate": 3.0752688172043017e-06, "loss": 0.6939, "step": 143 }, { "epoch": 0.01, "grad_norm": 1.4514435529708862, "learning_rate": 3.0967741935483874e-06, "loss": 0.6832, "step": 144 }, { "epoch": 0.01, "grad_norm": 1.5619627237319946, "learning_rate": 3.1182795698924735e-06, "loss": 0.687, "step": 145 }, { "epoch": 0.01, "grad_norm": 1.5921282768249512, "learning_rate": 3.139784946236559e-06, "loss": 0.6646, "step": 146 }, { "epoch": 0.01, "grad_norm": 1.3784055709838867, "learning_rate": 3.1612903225806453e-06, "loss": 0.6463, "step": 147 }, { "epoch": 0.01, "grad_norm": 1.512998342514038, "learning_rate": 3.182795698924732e-06, "loss": 0.6525, "step": 148 }, { "epoch": 0.01, "grad_norm": 1.3162131309509277, "learning_rate": 3.2043010752688175e-06, "loss": 0.6469, "step": 149 }, { "epoch": 0.01, "grad_norm": 1.572028636932373, "learning_rate": 3.225806451612903e-06, "loss": 0.7131, "step": 150 }, { "epoch": 0.01, "grad_norm": 1.5169793367385864, "learning_rate": 3.2473118279569893e-06, "loss": 0.7038, "step": 151 }, { "epoch": 0.01, "grad_norm": 1.466475009918213, "learning_rate": 3.268817204301076e-06, "loss": 0.7005, "step": 152 }, { "epoch": 0.01, "grad_norm": 1.4486643075942993, "learning_rate": 3.2903225806451615e-06, "loss": 0.6751, "step": 153 }, { "epoch": 0.01, "grad_norm": 1.422550082206726, "learning_rate": 3.3118279569892476e-06, "loss": 0.6497, "step": 154 }, { "epoch": 0.01, "grad_norm": 1.566698431968689, "learning_rate": 3.3333333333333333e-06, "loss": 0.7081, "step": 155 }, { "epoch": 0.01, "grad_norm": 1.569364309310913, "learning_rate": 3.35483870967742e-06, "loss": 0.6227, "step": 156 }, { "epoch": 0.01, "grad_norm": 1.7164595127105713, "learning_rate": 3.376344086021506e-06, "loss": 0.6862, "step": 157 }, { "epoch": 0.01, "grad_norm": 1.5494818687438965, "learning_rate": 3.3978494623655917e-06, "loss": 0.6533, "step": 158 }, { "epoch": 0.01, "grad_norm": 1.4848299026489258, "learning_rate": 3.4193548387096773e-06, "loss": 0.7329, "step": 159 }, { "epoch": 0.01, "grad_norm": 1.5307267904281616, "learning_rate": 3.440860215053764e-06, "loss": 0.6468, "step": 160 }, { "epoch": 0.01, "grad_norm": 1.4356653690338135, "learning_rate": 3.46236559139785e-06, "loss": 0.6623, "step": 161 }, { "epoch": 0.01, "grad_norm": 1.3290424346923828, "learning_rate": 3.4838709677419357e-06, "loss": 0.648, "step": 162 }, { "epoch": 0.01, "grad_norm": 1.514076828956604, "learning_rate": 3.505376344086022e-06, "loss": 0.5723, "step": 163 }, { "epoch": 0.01, "grad_norm": 1.4453346729278564, "learning_rate": 3.5268817204301075e-06, "loss": 0.5902, "step": 164 }, { "epoch": 0.01, "grad_norm": 1.5088139772415161, "learning_rate": 3.548387096774194e-06, "loss": 0.6244, "step": 165 }, { "epoch": 0.01, "grad_norm": 1.8480931520462036, "learning_rate": 3.5698924731182797e-06, "loss": 0.6643, "step": 166 }, { "epoch": 0.01, "grad_norm": 1.589393973350525, "learning_rate": 3.591397849462366e-06, "loss": 0.6439, "step": 167 }, { "epoch": 0.01, "grad_norm": 1.5685641765594482, "learning_rate": 3.6129032258064515e-06, "loss": 0.7052, "step": 168 }, { "epoch": 0.01, "grad_norm": 1.38327157497406, "learning_rate": 3.634408602150538e-06, "loss": 0.6462, "step": 169 }, { "epoch": 0.01, "grad_norm": 1.597148060798645, "learning_rate": 3.655913978494624e-06, "loss": 0.7158, "step": 170 }, { "epoch": 0.01, "grad_norm": 1.4642106294631958, "learning_rate": 3.67741935483871e-06, "loss": 0.6272, "step": 171 }, { "epoch": 0.01, "grad_norm": 1.4251148700714111, "learning_rate": 3.698924731182796e-06, "loss": 0.6799, "step": 172 }, { "epoch": 0.01, "grad_norm": 1.487595558166504, "learning_rate": 3.720430107526882e-06, "loss": 0.6073, "step": 173 }, { "epoch": 0.01, "grad_norm": 1.4418853521347046, "learning_rate": 3.741935483870968e-06, "loss": 0.6399, "step": 174 }, { "epoch": 0.01, "grad_norm": 1.3663420677185059, "learning_rate": 3.763440860215054e-06, "loss": 0.6021, "step": 175 }, { "epoch": 0.01, "grad_norm": 1.6035783290863037, "learning_rate": 3.78494623655914e-06, "loss": 0.6398, "step": 176 }, { "epoch": 0.01, "grad_norm": 1.3505829572677612, "learning_rate": 3.8064516129032257e-06, "loss": 0.6305, "step": 177 }, { "epoch": 0.01, "grad_norm": 1.4782058000564575, "learning_rate": 3.827956989247313e-06, "loss": 0.6473, "step": 178 }, { "epoch": 0.01, "grad_norm": 1.5228917598724365, "learning_rate": 3.849462365591398e-06, "loss": 0.6405, "step": 179 }, { "epoch": 0.01, "grad_norm": 1.5363714694976807, "learning_rate": 3.870967741935484e-06, "loss": 0.6281, "step": 180 }, { "epoch": 0.01, "grad_norm": 1.6528879404067993, "learning_rate": 3.89247311827957e-06, "loss": 0.6162, "step": 181 }, { "epoch": 0.01, "grad_norm": 1.5740172863006592, "learning_rate": 3.913978494623656e-06, "loss": 0.649, "step": 182 }, { "epoch": 0.01, "grad_norm": 1.503543734550476, "learning_rate": 3.935483870967742e-06, "loss": 0.646, "step": 183 }, { "epoch": 0.01, "grad_norm": 1.5960241556167603, "learning_rate": 3.9569892473118284e-06, "loss": 0.6721, "step": 184 }, { "epoch": 0.01, "grad_norm": 1.5582656860351562, "learning_rate": 3.978494623655914e-06, "loss": 0.6972, "step": 185 }, { "epoch": 0.01, "grad_norm": 1.5604192018508911, "learning_rate": 4.000000000000001e-06, "loss": 0.7031, "step": 186 }, { "epoch": 0.01, "grad_norm": 1.498957872390747, "learning_rate": 4.021505376344086e-06, "loss": 0.6644, "step": 187 }, { "epoch": 0.01, "grad_norm": 1.5610569715499878, "learning_rate": 4.043010752688172e-06, "loss": 0.6804, "step": 188 }, { "epoch": 0.01, "grad_norm": 1.474185585975647, "learning_rate": 4.064516129032259e-06, "loss": 0.5963, "step": 189 }, { "epoch": 0.01, "grad_norm": 1.3687865734100342, "learning_rate": 4.086021505376344e-06, "loss": 0.668, "step": 190 }, { "epoch": 0.01, "grad_norm": 1.316788911819458, "learning_rate": 4.107526881720431e-06, "loss": 0.6523, "step": 191 }, { "epoch": 0.01, "grad_norm": 1.4012666940689087, "learning_rate": 4.1290322580645165e-06, "loss": 0.6288, "step": 192 }, { "epoch": 0.01, "grad_norm": 1.6649855375289917, "learning_rate": 4.150537634408602e-06, "loss": 0.6863, "step": 193 }, { "epoch": 0.01, "grad_norm": 1.4687491655349731, "learning_rate": 4.172043010752688e-06, "loss": 0.6585, "step": 194 }, { "epoch": 0.01, "grad_norm": 1.525539755821228, "learning_rate": 4.193548387096774e-06, "loss": 0.635, "step": 195 }, { "epoch": 0.01, "grad_norm": 1.466711401939392, "learning_rate": 4.215053763440861e-06, "loss": 0.6545, "step": 196 }, { "epoch": 0.01, "grad_norm": 1.5222094058990479, "learning_rate": 4.236559139784947e-06, "loss": 0.6553, "step": 197 }, { "epoch": 0.01, "grad_norm": 1.4313774108886719, "learning_rate": 4.258064516129032e-06, "loss": 0.629, "step": 198 }, { "epoch": 0.01, "grad_norm": 1.327798843383789, "learning_rate": 4.279569892473119e-06, "loss": 0.5891, "step": 199 }, { "epoch": 0.01, "grad_norm": 1.3087619543075562, "learning_rate": 4.3010752688172045e-06, "loss": 0.6483, "step": 200 }, { "epoch": 0.01, "grad_norm": 1.7443838119506836, "learning_rate": 4.32258064516129e-06, "loss": 0.6468, "step": 201 }, { "epoch": 0.01, "grad_norm": 1.6616973876953125, "learning_rate": 4.344086021505377e-06, "loss": 0.7197, "step": 202 }, { "epoch": 0.01, "grad_norm": 1.3355097770690918, "learning_rate": 4.365591397849463e-06, "loss": 0.662, "step": 203 }, { "epoch": 0.01, "grad_norm": 1.5761741399765015, "learning_rate": 4.387096774193549e-06, "loss": 0.6174, "step": 204 }, { "epoch": 0.01, "grad_norm": 1.441694974899292, "learning_rate": 4.408602150537635e-06, "loss": 0.633, "step": 205 }, { "epoch": 0.01, "grad_norm": 1.495047688484192, "learning_rate": 4.43010752688172e-06, "loss": 0.6942, "step": 206 }, { "epoch": 0.01, "grad_norm": 1.5562357902526855, "learning_rate": 4.451612903225807e-06, "loss": 0.6575, "step": 207 }, { "epoch": 0.01, "grad_norm": 1.677410364151001, "learning_rate": 4.473118279569893e-06, "loss": 0.6924, "step": 208 }, { "epoch": 0.01, "grad_norm": 1.680884599685669, "learning_rate": 4.494623655913979e-06, "loss": 0.6334, "step": 209 }, { "epoch": 0.01, "grad_norm": 1.4539467096328735, "learning_rate": 4.516129032258065e-06, "loss": 0.6658, "step": 210 }, { "epoch": 0.01, "grad_norm": 1.4707945585250854, "learning_rate": 4.5376344086021505e-06, "loss": 0.6821, "step": 211 }, { "epoch": 0.01, "grad_norm": 1.5402538776397705, "learning_rate": 4.559139784946237e-06, "loss": 0.6772, "step": 212 }, { "epoch": 0.01, "grad_norm": 1.487562656402588, "learning_rate": 4.580645161290323e-06, "loss": 0.6509, "step": 213 }, { "epoch": 0.01, "grad_norm": 1.506378173828125, "learning_rate": 4.602150537634409e-06, "loss": 0.7276, "step": 214 }, { "epoch": 0.01, "grad_norm": 1.818841814994812, "learning_rate": 4.623655913978495e-06, "loss": 0.6813, "step": 215 }, { "epoch": 0.01, "grad_norm": 1.4011495113372803, "learning_rate": 4.6451612903225815e-06, "loss": 0.6469, "step": 216 }, { "epoch": 0.01, "grad_norm": 1.478278636932373, "learning_rate": 4.666666666666667e-06, "loss": 0.6733, "step": 217 }, { "epoch": 0.01, "grad_norm": 1.4562674760818481, "learning_rate": 4.688172043010753e-06, "loss": 0.6638, "step": 218 }, { "epoch": 0.01, "grad_norm": 1.4329262971878052, "learning_rate": 4.7096774193548385e-06, "loss": 0.6041, "step": 219 }, { "epoch": 0.01, "grad_norm": 1.363663673400879, "learning_rate": 4.731182795698925e-06, "loss": 0.6322, "step": 220 }, { "epoch": 0.01, "grad_norm": 1.3990613222122192, "learning_rate": 4.752688172043012e-06, "loss": 0.6525, "step": 221 }, { "epoch": 0.01, "grad_norm": 1.5733911991119385, "learning_rate": 4.774193548387097e-06, "loss": 0.6858, "step": 222 }, { "epoch": 0.01, "grad_norm": 1.4682279825210571, "learning_rate": 4.795698924731183e-06, "loss": 0.7604, "step": 223 }, { "epoch": 0.01, "grad_norm": 1.4119493961334229, "learning_rate": 4.8172043010752695e-06, "loss": 0.6233, "step": 224 }, { "epoch": 0.01, "grad_norm": 1.3660738468170166, "learning_rate": 4.838709677419355e-06, "loss": 0.6225, "step": 225 }, { "epoch": 0.01, "grad_norm": 1.5216950178146362, "learning_rate": 4.860215053763441e-06, "loss": 0.6288, "step": 226 }, { "epoch": 0.01, "grad_norm": 1.4755860567092896, "learning_rate": 4.8817204301075274e-06, "loss": 0.6223, "step": 227 }, { "epoch": 0.01, "grad_norm": 1.5894490480422974, "learning_rate": 4.903225806451613e-06, "loss": 0.6459, "step": 228 }, { "epoch": 0.01, "grad_norm": 1.3762043714523315, "learning_rate": 4.9247311827957e-06, "loss": 0.6469, "step": 229 }, { "epoch": 0.01, "grad_norm": 1.6072083711624146, "learning_rate": 4.946236559139785e-06, "loss": 0.6513, "step": 230 }, { "epoch": 0.01, "grad_norm": 1.4103091955184937, "learning_rate": 4.967741935483871e-06, "loss": 0.6968, "step": 231 }, { "epoch": 0.01, "grad_norm": 1.4812161922454834, "learning_rate": 4.9892473118279576e-06, "loss": 0.6007, "step": 232 }, { "epoch": 0.02, "grad_norm": 1.5096906423568726, "learning_rate": 5.010752688172043e-06, "loss": 0.6626, "step": 233 }, { "epoch": 0.02, "grad_norm": 1.3654091358184814, "learning_rate": 5.032258064516129e-06, "loss": 0.6599, "step": 234 }, { "epoch": 0.02, "grad_norm": 1.6609419584274292, "learning_rate": 5.0537634408602155e-06, "loss": 0.6315, "step": 235 }, { "epoch": 0.02, "grad_norm": 1.5166031122207642, "learning_rate": 5.075268817204302e-06, "loss": 0.6862, "step": 236 }, { "epoch": 0.02, "grad_norm": 1.4699960947036743, "learning_rate": 5.096774193548387e-06, "loss": 0.671, "step": 237 }, { "epoch": 0.02, "grad_norm": 1.4428695440292358, "learning_rate": 5.118279569892473e-06, "loss": 0.6429, "step": 238 }, { "epoch": 0.02, "grad_norm": 1.4387420415878296, "learning_rate": 5.13978494623656e-06, "loss": 0.6608, "step": 239 }, { "epoch": 0.02, "grad_norm": 1.6668487787246704, "learning_rate": 5.161290322580646e-06, "loss": 0.6231, "step": 240 }, { "epoch": 0.02, "grad_norm": 1.3924683332443237, "learning_rate": 5.182795698924732e-06, "loss": 0.6081, "step": 241 }, { "epoch": 0.02, "grad_norm": 1.602899432182312, "learning_rate": 5.204301075268817e-06, "loss": 0.656, "step": 242 }, { "epoch": 0.02, "grad_norm": 1.534250020980835, "learning_rate": 5.2258064516129035e-06, "loss": 0.6371, "step": 243 }, { "epoch": 0.02, "grad_norm": 1.4591553211212158, "learning_rate": 5.24731182795699e-06, "loss": 0.6428, "step": 244 }, { "epoch": 0.02, "grad_norm": 1.4373527765274048, "learning_rate": 5.268817204301076e-06, "loss": 0.6894, "step": 245 }, { "epoch": 0.02, "grad_norm": 1.5036216974258423, "learning_rate": 5.290322580645162e-06, "loss": 0.6552, "step": 246 }, { "epoch": 0.02, "grad_norm": 1.5347460508346558, "learning_rate": 5.311827956989247e-06, "loss": 0.6344, "step": 247 }, { "epoch": 0.02, "grad_norm": 1.5351076126098633, "learning_rate": 5.333333333333334e-06, "loss": 0.6928, "step": 248 }, { "epoch": 0.02, "grad_norm": 1.586463212966919, "learning_rate": 5.35483870967742e-06, "loss": 0.6623, "step": 249 }, { "epoch": 0.02, "grad_norm": 1.2320237159729004, "learning_rate": 5.376344086021506e-06, "loss": 0.5831, "step": 250 }, { "epoch": 0.02, "grad_norm": 1.3360234498977661, "learning_rate": 5.3978494623655916e-06, "loss": 0.6124, "step": 251 }, { "epoch": 0.02, "grad_norm": 1.3803778886795044, "learning_rate": 5.419354838709678e-06, "loss": 0.6539, "step": 252 }, { "epoch": 0.02, "grad_norm": 1.659001111984253, "learning_rate": 5.440860215053764e-06, "loss": 0.655, "step": 253 }, { "epoch": 0.02, "grad_norm": 1.5412524938583374, "learning_rate": 5.46236559139785e-06, "loss": 0.7002, "step": 254 }, { "epoch": 0.02, "grad_norm": 1.5161240100860596, "learning_rate": 5.483870967741935e-06, "loss": 0.6279, "step": 255 }, { "epoch": 0.02, "grad_norm": 1.668499231338501, "learning_rate": 5.505376344086022e-06, "loss": 0.6952, "step": 256 }, { "epoch": 0.02, "grad_norm": 1.6237787008285522, "learning_rate": 5.526881720430108e-06, "loss": 0.6084, "step": 257 }, { "epoch": 0.02, "grad_norm": 1.4061869382858276, "learning_rate": 5.548387096774194e-06, "loss": 0.6424, "step": 258 }, { "epoch": 0.02, "grad_norm": 1.373908281326294, "learning_rate": 5.5698924731182805e-06, "loss": 0.6339, "step": 259 }, { "epoch": 0.02, "grad_norm": 1.5261977910995483, "learning_rate": 5.591397849462365e-06, "loss": 0.612, "step": 260 }, { "epoch": 0.02, "grad_norm": 1.5259449481964111, "learning_rate": 5.612903225806452e-06, "loss": 0.646, "step": 261 }, { "epoch": 0.02, "grad_norm": 1.4658719301223755, "learning_rate": 5.634408602150538e-06, "loss": 0.5905, "step": 262 }, { "epoch": 0.02, "grad_norm": 1.6112391948699951, "learning_rate": 5.655913978494624e-06, "loss": 0.6166, "step": 263 }, { "epoch": 0.02, "grad_norm": 1.5063508749008179, "learning_rate": 5.677419354838711e-06, "loss": 0.6646, "step": 264 }, { "epoch": 0.02, "grad_norm": 1.4465597867965698, "learning_rate": 5.698924731182796e-06, "loss": 0.6425, "step": 265 }, { "epoch": 0.02, "grad_norm": 1.477258563041687, "learning_rate": 5.720430107526882e-06, "loss": 0.6973, "step": 266 }, { "epoch": 0.02, "grad_norm": 1.450239896774292, "learning_rate": 5.7419354838709685e-06, "loss": 0.6351, "step": 267 }, { "epoch": 0.02, "grad_norm": 1.6620668172836304, "learning_rate": 5.763440860215054e-06, "loss": 0.7269, "step": 268 }, { "epoch": 0.02, "grad_norm": 1.3760650157928467, "learning_rate": 5.78494623655914e-06, "loss": 0.5689, "step": 269 }, { "epoch": 0.02, "grad_norm": 1.6403603553771973, "learning_rate": 5.806451612903226e-06, "loss": 0.643, "step": 270 }, { "epoch": 0.02, "grad_norm": 1.5486371517181396, "learning_rate": 5.827956989247312e-06, "loss": 0.701, "step": 271 }, { "epoch": 0.02, "grad_norm": 1.481448769569397, "learning_rate": 5.849462365591399e-06, "loss": 0.6348, "step": 272 }, { "epoch": 0.02, "grad_norm": 1.5361462831497192, "learning_rate": 5.8709677419354835e-06, "loss": 0.5953, "step": 273 }, { "epoch": 0.02, "grad_norm": 1.3090801239013672, "learning_rate": 5.89247311827957e-06, "loss": 0.5737, "step": 274 }, { "epoch": 0.02, "grad_norm": 1.4672064781188965, "learning_rate": 5.9139784946236566e-06, "loss": 0.6612, "step": 275 }, { "epoch": 0.02, "grad_norm": 1.4781343936920166, "learning_rate": 5.935483870967742e-06, "loss": 0.645, "step": 276 }, { "epoch": 0.02, "grad_norm": 1.4411101341247559, "learning_rate": 5.956989247311829e-06, "loss": 0.6702, "step": 277 }, { "epoch": 0.02, "grad_norm": 1.4046645164489746, "learning_rate": 5.978494623655915e-06, "loss": 0.5777, "step": 278 }, { "epoch": 0.02, "grad_norm": 1.403406023979187, "learning_rate": 6e-06, "loss": 0.6674, "step": 279 }, { "epoch": 0.02, "grad_norm": 1.4280766248703003, "learning_rate": 6.021505376344087e-06, "loss": 0.584, "step": 280 }, { "epoch": 0.02, "grad_norm": 1.4250211715698242, "learning_rate": 6.043010752688172e-06, "loss": 0.5981, "step": 281 }, { "epoch": 0.02, "grad_norm": 1.523804783821106, "learning_rate": 6.064516129032259e-06, "loss": 0.6502, "step": 282 }, { "epoch": 0.02, "grad_norm": 1.575208067893982, "learning_rate": 6.086021505376345e-06, "loss": 0.6565, "step": 283 }, { "epoch": 0.02, "grad_norm": 1.3995612859725952, "learning_rate": 6.10752688172043e-06, "loss": 0.6035, "step": 284 }, { "epoch": 0.02, "grad_norm": 1.4009511470794678, "learning_rate": 6.129032258064517e-06, "loss": 0.6319, "step": 285 }, { "epoch": 0.02, "grad_norm": 1.4129953384399414, "learning_rate": 6.150537634408603e-06, "loss": 0.5978, "step": 286 }, { "epoch": 0.02, "grad_norm": 1.4452019929885864, "learning_rate": 6.172043010752688e-06, "loss": 0.6451, "step": 287 }, { "epoch": 0.02, "grad_norm": 1.4830524921417236, "learning_rate": 6.193548387096775e-06, "loss": 0.6973, "step": 288 }, { "epoch": 0.02, "grad_norm": 1.6883000135421753, "learning_rate": 6.21505376344086e-06, "loss": 0.6525, "step": 289 }, { "epoch": 0.02, "grad_norm": 1.333804726600647, "learning_rate": 6.236559139784947e-06, "loss": 0.5934, "step": 290 }, { "epoch": 0.02, "grad_norm": 1.538365364074707, "learning_rate": 6.2580645161290335e-06, "loss": 0.5898, "step": 291 }, { "epoch": 0.02, "grad_norm": 1.4548001289367676, "learning_rate": 6.279569892473118e-06, "loss": 0.6224, "step": 292 }, { "epoch": 0.02, "grad_norm": 1.567641019821167, "learning_rate": 6.301075268817205e-06, "loss": 0.6155, "step": 293 }, { "epoch": 0.02, "grad_norm": 1.4597487449645996, "learning_rate": 6.3225806451612906e-06, "loss": 0.612, "step": 294 }, { "epoch": 0.02, "grad_norm": 1.3867119550704956, "learning_rate": 6.344086021505377e-06, "loss": 0.6706, "step": 295 }, { "epoch": 0.02, "grad_norm": 1.5243204832077026, "learning_rate": 6.365591397849464e-06, "loss": 0.7147, "step": 296 }, { "epoch": 0.02, "grad_norm": 1.4104105234146118, "learning_rate": 6.3870967741935485e-06, "loss": 0.6459, "step": 297 }, { "epoch": 0.02, "grad_norm": 1.5132335424423218, "learning_rate": 6.408602150537635e-06, "loss": 0.6004, "step": 298 }, { "epoch": 0.02, "grad_norm": 1.5100023746490479, "learning_rate": 6.4301075268817215e-06, "loss": 0.5986, "step": 299 }, { "epoch": 0.02, "grad_norm": 1.453492283821106, "learning_rate": 6.451612903225806e-06, "loss": 0.58, "step": 300 }, { "epoch": 0.02, "grad_norm": 1.3622909784317017, "learning_rate": 6.473118279569893e-06, "loss": 0.647, "step": 301 }, { "epoch": 0.02, "grad_norm": 1.4116355180740356, "learning_rate": 6.494623655913979e-06, "loss": 0.664, "step": 302 }, { "epoch": 0.02, "grad_norm": 1.5946540832519531, "learning_rate": 6.516129032258065e-06, "loss": 0.6602, "step": 303 }, { "epoch": 0.02, "grad_norm": 1.6387183666229248, "learning_rate": 6.537634408602152e-06, "loss": 0.7118, "step": 304 }, { "epoch": 0.02, "grad_norm": 1.4372448921203613, "learning_rate": 6.5591397849462365e-06, "loss": 0.6503, "step": 305 }, { "epoch": 0.02, "grad_norm": 1.4555515050888062, "learning_rate": 6.580645161290323e-06, "loss": 0.6234, "step": 306 }, { "epoch": 0.02, "grad_norm": 1.2929739952087402, "learning_rate": 6.602150537634409e-06, "loss": 0.6473, "step": 307 }, { "epoch": 0.02, "grad_norm": 1.3931500911712646, "learning_rate": 6.623655913978495e-06, "loss": 0.5118, "step": 308 }, { "epoch": 0.02, "grad_norm": 1.5991520881652832, "learning_rate": 6.645161290322582e-06, "loss": 0.6386, "step": 309 }, { "epoch": 0.02, "grad_norm": 1.4256038665771484, "learning_rate": 6.666666666666667e-06, "loss": 0.6106, "step": 310 }, { "epoch": 0.02, "grad_norm": 1.4415760040283203, "learning_rate": 6.688172043010753e-06, "loss": 0.6222, "step": 311 }, { "epoch": 0.02, "grad_norm": 1.52562415599823, "learning_rate": 6.70967741935484e-06, "loss": 0.6756, "step": 312 }, { "epoch": 0.02, "grad_norm": 3.325425148010254, "learning_rate": 6.731182795698925e-06, "loss": 0.6109, "step": 313 }, { "epoch": 0.02, "grad_norm": 1.4063736200332642, "learning_rate": 6.752688172043012e-06, "loss": 0.6666, "step": 314 }, { "epoch": 0.02, "grad_norm": 1.5280187129974365, "learning_rate": 6.774193548387097e-06, "loss": 0.5891, "step": 315 }, { "epoch": 0.02, "grad_norm": 1.385825514793396, "learning_rate": 6.795698924731183e-06, "loss": 0.6237, "step": 316 }, { "epoch": 0.02, "grad_norm": 1.7721728086471558, "learning_rate": 6.81720430107527e-06, "loss": 0.6016, "step": 317 }, { "epoch": 0.02, "grad_norm": 1.8388954401016235, "learning_rate": 6.838709677419355e-06, "loss": 0.6452, "step": 318 }, { "epoch": 0.02, "grad_norm": 1.351448893547058, "learning_rate": 6.860215053763441e-06, "loss": 0.6005, "step": 319 }, { "epoch": 0.02, "grad_norm": 1.3983830213546753, "learning_rate": 6.881720430107528e-06, "loss": 0.5883, "step": 320 }, { "epoch": 0.02, "grad_norm": 1.4314123392105103, "learning_rate": 6.9032258064516135e-06, "loss": 0.657, "step": 321 }, { "epoch": 0.02, "grad_norm": 1.3746355772018433, "learning_rate": 6.9247311827957e-06, "loss": 0.5975, "step": 322 }, { "epoch": 0.02, "grad_norm": 1.5231519937515259, "learning_rate": 6.946236559139785e-06, "loss": 0.631, "step": 323 }, { "epoch": 0.02, "grad_norm": 1.5126864910125732, "learning_rate": 6.967741935483871e-06, "loss": 0.6983, "step": 324 }, { "epoch": 0.02, "grad_norm": 1.445299506187439, "learning_rate": 6.989247311827958e-06, "loss": 0.6163, "step": 325 }, { "epoch": 0.02, "grad_norm": 1.571048617362976, "learning_rate": 7.010752688172044e-06, "loss": 0.6489, "step": 326 }, { "epoch": 0.02, "grad_norm": 1.424094557762146, "learning_rate": 7.03225806451613e-06, "loss": 0.667, "step": 327 }, { "epoch": 0.02, "grad_norm": 1.4827238321304321, "learning_rate": 7.053763440860215e-06, "loss": 0.6557, "step": 328 }, { "epoch": 0.02, "grad_norm": 1.4650352001190186, "learning_rate": 7.0752688172043015e-06, "loss": 0.6291, "step": 329 }, { "epoch": 0.02, "grad_norm": 1.3651626110076904, "learning_rate": 7.096774193548388e-06, "loss": 0.5859, "step": 330 }, { "epoch": 0.02, "grad_norm": 1.3819096088409424, "learning_rate": 7.118279569892474e-06, "loss": 0.5993, "step": 331 }, { "epoch": 0.02, "grad_norm": 1.5935832262039185, "learning_rate": 7.139784946236559e-06, "loss": 0.6541, "step": 332 }, { "epoch": 0.02, "grad_norm": 1.3847055435180664, "learning_rate": 7.161290322580646e-06, "loss": 0.5837, "step": 333 }, { "epoch": 0.02, "grad_norm": 1.43685781955719, "learning_rate": 7.182795698924732e-06, "loss": 0.6365, "step": 334 }, { "epoch": 0.02, "grad_norm": 1.4804103374481201, "learning_rate": 7.204301075268818e-06, "loss": 0.67, "step": 335 }, { "epoch": 0.02, "grad_norm": 1.4346507787704468, "learning_rate": 7.225806451612903e-06, "loss": 0.6368, "step": 336 }, { "epoch": 0.02, "grad_norm": 1.534269094467163, "learning_rate": 7.2473118279569895e-06, "loss": 0.6494, "step": 337 }, { "epoch": 0.02, "grad_norm": 1.3490772247314453, "learning_rate": 7.268817204301076e-06, "loss": 0.6325, "step": 338 }, { "epoch": 0.02, "grad_norm": 1.391771912574768, "learning_rate": 7.290322580645162e-06, "loss": 0.5989, "step": 339 }, { "epoch": 0.02, "grad_norm": 1.3887240886688232, "learning_rate": 7.311827956989248e-06, "loss": 0.6177, "step": 340 }, { "epoch": 0.02, "grad_norm": 1.4578704833984375, "learning_rate": 7.333333333333333e-06, "loss": 0.5951, "step": 341 }, { "epoch": 0.02, "grad_norm": 1.4149882793426514, "learning_rate": 7.35483870967742e-06, "loss": 0.6425, "step": 342 }, { "epoch": 0.02, "grad_norm": 1.539797067642212, "learning_rate": 7.376344086021506e-06, "loss": 0.6644, "step": 343 }, { "epoch": 0.02, "grad_norm": 1.428674578666687, "learning_rate": 7.397849462365592e-06, "loss": 0.573, "step": 344 }, { "epoch": 0.02, "grad_norm": 1.3357818126678467, "learning_rate": 7.4193548387096784e-06, "loss": 0.6359, "step": 345 }, { "epoch": 0.02, "grad_norm": 1.3804444074630737, "learning_rate": 7.440860215053764e-06, "loss": 0.6023, "step": 346 }, { "epoch": 0.02, "grad_norm": 1.3689799308776855, "learning_rate": 7.46236559139785e-06, "loss": 0.6149, "step": 347 }, { "epoch": 0.02, "grad_norm": 1.27238130569458, "learning_rate": 7.483870967741936e-06, "loss": 0.5647, "step": 348 }, { "epoch": 0.02, "grad_norm": 1.3942755460739136, "learning_rate": 7.505376344086022e-06, "loss": 0.6322, "step": 349 }, { "epoch": 0.02, "grad_norm": 1.5406869649887085, "learning_rate": 7.526881720430108e-06, "loss": 0.6939, "step": 350 }, { "epoch": 0.02, "grad_norm": 1.4340327978134155, "learning_rate": 7.548387096774194e-06, "loss": 0.6417, "step": 351 }, { "epoch": 0.02, "grad_norm": 1.648555874824524, "learning_rate": 7.56989247311828e-06, "loss": 0.6999, "step": 352 }, { "epoch": 0.02, "grad_norm": 1.4747724533081055, "learning_rate": 7.5913978494623665e-06, "loss": 0.6219, "step": 353 }, { "epoch": 0.02, "grad_norm": 1.513301968574524, "learning_rate": 7.612903225806451e-06, "loss": 0.6661, "step": 354 }, { "epoch": 0.02, "grad_norm": 1.3514729738235474, "learning_rate": 7.634408602150538e-06, "loss": 0.5698, "step": 355 }, { "epoch": 0.02, "grad_norm": 1.9875829219818115, "learning_rate": 7.655913978494625e-06, "loss": 0.6027, "step": 356 }, { "epoch": 0.02, "grad_norm": 1.4943101406097412, "learning_rate": 7.67741935483871e-06, "loss": 0.6099, "step": 357 }, { "epoch": 0.02, "grad_norm": 1.5617083311080933, "learning_rate": 7.698924731182797e-06, "loss": 0.6442, "step": 358 }, { "epoch": 0.02, "grad_norm": 1.8945634365081787, "learning_rate": 7.720430107526882e-06, "loss": 0.6636, "step": 359 }, { "epoch": 0.02, "grad_norm": 1.4128419160842896, "learning_rate": 7.741935483870968e-06, "loss": 0.6618, "step": 360 }, { "epoch": 0.02, "grad_norm": 1.3764652013778687, "learning_rate": 7.763440860215055e-06, "loss": 0.5967, "step": 361 }, { "epoch": 0.02, "grad_norm": 1.4208801984786987, "learning_rate": 7.78494623655914e-06, "loss": 0.5721, "step": 362 }, { "epoch": 0.02, "grad_norm": 1.4541181325912476, "learning_rate": 7.806451612903227e-06, "loss": 0.644, "step": 363 }, { "epoch": 0.02, "grad_norm": 1.4554874897003174, "learning_rate": 7.827956989247312e-06, "loss": 0.6945, "step": 364 }, { "epoch": 0.02, "grad_norm": 1.4966490268707275, "learning_rate": 7.849462365591398e-06, "loss": 0.6241, "step": 365 }, { "epoch": 0.02, "grad_norm": 1.4449836015701294, "learning_rate": 7.870967741935484e-06, "loss": 0.6533, "step": 366 }, { "epoch": 0.02, "grad_norm": 1.424904704093933, "learning_rate": 7.892473118279571e-06, "loss": 0.6773, "step": 367 }, { "epoch": 0.02, "grad_norm": 1.6740007400512695, "learning_rate": 7.913978494623657e-06, "loss": 0.6502, "step": 368 }, { "epoch": 0.02, "grad_norm": 1.51981782913208, "learning_rate": 7.935483870967743e-06, "loss": 0.626, "step": 369 }, { "epoch": 0.02, "grad_norm": 1.4225807189941406, "learning_rate": 7.956989247311828e-06, "loss": 0.5911, "step": 370 }, { "epoch": 0.02, "grad_norm": 1.3910701274871826, "learning_rate": 7.978494623655914e-06, "loss": 0.594, "step": 371 }, { "epoch": 0.02, "grad_norm": 1.4724160432815552, "learning_rate": 8.000000000000001e-06, "loss": 0.6451, "step": 372 }, { "epoch": 0.02, "grad_norm": 1.6140693426132202, "learning_rate": 8.021505376344087e-06, "loss": 0.6575, "step": 373 }, { "epoch": 0.02, "grad_norm": 1.4188967943191528, "learning_rate": 8.043010752688173e-06, "loss": 0.6828, "step": 374 }, { "epoch": 0.02, "grad_norm": 1.5171339511871338, "learning_rate": 8.064516129032258e-06, "loss": 0.6483, "step": 375 }, { "epoch": 0.02, "grad_norm": 1.44259774684906, "learning_rate": 8.086021505376344e-06, "loss": 0.6148, "step": 376 }, { "epoch": 0.02, "grad_norm": 1.4303991794586182, "learning_rate": 8.107526881720431e-06, "loss": 0.6117, "step": 377 }, { "epoch": 0.02, "grad_norm": 1.5881036520004272, "learning_rate": 8.129032258064517e-06, "loss": 0.6405, "step": 378 }, { "epoch": 0.02, "grad_norm": 1.3653823137283325, "learning_rate": 8.150537634408603e-06, "loss": 0.6293, "step": 379 }, { "epoch": 0.02, "grad_norm": 1.4228652715682983, "learning_rate": 8.172043010752689e-06, "loss": 0.5914, "step": 380 }, { "epoch": 0.02, "grad_norm": 1.59874427318573, "learning_rate": 8.193548387096774e-06, "loss": 0.6303, "step": 381 }, { "epoch": 0.02, "grad_norm": 1.4759762287139893, "learning_rate": 8.215053763440862e-06, "loss": 0.6155, "step": 382 }, { "epoch": 0.02, "grad_norm": 1.629198431968689, "learning_rate": 8.236559139784947e-06, "loss": 0.672, "step": 383 }, { "epoch": 0.02, "grad_norm": 1.5371421575546265, "learning_rate": 8.258064516129033e-06, "loss": 0.6386, "step": 384 }, { "epoch": 0.02, "grad_norm": 1.4829614162445068, "learning_rate": 8.279569892473119e-06, "loss": 0.6457, "step": 385 }, { "epoch": 0.02, "grad_norm": 1.405723214149475, "learning_rate": 8.301075268817204e-06, "loss": 0.6468, "step": 386 }, { "epoch": 0.02, "grad_norm": 1.3293776512145996, "learning_rate": 8.322580645161292e-06, "loss": 0.5981, "step": 387 }, { "epoch": 0.03, "grad_norm": 1.3628876209259033, "learning_rate": 8.344086021505376e-06, "loss": 0.6114, "step": 388 }, { "epoch": 0.03, "grad_norm": 1.4189192056655884, "learning_rate": 8.365591397849463e-06, "loss": 0.615, "step": 389 }, { "epoch": 0.03, "grad_norm": 1.535267949104309, "learning_rate": 8.387096774193549e-06, "loss": 0.5922, "step": 390 }, { "epoch": 0.03, "grad_norm": 1.5361080169677734, "learning_rate": 8.408602150537634e-06, "loss": 0.6835, "step": 391 }, { "epoch": 0.03, "grad_norm": 1.510011911392212, "learning_rate": 8.430107526881722e-06, "loss": 0.6477, "step": 392 }, { "epoch": 0.03, "grad_norm": 1.3545199632644653, "learning_rate": 8.451612903225808e-06, "loss": 0.6438, "step": 393 }, { "epoch": 0.03, "grad_norm": 1.480558156967163, "learning_rate": 8.473118279569893e-06, "loss": 0.6526, "step": 394 }, { "epoch": 0.03, "grad_norm": 1.4564388990402222, "learning_rate": 8.494623655913979e-06, "loss": 0.6417, "step": 395 }, { "epoch": 0.03, "grad_norm": 1.479118824005127, "learning_rate": 8.516129032258065e-06, "loss": 0.645, "step": 396 }, { "epoch": 0.03, "grad_norm": 1.4414150714874268, "learning_rate": 8.537634408602152e-06, "loss": 0.6507, "step": 397 }, { "epoch": 0.03, "grad_norm": 1.5302605628967285, "learning_rate": 8.559139784946238e-06, "loss": 0.5955, "step": 398 }, { "epoch": 0.03, "grad_norm": 1.360235333442688, "learning_rate": 8.580645161290323e-06, "loss": 0.5908, "step": 399 }, { "epoch": 0.03, "grad_norm": 1.5893677473068237, "learning_rate": 8.602150537634409e-06, "loss": 0.5966, "step": 400 }, { "epoch": 0.03, "grad_norm": 1.5427097082138062, "learning_rate": 8.623655913978495e-06, "loss": 0.6975, "step": 401 }, { "epoch": 0.03, "grad_norm": 1.5618129968643188, "learning_rate": 8.64516129032258e-06, "loss": 0.667, "step": 402 }, { "epoch": 0.03, "grad_norm": 1.3175947666168213, "learning_rate": 8.666666666666668e-06, "loss": 0.6534, "step": 403 }, { "epoch": 0.03, "grad_norm": 1.5197117328643799, "learning_rate": 8.688172043010754e-06, "loss": 0.6364, "step": 404 }, { "epoch": 0.03, "grad_norm": 1.6528739929199219, "learning_rate": 8.70967741935484e-06, "loss": 0.6234, "step": 405 }, { "epoch": 0.03, "grad_norm": 1.5401033163070679, "learning_rate": 8.731182795698927e-06, "loss": 0.668, "step": 406 }, { "epoch": 0.03, "grad_norm": 1.31411612033844, "learning_rate": 8.75268817204301e-06, "loss": 0.5671, "step": 407 }, { "epoch": 0.03, "grad_norm": 1.4268168210983276, "learning_rate": 8.774193548387098e-06, "loss": 0.5685, "step": 408 }, { "epoch": 0.03, "grad_norm": 1.422167420387268, "learning_rate": 8.795698924731184e-06, "loss": 0.583, "step": 409 }, { "epoch": 0.03, "grad_norm": 1.5414882898330688, "learning_rate": 8.81720430107527e-06, "loss": 0.6869, "step": 410 }, { "epoch": 0.03, "grad_norm": 1.3916350603103638, "learning_rate": 8.838709677419357e-06, "loss": 0.6051, "step": 411 }, { "epoch": 0.03, "grad_norm": 1.2925245761871338, "learning_rate": 8.86021505376344e-06, "loss": 0.6119, "step": 412 }, { "epoch": 0.03, "grad_norm": 1.2883868217468262, "learning_rate": 8.881720430107528e-06, "loss": 0.6466, "step": 413 }, { "epoch": 0.03, "grad_norm": 1.4036626815795898, "learning_rate": 8.903225806451614e-06, "loss": 0.617, "step": 414 }, { "epoch": 0.03, "grad_norm": 1.4841437339782715, "learning_rate": 8.9247311827957e-06, "loss": 0.6489, "step": 415 }, { "epoch": 0.03, "grad_norm": 1.5962668657302856, "learning_rate": 8.946236559139785e-06, "loss": 0.6638, "step": 416 }, { "epoch": 0.03, "grad_norm": 1.7028241157531738, "learning_rate": 8.967741935483871e-06, "loss": 0.6194, "step": 417 }, { "epoch": 0.03, "grad_norm": 1.3263282775878906, "learning_rate": 8.989247311827958e-06, "loss": 0.5959, "step": 418 }, { "epoch": 0.03, "grad_norm": 1.2740802764892578, "learning_rate": 9.010752688172044e-06, "loss": 0.6326, "step": 419 }, { "epoch": 0.03, "grad_norm": 1.385422945022583, "learning_rate": 9.03225806451613e-06, "loss": 0.6279, "step": 420 }, { "epoch": 0.03, "grad_norm": 1.3665378093719482, "learning_rate": 9.053763440860215e-06, "loss": 0.6472, "step": 421 }, { "epoch": 0.03, "grad_norm": 1.4755498170852661, "learning_rate": 9.075268817204301e-06, "loss": 0.62, "step": 422 }, { "epoch": 0.03, "grad_norm": 1.5142929553985596, "learning_rate": 9.096774193548388e-06, "loss": 0.6847, "step": 423 }, { "epoch": 0.03, "grad_norm": 1.5628260374069214, "learning_rate": 9.118279569892474e-06, "loss": 0.7124, "step": 424 }, { "epoch": 0.03, "grad_norm": 1.4346363544464111, "learning_rate": 9.13978494623656e-06, "loss": 0.5884, "step": 425 }, { "epoch": 0.03, "grad_norm": 1.3753560781478882, "learning_rate": 9.161290322580645e-06, "loss": 0.6308, "step": 426 }, { "epoch": 0.03, "grad_norm": 1.3414980173110962, "learning_rate": 9.182795698924733e-06, "loss": 0.6535, "step": 427 }, { "epoch": 0.03, "grad_norm": 1.5046628713607788, "learning_rate": 9.204301075268819e-06, "loss": 0.561, "step": 428 }, { "epoch": 0.03, "grad_norm": 1.4521865844726562, "learning_rate": 9.225806451612904e-06, "loss": 0.6309, "step": 429 }, { "epoch": 0.03, "grad_norm": 1.4248424768447876, "learning_rate": 9.24731182795699e-06, "loss": 0.5882, "step": 430 }, { "epoch": 0.03, "grad_norm": 1.8477834463119507, "learning_rate": 9.268817204301076e-06, "loss": 0.6688, "step": 431 }, { "epoch": 0.03, "grad_norm": 1.568738341331482, "learning_rate": 9.290322580645163e-06, "loss": 0.6429, "step": 432 }, { "epoch": 0.03, "grad_norm": 1.383816123008728, "learning_rate": 9.311827956989249e-06, "loss": 0.6467, "step": 433 }, { "epoch": 0.03, "grad_norm": 1.3504666090011597, "learning_rate": 9.333333333333334e-06, "loss": 0.6403, "step": 434 }, { "epoch": 0.03, "grad_norm": 1.5479354858398438, "learning_rate": 9.35483870967742e-06, "loss": 0.6702, "step": 435 }, { "epoch": 0.03, "grad_norm": 1.6439950466156006, "learning_rate": 9.376344086021506e-06, "loss": 0.5807, "step": 436 }, { "epoch": 0.03, "grad_norm": 1.3120920658111572, "learning_rate": 9.397849462365593e-06, "loss": 0.653, "step": 437 }, { "epoch": 0.03, "grad_norm": 1.4077050685882568, "learning_rate": 9.419354838709677e-06, "loss": 0.6111, "step": 438 }, { "epoch": 0.03, "grad_norm": 1.539696455001831, "learning_rate": 9.440860215053764e-06, "loss": 0.6873, "step": 439 }, { "epoch": 0.03, "grad_norm": 1.4033373594284058, "learning_rate": 9.46236559139785e-06, "loss": 0.6521, "step": 440 }, { "epoch": 0.03, "grad_norm": 1.5029712915420532, "learning_rate": 9.483870967741936e-06, "loss": 0.6041, "step": 441 }, { "epoch": 0.03, "grad_norm": 1.3496730327606201, "learning_rate": 9.505376344086023e-06, "loss": 0.5731, "step": 442 }, { "epoch": 0.03, "grad_norm": 1.3059781789779663, "learning_rate": 9.526881720430107e-06, "loss": 0.5786, "step": 443 }, { "epoch": 0.03, "grad_norm": 1.3550257682800293, "learning_rate": 9.548387096774195e-06, "loss": 0.638, "step": 444 }, { "epoch": 0.03, "grad_norm": 1.3789417743682861, "learning_rate": 9.56989247311828e-06, "loss": 0.6218, "step": 445 }, { "epoch": 0.03, "grad_norm": 1.393643856048584, "learning_rate": 9.591397849462366e-06, "loss": 0.6285, "step": 446 }, { "epoch": 0.03, "grad_norm": 1.3636863231658936, "learning_rate": 9.612903225806453e-06, "loss": 0.629, "step": 447 }, { "epoch": 0.03, "grad_norm": 1.3965668678283691, "learning_rate": 9.634408602150539e-06, "loss": 0.6305, "step": 448 }, { "epoch": 0.03, "grad_norm": 1.2116957902908325, "learning_rate": 9.655913978494625e-06, "loss": 0.5482, "step": 449 }, { "epoch": 0.03, "grad_norm": 1.4412785768508911, "learning_rate": 9.67741935483871e-06, "loss": 0.5748, "step": 450 }, { "epoch": 0.03, "grad_norm": 1.5753326416015625, "learning_rate": 9.698924731182796e-06, "loss": 0.6359, "step": 451 }, { "epoch": 0.03, "grad_norm": 1.2655643224716187, "learning_rate": 9.720430107526882e-06, "loss": 0.5286, "step": 452 }, { "epoch": 0.03, "grad_norm": 1.855661392211914, "learning_rate": 9.74193548387097e-06, "loss": 0.6477, "step": 453 }, { "epoch": 0.03, "grad_norm": 1.4865162372589111, "learning_rate": 9.763440860215055e-06, "loss": 0.6546, "step": 454 }, { "epoch": 0.03, "grad_norm": 2.033989906311035, "learning_rate": 9.78494623655914e-06, "loss": 0.5778, "step": 455 }, { "epoch": 0.03, "grad_norm": 1.5287559032440186, "learning_rate": 9.806451612903226e-06, "loss": 0.5877, "step": 456 }, { "epoch": 0.03, "grad_norm": 1.5013145208358765, "learning_rate": 9.827956989247312e-06, "loss": 0.5934, "step": 457 }, { "epoch": 0.03, "grad_norm": 1.185533046722412, "learning_rate": 9.8494623655914e-06, "loss": 0.5525, "step": 458 }, { "epoch": 0.03, "grad_norm": 1.378401756286621, "learning_rate": 9.870967741935485e-06, "loss": 0.5672, "step": 459 }, { "epoch": 0.03, "grad_norm": 1.553414225578308, "learning_rate": 9.89247311827957e-06, "loss": 0.6238, "step": 460 }, { "epoch": 0.03, "grad_norm": 1.4193683862686157, "learning_rate": 9.913978494623658e-06, "loss": 0.6365, "step": 461 }, { "epoch": 0.03, "grad_norm": 1.342771053314209, "learning_rate": 9.935483870967742e-06, "loss": 0.597, "step": 462 }, { "epoch": 0.03, "grad_norm": 1.381080985069275, "learning_rate": 9.95698924731183e-06, "loss": 0.5718, "step": 463 }, { "epoch": 0.03, "grad_norm": 1.290152907371521, "learning_rate": 9.978494623655915e-06, "loss": 0.6031, "step": 464 }, { "epoch": 0.03, "grad_norm": 1.4510380029678345, "learning_rate": 1e-05, "loss": 0.6455, "step": 465 }, { "epoch": 0.03, "grad_norm": 1.433104395866394, "learning_rate": 9.999999890731451e-06, "loss": 0.6704, "step": 466 }, { "epoch": 0.03, "grad_norm": 1.379442572593689, "learning_rate": 9.999999562925806e-06, "loss": 0.6233, "step": 467 }, { "epoch": 0.03, "grad_norm": 1.3878716230392456, "learning_rate": 9.99999901658308e-06, "loss": 0.5847, "step": 468 }, { "epoch": 0.03, "grad_norm": 1.3508026599884033, "learning_rate": 9.999998251703298e-06, "loss": 0.6485, "step": 469 }, { "epoch": 0.03, "grad_norm": 1.4624731540679932, "learning_rate": 9.999997268286493e-06, "loss": 0.6421, "step": 470 }, { "epoch": 0.03, "grad_norm": 1.4106872081756592, "learning_rate": 9.999996066332705e-06, "loss": 0.615, "step": 471 }, { "epoch": 0.03, "grad_norm": 1.492403268814087, "learning_rate": 9.999994645841991e-06, "loss": 0.6585, "step": 472 }, { "epoch": 0.03, "grad_norm": 1.5375527143478394, "learning_rate": 9.99999300681441e-06, "loss": 0.6172, "step": 473 }, { "epoch": 0.03, "grad_norm": 1.4236764907836914, "learning_rate": 9.999991149250037e-06, "loss": 0.6658, "step": 474 }, { "epoch": 0.03, "grad_norm": 1.3868240118026733, "learning_rate": 9.99998907314895e-06, "loss": 0.64, "step": 475 }, { "epoch": 0.03, "grad_norm": 1.4079457521438599, "learning_rate": 9.999986778511239e-06, "loss": 0.6367, "step": 476 }, { "epoch": 0.03, "grad_norm": 1.4108283519744873, "learning_rate": 9.999984265337008e-06, "loss": 0.5832, "step": 477 }, { "epoch": 0.03, "grad_norm": 1.5880156755447388, "learning_rate": 9.999981533626365e-06, "loss": 0.661, "step": 478 }, { "epoch": 0.03, "grad_norm": 1.3079686164855957, "learning_rate": 9.999978583379428e-06, "loss": 0.5729, "step": 479 }, { "epoch": 0.03, "grad_norm": 1.3214693069458008, "learning_rate": 9.999975414596328e-06, "loss": 0.646, "step": 480 }, { "epoch": 0.03, "grad_norm": 1.7438883781433105, "learning_rate": 9.999972027277203e-06, "loss": 0.5407, "step": 481 }, { "epoch": 0.03, "grad_norm": 1.6696085929870605, "learning_rate": 9.999968421422202e-06, "loss": 0.6715, "step": 482 }, { "epoch": 0.03, "grad_norm": 1.4836169481277466, "learning_rate": 9.99996459703148e-06, "loss": 0.6246, "step": 483 }, { "epoch": 0.03, "grad_norm": 1.3282831907272339, "learning_rate": 9.999960554105203e-06, "loss": 0.6287, "step": 484 }, { "epoch": 0.03, "grad_norm": 1.4618878364562988, "learning_rate": 9.999956292643553e-06, "loss": 0.663, "step": 485 }, { "epoch": 0.03, "grad_norm": 1.434822916984558, "learning_rate": 9.999951812646715e-06, "loss": 0.6616, "step": 486 }, { "epoch": 0.03, "grad_norm": 1.3179460763931274, "learning_rate": 9.99994711411488e-06, "loss": 0.607, "step": 487 }, { "epoch": 0.03, "grad_norm": 1.4073739051818848, "learning_rate": 9.999942197048258e-06, "loss": 0.5672, "step": 488 }, { "epoch": 0.03, "grad_norm": 1.4919414520263672, "learning_rate": 9.999937061447063e-06, "loss": 0.6296, "step": 489 }, { "epoch": 0.03, "grad_norm": 1.357347846031189, "learning_rate": 9.999931707311519e-06, "loss": 0.6028, "step": 490 }, { "epoch": 0.03, "grad_norm": 1.468239665031433, "learning_rate": 9.99992613464186e-06, "loss": 0.6483, "step": 491 }, { "epoch": 0.03, "grad_norm": 1.452073574066162, "learning_rate": 9.99992034343833e-06, "loss": 0.6422, "step": 492 }, { "epoch": 0.03, "grad_norm": 1.3294585943222046, "learning_rate": 9.999914333701181e-06, "loss": 0.6026, "step": 493 }, { "epoch": 0.03, "grad_norm": 1.3818359375, "learning_rate": 9.999908105430676e-06, "loss": 0.6782, "step": 494 }, { "epoch": 0.03, "grad_norm": 1.5039681196212769, "learning_rate": 9.999901658627089e-06, "loss": 0.66, "step": 495 }, { "epoch": 0.03, "grad_norm": 1.7650656700134277, "learning_rate": 9.9998949932907e-06, "loss": 0.6886, "step": 496 }, { "epoch": 0.03, "grad_norm": 1.3962520360946655, "learning_rate": 9.999888109421801e-06, "loss": 0.6432, "step": 497 }, { "epoch": 0.03, "grad_norm": 1.2979069948196411, "learning_rate": 9.99988100702069e-06, "loss": 0.5862, "step": 498 }, { "epoch": 0.03, "grad_norm": 1.3877103328704834, "learning_rate": 9.999873686087685e-06, "loss": 0.6497, "step": 499 }, { "epoch": 0.03, "grad_norm": 1.3693751096725464, "learning_rate": 9.999866146623096e-06, "loss": 0.5931, "step": 500 }, { "epoch": 0.03, "grad_norm": 1.3379958868026733, "learning_rate": 9.999858388627262e-06, "loss": 0.5726, "step": 501 }, { "epoch": 0.03, "grad_norm": 1.3233726024627686, "learning_rate": 9.999850412100513e-06, "loss": 0.6871, "step": 502 }, { "epoch": 0.03, "grad_norm": 1.3882817029953003, "learning_rate": 9.999842217043206e-06, "loss": 0.5801, "step": 503 }, { "epoch": 0.03, "grad_norm": 1.359798789024353, "learning_rate": 9.999833803455695e-06, "loss": 0.5598, "step": 504 }, { "epoch": 0.03, "grad_norm": 1.3011025190353394, "learning_rate": 9.999825171338347e-06, "loss": 0.6216, "step": 505 }, { "epoch": 0.03, "grad_norm": 1.301864504814148, "learning_rate": 9.99981632069154e-06, "loss": 0.6176, "step": 506 }, { "epoch": 0.03, "grad_norm": 1.224688172340393, "learning_rate": 9.999807251515662e-06, "loss": 0.5898, "step": 507 }, { "epoch": 0.03, "grad_norm": 1.2934397459030151, "learning_rate": 9.999797963811108e-06, "loss": 0.6033, "step": 508 }, { "epoch": 0.03, "grad_norm": 1.5456393957138062, "learning_rate": 9.999788457578287e-06, "loss": 0.6587, "step": 509 }, { "epoch": 0.03, "grad_norm": 1.2995927333831787, "learning_rate": 9.99977873281761e-06, "loss": 0.5906, "step": 510 }, { "epoch": 0.03, "grad_norm": 1.4009116888046265, "learning_rate": 9.999768789529505e-06, "loss": 0.6451, "step": 511 }, { "epoch": 0.03, "grad_norm": 1.4583503007888794, "learning_rate": 9.999758627714407e-06, "loss": 0.5967, "step": 512 }, { "epoch": 0.03, "grad_norm": 1.3689504861831665, "learning_rate": 9.999748247372755e-06, "loss": 0.5852, "step": 513 }, { "epoch": 0.03, "grad_norm": 1.3831216096878052, "learning_rate": 9.99973764850501e-06, "loss": 0.6234, "step": 514 }, { "epoch": 0.03, "grad_norm": 1.3693561553955078, "learning_rate": 9.999726831111631e-06, "loss": 0.6173, "step": 515 }, { "epoch": 0.03, "grad_norm": 1.618941068649292, "learning_rate": 9.999715795193092e-06, "loss": 0.5923, "step": 516 }, { "epoch": 0.03, "grad_norm": 1.3793164491653442, "learning_rate": 9.999704540749875e-06, "loss": 0.5498, "step": 517 }, { "epoch": 0.03, "grad_norm": 1.4890034198760986, "learning_rate": 9.999693067782473e-06, "loss": 0.623, "step": 518 }, { "epoch": 0.03, "grad_norm": 1.3933348655700684, "learning_rate": 9.999681376291383e-06, "loss": 0.5993, "step": 519 }, { "epoch": 0.03, "grad_norm": 1.3510125875473022, "learning_rate": 9.999669466277121e-06, "loss": 0.5696, "step": 520 }, { "epoch": 0.03, "grad_norm": 1.4070014953613281, "learning_rate": 9.999657337740208e-06, "loss": 0.6515, "step": 521 }, { "epoch": 0.03, "grad_norm": 1.4968876838684082, "learning_rate": 9.99964499068117e-06, "loss": 0.6866, "step": 522 }, { "epoch": 0.03, "grad_norm": 1.4585641622543335, "learning_rate": 9.99963242510055e-06, "loss": 0.6027, "step": 523 }, { "epoch": 0.03, "grad_norm": 1.3629721403121948, "learning_rate": 9.999619640998895e-06, "loss": 0.6835, "step": 524 }, { "epoch": 0.03, "grad_norm": 1.409943699836731, "learning_rate": 9.999606638376765e-06, "loss": 0.6212, "step": 525 }, { "epoch": 0.03, "grad_norm": 1.3533695936203003, "learning_rate": 9.999593417234727e-06, "loss": 0.6604, "step": 526 }, { "epoch": 0.03, "grad_norm": 1.564877986907959, "learning_rate": 9.999579977573361e-06, "loss": 0.6461, "step": 527 }, { "epoch": 0.03, "grad_norm": 1.388977289199829, "learning_rate": 9.999566319393253e-06, "loss": 0.6038, "step": 528 }, { "epoch": 0.03, "grad_norm": 1.4242626428604126, "learning_rate": 9.999552442694999e-06, "loss": 0.5827, "step": 529 }, { "epoch": 0.03, "grad_norm": 1.5893322229385376, "learning_rate": 9.999538347479209e-06, "loss": 0.6464, "step": 530 }, { "epoch": 0.03, "grad_norm": 1.460611343383789, "learning_rate": 9.999524033746496e-06, "loss": 0.6333, "step": 531 }, { "epoch": 0.03, "grad_norm": 1.4043097496032715, "learning_rate": 9.999509501497486e-06, "loss": 0.5738, "step": 532 }, { "epoch": 0.03, "grad_norm": 1.3280092477798462, "learning_rate": 9.999494750732814e-06, "loss": 0.6278, "step": 533 }, { "epoch": 0.03, "grad_norm": 1.9918292760849, "learning_rate": 9.999479781453128e-06, "loss": 0.6604, "step": 534 }, { "epoch": 0.03, "grad_norm": 1.3885629177093506, "learning_rate": 9.999464593659077e-06, "loss": 0.6558, "step": 535 }, { "epoch": 0.03, "grad_norm": 1.383156418800354, "learning_rate": 9.999449187351328e-06, "loss": 0.6288, "step": 536 }, { "epoch": 0.03, "grad_norm": 1.432924747467041, "learning_rate": 9.999433562530553e-06, "loss": 0.5839, "step": 537 }, { "epoch": 0.03, "grad_norm": 1.4221415519714355, "learning_rate": 9.999417719197437e-06, "loss": 0.6288, "step": 538 }, { "epoch": 0.03, "grad_norm": 1.3455404043197632, "learning_rate": 9.99940165735267e-06, "loss": 0.6169, "step": 539 }, { "epoch": 0.03, "grad_norm": 1.4397646188735962, "learning_rate": 9.999385376996957e-06, "loss": 0.5864, "step": 540 }, { "epoch": 0.03, "grad_norm": 1.6049394607543945, "learning_rate": 9.999368878131007e-06, "loss": 0.6326, "step": 541 }, { "epoch": 0.03, "grad_norm": 1.453466773033142, "learning_rate": 9.99935216075554e-06, "loss": 0.6733, "step": 542 }, { "epoch": 0.04, "grad_norm": 1.4329071044921875, "learning_rate": 9.999335224871291e-06, "loss": 0.5959, "step": 543 }, { "epoch": 0.04, "grad_norm": 1.3323159217834473, "learning_rate": 9.999318070478996e-06, "loss": 0.6009, "step": 544 }, { "epoch": 0.04, "grad_norm": 1.4180116653442383, "learning_rate": 9.999300697579409e-06, "loss": 0.6396, "step": 545 }, { "epoch": 0.04, "grad_norm": 1.5099537372589111, "learning_rate": 9.999283106173284e-06, "loss": 0.6338, "step": 546 }, { "epoch": 0.04, "grad_norm": 1.2980222702026367, "learning_rate": 9.999265296261393e-06, "loss": 0.5805, "step": 547 }, { "epoch": 0.04, "grad_norm": 1.3887156248092651, "learning_rate": 9.999247267844516e-06, "loss": 0.6454, "step": 548 }, { "epoch": 0.04, "grad_norm": 1.5193780660629272, "learning_rate": 9.999229020923438e-06, "loss": 0.678, "step": 549 }, { "epoch": 0.04, "grad_norm": 1.4456068277359009, "learning_rate": 9.99921055549896e-06, "loss": 0.6187, "step": 550 }, { "epoch": 0.04, "grad_norm": 1.2680203914642334, "learning_rate": 9.999191871571883e-06, "loss": 0.6437, "step": 551 }, { "epoch": 0.04, "grad_norm": 1.6735711097717285, "learning_rate": 9.999172969143029e-06, "loss": 0.6277, "step": 552 }, { "epoch": 0.04, "grad_norm": 1.4722726345062256, "learning_rate": 9.999153848213224e-06, "loss": 0.6461, "step": 553 }, { "epoch": 0.04, "grad_norm": 4.736605167388916, "learning_rate": 9.9991345087833e-06, "loss": 0.6631, "step": 554 }, { "epoch": 0.04, "grad_norm": 1.2990950345993042, "learning_rate": 9.999114950854105e-06, "loss": 0.5939, "step": 555 }, { "epoch": 0.04, "grad_norm": 1.350610375404358, "learning_rate": 9.999095174426495e-06, "loss": 0.6086, "step": 556 }, { "epoch": 0.04, "grad_norm": 1.4150111675262451, "learning_rate": 9.999075179501332e-06, "loss": 0.6124, "step": 557 }, { "epoch": 0.04, "grad_norm": 1.3097964525222778, "learning_rate": 9.99905496607949e-06, "loss": 0.6472, "step": 558 }, { "epoch": 0.04, "grad_norm": 1.6413530111312866, "learning_rate": 9.999034534161853e-06, "loss": 0.5801, "step": 559 }, { "epoch": 0.04, "grad_norm": 1.4512473344802856, "learning_rate": 9.999013883749316e-06, "loss": 0.631, "step": 560 }, { "epoch": 0.04, "grad_norm": 1.5451297760009766, "learning_rate": 9.998993014842776e-06, "loss": 0.6233, "step": 561 }, { "epoch": 0.04, "grad_norm": 1.3480613231658936, "learning_rate": 9.998971927443153e-06, "loss": 0.583, "step": 562 }, { "epoch": 0.04, "grad_norm": 1.428259015083313, "learning_rate": 9.998950621551362e-06, "loss": 0.5885, "step": 563 }, { "epoch": 0.04, "grad_norm": 1.423348307609558, "learning_rate": 9.99892909716834e-06, "loss": 0.63, "step": 564 }, { "epoch": 0.04, "grad_norm": 1.2922760248184204, "learning_rate": 9.998907354295023e-06, "loss": 0.6022, "step": 565 }, { "epoch": 0.04, "grad_norm": 1.4418048858642578, "learning_rate": 9.998885392932361e-06, "loss": 0.626, "step": 566 }, { "epoch": 0.04, "grad_norm": 1.3452619314193726, "learning_rate": 9.998863213081316e-06, "loss": 0.6197, "step": 567 }, { "epoch": 0.04, "grad_norm": 1.3493647575378418, "learning_rate": 9.998840814742858e-06, "loss": 0.611, "step": 568 }, { "epoch": 0.04, "grad_norm": 1.418599009513855, "learning_rate": 9.998818197917965e-06, "loss": 0.6032, "step": 569 }, { "epoch": 0.04, "grad_norm": 1.3333308696746826, "learning_rate": 9.998795362607626e-06, "loss": 0.6211, "step": 570 }, { "epoch": 0.04, "grad_norm": 1.3154845237731934, "learning_rate": 9.99877230881284e-06, "loss": 0.6268, "step": 571 }, { "epoch": 0.04, "grad_norm": 1.3190644979476929, "learning_rate": 9.998749036534612e-06, "loss": 0.5948, "step": 572 }, { "epoch": 0.04, "grad_norm": 1.2858837842941284, "learning_rate": 9.998725545773961e-06, "loss": 0.6411, "step": 573 }, { "epoch": 0.04, "grad_norm": 1.8751180171966553, "learning_rate": 9.998701836531913e-06, "loss": 0.6134, "step": 574 }, { "epoch": 0.04, "grad_norm": 1.3727989196777344, "learning_rate": 9.998677908809505e-06, "loss": 0.6233, "step": 575 }, { "epoch": 0.04, "grad_norm": 1.483923316001892, "learning_rate": 9.998653762607782e-06, "loss": 0.6584, "step": 576 }, { "epoch": 0.04, "grad_norm": 1.3740060329437256, "learning_rate": 9.998629397927798e-06, "loss": 0.6098, "step": 577 }, { "epoch": 0.04, "grad_norm": 1.2990485429763794, "learning_rate": 9.998604814770623e-06, "loss": 0.5876, "step": 578 }, { "epoch": 0.04, "grad_norm": 1.546345591545105, "learning_rate": 9.998580013137327e-06, "loss": 0.6225, "step": 579 }, { "epoch": 0.04, "grad_norm": 1.3674705028533936, "learning_rate": 9.998554993028995e-06, "loss": 0.5788, "step": 580 }, { "epoch": 0.04, "grad_norm": 1.3366996049880981, "learning_rate": 9.99852975444672e-06, "loss": 0.6341, "step": 581 }, { "epoch": 0.04, "grad_norm": 1.3841782808303833, "learning_rate": 9.998504297391606e-06, "loss": 0.6211, "step": 582 }, { "epoch": 0.04, "grad_norm": 1.4395897388458252, "learning_rate": 9.998478621864765e-06, "loss": 0.6356, "step": 583 }, { "epoch": 0.04, "grad_norm": 1.3469678163528442, "learning_rate": 9.99845272786732e-06, "loss": 0.5942, "step": 584 }, { "epoch": 0.04, "grad_norm": 1.296804666519165, "learning_rate": 9.998426615400402e-06, "loss": 0.5655, "step": 585 }, { "epoch": 0.04, "grad_norm": 2.049987316131592, "learning_rate": 9.998400284465155e-06, "loss": 0.6376, "step": 586 }, { "epoch": 0.04, "grad_norm": 1.4488509893417358, "learning_rate": 9.998373735062725e-06, "loss": 0.6405, "step": 587 }, { "epoch": 0.04, "grad_norm": 1.6230138540267944, "learning_rate": 9.998346967194277e-06, "loss": 0.6308, "step": 588 }, { "epoch": 0.04, "grad_norm": 1.4305623769760132, "learning_rate": 9.998319980860977e-06, "loss": 0.642, "step": 589 }, { "epoch": 0.04, "grad_norm": 1.458619236946106, "learning_rate": 9.998292776064009e-06, "loss": 0.7027, "step": 590 }, { "epoch": 0.04, "grad_norm": 1.4542146921157837, "learning_rate": 9.998265352804557e-06, "loss": 0.6332, "step": 591 }, { "epoch": 0.04, "grad_norm": 1.3537094593048096, "learning_rate": 9.998237711083825e-06, "loss": 0.6488, "step": 592 }, { "epoch": 0.04, "grad_norm": 1.2176471948623657, "learning_rate": 9.998209850903015e-06, "loss": 0.6339, "step": 593 }, { "epoch": 0.04, "grad_norm": 1.4143359661102295, "learning_rate": 9.99818177226335e-06, "loss": 0.6603, "step": 594 }, { "epoch": 0.04, "grad_norm": 1.2444353103637695, "learning_rate": 9.998153475166054e-06, "loss": 0.5806, "step": 595 }, { "epoch": 0.04, "grad_norm": 1.3231613636016846, "learning_rate": 9.998124959612366e-06, "loss": 0.6305, "step": 596 }, { "epoch": 0.04, "grad_norm": 1.3348722457885742, "learning_rate": 9.998096225603532e-06, "loss": 0.5753, "step": 597 }, { "epoch": 0.04, "grad_norm": 1.3873306512832642, "learning_rate": 9.998067273140805e-06, "loss": 0.6036, "step": 598 }, { "epoch": 0.04, "grad_norm": 1.223405361175537, "learning_rate": 9.998038102225454e-06, "loss": 0.6101, "step": 599 }, { "epoch": 0.04, "grad_norm": 1.378750205039978, "learning_rate": 9.998008712858753e-06, "loss": 0.594, "step": 600 }, { "epoch": 0.04, "grad_norm": 1.3959145545959473, "learning_rate": 9.997979105041986e-06, "loss": 0.5738, "step": 601 }, { "epoch": 0.04, "grad_norm": 1.3436325788497925, "learning_rate": 9.997949278776446e-06, "loss": 0.5957, "step": 602 }, { "epoch": 0.04, "grad_norm": 1.5797841548919678, "learning_rate": 9.99791923406344e-06, "loss": 0.6471, "step": 603 }, { "epoch": 0.04, "grad_norm": 1.5878989696502686, "learning_rate": 9.997888970904279e-06, "loss": 0.6698, "step": 604 }, { "epoch": 0.04, "grad_norm": 1.3618369102478027, "learning_rate": 9.997858489300284e-06, "loss": 0.6254, "step": 605 }, { "epoch": 0.04, "grad_norm": 1.2332717180252075, "learning_rate": 9.99782778925279e-06, "loss": 0.6083, "step": 606 }, { "epoch": 0.04, "grad_norm": 1.3141379356384277, "learning_rate": 9.997796870763138e-06, "loss": 0.6438, "step": 607 }, { "epoch": 0.04, "grad_norm": 1.4123797416687012, "learning_rate": 9.997765733832678e-06, "loss": 0.6111, "step": 608 }, { "epoch": 0.04, "grad_norm": 1.2908601760864258, "learning_rate": 9.997734378462773e-06, "loss": 0.6034, "step": 609 }, { "epoch": 0.04, "grad_norm": 1.3210546970367432, "learning_rate": 9.997702804654794e-06, "loss": 0.6191, "step": 610 }, { "epoch": 0.04, "grad_norm": 1.4057574272155762, "learning_rate": 9.99767101241012e-06, "loss": 0.5925, "step": 611 }, { "epoch": 0.04, "grad_norm": 1.3981122970581055, "learning_rate": 9.997639001730137e-06, "loss": 0.639, "step": 612 }, { "epoch": 0.04, "grad_norm": 1.5295166969299316, "learning_rate": 9.99760677261625e-06, "loss": 0.6155, "step": 613 }, { "epoch": 0.04, "grad_norm": 1.372321367263794, "learning_rate": 9.997574325069864e-06, "loss": 0.6393, "step": 614 }, { "epoch": 0.04, "grad_norm": 1.5501377582550049, "learning_rate": 9.997541659092399e-06, "loss": 0.6175, "step": 615 }, { "epoch": 0.04, "grad_norm": 1.3083739280700684, "learning_rate": 9.99750877468528e-06, "loss": 0.5678, "step": 616 }, { "epoch": 0.04, "grad_norm": 1.4250694513320923, "learning_rate": 9.997475671849948e-06, "loss": 0.6169, "step": 617 }, { "epoch": 0.04, "grad_norm": 1.3350144624710083, "learning_rate": 9.997442350587847e-06, "loss": 0.6191, "step": 618 }, { "epoch": 0.04, "grad_norm": 1.3227037191390991, "learning_rate": 9.997408810900435e-06, "loss": 0.5645, "step": 619 }, { "epoch": 0.04, "grad_norm": 1.3600165843963623, "learning_rate": 9.997375052789177e-06, "loss": 0.6019, "step": 620 }, { "epoch": 0.04, "grad_norm": 1.417001724243164, "learning_rate": 9.99734107625555e-06, "loss": 0.6108, "step": 621 }, { "epoch": 0.04, "grad_norm": 1.4260709285736084, "learning_rate": 9.997306881301037e-06, "loss": 0.6372, "step": 622 }, { "epoch": 0.04, "grad_norm": 1.6552056074142456, "learning_rate": 9.997272467927133e-06, "loss": 0.6888, "step": 623 }, { "epoch": 0.04, "grad_norm": 1.3717509508132935, "learning_rate": 9.997237836135343e-06, "loss": 0.6547, "step": 624 }, { "epoch": 0.04, "grad_norm": 1.3084243535995483, "learning_rate": 9.997202985927179e-06, "loss": 0.6279, "step": 625 }, { "epoch": 0.04, "grad_norm": 1.4391580820083618, "learning_rate": 9.997167917304167e-06, "loss": 0.5672, "step": 626 }, { "epoch": 0.04, "grad_norm": 1.2989758253097534, "learning_rate": 9.997132630267838e-06, "loss": 0.5978, "step": 627 }, { "epoch": 0.04, "grad_norm": 1.4140212535858154, "learning_rate": 9.997097124819735e-06, "loss": 0.6185, "step": 628 }, { "epoch": 0.04, "grad_norm": 1.3084220886230469, "learning_rate": 9.997061400961407e-06, "loss": 0.6481, "step": 629 }, { "epoch": 0.04, "grad_norm": 1.3674579858779907, "learning_rate": 9.99702545869442e-06, "loss": 0.5874, "step": 630 }, { "epoch": 0.04, "grad_norm": 1.3004659414291382, "learning_rate": 9.996989298020342e-06, "loss": 0.6056, "step": 631 }, { "epoch": 0.04, "grad_norm": 1.3821625709533691, "learning_rate": 9.996952918940754e-06, "loss": 0.6339, "step": 632 }, { "epoch": 0.04, "grad_norm": 1.2644506692886353, "learning_rate": 9.996916321457248e-06, "loss": 0.6139, "step": 633 }, { "epoch": 0.04, "grad_norm": 1.3372913599014282, "learning_rate": 9.99687950557142e-06, "loss": 0.663, "step": 634 }, { "epoch": 0.04, "grad_norm": 1.314698338508606, "learning_rate": 9.99684247128488e-06, "loss": 0.6513, "step": 635 }, { "epoch": 0.04, "grad_norm": 1.299383521080017, "learning_rate": 9.996805218599249e-06, "loss": 0.5934, "step": 636 }, { "epoch": 0.04, "grad_norm": 1.3698617219924927, "learning_rate": 9.996767747516155e-06, "loss": 0.5964, "step": 637 }, { "epoch": 0.04, "grad_norm": 1.2554208040237427, "learning_rate": 9.996730058037231e-06, "loss": 0.6006, "step": 638 }, { "epoch": 0.04, "grad_norm": 1.4697881937026978, "learning_rate": 9.99669215016413e-06, "loss": 0.6822, "step": 639 }, { "epoch": 0.04, "grad_norm": 1.342803716659546, "learning_rate": 9.996654023898509e-06, "loss": 0.5958, "step": 640 }, { "epoch": 0.04, "grad_norm": 1.2601337432861328, "learning_rate": 9.99661567924203e-06, "loss": 0.6294, "step": 641 }, { "epoch": 0.04, "grad_norm": 1.2969474792480469, "learning_rate": 9.996577116196372e-06, "loss": 0.6153, "step": 642 }, { "epoch": 0.04, "grad_norm": 1.4883581399917603, "learning_rate": 9.996538334763217e-06, "loss": 0.6528, "step": 643 }, { "epoch": 0.04, "grad_norm": 1.312225580215454, "learning_rate": 9.996499334944265e-06, "loss": 0.6435, "step": 644 }, { "epoch": 0.04, "grad_norm": 1.3484292030334473, "learning_rate": 9.996460116741217e-06, "loss": 0.6573, "step": 645 }, { "epoch": 0.04, "grad_norm": 1.5737913846969604, "learning_rate": 9.996420680155789e-06, "loss": 0.5925, "step": 646 }, { "epoch": 0.04, "grad_norm": 1.3459078073501587, "learning_rate": 9.996381025189704e-06, "loss": 0.5946, "step": 647 }, { "epoch": 0.04, "grad_norm": 1.2947304248809814, "learning_rate": 9.996341151844694e-06, "loss": 0.5839, "step": 648 }, { "epoch": 0.04, "grad_norm": 1.2440167665481567, "learning_rate": 9.996301060122506e-06, "loss": 0.5634, "step": 649 }, { "epoch": 0.04, "grad_norm": 1.3495244979858398, "learning_rate": 9.996260750024886e-06, "loss": 0.6176, "step": 650 }, { "epoch": 0.04, "grad_norm": 1.218250036239624, "learning_rate": 9.996220221553603e-06, "loss": 0.5896, "step": 651 }, { "epoch": 0.04, "grad_norm": 1.3101712465286255, "learning_rate": 9.996179474710422e-06, "loss": 0.6192, "step": 652 }, { "epoch": 0.04, "grad_norm": 1.291196584701538, "learning_rate": 9.996138509497126e-06, "loss": 0.6236, "step": 653 }, { "epoch": 0.04, "grad_norm": 1.4282971620559692, "learning_rate": 9.996097325915506e-06, "loss": 0.6451, "step": 654 }, { "epoch": 0.04, "grad_norm": 1.2615121603012085, "learning_rate": 9.996055923967363e-06, "loss": 0.6223, "step": 655 }, { "epoch": 0.04, "grad_norm": 1.4690920114517212, "learning_rate": 9.996014303654504e-06, "loss": 0.6001, "step": 656 }, { "epoch": 0.04, "grad_norm": 1.2898240089416504, "learning_rate": 9.995972464978752e-06, "loss": 0.654, "step": 657 }, { "epoch": 0.04, "grad_norm": 1.369773030281067, "learning_rate": 9.995930407941932e-06, "loss": 0.5332, "step": 658 }, { "epoch": 0.04, "grad_norm": 1.5672647953033447, "learning_rate": 9.995888132545883e-06, "loss": 0.6006, "step": 659 }, { "epoch": 0.04, "grad_norm": 1.3229718208312988, "learning_rate": 9.995845638792453e-06, "loss": 0.6178, "step": 660 }, { "epoch": 0.04, "grad_norm": 1.2734156847000122, "learning_rate": 9.995802926683503e-06, "loss": 0.5923, "step": 661 }, { "epoch": 0.04, "grad_norm": 1.4581255912780762, "learning_rate": 9.995759996220894e-06, "loss": 0.6905, "step": 662 }, { "epoch": 0.04, "grad_norm": 1.9515048265457153, "learning_rate": 9.995716847406504e-06, "loss": 0.6316, "step": 663 }, { "epoch": 0.04, "grad_norm": 1.537920594215393, "learning_rate": 9.99567348024222e-06, "loss": 0.6547, "step": 664 }, { "epoch": 0.04, "grad_norm": 1.3634839057922363, "learning_rate": 9.995629894729937e-06, "loss": 0.5749, "step": 665 }, { "epoch": 0.04, "grad_norm": 1.435774803161621, "learning_rate": 9.995586090871561e-06, "loss": 0.6309, "step": 666 }, { "epoch": 0.04, "grad_norm": 1.307647466659546, "learning_rate": 9.995542068669006e-06, "loss": 0.7145, "step": 667 }, { "epoch": 0.04, "grad_norm": 1.300934076309204, "learning_rate": 9.995497828124195e-06, "loss": 0.6253, "step": 668 }, { "epoch": 0.04, "grad_norm": 1.4089375734329224, "learning_rate": 9.995453369239062e-06, "loss": 0.6376, "step": 669 }, { "epoch": 0.04, "grad_norm": 1.2301372289657593, "learning_rate": 9.995408692015553e-06, "loss": 0.6209, "step": 670 }, { "epoch": 0.04, "grad_norm": 1.3325904607772827, "learning_rate": 9.995363796455617e-06, "loss": 0.5835, "step": 671 }, { "epoch": 0.04, "grad_norm": 1.3954236507415771, "learning_rate": 9.995318682561217e-06, "loss": 0.6005, "step": 672 }, { "epoch": 0.04, "grad_norm": 1.1465363502502441, "learning_rate": 9.995273350334326e-06, "loss": 0.5941, "step": 673 }, { "epoch": 0.04, "grad_norm": 1.1670862436294556, "learning_rate": 9.995227799776926e-06, "loss": 0.5944, "step": 674 }, { "epoch": 0.04, "grad_norm": 1.396679162979126, "learning_rate": 9.995182030891007e-06, "loss": 0.5803, "step": 675 }, { "epoch": 0.04, "grad_norm": 1.3208783864974976, "learning_rate": 9.995136043678566e-06, "loss": 0.5966, "step": 676 }, { "epoch": 0.04, "grad_norm": 1.2235616445541382, "learning_rate": 9.995089838141619e-06, "loss": 0.6336, "step": 677 }, { "epoch": 0.04, "grad_norm": 1.3960037231445312, "learning_rate": 9.995043414282182e-06, "loss": 0.635, "step": 678 }, { "epoch": 0.04, "grad_norm": 1.2838006019592285, "learning_rate": 9.994996772102284e-06, "loss": 0.6349, "step": 679 }, { "epoch": 0.04, "grad_norm": 1.427472472190857, "learning_rate": 9.994949911603965e-06, "loss": 0.6394, "step": 680 }, { "epoch": 0.04, "grad_norm": 1.5836288928985596, "learning_rate": 9.994902832789272e-06, "loss": 0.6174, "step": 681 }, { "epoch": 0.04, "grad_norm": 1.4125407934188843, "learning_rate": 9.994855535660265e-06, "loss": 0.6426, "step": 682 }, { "epoch": 0.04, "grad_norm": 1.33255934715271, "learning_rate": 9.994808020219007e-06, "loss": 0.6137, "step": 683 }, { "epoch": 0.04, "grad_norm": 1.3130912780761719, "learning_rate": 9.994760286467578e-06, "loss": 0.6239, "step": 684 }, { "epoch": 0.04, "grad_norm": 1.4424058198928833, "learning_rate": 9.994712334408063e-06, "loss": 0.6389, "step": 685 }, { "epoch": 0.04, "grad_norm": 1.1882442235946655, "learning_rate": 9.99466416404256e-06, "loss": 0.579, "step": 686 }, { "epoch": 0.04, "grad_norm": 1.2046270370483398, "learning_rate": 9.99461577537317e-06, "loss": 0.6062, "step": 687 }, { "epoch": 0.04, "grad_norm": 1.4108847379684448, "learning_rate": 9.994567168402014e-06, "loss": 0.5639, "step": 688 }, { "epoch": 0.04, "grad_norm": 1.4525573253631592, "learning_rate": 9.994518343131212e-06, "loss": 0.6557, "step": 689 }, { "epoch": 0.04, "grad_norm": 1.2102625370025635, "learning_rate": 9.9944692995629e-06, "loss": 0.5849, "step": 690 }, { "epoch": 0.04, "grad_norm": 1.3305519819259644, "learning_rate": 9.994420037699219e-06, "loss": 0.5712, "step": 691 }, { "epoch": 0.04, "grad_norm": 1.3691257238388062, "learning_rate": 9.994370557542326e-06, "loss": 0.7174, "step": 692 }, { "epoch": 0.04, "grad_norm": 1.2992675304412842, "learning_rate": 9.99432085909438e-06, "loss": 0.6121, "step": 693 }, { "epoch": 0.04, "grad_norm": 1.2847000360488892, "learning_rate": 9.994270942357554e-06, "loss": 0.5751, "step": 694 }, { "epoch": 0.04, "grad_norm": 1.2804197072982788, "learning_rate": 9.994220807334032e-06, "loss": 0.6305, "step": 695 }, { "epoch": 0.04, "grad_norm": 1.5275192260742188, "learning_rate": 9.994170454026004e-06, "loss": 0.6521, "step": 696 }, { "epoch": 0.04, "grad_norm": 1.2836635112762451, "learning_rate": 9.99411988243567e-06, "loss": 0.5964, "step": 697 }, { "epoch": 0.05, "grad_norm": 1.3158926963806152, "learning_rate": 9.994069092565241e-06, "loss": 0.597, "step": 698 }, { "epoch": 0.05, "grad_norm": 1.3170349597930908, "learning_rate": 9.994018084416937e-06, "loss": 0.61, "step": 699 }, { "epoch": 0.05, "grad_norm": 1.2311310768127441, "learning_rate": 9.993966857992988e-06, "loss": 0.5209, "step": 700 }, { "epoch": 0.05, "grad_norm": 1.3446661233901978, "learning_rate": 9.99391541329563e-06, "loss": 0.6017, "step": 701 }, { "epoch": 0.05, "grad_norm": 1.2523772716522217, "learning_rate": 9.993863750327116e-06, "loss": 0.6243, "step": 702 }, { "epoch": 0.05, "grad_norm": 1.3298230171203613, "learning_rate": 9.9938118690897e-06, "loss": 0.5913, "step": 703 }, { "epoch": 0.05, "grad_norm": 1.3482544422149658, "learning_rate": 9.993759769585654e-06, "loss": 0.6378, "step": 704 }, { "epoch": 0.05, "grad_norm": 1.3581843376159668, "learning_rate": 9.99370745181725e-06, "loss": 0.5947, "step": 705 }, { "epoch": 0.05, "grad_norm": 1.2292523384094238, "learning_rate": 9.993654915786777e-06, "loss": 0.5825, "step": 706 }, { "epoch": 0.05, "grad_norm": 1.3815670013427734, "learning_rate": 9.993602161496534e-06, "loss": 0.5709, "step": 707 }, { "epoch": 0.05, "grad_norm": 1.314260482788086, "learning_rate": 9.993549188948823e-06, "loss": 0.6171, "step": 708 }, { "epoch": 0.05, "grad_norm": 1.5122920274734497, "learning_rate": 9.99349599814596e-06, "loss": 0.6809, "step": 709 }, { "epoch": 0.05, "grad_norm": 1.3366621732711792, "learning_rate": 9.99344258909027e-06, "loss": 0.6472, "step": 710 }, { "epoch": 0.05, "grad_norm": 1.31582510471344, "learning_rate": 9.993388961784087e-06, "loss": 0.623, "step": 711 }, { "epoch": 0.05, "grad_norm": 1.2566989660263062, "learning_rate": 9.993335116229758e-06, "loss": 0.5544, "step": 712 }, { "epoch": 0.05, "grad_norm": 1.3460108041763306, "learning_rate": 9.993281052429633e-06, "loss": 0.6409, "step": 713 }, { "epoch": 0.05, "grad_norm": 1.4746778011322021, "learning_rate": 9.993226770386075e-06, "loss": 0.6036, "step": 714 }, { "epoch": 0.05, "grad_norm": 1.5149903297424316, "learning_rate": 9.99317227010146e-06, "loss": 0.63, "step": 715 }, { "epoch": 0.05, "grad_norm": 1.2749017477035522, "learning_rate": 9.993117551578164e-06, "loss": 0.6055, "step": 716 }, { "epoch": 0.05, "grad_norm": 1.3636715412139893, "learning_rate": 9.993062614818586e-06, "loss": 0.6023, "step": 717 }, { "epoch": 0.05, "grad_norm": 1.6237174272537231, "learning_rate": 9.993007459825122e-06, "loss": 0.6226, "step": 718 }, { "epoch": 0.05, "grad_norm": 1.3643035888671875, "learning_rate": 9.992952086600182e-06, "loss": 0.5845, "step": 719 }, { "epoch": 0.05, "grad_norm": 1.6051820516586304, "learning_rate": 9.992896495146192e-06, "loss": 0.6053, "step": 720 }, { "epoch": 0.05, "grad_norm": 1.3144152164459229, "learning_rate": 9.992840685465575e-06, "loss": 0.6336, "step": 721 }, { "epoch": 0.05, "grad_norm": 1.369965672492981, "learning_rate": 9.992784657560774e-06, "loss": 0.6183, "step": 722 }, { "epoch": 0.05, "grad_norm": 1.3667213916778564, "learning_rate": 9.992728411434238e-06, "loss": 0.6135, "step": 723 }, { "epoch": 0.05, "grad_norm": 1.346530556678772, "learning_rate": 9.992671947088424e-06, "loss": 0.6205, "step": 724 }, { "epoch": 0.05, "grad_norm": 1.374173879623413, "learning_rate": 9.9926152645258e-06, "loss": 0.6142, "step": 725 }, { "epoch": 0.05, "grad_norm": 1.429508924484253, "learning_rate": 9.992558363748846e-06, "loss": 0.6362, "step": 726 }, { "epoch": 0.05, "grad_norm": 1.4174789190292358, "learning_rate": 9.992501244760044e-06, "loss": 0.6427, "step": 727 }, { "epoch": 0.05, "grad_norm": 1.55190110206604, "learning_rate": 9.992443907561895e-06, "loss": 0.6199, "step": 728 }, { "epoch": 0.05, "grad_norm": 1.3450894355773926, "learning_rate": 9.992386352156903e-06, "loss": 0.667, "step": 729 }, { "epoch": 0.05, "grad_norm": 1.3739930391311646, "learning_rate": 9.992328578547585e-06, "loss": 0.6097, "step": 730 }, { "epoch": 0.05, "grad_norm": 1.3084079027175903, "learning_rate": 9.992270586736464e-06, "loss": 0.6314, "step": 731 }, { "epoch": 0.05, "grad_norm": 1.190564513206482, "learning_rate": 9.992212376726077e-06, "loss": 0.6235, "step": 732 }, { "epoch": 0.05, "grad_norm": 1.2464158535003662, "learning_rate": 9.992153948518967e-06, "loss": 0.5915, "step": 733 }, { "epoch": 0.05, "grad_norm": 1.3274720907211304, "learning_rate": 9.992095302117687e-06, "loss": 0.5719, "step": 734 }, { "epoch": 0.05, "grad_norm": 1.577211856842041, "learning_rate": 9.992036437524801e-06, "loss": 0.6192, "step": 735 }, { "epoch": 0.05, "grad_norm": 1.3138668537139893, "learning_rate": 9.991977354742883e-06, "loss": 0.5653, "step": 736 }, { "epoch": 0.05, "grad_norm": 1.2117230892181396, "learning_rate": 9.991918053774514e-06, "loss": 0.608, "step": 737 }, { "epoch": 0.05, "grad_norm": 1.4404189586639404, "learning_rate": 9.991858534622285e-06, "loss": 0.5744, "step": 738 }, { "epoch": 0.05, "grad_norm": 1.2764012813568115, "learning_rate": 9.9917987972888e-06, "loss": 0.5625, "step": 739 }, { "epoch": 0.05, "grad_norm": 1.339328646659851, "learning_rate": 9.991738841776668e-06, "loss": 0.6263, "step": 740 }, { "epoch": 0.05, "grad_norm": 1.217502474784851, "learning_rate": 9.99167866808851e-06, "loss": 0.635, "step": 741 }, { "epoch": 0.05, "grad_norm": 1.2880744934082031, "learning_rate": 9.991618276226958e-06, "loss": 0.6055, "step": 742 }, { "epoch": 0.05, "grad_norm": 1.3998174667358398, "learning_rate": 9.991557666194647e-06, "loss": 0.6542, "step": 743 }, { "epoch": 0.05, "grad_norm": 1.2290549278259277, "learning_rate": 9.99149683799423e-06, "loss": 0.575, "step": 744 }, { "epoch": 0.05, "grad_norm": 1.3445876836776733, "learning_rate": 9.991435791628363e-06, "loss": 0.6566, "step": 745 }, { "epoch": 0.05, "grad_norm": 1.2868216037750244, "learning_rate": 9.991374527099717e-06, "loss": 0.5725, "step": 746 }, { "epoch": 0.05, "grad_norm": 1.1972132921218872, "learning_rate": 9.991313044410968e-06, "loss": 0.5946, "step": 747 }, { "epoch": 0.05, "grad_norm": 1.4294627904891968, "learning_rate": 9.991251343564805e-06, "loss": 0.6543, "step": 748 }, { "epoch": 0.05, "grad_norm": 1.1957937479019165, "learning_rate": 9.991189424563923e-06, "loss": 0.5655, "step": 749 }, { "epoch": 0.05, "grad_norm": 1.1231310367584229, "learning_rate": 9.991127287411027e-06, "loss": 0.5653, "step": 750 }, { "epoch": 0.05, "grad_norm": 1.2265135049819946, "learning_rate": 9.991064932108836e-06, "loss": 0.5691, "step": 751 }, { "epoch": 0.05, "grad_norm": 1.4323461055755615, "learning_rate": 9.991002358660074e-06, "loss": 0.6226, "step": 752 }, { "epoch": 0.05, "grad_norm": 1.302708625793457, "learning_rate": 9.990939567067475e-06, "loss": 0.5811, "step": 753 }, { "epoch": 0.05, "grad_norm": 1.4296199083328247, "learning_rate": 9.990876557333783e-06, "loss": 0.5572, "step": 754 }, { "epoch": 0.05, "grad_norm": 1.2518582344055176, "learning_rate": 9.990813329461755e-06, "loss": 0.5917, "step": 755 }, { "epoch": 0.05, "grad_norm": 1.272987961769104, "learning_rate": 9.990749883454154e-06, "loss": 0.5847, "step": 756 }, { "epoch": 0.05, "grad_norm": 1.3373357057571411, "learning_rate": 9.99068621931375e-06, "loss": 0.5189, "step": 757 }, { "epoch": 0.05, "grad_norm": 1.222906470298767, "learning_rate": 9.990622337043328e-06, "loss": 0.5539, "step": 758 }, { "epoch": 0.05, "grad_norm": 1.2243908643722534, "learning_rate": 9.99055823664568e-06, "loss": 0.573, "step": 759 }, { "epoch": 0.05, "grad_norm": 1.1998130083084106, "learning_rate": 9.990493918123607e-06, "loss": 0.5761, "step": 760 }, { "epoch": 0.05, "grad_norm": 1.3498951196670532, "learning_rate": 9.99042938147992e-06, "loss": 0.6052, "step": 761 }, { "epoch": 0.05, "grad_norm": 1.3378664255142212, "learning_rate": 9.990364626717441e-06, "loss": 0.6322, "step": 762 }, { "epoch": 0.05, "grad_norm": 1.117990255355835, "learning_rate": 9.990299653839e-06, "loss": 0.5943, "step": 763 }, { "epoch": 0.05, "grad_norm": 1.3539758920669556, "learning_rate": 9.990234462847435e-06, "loss": 0.6105, "step": 764 }, { "epoch": 0.05, "grad_norm": 1.4259867668151855, "learning_rate": 9.990169053745597e-06, "loss": 0.6047, "step": 765 }, { "epoch": 0.05, "grad_norm": 1.344708800315857, "learning_rate": 9.990103426536344e-06, "loss": 0.5683, "step": 766 }, { "epoch": 0.05, "grad_norm": 1.442091941833496, "learning_rate": 9.990037581222545e-06, "loss": 0.6043, "step": 767 }, { "epoch": 0.05, "grad_norm": 1.282313346862793, "learning_rate": 9.989971517807078e-06, "loss": 0.616, "step": 768 }, { "epoch": 0.05, "grad_norm": 1.4440878629684448, "learning_rate": 9.989905236292832e-06, "loss": 0.614, "step": 769 }, { "epoch": 0.05, "grad_norm": 1.5427175760269165, "learning_rate": 9.9898387366827e-06, "loss": 0.6308, "step": 770 }, { "epoch": 0.05, "grad_norm": 1.2785429954528809, "learning_rate": 9.989772018979591e-06, "loss": 0.628, "step": 771 }, { "epoch": 0.05, "grad_norm": 1.3523285388946533, "learning_rate": 9.989705083186422e-06, "loss": 0.5952, "step": 772 }, { "epoch": 0.05, "grad_norm": 1.2685215473175049, "learning_rate": 9.989637929306118e-06, "loss": 0.6244, "step": 773 }, { "epoch": 0.05, "grad_norm": 1.330335259437561, "learning_rate": 9.98957055734161e-06, "loss": 0.605, "step": 774 }, { "epoch": 0.05, "grad_norm": 1.614316701889038, "learning_rate": 9.98950296729585e-06, "loss": 0.6482, "step": 775 }, { "epoch": 0.05, "grad_norm": 1.4099806547164917, "learning_rate": 9.989435159171786e-06, "loss": 0.5682, "step": 776 }, { "epoch": 0.05, "grad_norm": 1.195177674293518, "learning_rate": 9.989367132972385e-06, "loss": 0.6075, "step": 777 }, { "epoch": 0.05, "grad_norm": 1.283618688583374, "learning_rate": 9.989298888700621e-06, "loss": 0.5733, "step": 778 }, { "epoch": 0.05, "grad_norm": 1.6082128286361694, "learning_rate": 9.989230426359472e-06, "loss": 0.6283, "step": 779 }, { "epoch": 0.05, "grad_norm": 1.3546257019042969, "learning_rate": 9.989161745951936e-06, "loss": 0.5932, "step": 780 }, { "epoch": 0.05, "grad_norm": 1.2849421501159668, "learning_rate": 9.98909284748101e-06, "loss": 0.6192, "step": 781 }, { "epoch": 0.05, "grad_norm": 1.3871405124664307, "learning_rate": 9.98902373094971e-06, "loss": 0.6527, "step": 782 }, { "epoch": 0.05, "grad_norm": 1.3192064762115479, "learning_rate": 9.988954396361053e-06, "loss": 0.5382, "step": 783 }, { "epoch": 0.05, "grad_norm": 1.2869455814361572, "learning_rate": 9.988884843718072e-06, "loss": 0.5947, "step": 784 }, { "epoch": 0.05, "grad_norm": 1.2898058891296387, "learning_rate": 9.988815073023806e-06, "loss": 0.5952, "step": 785 }, { "epoch": 0.05, "grad_norm": 1.539278268814087, "learning_rate": 9.988745084281302e-06, "loss": 0.6321, "step": 786 }, { "epoch": 0.05, "grad_norm": 1.3589502573013306, "learning_rate": 9.988674877493625e-06, "loss": 0.585, "step": 787 }, { "epoch": 0.05, "grad_norm": 1.479506015777588, "learning_rate": 9.988604452663837e-06, "loss": 0.6428, "step": 788 }, { "epoch": 0.05, "grad_norm": 1.4313050508499146, "learning_rate": 9.988533809795022e-06, "loss": 0.5669, "step": 789 }, { "epoch": 0.05, "grad_norm": 1.3543869256973267, "learning_rate": 9.988462948890262e-06, "loss": 0.5955, "step": 790 }, { "epoch": 0.05, "grad_norm": 1.1839933395385742, "learning_rate": 9.988391869952659e-06, "loss": 0.5635, "step": 791 }, { "epoch": 0.05, "grad_norm": 1.4674644470214844, "learning_rate": 9.988320572985317e-06, "loss": 0.6293, "step": 792 }, { "epoch": 0.05, "grad_norm": 1.2924787998199463, "learning_rate": 9.988249057991353e-06, "loss": 0.6008, "step": 793 }, { "epoch": 0.05, "grad_norm": 1.5440112352371216, "learning_rate": 9.988177324973891e-06, "loss": 0.6356, "step": 794 }, { "epoch": 0.05, "grad_norm": 1.2951475381851196, "learning_rate": 9.98810537393607e-06, "loss": 0.6279, "step": 795 }, { "epoch": 0.05, "grad_norm": 1.2826663255691528, "learning_rate": 9.98803320488103e-06, "loss": 0.5347, "step": 796 }, { "epoch": 0.05, "grad_norm": 1.3169615268707275, "learning_rate": 9.98796081781193e-06, "loss": 0.6153, "step": 797 }, { "epoch": 0.05, "grad_norm": 1.2405827045440674, "learning_rate": 9.987888212731932e-06, "loss": 0.6258, "step": 798 }, { "epoch": 0.05, "grad_norm": 1.3177257776260376, "learning_rate": 9.98781538964421e-06, "loss": 0.543, "step": 799 }, { "epoch": 0.05, "grad_norm": 1.338072419166565, "learning_rate": 9.987742348551943e-06, "loss": 0.6024, "step": 800 }, { "epoch": 0.05, "grad_norm": 1.4956375360488892, "learning_rate": 9.987669089458327e-06, "loss": 0.6265, "step": 801 }, { "epoch": 0.05, "grad_norm": 1.5416162014007568, "learning_rate": 9.987595612366566e-06, "loss": 0.6123, "step": 802 }, { "epoch": 0.05, "grad_norm": 1.1853357553482056, "learning_rate": 9.987521917279866e-06, "loss": 0.6091, "step": 803 }, { "epoch": 0.05, "grad_norm": 1.27427339553833, "learning_rate": 9.987448004201453e-06, "loss": 0.644, "step": 804 }, { "epoch": 0.05, "grad_norm": 1.3743523359298706, "learning_rate": 9.987373873134555e-06, "loss": 0.6331, "step": 805 }, { "epoch": 0.05, "grad_norm": 1.3529893159866333, "learning_rate": 9.987299524082413e-06, "loss": 0.6286, "step": 806 }, { "epoch": 0.05, "grad_norm": 1.2685294151306152, "learning_rate": 9.987224957048275e-06, "loss": 0.6185, "step": 807 }, { "epoch": 0.05, "grad_norm": 1.3373247385025024, "learning_rate": 9.987150172035402e-06, "loss": 0.5822, "step": 808 }, { "epoch": 0.05, "grad_norm": 1.3268299102783203, "learning_rate": 9.987075169047063e-06, "loss": 0.6224, "step": 809 }, { "epoch": 0.05, "grad_norm": 1.3368687629699707, "learning_rate": 9.986999948086533e-06, "loss": 0.6012, "step": 810 }, { "epoch": 0.05, "grad_norm": 1.1944218873977661, "learning_rate": 9.986924509157103e-06, "loss": 0.615, "step": 811 }, { "epoch": 0.05, "grad_norm": 1.4409010410308838, "learning_rate": 9.98684885226207e-06, "loss": 0.631, "step": 812 }, { "epoch": 0.05, "grad_norm": 1.3638094663619995, "learning_rate": 9.986772977404739e-06, "loss": 0.6249, "step": 813 }, { "epoch": 0.05, "grad_norm": 1.3288711309432983, "learning_rate": 9.986696884588428e-06, "loss": 0.5838, "step": 814 }, { "epoch": 0.05, "grad_norm": 1.3208523988723755, "learning_rate": 9.986620573816461e-06, "loss": 0.5927, "step": 815 }, { "epoch": 0.05, "grad_norm": 1.3350193500518799, "learning_rate": 9.986544045092175e-06, "loss": 0.6119, "step": 816 }, { "epoch": 0.05, "grad_norm": 1.3693687915802002, "learning_rate": 9.986467298418914e-06, "loss": 0.6091, "step": 817 }, { "epoch": 0.05, "grad_norm": 1.335152506828308, "learning_rate": 9.98639033380003e-06, "loss": 0.5991, "step": 818 }, { "epoch": 0.05, "grad_norm": 1.4040459394454956, "learning_rate": 9.986313151238893e-06, "loss": 0.6118, "step": 819 }, { "epoch": 0.05, "grad_norm": 1.255664348602295, "learning_rate": 9.986235750738872e-06, "loss": 0.616, "step": 820 }, { "epoch": 0.05, "grad_norm": 1.4109734296798706, "learning_rate": 9.986158132303353e-06, "loss": 0.6267, "step": 821 }, { "epoch": 0.05, "grad_norm": 1.2166942358016968, "learning_rate": 9.986080295935723e-06, "loss": 0.6162, "step": 822 }, { "epoch": 0.05, "grad_norm": 1.2423460483551025, "learning_rate": 9.98600224163939e-06, "loss": 0.6429, "step": 823 }, { "epoch": 0.05, "grad_norm": 1.3565016984939575, "learning_rate": 9.985923969417763e-06, "loss": 0.5927, "step": 824 }, { "epoch": 0.05, "grad_norm": 1.2119100093841553, "learning_rate": 9.985845479274262e-06, "loss": 0.6016, "step": 825 }, { "epoch": 0.05, "grad_norm": 1.3419301509857178, "learning_rate": 9.98576677121232e-06, "loss": 0.6246, "step": 826 }, { "epoch": 0.05, "grad_norm": 1.186578392982483, "learning_rate": 9.985687845235375e-06, "loss": 0.5757, "step": 827 }, { "epoch": 0.05, "grad_norm": 1.5659005641937256, "learning_rate": 9.985608701346877e-06, "loss": 0.6256, "step": 828 }, { "epoch": 0.05, "grad_norm": 1.3075511455535889, "learning_rate": 9.985529339550286e-06, "loss": 0.5781, "step": 829 }, { "epoch": 0.05, "grad_norm": 1.3651241064071655, "learning_rate": 9.98544975984907e-06, "loss": 0.5554, "step": 830 }, { "epoch": 0.05, "grad_norm": 1.3739334344863892, "learning_rate": 9.985369962246709e-06, "loss": 0.6083, "step": 831 }, { "epoch": 0.05, "grad_norm": 1.26080322265625, "learning_rate": 9.98528994674669e-06, "loss": 0.6557, "step": 832 }, { "epoch": 0.05, "grad_norm": 1.3462553024291992, "learning_rate": 9.985209713352509e-06, "loss": 0.6334, "step": 833 }, { "epoch": 0.05, "grad_norm": 1.352421522140503, "learning_rate": 9.985129262067672e-06, "loss": 0.6165, "step": 834 }, { "epoch": 0.05, "grad_norm": 1.3593071699142456, "learning_rate": 9.985048592895697e-06, "loss": 0.593, "step": 835 }, { "epoch": 0.05, "grad_norm": 1.4635106325149536, "learning_rate": 9.98496770584011e-06, "loss": 0.5406, "step": 836 }, { "epoch": 0.05, "grad_norm": 1.2639870643615723, "learning_rate": 9.984886600904446e-06, "loss": 0.6109, "step": 837 }, { "epoch": 0.05, "grad_norm": 1.3601460456848145, "learning_rate": 9.984805278092252e-06, "loss": 0.6193, "step": 838 }, { "epoch": 0.05, "grad_norm": 1.3746196031570435, "learning_rate": 9.984723737407078e-06, "loss": 0.6797, "step": 839 }, { "epoch": 0.05, "grad_norm": 1.419798493385315, "learning_rate": 9.98464197885249e-06, "loss": 0.5956, "step": 840 }, { "epoch": 0.05, "grad_norm": 1.4878201484680176, "learning_rate": 9.984560002432062e-06, "loss": 0.6075, "step": 841 }, { "epoch": 0.05, "grad_norm": 1.3479951620101929, "learning_rate": 9.984477808149376e-06, "loss": 0.594, "step": 842 }, { "epoch": 0.05, "grad_norm": 1.177579641342163, "learning_rate": 9.984395396008027e-06, "loss": 0.5684, "step": 843 }, { "epoch": 0.05, "grad_norm": 1.4513664245605469, "learning_rate": 9.984312766011613e-06, "loss": 0.6203, "step": 844 }, { "epoch": 0.05, "grad_norm": 1.2280117273330688, "learning_rate": 9.98422991816375e-06, "loss": 0.6021, "step": 845 }, { "epoch": 0.05, "grad_norm": 1.592174768447876, "learning_rate": 9.984146852468055e-06, "loss": 0.6462, "step": 846 }, { "epoch": 0.05, "grad_norm": 1.429560661315918, "learning_rate": 9.98406356892816e-06, "loss": 0.5981, "step": 847 }, { "epoch": 0.05, "grad_norm": 1.2229324579238892, "learning_rate": 9.983980067547708e-06, "loss": 0.6141, "step": 848 }, { "epoch": 0.05, "grad_norm": 1.190948247909546, "learning_rate": 9.983896348330343e-06, "loss": 0.5466, "step": 849 }, { "epoch": 0.05, "grad_norm": 1.2031153440475464, "learning_rate": 9.983812411279728e-06, "loss": 0.5956, "step": 850 }, { "epoch": 0.05, "grad_norm": 1.354122281074524, "learning_rate": 9.983728256399532e-06, "loss": 0.6184, "step": 851 }, { "epoch": 0.05, "grad_norm": 1.4730654954910278, "learning_rate": 9.983643883693432e-06, "loss": 0.6702, "step": 852 }, { "epoch": 0.06, "grad_norm": 1.2778820991516113, "learning_rate": 9.983559293165115e-06, "loss": 0.6324, "step": 853 }, { "epoch": 0.06, "grad_norm": 3.869008779525757, "learning_rate": 9.98347448481828e-06, "loss": 0.5988, "step": 854 }, { "epoch": 0.06, "grad_norm": 1.2728172540664673, "learning_rate": 9.983389458656631e-06, "loss": 0.6188, "step": 855 }, { "epoch": 0.06, "grad_norm": 1.2694522142410278, "learning_rate": 9.98330421468389e-06, "loss": 0.5513, "step": 856 }, { "epoch": 0.06, "grad_norm": 1.3104051351547241, "learning_rate": 9.983218752903776e-06, "loss": 0.6396, "step": 857 }, { "epoch": 0.06, "grad_norm": 1.2463247776031494, "learning_rate": 9.983133073320025e-06, "loss": 0.633, "step": 858 }, { "epoch": 0.06, "grad_norm": 1.249991774559021, "learning_rate": 9.983047175936388e-06, "loss": 0.5838, "step": 859 }, { "epoch": 0.06, "grad_norm": 1.3543109893798828, "learning_rate": 9.982961060756614e-06, "loss": 0.6907, "step": 860 }, { "epoch": 0.06, "grad_norm": 1.253469705581665, "learning_rate": 9.982874727784469e-06, "loss": 0.5868, "step": 861 }, { "epoch": 0.06, "grad_norm": 1.3407464027404785, "learning_rate": 9.982788177023724e-06, "loss": 0.6495, "step": 862 }, { "epoch": 0.06, "grad_norm": 1.2800185680389404, "learning_rate": 9.982701408478164e-06, "loss": 0.6469, "step": 863 }, { "epoch": 0.06, "grad_norm": 1.2546530961990356, "learning_rate": 9.982614422151582e-06, "loss": 0.6175, "step": 864 }, { "epoch": 0.06, "grad_norm": 1.4135569334030151, "learning_rate": 9.982527218047777e-06, "loss": 0.6241, "step": 865 }, { "epoch": 0.06, "grad_norm": 1.2569926977157593, "learning_rate": 9.982439796170565e-06, "loss": 0.5861, "step": 866 }, { "epoch": 0.06, "grad_norm": 1.3089103698730469, "learning_rate": 9.982352156523762e-06, "loss": 0.6393, "step": 867 }, { "epoch": 0.06, "grad_norm": 1.466545820236206, "learning_rate": 9.982264299111202e-06, "loss": 0.5573, "step": 868 }, { "epoch": 0.06, "grad_norm": 1.1536349058151245, "learning_rate": 9.982176223936724e-06, "loss": 0.6095, "step": 869 }, { "epoch": 0.06, "grad_norm": 1.270713448524475, "learning_rate": 9.982087931004177e-06, "loss": 0.6756, "step": 870 }, { "epoch": 0.06, "grad_norm": 1.1618008613586426, "learning_rate": 9.981999420317419e-06, "loss": 0.5468, "step": 871 }, { "epoch": 0.06, "grad_norm": 1.2762984037399292, "learning_rate": 9.981910691880324e-06, "loss": 0.666, "step": 872 }, { "epoch": 0.06, "grad_norm": 1.2015697956085205, "learning_rate": 9.981821745696762e-06, "loss": 0.5909, "step": 873 }, { "epoch": 0.06, "grad_norm": 1.2805262804031372, "learning_rate": 9.981732581770626e-06, "loss": 0.5906, "step": 874 }, { "epoch": 0.06, "grad_norm": 1.4353238344192505, "learning_rate": 9.981643200105813e-06, "loss": 0.6453, "step": 875 }, { "epoch": 0.06, "grad_norm": 1.2009645700454712, "learning_rate": 9.981553600706228e-06, "loss": 0.5749, "step": 876 }, { "epoch": 0.06, "grad_norm": 1.2774187326431274, "learning_rate": 9.981463783575788e-06, "loss": 0.6138, "step": 877 }, { "epoch": 0.06, "grad_norm": 1.2321290969848633, "learning_rate": 9.98137374871842e-06, "loss": 0.5939, "step": 878 }, { "epoch": 0.06, "grad_norm": 1.434096097946167, "learning_rate": 9.981283496138055e-06, "loss": 0.5899, "step": 879 }, { "epoch": 0.06, "grad_norm": 1.4767001867294312, "learning_rate": 9.98119302583864e-06, "loss": 0.5881, "step": 880 }, { "epoch": 0.06, "grad_norm": 1.3460227251052856, "learning_rate": 9.981102337824131e-06, "loss": 0.6136, "step": 881 }, { "epoch": 0.06, "grad_norm": 1.2644078731536865, "learning_rate": 9.98101143209849e-06, "loss": 0.5828, "step": 882 }, { "epoch": 0.06, "grad_norm": 1.4043848514556885, "learning_rate": 9.98092030866569e-06, "loss": 0.6075, "step": 883 }, { "epoch": 0.06, "grad_norm": 1.2891085147857666, "learning_rate": 9.980828967529714e-06, "loss": 0.6453, "step": 884 }, { "epoch": 0.06, "grad_norm": 1.4434421062469482, "learning_rate": 9.980737408694554e-06, "loss": 0.6651, "step": 885 }, { "epoch": 0.06, "grad_norm": 1.6333956718444824, "learning_rate": 9.980645632164214e-06, "loss": 0.5912, "step": 886 }, { "epoch": 0.06, "grad_norm": 1.4379559755325317, "learning_rate": 9.980553637942702e-06, "loss": 0.6271, "step": 887 }, { "epoch": 0.06, "grad_norm": 1.245364785194397, "learning_rate": 9.980461426034042e-06, "loss": 0.6118, "step": 888 }, { "epoch": 0.06, "grad_norm": 1.3191221952438354, "learning_rate": 9.980368996442262e-06, "loss": 0.6362, "step": 889 }, { "epoch": 0.06, "grad_norm": 1.3540645837783813, "learning_rate": 9.980276349171404e-06, "loss": 0.6331, "step": 890 }, { "epoch": 0.06, "grad_norm": 1.3820072412490845, "learning_rate": 9.980183484225515e-06, "loss": 0.6189, "step": 891 }, { "epoch": 0.06, "grad_norm": 1.4162912368774414, "learning_rate": 9.980090401608655e-06, "loss": 0.5757, "step": 892 }, { "epoch": 0.06, "grad_norm": 1.2870231866836548, "learning_rate": 9.979997101324893e-06, "loss": 0.6136, "step": 893 }, { "epoch": 0.06, "grad_norm": 1.349748969078064, "learning_rate": 9.979903583378305e-06, "loss": 0.593, "step": 894 }, { "epoch": 0.06, "grad_norm": 1.1928237676620483, "learning_rate": 9.979809847772981e-06, "loss": 0.5558, "step": 895 }, { "epoch": 0.06, "grad_norm": 1.4405207633972168, "learning_rate": 9.979715894513016e-06, "loss": 0.614, "step": 896 }, { "epoch": 0.06, "grad_norm": 1.3275114297866821, "learning_rate": 9.979621723602516e-06, "loss": 0.6473, "step": 897 }, { "epoch": 0.06, "grad_norm": 1.2854678630828857, "learning_rate": 9.979527335045599e-06, "loss": 0.5986, "step": 898 }, { "epoch": 0.06, "grad_norm": 1.3321114778518677, "learning_rate": 9.97943272884639e-06, "loss": 0.5994, "step": 899 }, { "epoch": 0.06, "grad_norm": 1.2891168594360352, "learning_rate": 9.979337905009023e-06, "loss": 0.6368, "step": 900 }, { "epoch": 0.06, "grad_norm": 1.1582252979278564, "learning_rate": 9.979242863537644e-06, "loss": 0.5976, "step": 901 }, { "epoch": 0.06, "grad_norm": 1.256514549255371, "learning_rate": 9.979147604436405e-06, "loss": 0.5862, "step": 902 }, { "epoch": 0.06, "grad_norm": 1.3321086168289185, "learning_rate": 9.97905212770947e-06, "loss": 0.5533, "step": 903 }, { "epoch": 0.06, "grad_norm": 1.2550773620605469, "learning_rate": 9.978956433361013e-06, "loss": 0.5864, "step": 904 }, { "epoch": 0.06, "grad_norm": 1.2976868152618408, "learning_rate": 9.978860521395218e-06, "loss": 0.6082, "step": 905 }, { "epoch": 0.06, "grad_norm": 1.3422613143920898, "learning_rate": 9.978764391816274e-06, "loss": 0.6053, "step": 906 }, { "epoch": 0.06, "grad_norm": 1.461303949356079, "learning_rate": 9.978668044628383e-06, "loss": 0.625, "step": 907 }, { "epoch": 0.06, "grad_norm": 1.4941318035125732, "learning_rate": 9.978571479835757e-06, "loss": 0.6399, "step": 908 }, { "epoch": 0.06, "grad_norm": 1.2639497518539429, "learning_rate": 9.978474697442617e-06, "loss": 0.6078, "step": 909 }, { "epoch": 0.06, "grad_norm": 1.2828413248062134, "learning_rate": 9.978377697453191e-06, "loss": 0.611, "step": 910 }, { "epoch": 0.06, "grad_norm": 1.262369990348816, "learning_rate": 9.978280479871723e-06, "loss": 0.6144, "step": 911 }, { "epoch": 0.06, "grad_norm": 1.3285514116287231, "learning_rate": 9.97818304470246e-06, "loss": 0.646, "step": 912 }, { "epoch": 0.06, "grad_norm": 1.4956212043762207, "learning_rate": 9.978085391949657e-06, "loss": 0.609, "step": 913 }, { "epoch": 0.06, "grad_norm": 1.4528990983963013, "learning_rate": 9.977987521617588e-06, "loss": 0.605, "step": 914 }, { "epoch": 0.06, "grad_norm": 1.3485757112503052, "learning_rate": 9.977889433710525e-06, "loss": 0.639, "step": 915 }, { "epoch": 0.06, "grad_norm": 1.3146936893463135, "learning_rate": 9.977791128232758e-06, "loss": 0.6197, "step": 916 }, { "epoch": 0.06, "grad_norm": 1.3970012664794922, "learning_rate": 9.977692605188585e-06, "loss": 0.6449, "step": 917 }, { "epoch": 0.06, "grad_norm": 1.369360089302063, "learning_rate": 9.977593864582311e-06, "loss": 0.5986, "step": 918 }, { "epoch": 0.06, "grad_norm": 1.3102500438690186, "learning_rate": 9.977494906418252e-06, "loss": 0.6207, "step": 919 }, { "epoch": 0.06, "grad_norm": 1.2660953998565674, "learning_rate": 9.977395730700733e-06, "loss": 0.6724, "step": 920 }, { "epoch": 0.06, "grad_norm": 1.2717785835266113, "learning_rate": 9.977296337434087e-06, "loss": 0.5262, "step": 921 }, { "epoch": 0.06, "grad_norm": 1.3404022455215454, "learning_rate": 9.977196726622659e-06, "loss": 0.6199, "step": 922 }, { "epoch": 0.06, "grad_norm": 1.270916223526001, "learning_rate": 9.977096898270804e-06, "loss": 0.6627, "step": 923 }, { "epoch": 0.06, "grad_norm": 1.3402420282363892, "learning_rate": 9.976996852382887e-06, "loss": 0.6459, "step": 924 }, { "epoch": 0.06, "grad_norm": 1.3334695100784302, "learning_rate": 9.976896588963276e-06, "loss": 0.6279, "step": 925 }, { "epoch": 0.06, "grad_norm": 1.293718695640564, "learning_rate": 9.976796108016355e-06, "loss": 0.5694, "step": 926 }, { "epoch": 0.06, "grad_norm": 1.3331369161605835, "learning_rate": 9.97669540954652e-06, "loss": 0.6489, "step": 927 }, { "epoch": 0.06, "grad_norm": 1.3454598188400269, "learning_rate": 9.976594493558166e-06, "loss": 0.5723, "step": 928 }, { "epoch": 0.06, "grad_norm": 1.2706190347671509, "learning_rate": 9.976493360055706e-06, "loss": 0.6079, "step": 929 }, { "epoch": 0.06, "grad_norm": 1.2573448419570923, "learning_rate": 9.976392009043562e-06, "loss": 0.593, "step": 930 }, { "epoch": 0.06, "grad_norm": 1.3673250675201416, "learning_rate": 9.976290440526161e-06, "loss": 0.6513, "step": 931 }, { "epoch": 0.06, "grad_norm": 1.3039312362670898, "learning_rate": 9.976188654507945e-06, "loss": 0.5774, "step": 932 }, { "epoch": 0.06, "grad_norm": 1.2283755540847778, "learning_rate": 9.976086650993361e-06, "loss": 0.5581, "step": 933 }, { "epoch": 0.06, "grad_norm": 1.344382882118225, "learning_rate": 9.97598442998687e-06, "loss": 0.5466, "step": 934 }, { "epoch": 0.06, "grad_norm": 1.3536109924316406, "learning_rate": 9.975881991492938e-06, "loss": 0.612, "step": 935 }, { "epoch": 0.06, "grad_norm": 1.3180475234985352, "learning_rate": 9.97577933551604e-06, "loss": 0.5874, "step": 936 }, { "epoch": 0.06, "grad_norm": 1.5470716953277588, "learning_rate": 9.975676462060663e-06, "loss": 0.6171, "step": 937 }, { "epoch": 0.06, "grad_norm": 1.3709609508514404, "learning_rate": 9.975573371131309e-06, "loss": 0.599, "step": 938 }, { "epoch": 0.06, "grad_norm": 1.2852208614349365, "learning_rate": 9.975470062732479e-06, "loss": 0.5687, "step": 939 }, { "epoch": 0.06, "grad_norm": 1.3819485902786255, "learning_rate": 9.975366536868689e-06, "loss": 0.674, "step": 940 }, { "epoch": 0.06, "grad_norm": 1.4717525243759155, "learning_rate": 9.975262793544465e-06, "loss": 0.6399, "step": 941 }, { "epoch": 0.06, "grad_norm": 1.382629632949829, "learning_rate": 9.97515883276434e-06, "loss": 0.574, "step": 942 }, { "epoch": 0.06, "grad_norm": 1.3435509204864502, "learning_rate": 9.975054654532858e-06, "loss": 0.6066, "step": 943 }, { "epoch": 0.06, "grad_norm": 1.329080581665039, "learning_rate": 9.974950258854575e-06, "loss": 0.5698, "step": 944 }, { "epoch": 0.06, "grad_norm": 1.317596435546875, "learning_rate": 9.974845645734049e-06, "loss": 0.6295, "step": 945 }, { "epoch": 0.06, "grad_norm": 1.286259412765503, "learning_rate": 9.974740815175856e-06, "loss": 0.6303, "step": 946 }, { "epoch": 0.06, "grad_norm": 1.407423973083496, "learning_rate": 9.974635767184578e-06, "loss": 0.5796, "step": 947 }, { "epoch": 0.06, "grad_norm": 1.4076265096664429, "learning_rate": 9.974530501764806e-06, "loss": 0.6105, "step": 948 }, { "epoch": 0.06, "grad_norm": 1.3441686630249023, "learning_rate": 9.974425018921138e-06, "loss": 0.6095, "step": 949 }, { "epoch": 0.06, "grad_norm": 1.3674129247665405, "learning_rate": 9.974319318658188e-06, "loss": 0.6057, "step": 950 }, { "epoch": 0.06, "grad_norm": 1.24234938621521, "learning_rate": 9.974213400980573e-06, "loss": 0.5876, "step": 951 }, { "epoch": 0.06, "grad_norm": 1.3447238206863403, "learning_rate": 9.974107265892925e-06, "loss": 0.6245, "step": 952 }, { "epoch": 0.06, "grad_norm": 1.3682032823562622, "learning_rate": 9.97400091339988e-06, "loss": 0.6161, "step": 953 }, { "epoch": 0.06, "grad_norm": 1.2128146886825562, "learning_rate": 9.973894343506092e-06, "loss": 0.5874, "step": 954 }, { "epoch": 0.06, "grad_norm": 1.2942261695861816, "learning_rate": 9.973787556216213e-06, "loss": 0.6203, "step": 955 }, { "epoch": 0.06, "grad_norm": 1.1971477270126343, "learning_rate": 9.973680551534912e-06, "loss": 0.5554, "step": 956 }, { "epoch": 0.06, "grad_norm": 1.2731057405471802, "learning_rate": 9.973573329466867e-06, "loss": 0.6145, "step": 957 }, { "epoch": 0.06, "grad_norm": 1.2733136415481567, "learning_rate": 9.973465890016762e-06, "loss": 0.5808, "step": 958 }, { "epoch": 0.06, "grad_norm": 1.1474612951278687, "learning_rate": 9.973358233189297e-06, "loss": 0.5565, "step": 959 }, { "epoch": 0.06, "grad_norm": 1.259900689125061, "learning_rate": 9.973250358989175e-06, "loss": 0.6021, "step": 960 }, { "epoch": 0.06, "grad_norm": 1.2001707553863525, "learning_rate": 9.97314226742111e-06, "loss": 0.6051, "step": 961 }, { "epoch": 0.06, "grad_norm": 1.318737506866455, "learning_rate": 9.973033958489828e-06, "loss": 0.6201, "step": 962 }, { "epoch": 0.06, "grad_norm": 1.2592869997024536, "learning_rate": 9.97292543220006e-06, "loss": 0.6053, "step": 963 }, { "epoch": 0.06, "grad_norm": 1.2526111602783203, "learning_rate": 9.972816688556555e-06, "loss": 0.5365, "step": 964 }, { "epoch": 0.06, "grad_norm": 1.3680225610733032, "learning_rate": 9.97270772756406e-06, "loss": 0.6108, "step": 965 }, { "epoch": 0.06, "grad_norm": 1.2968369722366333, "learning_rate": 9.972598549227342e-06, "loss": 0.6109, "step": 966 }, { "epoch": 0.06, "grad_norm": 1.3300329446792603, "learning_rate": 9.97248915355117e-06, "loss": 0.6093, "step": 967 }, { "epoch": 0.06, "grad_norm": 1.2673001289367676, "learning_rate": 9.972379540540325e-06, "loss": 0.6028, "step": 968 }, { "epoch": 0.06, "grad_norm": 1.4004416465759277, "learning_rate": 9.9722697101996e-06, "loss": 0.634, "step": 969 }, { "epoch": 0.06, "grad_norm": 1.2808008193969727, "learning_rate": 9.972159662533796e-06, "loss": 0.6596, "step": 970 }, { "epoch": 0.06, "grad_norm": 1.139958143234253, "learning_rate": 9.97204939754772e-06, "loss": 0.603, "step": 971 }, { "epoch": 0.06, "grad_norm": 1.2374509572982788, "learning_rate": 9.971938915246194e-06, "loss": 0.6072, "step": 972 }, { "epoch": 0.06, "grad_norm": 1.244316577911377, "learning_rate": 9.971828215634044e-06, "loss": 0.5547, "step": 973 }, { "epoch": 0.06, "grad_norm": 1.4000357389450073, "learning_rate": 9.971717298716113e-06, "loss": 0.5498, "step": 974 }, { "epoch": 0.06, "grad_norm": 1.2839558124542236, "learning_rate": 9.971606164497243e-06, "loss": 0.6037, "step": 975 }, { "epoch": 0.06, "grad_norm": 1.3868799209594727, "learning_rate": 9.971494812982297e-06, "loss": 0.5874, "step": 976 }, { "epoch": 0.06, "grad_norm": 1.3136693239212036, "learning_rate": 9.971383244176139e-06, "loss": 0.5873, "step": 977 }, { "epoch": 0.06, "grad_norm": 1.3569260835647583, "learning_rate": 9.971271458083644e-06, "loss": 0.6221, "step": 978 }, { "epoch": 0.06, "grad_norm": 1.2958790063858032, "learning_rate": 9.9711594547097e-06, "loss": 0.5682, "step": 979 }, { "epoch": 0.06, "grad_norm": 1.2136497497558594, "learning_rate": 9.971047234059203e-06, "loss": 0.5487, "step": 980 }, { "epoch": 0.06, "grad_norm": 1.3725528717041016, "learning_rate": 9.970934796137058e-06, "loss": 0.6223, "step": 981 }, { "epoch": 0.06, "grad_norm": 1.436851978302002, "learning_rate": 9.970822140948176e-06, "loss": 0.6107, "step": 982 }, { "epoch": 0.06, "grad_norm": 1.2445377111434937, "learning_rate": 9.970709268497483e-06, "loss": 0.5903, "step": 983 }, { "epoch": 0.06, "grad_norm": 1.2583562135696411, "learning_rate": 9.970596178789913e-06, "loss": 0.6224, "step": 984 }, { "epoch": 0.06, "grad_norm": 1.3294811248779297, "learning_rate": 9.970482871830409e-06, "loss": 0.6529, "step": 985 }, { "epoch": 0.06, "grad_norm": 1.291263461112976, "learning_rate": 9.970369347623923e-06, "loss": 0.641, "step": 986 }, { "epoch": 0.06, "grad_norm": 1.2129530906677246, "learning_rate": 9.970255606175416e-06, "loss": 0.6239, "step": 987 }, { "epoch": 0.06, "grad_norm": 1.301856517791748, "learning_rate": 9.970141647489859e-06, "loss": 0.631, "step": 988 }, { "epoch": 0.06, "grad_norm": 1.230025053024292, "learning_rate": 9.970027471572234e-06, "loss": 0.539, "step": 989 }, { "epoch": 0.06, "grad_norm": 1.4992319345474243, "learning_rate": 9.969913078427533e-06, "loss": 0.638, "step": 990 }, { "epoch": 0.06, "grad_norm": 1.215445876121521, "learning_rate": 9.969798468060752e-06, "loss": 0.6256, "step": 991 }, { "epoch": 0.06, "grad_norm": 1.3984203338623047, "learning_rate": 9.969683640476903e-06, "loss": 0.5673, "step": 992 }, { "epoch": 0.06, "grad_norm": 1.16165030002594, "learning_rate": 9.969568595681003e-06, "loss": 0.5569, "step": 993 }, { "epoch": 0.06, "grad_norm": 1.426371455192566, "learning_rate": 9.969453333678084e-06, "loss": 0.5891, "step": 994 }, { "epoch": 0.06, "grad_norm": 1.581337809562683, "learning_rate": 9.969337854473177e-06, "loss": 0.638, "step": 995 }, { "epoch": 0.06, "grad_norm": 1.2620909214019775, "learning_rate": 9.969222158071337e-06, "loss": 0.6233, "step": 996 }, { "epoch": 0.06, "grad_norm": 1.39928138256073, "learning_rate": 9.969106244477616e-06, "loss": 0.6136, "step": 997 }, { "epoch": 0.06, "grad_norm": 1.4003492593765259, "learning_rate": 9.96899011369708e-06, "loss": 0.5893, "step": 998 }, { "epoch": 0.06, "grad_norm": 1.2223340272903442, "learning_rate": 9.968873765734808e-06, "loss": 0.5862, "step": 999 }, { "epoch": 0.06, "grad_norm": 1.2832059860229492, "learning_rate": 9.968757200595883e-06, "loss": 0.584, "step": 1000 }, { "epoch": 0.06, "grad_norm": 1.4375183582305908, "learning_rate": 9.9686404182854e-06, "loss": 0.6275, "step": 1001 }, { "epoch": 0.06, "grad_norm": 1.3440426588058472, "learning_rate": 9.968523418808464e-06, "loss": 0.5822, "step": 1002 }, { "epoch": 0.06, "grad_norm": 1.4043985605239868, "learning_rate": 9.968406202170189e-06, "loss": 0.6221, "step": 1003 }, { "epoch": 0.06, "grad_norm": 1.3684320449829102, "learning_rate": 9.968288768375699e-06, "loss": 0.6218, "step": 1004 }, { "epoch": 0.06, "grad_norm": 1.4080878496170044, "learning_rate": 9.968171117430121e-06, "loss": 0.6473, "step": 1005 }, { "epoch": 0.06, "grad_norm": 1.3120465278625488, "learning_rate": 9.968053249338605e-06, "loss": 0.5966, "step": 1006 }, { "epoch": 0.07, "grad_norm": 1.2415828704833984, "learning_rate": 9.967935164106297e-06, "loss": 0.5794, "step": 1007 }, { "epoch": 0.07, "grad_norm": 1.1830908060073853, "learning_rate": 9.967816861738363e-06, "loss": 0.5441, "step": 1008 }, { "epoch": 0.07, "grad_norm": 1.3025238513946533, "learning_rate": 9.967698342239968e-06, "loss": 0.6227, "step": 1009 }, { "epoch": 0.07, "grad_norm": 1.5075790882110596, "learning_rate": 9.967579605616297e-06, "loss": 0.6032, "step": 1010 }, { "epoch": 0.07, "grad_norm": 1.3804603815078735, "learning_rate": 9.967460651872537e-06, "loss": 0.5505, "step": 1011 }, { "epoch": 0.07, "grad_norm": 1.275758147239685, "learning_rate": 9.96734148101389e-06, "loss": 0.6012, "step": 1012 }, { "epoch": 0.07, "grad_norm": 1.3463727235794067, "learning_rate": 9.967222093045562e-06, "loss": 0.5801, "step": 1013 }, { "epoch": 0.07, "grad_norm": 1.356225609779358, "learning_rate": 9.967102487972773e-06, "loss": 0.5647, "step": 1014 }, { "epoch": 0.07, "grad_norm": 1.3667569160461426, "learning_rate": 9.966982665800747e-06, "loss": 0.5962, "step": 1015 }, { "epoch": 0.07, "grad_norm": 1.3173860311508179, "learning_rate": 9.966862626534725e-06, "loss": 0.5696, "step": 1016 }, { "epoch": 0.07, "grad_norm": 1.458933711051941, "learning_rate": 9.966742370179951e-06, "loss": 0.6827, "step": 1017 }, { "epoch": 0.07, "grad_norm": 1.2404475212097168, "learning_rate": 9.966621896741683e-06, "loss": 0.5573, "step": 1018 }, { "epoch": 0.07, "grad_norm": 1.3857566118240356, "learning_rate": 9.966501206225187e-06, "loss": 0.6034, "step": 1019 }, { "epoch": 0.07, "grad_norm": 1.199560284614563, "learning_rate": 9.966380298635737e-06, "loss": 0.5771, "step": 1020 }, { "epoch": 0.07, "grad_norm": 1.309192419052124, "learning_rate": 9.966259173978617e-06, "loss": 0.6489, "step": 1021 }, { "epoch": 0.07, "grad_norm": 1.3990284204483032, "learning_rate": 9.966137832259121e-06, "loss": 0.6486, "step": 1022 }, { "epoch": 0.07, "grad_norm": 1.3897686004638672, "learning_rate": 9.966016273482556e-06, "loss": 0.606, "step": 1023 }, { "epoch": 0.07, "grad_norm": 1.3673896789550781, "learning_rate": 9.96589449765423e-06, "loss": 0.5516, "step": 1024 }, { "epoch": 0.07, "grad_norm": 1.4048101902008057, "learning_rate": 9.965772504779467e-06, "loss": 0.6368, "step": 1025 }, { "epoch": 0.07, "grad_norm": 1.2691049575805664, "learning_rate": 9.9656502948636e-06, "loss": 0.5693, "step": 1026 }, { "epoch": 0.07, "grad_norm": 1.4324671030044556, "learning_rate": 9.96552786791197e-06, "loss": 0.6307, "step": 1027 }, { "epoch": 0.07, "grad_norm": 1.358174204826355, "learning_rate": 9.965405223929929e-06, "loss": 0.5908, "step": 1028 }, { "epoch": 0.07, "grad_norm": 1.4566618204116821, "learning_rate": 9.965282362922837e-06, "loss": 0.6102, "step": 1029 }, { "epoch": 0.07, "grad_norm": 1.284130334854126, "learning_rate": 9.965159284896063e-06, "loss": 0.555, "step": 1030 }, { "epoch": 0.07, "grad_norm": 1.3085185289382935, "learning_rate": 9.965035989854985e-06, "loss": 0.582, "step": 1031 }, { "epoch": 0.07, "grad_norm": 1.373648762702942, "learning_rate": 9.964912477804995e-06, "loss": 0.6896, "step": 1032 }, { "epoch": 0.07, "grad_norm": 1.3032994270324707, "learning_rate": 9.964788748751493e-06, "loss": 0.5869, "step": 1033 }, { "epoch": 0.07, "grad_norm": 1.3384987115859985, "learning_rate": 9.964664802699881e-06, "loss": 0.6086, "step": 1034 }, { "epoch": 0.07, "grad_norm": 1.4441288709640503, "learning_rate": 9.96454063965558e-06, "loss": 0.6191, "step": 1035 }, { "epoch": 0.07, "grad_norm": 1.406726598739624, "learning_rate": 9.964416259624017e-06, "loss": 0.6561, "step": 1036 }, { "epoch": 0.07, "grad_norm": 1.3016581535339355, "learning_rate": 9.964291662610628e-06, "loss": 0.6138, "step": 1037 }, { "epoch": 0.07, "grad_norm": 1.616181492805481, "learning_rate": 9.964166848620857e-06, "loss": 0.6315, "step": 1038 }, { "epoch": 0.07, "grad_norm": 1.2850375175476074, "learning_rate": 9.964041817660162e-06, "loss": 0.6277, "step": 1039 }, { "epoch": 0.07, "grad_norm": 1.2874256372451782, "learning_rate": 9.963916569734006e-06, "loss": 0.5888, "step": 1040 }, { "epoch": 0.07, "grad_norm": 1.3140335083007812, "learning_rate": 9.963791104847864e-06, "loss": 0.6212, "step": 1041 }, { "epoch": 0.07, "grad_norm": 1.8008495569229126, "learning_rate": 9.963665423007219e-06, "loss": 0.5915, "step": 1042 }, { "epoch": 0.07, "grad_norm": 1.3082939386367798, "learning_rate": 9.963539524217567e-06, "loss": 0.5794, "step": 1043 }, { "epoch": 0.07, "grad_norm": 1.337387204170227, "learning_rate": 9.963413408484406e-06, "loss": 0.6687, "step": 1044 }, { "epoch": 0.07, "grad_norm": 1.2350521087646484, "learning_rate": 9.96328707581325e-06, "loss": 0.6149, "step": 1045 }, { "epoch": 0.07, "grad_norm": 1.2069257497787476, "learning_rate": 9.963160526209624e-06, "loss": 0.6077, "step": 1046 }, { "epoch": 0.07, "grad_norm": 1.4046622514724731, "learning_rate": 9.963033759679056e-06, "loss": 0.6136, "step": 1047 }, { "epoch": 0.07, "grad_norm": 1.3758286237716675, "learning_rate": 9.962906776227085e-06, "loss": 0.6304, "step": 1048 }, { "epoch": 0.07, "grad_norm": 1.2532709836959839, "learning_rate": 9.962779575859266e-06, "loss": 0.6151, "step": 1049 }, { "epoch": 0.07, "grad_norm": 1.6897059679031372, "learning_rate": 9.962652158581155e-06, "loss": 0.6339, "step": 1050 }, { "epoch": 0.07, "grad_norm": 1.3903589248657227, "learning_rate": 9.96252452439832e-06, "loss": 0.6309, "step": 1051 }, { "epoch": 0.07, "grad_norm": 1.2395782470703125, "learning_rate": 9.962396673316343e-06, "loss": 0.6587, "step": 1052 }, { "epoch": 0.07, "grad_norm": 1.3518444299697876, "learning_rate": 9.962268605340812e-06, "loss": 0.5781, "step": 1053 }, { "epoch": 0.07, "grad_norm": 1.4211405515670776, "learning_rate": 9.962140320477323e-06, "loss": 0.6185, "step": 1054 }, { "epoch": 0.07, "grad_norm": 1.3327643871307373, "learning_rate": 9.962011818731482e-06, "loss": 0.5481, "step": 1055 }, { "epoch": 0.07, "grad_norm": 1.408034324645996, "learning_rate": 9.961883100108905e-06, "loss": 0.6072, "step": 1056 }, { "epoch": 0.07, "grad_norm": 1.362733006477356, "learning_rate": 9.96175416461522e-06, "loss": 0.5928, "step": 1057 }, { "epoch": 0.07, "grad_norm": 1.2583670616149902, "learning_rate": 9.961625012256065e-06, "loss": 0.6368, "step": 1058 }, { "epoch": 0.07, "grad_norm": 1.3010210990905762, "learning_rate": 9.961495643037079e-06, "loss": 0.6295, "step": 1059 }, { "epoch": 0.07, "grad_norm": 1.3886746168136597, "learning_rate": 9.96136605696392e-06, "loss": 0.6051, "step": 1060 }, { "epoch": 0.07, "grad_norm": 1.3298137187957764, "learning_rate": 9.961236254042251e-06, "loss": 0.611, "step": 1061 }, { "epoch": 0.07, "grad_norm": 1.4767413139343262, "learning_rate": 9.961106234277746e-06, "loss": 0.6053, "step": 1062 }, { "epoch": 0.07, "grad_norm": 1.2274994850158691, "learning_rate": 9.960975997676088e-06, "loss": 0.5831, "step": 1063 }, { "epoch": 0.07, "grad_norm": 1.3158564567565918, "learning_rate": 9.960845544242968e-06, "loss": 0.6357, "step": 1064 }, { "epoch": 0.07, "grad_norm": 1.434951663017273, "learning_rate": 9.960714873984088e-06, "loss": 0.6533, "step": 1065 }, { "epoch": 0.07, "grad_norm": 1.271307110786438, "learning_rate": 9.960583986905159e-06, "loss": 0.5805, "step": 1066 }, { "epoch": 0.07, "grad_norm": 1.2925405502319336, "learning_rate": 9.960452883011904e-06, "loss": 0.624, "step": 1067 }, { "epoch": 0.07, "grad_norm": 1.8101040124893188, "learning_rate": 9.96032156231005e-06, "loss": 0.6212, "step": 1068 }, { "epoch": 0.07, "grad_norm": 1.187077283859253, "learning_rate": 9.960190024805339e-06, "loss": 0.5439, "step": 1069 }, { "epoch": 0.07, "grad_norm": 1.2662111520767212, "learning_rate": 9.96005827050352e-06, "loss": 0.6223, "step": 1070 }, { "epoch": 0.07, "grad_norm": 1.3041688203811646, "learning_rate": 9.95992629941035e-06, "loss": 0.6312, "step": 1071 }, { "epoch": 0.07, "grad_norm": 1.1654102802276611, "learning_rate": 9.9597941115316e-06, "loss": 0.5958, "step": 1072 }, { "epoch": 0.07, "grad_norm": 1.1660929918289185, "learning_rate": 9.959661706873046e-06, "loss": 0.5985, "step": 1073 }, { "epoch": 0.07, "grad_norm": 1.289788842201233, "learning_rate": 9.959529085440472e-06, "loss": 0.6001, "step": 1074 }, { "epoch": 0.07, "grad_norm": 1.2321172952651978, "learning_rate": 9.959396247239678e-06, "loss": 0.5998, "step": 1075 }, { "epoch": 0.07, "grad_norm": 1.223419427871704, "learning_rate": 9.959263192276472e-06, "loss": 0.6037, "step": 1076 }, { "epoch": 0.07, "grad_norm": 1.3218345642089844, "learning_rate": 9.959129920556664e-06, "loss": 0.5869, "step": 1077 }, { "epoch": 0.07, "grad_norm": 1.311950445175171, "learning_rate": 9.958996432086085e-06, "loss": 0.6161, "step": 1078 }, { "epoch": 0.07, "grad_norm": 1.1266402006149292, "learning_rate": 9.958862726870565e-06, "loss": 0.512, "step": 1079 }, { "epoch": 0.07, "grad_norm": 1.3157715797424316, "learning_rate": 9.95872880491595e-06, "loss": 0.5958, "step": 1080 }, { "epoch": 0.07, "grad_norm": 1.3117763996124268, "learning_rate": 9.95859466622809e-06, "loss": 0.5856, "step": 1081 }, { "epoch": 0.07, "grad_norm": 1.2814903259277344, "learning_rate": 9.958460310812852e-06, "loss": 0.6398, "step": 1082 }, { "epoch": 0.07, "grad_norm": 1.270330548286438, "learning_rate": 9.958325738676106e-06, "loss": 0.5871, "step": 1083 }, { "epoch": 0.07, "grad_norm": 1.24514639377594, "learning_rate": 9.958190949823736e-06, "loss": 0.5725, "step": 1084 }, { "epoch": 0.07, "grad_norm": 1.1953569650650024, "learning_rate": 9.958055944261632e-06, "loss": 0.6423, "step": 1085 }, { "epoch": 0.07, "grad_norm": 1.2719711065292358, "learning_rate": 9.957920721995695e-06, "loss": 0.599, "step": 1086 }, { "epoch": 0.07, "grad_norm": 1.2289589643478394, "learning_rate": 9.957785283031833e-06, "loss": 0.5868, "step": 1087 }, { "epoch": 0.07, "grad_norm": 1.3035284280776978, "learning_rate": 9.957649627375969e-06, "loss": 0.5926, "step": 1088 }, { "epoch": 0.07, "grad_norm": 1.2735625505447388, "learning_rate": 9.957513755034029e-06, "loss": 0.5923, "step": 1089 }, { "epoch": 0.07, "grad_norm": 1.3438674211502075, "learning_rate": 9.957377666011955e-06, "loss": 0.5886, "step": 1090 }, { "epoch": 0.07, "grad_norm": 1.3080859184265137, "learning_rate": 9.957241360315692e-06, "loss": 0.6402, "step": 1091 }, { "epoch": 0.07, "grad_norm": 1.1098475456237793, "learning_rate": 9.957104837951201e-06, "loss": 0.5454, "step": 1092 }, { "epoch": 0.07, "grad_norm": 1.2530090808868408, "learning_rate": 9.956968098924445e-06, "loss": 0.6306, "step": 1093 }, { "epoch": 0.07, "grad_norm": 1.3584972620010376, "learning_rate": 9.956831143241404e-06, "loss": 0.6052, "step": 1094 }, { "epoch": 0.07, "grad_norm": 1.276675820350647, "learning_rate": 9.956693970908062e-06, "loss": 0.6559, "step": 1095 }, { "epoch": 0.07, "grad_norm": 1.2089980840682983, "learning_rate": 9.956556581930414e-06, "loss": 0.6294, "step": 1096 }, { "epoch": 0.07, "grad_norm": 1.3100813627243042, "learning_rate": 9.956418976314469e-06, "loss": 0.573, "step": 1097 }, { "epoch": 0.07, "grad_norm": 1.3837839365005493, "learning_rate": 9.956281154066234e-06, "loss": 0.5746, "step": 1098 }, { "epoch": 0.07, "grad_norm": 1.2231976985931396, "learning_rate": 9.956143115191741e-06, "loss": 0.5759, "step": 1099 }, { "epoch": 0.07, "grad_norm": 1.305685043334961, "learning_rate": 9.956004859697017e-06, "loss": 0.6185, "step": 1100 }, { "epoch": 0.07, "grad_norm": 1.4398977756500244, "learning_rate": 9.955866387588109e-06, "loss": 0.6223, "step": 1101 }, { "epoch": 0.07, "grad_norm": 1.320150375366211, "learning_rate": 9.955727698871065e-06, "loss": 0.5957, "step": 1102 }, { "epoch": 0.07, "grad_norm": 1.362086296081543, "learning_rate": 9.955588793551952e-06, "loss": 0.621, "step": 1103 }, { "epoch": 0.07, "grad_norm": 1.236119031906128, "learning_rate": 9.955449671636836e-06, "loss": 0.5566, "step": 1104 }, { "epoch": 0.07, "grad_norm": 1.2329609394073486, "learning_rate": 9.955310333131802e-06, "loss": 0.589, "step": 1105 }, { "epoch": 0.07, "grad_norm": 1.2791681289672852, "learning_rate": 9.955170778042936e-06, "loss": 0.6415, "step": 1106 }, { "epoch": 0.07, "grad_norm": 1.263288140296936, "learning_rate": 9.95503100637634e-06, "loss": 0.6245, "step": 1107 }, { "epoch": 0.07, "grad_norm": 1.4741309881210327, "learning_rate": 9.954891018138127e-06, "loss": 0.622, "step": 1108 }, { "epoch": 0.07, "grad_norm": 1.294095754623413, "learning_rate": 9.954750813334407e-06, "loss": 0.6093, "step": 1109 }, { "epoch": 0.07, "grad_norm": 1.2389483451843262, "learning_rate": 9.954610391971313e-06, "loss": 0.6104, "step": 1110 }, { "epoch": 0.07, "grad_norm": 1.1760540008544922, "learning_rate": 9.954469754054982e-06, "loss": 0.5583, "step": 1111 }, { "epoch": 0.07, "grad_norm": 1.1860904693603516, "learning_rate": 9.954328899591564e-06, "loss": 0.6158, "step": 1112 }, { "epoch": 0.07, "grad_norm": 1.2478057146072388, "learning_rate": 9.954187828587209e-06, "loss": 0.6258, "step": 1113 }, { "epoch": 0.07, "grad_norm": 1.207252860069275, "learning_rate": 9.954046541048086e-06, "loss": 0.5837, "step": 1114 }, { "epoch": 0.07, "grad_norm": 1.2313182353973389, "learning_rate": 9.953905036980371e-06, "loss": 0.5492, "step": 1115 }, { "epoch": 0.07, "grad_norm": 1.3306478261947632, "learning_rate": 9.953763316390248e-06, "loss": 0.5442, "step": 1116 }, { "epoch": 0.07, "grad_norm": 2.1319756507873535, "learning_rate": 9.953621379283911e-06, "loss": 0.6315, "step": 1117 }, { "epoch": 0.07, "grad_norm": 1.2099778652191162, "learning_rate": 9.953479225667564e-06, "loss": 0.5982, "step": 1118 }, { "epoch": 0.07, "grad_norm": 1.3906947374343872, "learning_rate": 9.953336855547421e-06, "loss": 0.5899, "step": 1119 }, { "epoch": 0.07, "grad_norm": 1.2389016151428223, "learning_rate": 9.953194268929701e-06, "loss": 0.6042, "step": 1120 }, { "epoch": 0.07, "grad_norm": 1.241973638534546, "learning_rate": 9.953051465820644e-06, "loss": 0.6539, "step": 1121 }, { "epoch": 0.07, "grad_norm": 1.3350203037261963, "learning_rate": 9.952908446226483e-06, "loss": 0.6147, "step": 1122 }, { "epoch": 0.07, "grad_norm": 1.4618288278579712, "learning_rate": 9.952765210153472e-06, "loss": 0.6829, "step": 1123 }, { "epoch": 0.07, "grad_norm": 1.2208502292633057, "learning_rate": 9.952621757607873e-06, "loss": 0.5577, "step": 1124 }, { "epoch": 0.07, "grad_norm": 1.3249115943908691, "learning_rate": 9.952478088595956e-06, "loss": 0.6084, "step": 1125 }, { "epoch": 0.07, "grad_norm": 1.2118337154388428, "learning_rate": 9.952334203123999e-06, "loss": 0.6171, "step": 1126 }, { "epoch": 0.07, "grad_norm": 1.2748695611953735, "learning_rate": 9.952190101198291e-06, "loss": 0.5622, "step": 1127 }, { "epoch": 0.07, "grad_norm": 1.2810522317886353, "learning_rate": 9.952045782825128e-06, "loss": 0.6277, "step": 1128 }, { "epoch": 0.07, "grad_norm": 1.2661120891571045, "learning_rate": 9.951901248010823e-06, "loss": 0.5545, "step": 1129 }, { "epoch": 0.07, "grad_norm": 1.4524827003479004, "learning_rate": 9.951756496761691e-06, "loss": 0.6135, "step": 1130 }, { "epoch": 0.07, "grad_norm": 1.286765217781067, "learning_rate": 9.951611529084055e-06, "loss": 0.5682, "step": 1131 }, { "epoch": 0.07, "grad_norm": 1.342798113822937, "learning_rate": 9.951466344984258e-06, "loss": 0.6147, "step": 1132 }, { "epoch": 0.07, "grad_norm": 1.3319382667541504, "learning_rate": 9.95132094446864e-06, "loss": 0.6095, "step": 1133 }, { "epoch": 0.07, "grad_norm": 1.350808024406433, "learning_rate": 9.951175327543558e-06, "loss": 0.5838, "step": 1134 }, { "epoch": 0.07, "grad_norm": 1.3468047380447388, "learning_rate": 9.951029494215379e-06, "loss": 0.6495, "step": 1135 }, { "epoch": 0.07, "grad_norm": 1.2685215473175049, "learning_rate": 9.950883444490471e-06, "loss": 0.5554, "step": 1136 }, { "epoch": 0.07, "grad_norm": 1.2255370616912842, "learning_rate": 9.950737178375222e-06, "loss": 0.6301, "step": 1137 }, { "epoch": 0.07, "grad_norm": 1.3248214721679688, "learning_rate": 9.950590695876025e-06, "loss": 0.5892, "step": 1138 }, { "epoch": 0.07, "grad_norm": 1.315330982208252, "learning_rate": 9.950443996999283e-06, "loss": 0.571, "step": 1139 }, { "epoch": 0.07, "grad_norm": 1.2812222242355347, "learning_rate": 9.950297081751404e-06, "loss": 0.6364, "step": 1140 }, { "epoch": 0.07, "grad_norm": 1.5235697031021118, "learning_rate": 9.950149950138813e-06, "loss": 0.5768, "step": 1141 }, { "epoch": 0.07, "grad_norm": 1.1456753015518188, "learning_rate": 9.950002602167938e-06, "loss": 0.5838, "step": 1142 }, { "epoch": 0.07, "grad_norm": 1.2834526300430298, "learning_rate": 9.94985503784522e-06, "loss": 0.5752, "step": 1143 }, { "epoch": 0.07, "grad_norm": 1.305107593536377, "learning_rate": 9.94970725717711e-06, "loss": 0.6082, "step": 1144 }, { "epoch": 0.07, "grad_norm": 1.3745965957641602, "learning_rate": 9.949559260170066e-06, "loss": 0.5532, "step": 1145 }, { "epoch": 0.07, "grad_norm": 1.4288769960403442, "learning_rate": 9.949411046830558e-06, "loss": 0.641, "step": 1146 }, { "epoch": 0.07, "grad_norm": 1.409918189048767, "learning_rate": 9.94926261716506e-06, "loss": 0.6136, "step": 1147 }, { "epoch": 0.07, "grad_norm": 1.2989587783813477, "learning_rate": 9.949113971180064e-06, "loss": 0.6253, "step": 1148 }, { "epoch": 0.07, "grad_norm": 1.441202998161316, "learning_rate": 9.948965108882065e-06, "loss": 0.6205, "step": 1149 }, { "epoch": 0.07, "grad_norm": 1.2443034648895264, "learning_rate": 9.94881603027757e-06, "loss": 0.6352, "step": 1150 }, { "epoch": 0.07, "grad_norm": 1.3089383840560913, "learning_rate": 9.948666735373094e-06, "loss": 0.6026, "step": 1151 }, { "epoch": 0.07, "grad_norm": 1.4130501747131348, "learning_rate": 9.948517224175163e-06, "loss": 0.6341, "step": 1152 }, { "epoch": 0.07, "grad_norm": 1.388331651687622, "learning_rate": 9.948367496690309e-06, "loss": 0.6277, "step": 1153 }, { "epoch": 0.07, "grad_norm": 1.4828189611434937, "learning_rate": 9.948217552925082e-06, "loss": 0.6188, "step": 1154 }, { "epoch": 0.07, "grad_norm": 1.5229480266571045, "learning_rate": 9.948067392886031e-06, "loss": 0.61, "step": 1155 }, { "epoch": 0.07, "grad_norm": 1.2792394161224365, "learning_rate": 9.947917016579721e-06, "loss": 0.6422, "step": 1156 }, { "epoch": 0.07, "grad_norm": 1.2348227500915527, "learning_rate": 9.947766424012723e-06, "loss": 0.6133, "step": 1157 }, { "epoch": 0.07, "grad_norm": 1.1531131267547607, "learning_rate": 9.94761561519162e-06, "loss": 0.6275, "step": 1158 }, { "epoch": 0.07, "grad_norm": 1.2202931642532349, "learning_rate": 9.947464590123007e-06, "loss": 0.5872, "step": 1159 }, { "epoch": 0.07, "grad_norm": 1.2237354516983032, "learning_rate": 9.947313348813476e-06, "loss": 0.5763, "step": 1160 }, { "epoch": 0.07, "grad_norm": 1.2891526222229004, "learning_rate": 9.947161891269646e-06, "loss": 0.6401, "step": 1161 }, { "epoch": 0.08, "grad_norm": 1.2300339937210083, "learning_rate": 9.947010217498134e-06, "loss": 0.5545, "step": 1162 }, { "epoch": 0.08, "grad_norm": 1.341225266456604, "learning_rate": 9.946858327505568e-06, "loss": 0.5754, "step": 1163 }, { "epoch": 0.08, "grad_norm": 1.353569746017456, "learning_rate": 9.946706221298586e-06, "loss": 0.6369, "step": 1164 }, { "epoch": 0.08, "grad_norm": 1.3104820251464844, "learning_rate": 9.946553898883839e-06, "loss": 0.5557, "step": 1165 }, { "epoch": 0.08, "grad_norm": 1.3433036804199219, "learning_rate": 9.946401360267983e-06, "loss": 0.5959, "step": 1166 }, { "epoch": 0.08, "grad_norm": 1.268896222114563, "learning_rate": 9.946248605457686e-06, "loss": 0.5825, "step": 1167 }, { "epoch": 0.08, "grad_norm": 1.2628235816955566, "learning_rate": 9.946095634459625e-06, "loss": 0.5592, "step": 1168 }, { "epoch": 0.08, "grad_norm": 1.4537910223007202, "learning_rate": 9.945942447280483e-06, "loss": 0.6194, "step": 1169 }, { "epoch": 0.08, "grad_norm": 1.267624020576477, "learning_rate": 9.94578904392696e-06, "loss": 0.5725, "step": 1170 }, { "epoch": 0.08, "grad_norm": 1.3066462278366089, "learning_rate": 9.945635424405756e-06, "loss": 0.6112, "step": 1171 }, { "epoch": 0.08, "grad_norm": 1.1298201084136963, "learning_rate": 9.945481588723587e-06, "loss": 0.5649, "step": 1172 }, { "epoch": 0.08, "grad_norm": 1.3651297092437744, "learning_rate": 9.945327536887177e-06, "loss": 0.6057, "step": 1173 }, { "epoch": 0.08, "grad_norm": 1.255031704902649, "learning_rate": 9.94517326890326e-06, "loss": 0.5788, "step": 1174 }, { "epoch": 0.08, "grad_norm": 1.2854971885681152, "learning_rate": 9.94501878477858e-06, "loss": 0.5581, "step": 1175 }, { "epoch": 0.08, "grad_norm": 1.349037766456604, "learning_rate": 9.944864084519885e-06, "loss": 0.5882, "step": 1176 }, { "epoch": 0.08, "grad_norm": 1.26833975315094, "learning_rate": 9.94470916813394e-06, "loss": 0.6189, "step": 1177 }, { "epoch": 0.08, "grad_norm": 1.2514084577560425, "learning_rate": 9.944554035627514e-06, "loss": 0.6002, "step": 1178 }, { "epoch": 0.08, "grad_norm": 1.5157418251037598, "learning_rate": 9.944398687007389e-06, "loss": 0.6114, "step": 1179 }, { "epoch": 0.08, "grad_norm": 1.2290116548538208, "learning_rate": 9.944243122280354e-06, "loss": 0.5692, "step": 1180 }, { "epoch": 0.08, "grad_norm": 1.1612168550491333, "learning_rate": 9.944087341453207e-06, "loss": 0.6026, "step": 1181 }, { "epoch": 0.08, "grad_norm": 1.404802918434143, "learning_rate": 9.94393134453276e-06, "loss": 0.6086, "step": 1182 }, { "epoch": 0.08, "grad_norm": 1.368089199066162, "learning_rate": 9.94377513152583e-06, "loss": 0.599, "step": 1183 }, { "epoch": 0.08, "grad_norm": 1.2610008716583252, "learning_rate": 9.943618702439241e-06, "loss": 0.5975, "step": 1184 }, { "epoch": 0.08, "grad_norm": 1.1840574741363525, "learning_rate": 9.943462057279837e-06, "loss": 0.5936, "step": 1185 }, { "epoch": 0.08, "grad_norm": 1.2008758783340454, "learning_rate": 9.943305196054458e-06, "loss": 0.5769, "step": 1186 }, { "epoch": 0.08, "grad_norm": 1.3066725730895996, "learning_rate": 9.943148118769965e-06, "loss": 0.5828, "step": 1187 }, { "epoch": 0.08, "grad_norm": 1.2584288120269775, "learning_rate": 9.942990825433218e-06, "loss": 0.5985, "step": 1188 }, { "epoch": 0.08, "grad_norm": 1.195235013961792, "learning_rate": 9.942833316051097e-06, "loss": 0.5835, "step": 1189 }, { "epoch": 0.08, "grad_norm": 1.2277288436889648, "learning_rate": 9.942675590630486e-06, "loss": 0.6317, "step": 1190 }, { "epoch": 0.08, "grad_norm": 1.371163010597229, "learning_rate": 9.942517649178277e-06, "loss": 0.6033, "step": 1191 }, { "epoch": 0.08, "grad_norm": 1.2444205284118652, "learning_rate": 9.942359491701372e-06, "loss": 0.5706, "step": 1192 }, { "epoch": 0.08, "grad_norm": 1.3714152574539185, "learning_rate": 9.942201118206684e-06, "loss": 0.6251, "step": 1193 }, { "epoch": 0.08, "grad_norm": 1.3398494720458984, "learning_rate": 9.942042528701137e-06, "loss": 0.5984, "step": 1194 }, { "epoch": 0.08, "grad_norm": 1.3505089282989502, "learning_rate": 9.941883723191662e-06, "loss": 0.6251, "step": 1195 }, { "epoch": 0.08, "grad_norm": 1.4652668237686157, "learning_rate": 9.941724701685201e-06, "loss": 0.5767, "step": 1196 }, { "epoch": 0.08, "grad_norm": 1.2123565673828125, "learning_rate": 9.941565464188701e-06, "loss": 0.5656, "step": 1197 }, { "epoch": 0.08, "grad_norm": 1.2855535745620728, "learning_rate": 9.941406010709125e-06, "loss": 0.6179, "step": 1198 }, { "epoch": 0.08, "grad_norm": 1.4351853132247925, "learning_rate": 9.941246341253442e-06, "loss": 0.5835, "step": 1199 }, { "epoch": 0.08, "grad_norm": 1.395695686340332, "learning_rate": 9.941086455828628e-06, "loss": 0.5806, "step": 1200 }, { "epoch": 0.08, "grad_norm": 1.2149658203125, "learning_rate": 9.940926354441675e-06, "loss": 0.6553, "step": 1201 }, { "epoch": 0.08, "grad_norm": 1.3444076776504517, "learning_rate": 9.940766037099579e-06, "loss": 0.5621, "step": 1202 }, { "epoch": 0.08, "grad_norm": 1.379502296447754, "learning_rate": 9.940605503809344e-06, "loss": 0.6063, "step": 1203 }, { "epoch": 0.08, "grad_norm": 1.2536295652389526, "learning_rate": 9.94044475457799e-06, "loss": 0.5979, "step": 1204 }, { "epoch": 0.08, "grad_norm": 1.2390791177749634, "learning_rate": 9.940283789412546e-06, "loss": 0.5554, "step": 1205 }, { "epoch": 0.08, "grad_norm": 1.362121343612671, "learning_rate": 9.940122608320042e-06, "loss": 0.5847, "step": 1206 }, { "epoch": 0.08, "grad_norm": 1.201560139656067, "learning_rate": 9.939961211307523e-06, "loss": 0.5797, "step": 1207 }, { "epoch": 0.08, "grad_norm": 1.3780219554901123, "learning_rate": 9.939799598382045e-06, "loss": 0.6267, "step": 1208 }, { "epoch": 0.08, "grad_norm": 1.1694191694259644, "learning_rate": 9.939637769550671e-06, "loss": 0.6083, "step": 1209 }, { "epoch": 0.08, "grad_norm": 1.2494474649429321, "learning_rate": 9.939475724820478e-06, "loss": 0.5743, "step": 1210 }, { "epoch": 0.08, "grad_norm": 1.1869081258773804, "learning_rate": 9.939313464198542e-06, "loss": 0.5675, "step": 1211 }, { "epoch": 0.08, "grad_norm": 1.6061290502548218, "learning_rate": 9.93915098769196e-06, "loss": 0.6023, "step": 1212 }, { "epoch": 0.08, "grad_norm": 1.3815226554870605, "learning_rate": 9.938988295307828e-06, "loss": 0.5999, "step": 1213 }, { "epoch": 0.08, "grad_norm": 1.415724515914917, "learning_rate": 9.938825387053264e-06, "loss": 0.5557, "step": 1214 }, { "epoch": 0.08, "grad_norm": 1.3971490859985352, "learning_rate": 9.938662262935381e-06, "loss": 0.6085, "step": 1215 }, { "epoch": 0.08, "grad_norm": 1.3557735681533813, "learning_rate": 9.938498922961315e-06, "loss": 0.5778, "step": 1216 }, { "epoch": 0.08, "grad_norm": 1.2411137819290161, "learning_rate": 9.938335367138203e-06, "loss": 0.581, "step": 1217 }, { "epoch": 0.08, "grad_norm": 1.3359922170639038, "learning_rate": 9.938171595473191e-06, "loss": 0.6226, "step": 1218 }, { "epoch": 0.08, "grad_norm": 1.3057085275650024, "learning_rate": 9.93800760797344e-06, "loss": 0.5916, "step": 1219 }, { "epoch": 0.08, "grad_norm": 1.2983072996139526, "learning_rate": 9.937843404646119e-06, "loss": 0.5874, "step": 1220 }, { "epoch": 0.08, "grad_norm": 1.225028157234192, "learning_rate": 9.9376789854984e-06, "loss": 0.6044, "step": 1221 }, { "epoch": 0.08, "grad_norm": 1.2695586681365967, "learning_rate": 9.937514350537474e-06, "loss": 0.5997, "step": 1222 }, { "epoch": 0.08, "grad_norm": 1.224547028541565, "learning_rate": 9.937349499770533e-06, "loss": 0.5395, "step": 1223 }, { "epoch": 0.08, "grad_norm": 1.2115516662597656, "learning_rate": 9.937184433204786e-06, "loss": 0.6253, "step": 1224 }, { "epoch": 0.08, "grad_norm": 1.3034570217132568, "learning_rate": 9.937019150847442e-06, "loss": 0.6124, "step": 1225 }, { "epoch": 0.08, "grad_norm": 1.2024656534194946, "learning_rate": 9.936853652705731e-06, "loss": 0.5391, "step": 1226 }, { "epoch": 0.08, "grad_norm": 1.1785327196121216, "learning_rate": 9.936687938786884e-06, "loss": 0.5199, "step": 1227 }, { "epoch": 0.08, "grad_norm": 1.2759716510772705, "learning_rate": 9.936522009098143e-06, "loss": 0.6115, "step": 1228 }, { "epoch": 0.08, "grad_norm": 1.2254960536956787, "learning_rate": 9.936355863646762e-06, "loss": 0.54, "step": 1229 }, { "epoch": 0.08, "grad_norm": 1.2841613292694092, "learning_rate": 9.936189502440002e-06, "loss": 0.5766, "step": 1230 }, { "epoch": 0.08, "grad_norm": 1.2415355443954468, "learning_rate": 9.936022925485133e-06, "loss": 0.5671, "step": 1231 }, { "epoch": 0.08, "grad_norm": 1.3360373973846436, "learning_rate": 9.93585613278944e-06, "loss": 0.5953, "step": 1232 }, { "epoch": 0.08, "grad_norm": 1.3150969743728638, "learning_rate": 9.935689124360209e-06, "loss": 0.6165, "step": 1233 }, { "epoch": 0.08, "grad_norm": 1.221797227859497, "learning_rate": 9.93552190020474e-06, "loss": 0.6014, "step": 1234 }, { "epoch": 0.08, "grad_norm": 1.230061411857605, "learning_rate": 9.935354460330342e-06, "loss": 0.6288, "step": 1235 }, { "epoch": 0.08, "grad_norm": 1.275642991065979, "learning_rate": 9.935186804744335e-06, "loss": 0.5583, "step": 1236 }, { "epoch": 0.08, "grad_norm": 1.5190725326538086, "learning_rate": 9.935018933454047e-06, "loss": 0.5778, "step": 1237 }, { "epoch": 0.08, "grad_norm": 1.3018697500228882, "learning_rate": 9.934850846466811e-06, "loss": 0.6415, "step": 1238 }, { "epoch": 0.08, "grad_norm": 1.3302507400512695, "learning_rate": 9.93468254378998e-06, "loss": 0.5746, "step": 1239 }, { "epoch": 0.08, "grad_norm": 1.3504936695098877, "learning_rate": 9.934514025430903e-06, "loss": 0.6017, "step": 1240 }, { "epoch": 0.08, "grad_norm": 1.3269124031066895, "learning_rate": 9.934345291396953e-06, "loss": 0.6051, "step": 1241 }, { "epoch": 0.08, "grad_norm": 1.630271553993225, "learning_rate": 9.934176341695496e-06, "loss": 0.5866, "step": 1242 }, { "epoch": 0.08, "grad_norm": 1.4728937149047852, "learning_rate": 9.934007176333926e-06, "loss": 0.5849, "step": 1243 }, { "epoch": 0.08, "grad_norm": 1.4169102907180786, "learning_rate": 9.93383779531963e-06, "loss": 0.6386, "step": 1244 }, { "epoch": 0.08, "grad_norm": 1.3826144933700562, "learning_rate": 9.933668198660014e-06, "loss": 0.6327, "step": 1245 }, { "epoch": 0.08, "grad_norm": 1.4242713451385498, "learning_rate": 9.93349838636249e-06, "loss": 0.6103, "step": 1246 }, { "epoch": 0.08, "grad_norm": 1.1577061414718628, "learning_rate": 9.93332835843448e-06, "loss": 0.6076, "step": 1247 }, { "epoch": 0.08, "grad_norm": 1.378804326057434, "learning_rate": 9.933158114883416e-06, "loss": 0.5974, "step": 1248 }, { "epoch": 0.08, "grad_norm": 1.5556468963623047, "learning_rate": 9.93298765571674e-06, "loss": 0.6063, "step": 1249 }, { "epoch": 0.08, "grad_norm": 1.344406008720398, "learning_rate": 9.932816980941897e-06, "loss": 0.5569, "step": 1250 }, { "epoch": 0.08, "grad_norm": 1.254784345626831, "learning_rate": 9.932646090566353e-06, "loss": 0.5797, "step": 1251 }, { "epoch": 0.08, "grad_norm": 1.2046830654144287, "learning_rate": 9.932474984597575e-06, "loss": 0.6229, "step": 1252 }, { "epoch": 0.08, "grad_norm": 1.2241145372390747, "learning_rate": 9.932303663043042e-06, "loss": 0.5987, "step": 1253 }, { "epoch": 0.08, "grad_norm": 1.229219675064087, "learning_rate": 9.93213212591024e-06, "loss": 0.5965, "step": 1254 }, { "epoch": 0.08, "grad_norm": 1.3344566822052002, "learning_rate": 9.931960373206669e-06, "loss": 0.6005, "step": 1255 }, { "epoch": 0.08, "grad_norm": 1.2970008850097656, "learning_rate": 9.931788404939832e-06, "loss": 0.5726, "step": 1256 }, { "epoch": 0.08, "grad_norm": 1.4999446868896484, "learning_rate": 9.93161622111725e-06, "loss": 0.6752, "step": 1257 }, { "epoch": 0.08, "grad_norm": 1.251788854598999, "learning_rate": 9.931443821746448e-06, "loss": 0.5879, "step": 1258 }, { "epoch": 0.08, "grad_norm": 1.4527554512023926, "learning_rate": 9.931271206834958e-06, "loss": 0.6215, "step": 1259 }, { "epoch": 0.08, "grad_norm": 1.407408356666565, "learning_rate": 9.931098376390327e-06, "loss": 0.6516, "step": 1260 }, { "epoch": 0.08, "grad_norm": 1.162400484085083, "learning_rate": 9.930925330420107e-06, "loss": 0.5187, "step": 1261 }, { "epoch": 0.08, "grad_norm": 1.2554042339324951, "learning_rate": 9.930752068931867e-06, "loss": 0.6198, "step": 1262 }, { "epoch": 0.08, "grad_norm": 1.2641693353652954, "learning_rate": 9.93057859193317e-06, "loss": 0.5563, "step": 1263 }, { "epoch": 0.08, "grad_norm": 1.3925328254699707, "learning_rate": 9.930404899431606e-06, "loss": 0.6058, "step": 1264 }, { "epoch": 0.08, "grad_norm": 1.6808228492736816, "learning_rate": 9.930230991434766e-06, "loss": 0.6107, "step": 1265 }, { "epoch": 0.08, "grad_norm": 1.3723689317703247, "learning_rate": 9.930056867950248e-06, "loss": 0.5978, "step": 1266 }, { "epoch": 0.08, "grad_norm": 1.3185101747512817, "learning_rate": 9.929882528985664e-06, "loss": 0.603, "step": 1267 }, { "epoch": 0.08, "grad_norm": 1.1952364444732666, "learning_rate": 9.929707974548635e-06, "loss": 0.5339, "step": 1268 }, { "epoch": 0.08, "grad_norm": 1.7272064685821533, "learning_rate": 9.929533204646788e-06, "loss": 0.5578, "step": 1269 }, { "epoch": 0.08, "grad_norm": 1.2135045528411865, "learning_rate": 9.929358219287765e-06, "loss": 0.5548, "step": 1270 }, { "epoch": 0.08, "grad_norm": 1.2737771272659302, "learning_rate": 9.929183018479211e-06, "loss": 0.5735, "step": 1271 }, { "epoch": 0.08, "grad_norm": 1.2401165962219238, "learning_rate": 9.929007602228784e-06, "loss": 0.6079, "step": 1272 }, { "epoch": 0.08, "grad_norm": 1.501481533050537, "learning_rate": 9.928831970544153e-06, "loss": 0.6129, "step": 1273 }, { "epoch": 0.08, "grad_norm": 1.193386197090149, "learning_rate": 9.928656123432992e-06, "loss": 0.5901, "step": 1274 }, { "epoch": 0.08, "grad_norm": 1.2630032300949097, "learning_rate": 9.928480060902987e-06, "loss": 0.5814, "step": 1275 }, { "epoch": 0.08, "grad_norm": 1.4170989990234375, "learning_rate": 9.928303782961836e-06, "loss": 0.6129, "step": 1276 }, { "epoch": 0.08, "grad_norm": 1.2308217287063599, "learning_rate": 9.92812728961724e-06, "loss": 0.5913, "step": 1277 }, { "epoch": 0.08, "grad_norm": 1.2886221408843994, "learning_rate": 9.927950580876917e-06, "loss": 0.6184, "step": 1278 }, { "epoch": 0.08, "grad_norm": 1.1436219215393066, "learning_rate": 9.927773656748588e-06, "loss": 0.5805, "step": 1279 }, { "epoch": 0.08, "grad_norm": 1.1790612936019897, "learning_rate": 9.927596517239985e-06, "loss": 0.5423, "step": 1280 }, { "epoch": 0.08, "grad_norm": 1.2200431823730469, "learning_rate": 9.927419162358853e-06, "loss": 0.6009, "step": 1281 }, { "epoch": 0.08, "grad_norm": 1.3125168085098267, "learning_rate": 9.927241592112941e-06, "loss": 0.6708, "step": 1282 }, { "epoch": 0.08, "grad_norm": 1.2177456617355347, "learning_rate": 9.927063806510013e-06, "loss": 0.6252, "step": 1283 }, { "epoch": 0.08, "grad_norm": 1.3441758155822754, "learning_rate": 9.926885805557837e-06, "loss": 0.6075, "step": 1284 }, { "epoch": 0.08, "grad_norm": 1.473400354385376, "learning_rate": 9.926707589264194e-06, "loss": 0.6412, "step": 1285 }, { "epoch": 0.08, "grad_norm": 1.4310206174850464, "learning_rate": 9.926529157636875e-06, "loss": 0.6764, "step": 1286 }, { "epoch": 0.08, "grad_norm": 1.2505228519439697, "learning_rate": 9.926350510683676e-06, "loss": 0.5775, "step": 1287 }, { "epoch": 0.08, "grad_norm": 1.280970573425293, "learning_rate": 9.926171648412406e-06, "loss": 0.6263, "step": 1288 }, { "epoch": 0.08, "grad_norm": 1.4296976327896118, "learning_rate": 9.925992570830883e-06, "loss": 0.6198, "step": 1289 }, { "epoch": 0.08, "grad_norm": 1.2799838781356812, "learning_rate": 9.925813277946933e-06, "loss": 0.5866, "step": 1290 }, { "epoch": 0.08, "grad_norm": 1.2557793855667114, "learning_rate": 9.925633769768395e-06, "loss": 0.5715, "step": 1291 }, { "epoch": 0.08, "grad_norm": 1.2418335676193237, "learning_rate": 9.925454046303113e-06, "loss": 0.6257, "step": 1292 }, { "epoch": 0.08, "grad_norm": 1.1853708028793335, "learning_rate": 9.92527410755894e-06, "loss": 0.5782, "step": 1293 }, { "epoch": 0.08, "grad_norm": 1.1564172506332397, "learning_rate": 9.925093953543746e-06, "loss": 0.5807, "step": 1294 }, { "epoch": 0.08, "grad_norm": 1.3165241479873657, "learning_rate": 9.924913584265401e-06, "loss": 0.6191, "step": 1295 }, { "epoch": 0.08, "grad_norm": 1.315133810043335, "learning_rate": 9.92473299973179e-06, "loss": 0.6001, "step": 1296 }, { "epoch": 0.08, "grad_norm": 1.2981213331222534, "learning_rate": 9.924552199950806e-06, "loss": 0.6196, "step": 1297 }, { "epoch": 0.08, "grad_norm": 1.145391821861267, "learning_rate": 9.92437118493035e-06, "loss": 0.6167, "step": 1298 }, { "epoch": 0.08, "grad_norm": 1.3689723014831543, "learning_rate": 9.924189954678337e-06, "loss": 0.6361, "step": 1299 }, { "epoch": 0.08, "grad_norm": 1.1951885223388672, "learning_rate": 9.924008509202683e-06, "loss": 0.5775, "step": 1300 }, { "epoch": 0.08, "grad_norm": 1.2333961725234985, "learning_rate": 9.923826848511322e-06, "loss": 0.5492, "step": 1301 }, { "epoch": 0.08, "grad_norm": 1.385771632194519, "learning_rate": 9.923644972612192e-06, "loss": 0.6394, "step": 1302 }, { "epoch": 0.08, "grad_norm": 1.2548162937164307, "learning_rate": 9.923462881513245e-06, "loss": 0.597, "step": 1303 }, { "epoch": 0.08, "grad_norm": 1.2731972932815552, "learning_rate": 9.923280575222438e-06, "loss": 0.597, "step": 1304 }, { "epoch": 0.08, "grad_norm": 1.161837100982666, "learning_rate": 9.923098053747738e-06, "loss": 0.5985, "step": 1305 }, { "epoch": 0.08, "grad_norm": 1.1641130447387695, "learning_rate": 9.922915317097125e-06, "loss": 0.5793, "step": 1306 }, { "epoch": 0.08, "grad_norm": 1.6009544134140015, "learning_rate": 9.922732365278584e-06, "loss": 0.6066, "step": 1307 }, { "epoch": 0.08, "grad_norm": 1.4266157150268555, "learning_rate": 9.922549198300112e-06, "loss": 0.637, "step": 1308 }, { "epoch": 0.08, "grad_norm": 1.9337023496627808, "learning_rate": 9.922365816169717e-06, "loss": 0.6581, "step": 1309 }, { "epoch": 0.08, "grad_norm": 1.2842305898666382, "learning_rate": 9.92218221889541e-06, "loss": 0.5886, "step": 1310 }, { "epoch": 0.08, "grad_norm": 1.4331939220428467, "learning_rate": 9.921998406485218e-06, "loss": 0.6083, "step": 1311 }, { "epoch": 0.08, "grad_norm": 1.2208331823349, "learning_rate": 9.921814378947174e-06, "loss": 0.6093, "step": 1312 }, { "epoch": 0.08, "grad_norm": 1.282131552696228, "learning_rate": 9.921630136289324e-06, "loss": 0.5437, "step": 1313 }, { "epoch": 0.08, "grad_norm": 1.2807738780975342, "learning_rate": 9.921445678519716e-06, "loss": 0.601, "step": 1314 }, { "epoch": 0.08, "grad_norm": 1.4977648258209229, "learning_rate": 9.921261005646416e-06, "loss": 0.65, "step": 1315 }, { "epoch": 0.08, "grad_norm": 1.3393118381500244, "learning_rate": 9.921076117677494e-06, "loss": 0.5735, "step": 1316 }, { "epoch": 0.09, "grad_norm": 1.324051022529602, "learning_rate": 9.920891014621034e-06, "loss": 0.5738, "step": 1317 }, { "epoch": 0.09, "grad_norm": 1.2914668321609497, "learning_rate": 9.920705696485123e-06, "loss": 0.6362, "step": 1318 }, { "epoch": 0.09, "grad_norm": 1.3501033782958984, "learning_rate": 9.92052016327786e-06, "loss": 0.6213, "step": 1319 }, { "epoch": 0.09, "grad_norm": 1.4504932165145874, "learning_rate": 9.920334415007355e-06, "loss": 0.6393, "step": 1320 }, { "epoch": 0.09, "grad_norm": 1.4630228281021118, "learning_rate": 9.92014845168173e-06, "loss": 0.5865, "step": 1321 }, { "epoch": 0.09, "grad_norm": 1.133312702178955, "learning_rate": 9.919962273309108e-06, "loss": 0.5695, "step": 1322 }, { "epoch": 0.09, "grad_norm": 1.1970833539962769, "learning_rate": 9.91977587989763e-06, "loss": 0.6021, "step": 1323 }, { "epoch": 0.09, "grad_norm": 1.2777197360992432, "learning_rate": 9.91958927145544e-06, "loss": 0.573, "step": 1324 }, { "epoch": 0.09, "grad_norm": 1.5097123384475708, "learning_rate": 9.919402447990699e-06, "loss": 0.6027, "step": 1325 }, { "epoch": 0.09, "grad_norm": 1.2790755033493042, "learning_rate": 9.919215409511567e-06, "loss": 0.6586, "step": 1326 }, { "epoch": 0.09, "grad_norm": 1.3690685033798218, "learning_rate": 9.919028156026221e-06, "loss": 0.5925, "step": 1327 }, { "epoch": 0.09, "grad_norm": 1.2986596822738647, "learning_rate": 9.918840687542845e-06, "loss": 0.5649, "step": 1328 }, { "epoch": 0.09, "grad_norm": 1.4165630340576172, "learning_rate": 9.918653004069634e-06, "loss": 0.6178, "step": 1329 }, { "epoch": 0.09, "grad_norm": 1.1524397134780884, "learning_rate": 9.918465105614793e-06, "loss": 0.5652, "step": 1330 }, { "epoch": 0.09, "grad_norm": 1.2434324026107788, "learning_rate": 9.91827699218653e-06, "loss": 0.5889, "step": 1331 }, { "epoch": 0.09, "grad_norm": 1.2715033292770386, "learning_rate": 9.918088663793066e-06, "loss": 0.617, "step": 1332 }, { "epoch": 0.09, "grad_norm": 1.179105520248413, "learning_rate": 9.917900120442639e-06, "loss": 0.5723, "step": 1333 }, { "epoch": 0.09, "grad_norm": 1.3063476085662842, "learning_rate": 9.917711362143485e-06, "loss": 0.5909, "step": 1334 }, { "epoch": 0.09, "grad_norm": 1.2284542322158813, "learning_rate": 9.917522388903856e-06, "loss": 0.575, "step": 1335 }, { "epoch": 0.09, "grad_norm": 1.2862467765808105, "learning_rate": 9.917333200732008e-06, "loss": 0.6048, "step": 1336 }, { "epoch": 0.09, "grad_norm": 1.3297063112258911, "learning_rate": 9.917143797636216e-06, "loss": 0.6046, "step": 1337 }, { "epoch": 0.09, "grad_norm": 1.264683485031128, "learning_rate": 9.916954179624754e-06, "loss": 0.5926, "step": 1338 }, { "epoch": 0.09, "grad_norm": 1.2626245021820068, "learning_rate": 9.916764346705911e-06, "loss": 0.5653, "step": 1339 }, { "epoch": 0.09, "grad_norm": 1.5150067806243896, "learning_rate": 9.916574298887983e-06, "loss": 0.6075, "step": 1340 }, { "epoch": 0.09, "grad_norm": 1.2385473251342773, "learning_rate": 9.916384036179278e-06, "loss": 0.5718, "step": 1341 }, { "epoch": 0.09, "grad_norm": 1.281664490699768, "learning_rate": 9.91619355858811e-06, "loss": 0.5891, "step": 1342 }, { "epoch": 0.09, "grad_norm": 1.2655258178710938, "learning_rate": 9.916002866122807e-06, "loss": 0.5459, "step": 1343 }, { "epoch": 0.09, "grad_norm": 1.3847860097885132, "learning_rate": 9.915811958791702e-06, "loss": 0.5849, "step": 1344 }, { "epoch": 0.09, "grad_norm": 1.3486056327819824, "learning_rate": 9.91562083660314e-06, "loss": 0.6149, "step": 1345 }, { "epoch": 0.09, "grad_norm": 1.347636342048645, "learning_rate": 9.915429499565471e-06, "loss": 0.5724, "step": 1346 }, { "epoch": 0.09, "grad_norm": 1.3243262767791748, "learning_rate": 9.915237947687062e-06, "loss": 0.555, "step": 1347 }, { "epoch": 0.09, "grad_norm": 1.3203363418579102, "learning_rate": 9.915046180976284e-06, "loss": 0.6707, "step": 1348 }, { "epoch": 0.09, "grad_norm": 1.238389253616333, "learning_rate": 9.914854199441519e-06, "loss": 0.6361, "step": 1349 }, { "epoch": 0.09, "grad_norm": 1.319898009300232, "learning_rate": 9.914662003091155e-06, "loss": 0.5778, "step": 1350 }, { "epoch": 0.09, "grad_norm": 1.359611988067627, "learning_rate": 9.914469591933598e-06, "loss": 0.6001, "step": 1351 }, { "epoch": 0.09, "grad_norm": 1.2542012929916382, "learning_rate": 9.914276965977253e-06, "loss": 0.6321, "step": 1352 }, { "epoch": 0.09, "grad_norm": 1.1598618030548096, "learning_rate": 9.914084125230542e-06, "loss": 0.5807, "step": 1353 }, { "epoch": 0.09, "grad_norm": 1.2599636316299438, "learning_rate": 9.913891069701891e-06, "loss": 0.6243, "step": 1354 }, { "epoch": 0.09, "grad_norm": 1.3096433877944946, "learning_rate": 9.91369779939974e-06, "loss": 0.6114, "step": 1355 }, { "epoch": 0.09, "grad_norm": 1.2491356134414673, "learning_rate": 9.913504314332534e-06, "loss": 0.6043, "step": 1356 }, { "epoch": 0.09, "grad_norm": 1.375565767288208, "learning_rate": 9.913310614508733e-06, "loss": 0.5841, "step": 1357 }, { "epoch": 0.09, "grad_norm": 1.3851325511932373, "learning_rate": 9.913116699936803e-06, "loss": 0.5854, "step": 1358 }, { "epoch": 0.09, "grad_norm": 1.3662573099136353, "learning_rate": 9.912922570625215e-06, "loss": 0.623, "step": 1359 }, { "epoch": 0.09, "grad_norm": 1.271349549293518, "learning_rate": 9.91272822658246e-06, "loss": 0.5894, "step": 1360 }, { "epoch": 0.09, "grad_norm": 1.145318865776062, "learning_rate": 9.912533667817026e-06, "loss": 0.5555, "step": 1361 }, { "epoch": 0.09, "grad_norm": 1.2639018297195435, "learning_rate": 9.912338894337422e-06, "loss": 0.6273, "step": 1362 }, { "epoch": 0.09, "grad_norm": 1.312528133392334, "learning_rate": 9.912143906152158e-06, "loss": 0.6001, "step": 1363 }, { "epoch": 0.09, "grad_norm": 1.1710220575332642, "learning_rate": 9.911948703269759e-06, "loss": 0.5726, "step": 1364 }, { "epoch": 0.09, "grad_norm": 1.3129388093948364, "learning_rate": 9.911753285698753e-06, "loss": 0.5938, "step": 1365 }, { "epoch": 0.09, "grad_norm": 1.3247534036636353, "learning_rate": 9.911557653447685e-06, "loss": 0.5924, "step": 1366 }, { "epoch": 0.09, "grad_norm": 1.2073116302490234, "learning_rate": 9.911361806525104e-06, "loss": 0.569, "step": 1367 }, { "epoch": 0.09, "grad_norm": 1.233691692352295, "learning_rate": 9.911165744939568e-06, "loss": 0.567, "step": 1368 }, { "epoch": 0.09, "grad_norm": 1.3031489849090576, "learning_rate": 9.91096946869965e-06, "loss": 0.5572, "step": 1369 }, { "epoch": 0.09, "grad_norm": 1.3505975008010864, "learning_rate": 9.910772977813926e-06, "loss": 0.6314, "step": 1370 }, { "epoch": 0.09, "grad_norm": 1.214632511138916, "learning_rate": 9.910576272290986e-06, "loss": 0.5847, "step": 1371 }, { "epoch": 0.09, "grad_norm": 1.2935000658035278, "learning_rate": 9.910379352139426e-06, "loss": 0.6028, "step": 1372 }, { "epoch": 0.09, "grad_norm": 1.3299611806869507, "learning_rate": 9.910182217367854e-06, "loss": 0.6016, "step": 1373 }, { "epoch": 0.09, "grad_norm": 1.3040165901184082, "learning_rate": 9.909984867984886e-06, "loss": 0.6278, "step": 1374 }, { "epoch": 0.09, "grad_norm": 1.190635323524475, "learning_rate": 9.909787303999146e-06, "loss": 0.5947, "step": 1375 }, { "epoch": 0.09, "grad_norm": 1.3291469812393188, "learning_rate": 9.909589525419273e-06, "loss": 0.5764, "step": 1376 }, { "epoch": 0.09, "grad_norm": 1.2260907888412476, "learning_rate": 9.909391532253905e-06, "loss": 0.558, "step": 1377 }, { "epoch": 0.09, "grad_norm": 1.2049672603607178, "learning_rate": 9.909193324511701e-06, "loss": 0.6359, "step": 1378 }, { "epoch": 0.09, "grad_norm": 1.1795775890350342, "learning_rate": 9.908994902201323e-06, "loss": 0.5273, "step": 1379 }, { "epoch": 0.09, "grad_norm": 1.228368878364563, "learning_rate": 9.908796265331444e-06, "loss": 0.609, "step": 1380 }, { "epoch": 0.09, "grad_norm": 1.1994585990905762, "learning_rate": 9.908597413910744e-06, "loss": 0.5793, "step": 1381 }, { "epoch": 0.09, "grad_norm": 2.4981064796447754, "learning_rate": 9.908398347947917e-06, "loss": 0.5883, "step": 1382 }, { "epoch": 0.09, "grad_norm": 1.2944895029067993, "learning_rate": 9.908199067451661e-06, "loss": 0.5821, "step": 1383 }, { "epoch": 0.09, "grad_norm": 1.3875446319580078, "learning_rate": 9.907999572430686e-06, "loss": 0.6381, "step": 1384 }, { "epoch": 0.09, "grad_norm": 1.199365258216858, "learning_rate": 9.907799862893713e-06, "loss": 0.5783, "step": 1385 }, { "epoch": 0.09, "grad_norm": 1.505048155784607, "learning_rate": 9.907599938849471e-06, "loss": 0.5873, "step": 1386 }, { "epoch": 0.09, "grad_norm": 1.2483574151992798, "learning_rate": 9.907399800306697e-06, "loss": 0.593, "step": 1387 }, { "epoch": 0.09, "grad_norm": 1.342934250831604, "learning_rate": 9.90719944727414e-06, "loss": 0.5682, "step": 1388 }, { "epoch": 0.09, "grad_norm": 1.1770374774932861, "learning_rate": 9.906998879760553e-06, "loss": 0.6042, "step": 1389 }, { "epoch": 0.09, "grad_norm": 1.3846824169158936, "learning_rate": 9.906798097774708e-06, "loss": 0.6504, "step": 1390 }, { "epoch": 0.09, "grad_norm": 1.261752724647522, "learning_rate": 9.906597101325379e-06, "loss": 0.578, "step": 1391 }, { "epoch": 0.09, "grad_norm": 1.2559810876846313, "learning_rate": 9.906395890421348e-06, "loss": 0.5761, "step": 1392 }, { "epoch": 0.09, "grad_norm": 1.2905775308609009, "learning_rate": 9.90619446507141e-06, "loss": 0.5998, "step": 1393 }, { "epoch": 0.09, "grad_norm": 1.2931207418441772, "learning_rate": 9.905992825284371e-06, "loss": 0.6359, "step": 1394 }, { "epoch": 0.09, "grad_norm": 1.275443196296692, "learning_rate": 9.905790971069046e-06, "loss": 0.5569, "step": 1395 }, { "epoch": 0.09, "grad_norm": 1.174149751663208, "learning_rate": 9.905588902434251e-06, "loss": 0.5781, "step": 1396 }, { "epoch": 0.09, "grad_norm": 1.2426849603652954, "learning_rate": 9.905386619388823e-06, "loss": 0.5729, "step": 1397 }, { "epoch": 0.09, "grad_norm": 1.2002323865890503, "learning_rate": 9.905184121941602e-06, "loss": 0.6028, "step": 1398 }, { "epoch": 0.09, "grad_norm": 1.299588918685913, "learning_rate": 9.904981410101437e-06, "loss": 0.5772, "step": 1399 }, { "epoch": 0.09, "grad_norm": 1.2773582935333252, "learning_rate": 9.904778483877192e-06, "loss": 0.6286, "step": 1400 }, { "epoch": 0.09, "grad_norm": 1.295594334602356, "learning_rate": 9.904575343277734e-06, "loss": 0.5944, "step": 1401 }, { "epoch": 0.09, "grad_norm": 1.3374176025390625, "learning_rate": 9.904371988311941e-06, "loss": 0.5688, "step": 1402 }, { "epoch": 0.09, "grad_norm": 1.496567964553833, "learning_rate": 9.9041684189887e-06, "loss": 0.6426, "step": 1403 }, { "epoch": 0.09, "grad_norm": 1.337647557258606, "learning_rate": 9.903964635316912e-06, "loss": 0.6076, "step": 1404 }, { "epoch": 0.09, "grad_norm": 1.3153475522994995, "learning_rate": 9.903760637305483e-06, "loss": 0.607, "step": 1405 }, { "epoch": 0.09, "grad_norm": 1.436015009880066, "learning_rate": 9.903556424963327e-06, "loss": 0.6078, "step": 1406 }, { "epoch": 0.09, "grad_norm": 1.270382046699524, "learning_rate": 9.903351998299372e-06, "loss": 0.605, "step": 1407 }, { "epoch": 0.09, "grad_norm": 1.3743953704833984, "learning_rate": 9.903147357322552e-06, "loss": 0.5795, "step": 1408 }, { "epoch": 0.09, "grad_norm": 1.5257220268249512, "learning_rate": 9.90294250204181e-06, "loss": 0.6273, "step": 1409 }, { "epoch": 0.09, "grad_norm": 1.3945852518081665, "learning_rate": 9.902737432466102e-06, "loss": 0.586, "step": 1410 }, { "epoch": 0.09, "grad_norm": 1.2091225385665894, "learning_rate": 9.90253214860439e-06, "loss": 0.5982, "step": 1411 }, { "epoch": 0.09, "grad_norm": 1.2748793363571167, "learning_rate": 9.902326650465646e-06, "loss": 0.5669, "step": 1412 }, { "epoch": 0.09, "grad_norm": 1.2076648473739624, "learning_rate": 9.902120938058853e-06, "loss": 0.5571, "step": 1413 }, { "epoch": 0.09, "grad_norm": 1.3888256549835205, "learning_rate": 9.901915011393002e-06, "loss": 0.6225, "step": 1414 }, { "epoch": 0.09, "grad_norm": 1.2378675937652588, "learning_rate": 9.901708870477092e-06, "loss": 0.5237, "step": 1415 }, { "epoch": 0.09, "grad_norm": 1.358253836631775, "learning_rate": 9.901502515320134e-06, "loss": 0.559, "step": 1416 }, { "epoch": 0.09, "grad_norm": 1.2403948307037354, "learning_rate": 9.901295945931146e-06, "loss": 0.6404, "step": 1417 }, { "epoch": 0.09, "grad_norm": 1.5272672176361084, "learning_rate": 9.901089162319159e-06, "loss": 0.5927, "step": 1418 }, { "epoch": 0.09, "grad_norm": 1.3943582773208618, "learning_rate": 9.900882164493208e-06, "loss": 0.6135, "step": 1419 }, { "epoch": 0.09, "grad_norm": 1.3089895248413086, "learning_rate": 9.900674952462344e-06, "loss": 0.616, "step": 1420 }, { "epoch": 0.09, "grad_norm": 1.2431566715240479, "learning_rate": 9.900467526235621e-06, "loss": 0.5358, "step": 1421 }, { "epoch": 0.09, "grad_norm": 1.3940389156341553, "learning_rate": 9.900259885822106e-06, "loss": 0.6158, "step": 1422 }, { "epoch": 0.09, "grad_norm": 1.2664674520492554, "learning_rate": 9.900052031230876e-06, "loss": 0.6079, "step": 1423 }, { "epoch": 0.09, "grad_norm": 1.215100646018982, "learning_rate": 9.899843962471013e-06, "loss": 0.5718, "step": 1424 }, { "epoch": 0.09, "grad_norm": 1.2012476921081543, "learning_rate": 9.89963567955161e-06, "loss": 0.6257, "step": 1425 }, { "epoch": 0.09, "grad_norm": 1.2993314266204834, "learning_rate": 9.899427182481774e-06, "loss": 0.5792, "step": 1426 }, { "epoch": 0.09, "grad_norm": 1.4109528064727783, "learning_rate": 9.899218471270617e-06, "loss": 0.5968, "step": 1427 }, { "epoch": 0.09, "grad_norm": 1.3444833755493164, "learning_rate": 9.89900954592726e-06, "loss": 0.6334, "step": 1428 }, { "epoch": 0.09, "grad_norm": 1.166711449623108, "learning_rate": 9.898800406460836e-06, "loss": 0.5631, "step": 1429 }, { "epoch": 0.09, "grad_norm": 1.29062020778656, "learning_rate": 9.898591052880485e-06, "loss": 0.5517, "step": 1430 }, { "epoch": 0.09, "grad_norm": 1.2148298025131226, "learning_rate": 9.898381485195358e-06, "loss": 0.5736, "step": 1431 }, { "epoch": 0.09, "grad_norm": 1.3337525129318237, "learning_rate": 9.898171703414615e-06, "loss": 0.6411, "step": 1432 }, { "epoch": 0.09, "grad_norm": 1.2690143585205078, "learning_rate": 9.897961707547421e-06, "loss": 0.5404, "step": 1433 }, { "epoch": 0.09, "grad_norm": 1.2717101573944092, "learning_rate": 9.89775149760296e-06, "loss": 0.6457, "step": 1434 }, { "epoch": 0.09, "grad_norm": 1.3467198610305786, "learning_rate": 9.897541073590417e-06, "loss": 0.5818, "step": 1435 }, { "epoch": 0.09, "grad_norm": 1.280362844467163, "learning_rate": 9.897330435518991e-06, "loss": 0.6081, "step": 1436 }, { "epoch": 0.09, "grad_norm": 1.3492273092269897, "learning_rate": 9.897119583397885e-06, "loss": 0.5828, "step": 1437 }, { "epoch": 0.09, "grad_norm": 1.329773187637329, "learning_rate": 9.896908517236317e-06, "loss": 0.5912, "step": 1438 }, { "epoch": 0.09, "grad_norm": 1.2062897682189941, "learning_rate": 9.896697237043513e-06, "loss": 0.548, "step": 1439 }, { "epoch": 0.09, "grad_norm": 1.350963830947876, "learning_rate": 9.896485742828706e-06, "loss": 0.5587, "step": 1440 }, { "epoch": 0.09, "grad_norm": 1.839398741722107, "learning_rate": 9.896274034601141e-06, "loss": 0.6178, "step": 1441 }, { "epoch": 0.09, "grad_norm": 1.2086894512176514, "learning_rate": 9.896062112370068e-06, "loss": 0.6174, "step": 1442 }, { "epoch": 0.09, "grad_norm": 1.2806559801101685, "learning_rate": 9.895849976144755e-06, "loss": 0.5955, "step": 1443 }, { "epoch": 0.09, "grad_norm": 1.3734794855117798, "learning_rate": 9.895637625934468e-06, "loss": 0.6439, "step": 1444 }, { "epoch": 0.09, "grad_norm": 1.4741970300674438, "learning_rate": 9.895425061748494e-06, "loss": 0.56, "step": 1445 }, { "epoch": 0.09, "grad_norm": 1.3951997756958008, "learning_rate": 9.89521228359612e-06, "loss": 0.5848, "step": 1446 }, { "epoch": 0.09, "grad_norm": 1.3967658281326294, "learning_rate": 9.894999291486646e-06, "loss": 0.5795, "step": 1447 }, { "epoch": 0.09, "grad_norm": 1.2344610691070557, "learning_rate": 9.894786085429384e-06, "loss": 0.5397, "step": 1448 }, { "epoch": 0.09, "grad_norm": 1.231454610824585, "learning_rate": 9.89457266543365e-06, "loss": 0.5907, "step": 1449 }, { "epoch": 0.09, "grad_norm": 1.583797574043274, "learning_rate": 9.894359031508773e-06, "loss": 0.6502, "step": 1450 }, { "epoch": 0.09, "grad_norm": 1.278608798980713, "learning_rate": 9.89414518366409e-06, "loss": 0.586, "step": 1451 }, { "epoch": 0.09, "grad_norm": 2.4726057052612305, "learning_rate": 9.893931121908948e-06, "loss": 0.5695, "step": 1452 }, { "epoch": 0.09, "grad_norm": 1.3375308513641357, "learning_rate": 9.893716846252705e-06, "loss": 0.6252, "step": 1453 }, { "epoch": 0.09, "grad_norm": 1.1585888862609863, "learning_rate": 9.893502356704722e-06, "loss": 0.5685, "step": 1454 }, { "epoch": 0.09, "grad_norm": 1.3182766437530518, "learning_rate": 9.893287653274377e-06, "loss": 0.5825, "step": 1455 }, { "epoch": 0.09, "grad_norm": 1.271012544631958, "learning_rate": 9.893072735971055e-06, "loss": 0.606, "step": 1456 }, { "epoch": 0.09, "grad_norm": 1.3366020917892456, "learning_rate": 9.892857604804147e-06, "loss": 0.586, "step": 1457 }, { "epoch": 0.09, "grad_norm": 1.320239543914795, "learning_rate": 9.892642259783056e-06, "loss": 0.5924, "step": 1458 }, { "epoch": 0.09, "grad_norm": 1.3217356204986572, "learning_rate": 9.892426700917197e-06, "loss": 0.6002, "step": 1459 }, { "epoch": 0.09, "grad_norm": 1.3917009830474854, "learning_rate": 9.892210928215989e-06, "loss": 0.5458, "step": 1460 }, { "epoch": 0.09, "grad_norm": 1.4800511598587036, "learning_rate": 9.891994941688864e-06, "loss": 0.6182, "step": 1461 }, { "epoch": 0.09, "grad_norm": 1.4278688430786133, "learning_rate": 9.891778741345259e-06, "loss": 0.5792, "step": 1462 }, { "epoch": 0.09, "grad_norm": 1.2970895767211914, "learning_rate": 9.891562327194628e-06, "loss": 0.603, "step": 1463 }, { "epoch": 0.09, "grad_norm": 1.1945924758911133, "learning_rate": 9.891345699246426e-06, "loss": 0.5985, "step": 1464 }, { "epoch": 0.09, "grad_norm": 1.2194398641586304, "learning_rate": 9.891128857510126e-06, "loss": 0.5682, "step": 1465 }, { "epoch": 0.09, "grad_norm": 1.352347731590271, "learning_rate": 9.890911801995201e-06, "loss": 0.5905, "step": 1466 }, { "epoch": 0.09, "grad_norm": 1.4632457494735718, "learning_rate": 9.890694532711142e-06, "loss": 0.5874, "step": 1467 }, { "epoch": 0.09, "grad_norm": 1.29800283908844, "learning_rate": 9.890477049667442e-06, "loss": 0.6373, "step": 1468 }, { "epoch": 0.09, "grad_norm": 1.2692029476165771, "learning_rate": 9.890259352873608e-06, "loss": 0.5522, "step": 1469 }, { "epoch": 0.09, "grad_norm": 1.2419679164886475, "learning_rate": 9.890041442339154e-06, "loss": 0.5781, "step": 1470 }, { "epoch": 0.09, "grad_norm": 1.3129690885543823, "learning_rate": 9.889823318073605e-06, "loss": 0.5567, "step": 1471 }, { "epoch": 0.1, "grad_norm": 1.480728030204773, "learning_rate": 9.889604980086496e-06, "loss": 0.5682, "step": 1472 }, { "epoch": 0.1, "grad_norm": 1.3417298793792725, "learning_rate": 9.889386428387366e-06, "loss": 0.5742, "step": 1473 }, { "epoch": 0.1, "grad_norm": 1.2787445783615112, "learning_rate": 9.889167662985773e-06, "loss": 0.5655, "step": 1474 }, { "epoch": 0.1, "grad_norm": 1.4531000852584839, "learning_rate": 9.888948683891275e-06, "loss": 0.6182, "step": 1475 }, { "epoch": 0.1, "grad_norm": 1.2879085540771484, "learning_rate": 9.888729491113442e-06, "loss": 0.5952, "step": 1476 }, { "epoch": 0.1, "grad_norm": 1.2671804428100586, "learning_rate": 9.888510084661857e-06, "loss": 0.5681, "step": 1477 }, { "epoch": 0.1, "grad_norm": 1.200857400894165, "learning_rate": 9.88829046454611e-06, "loss": 0.6366, "step": 1478 }, { "epoch": 0.1, "grad_norm": 1.4526344537734985, "learning_rate": 9.888070630775797e-06, "loss": 0.6409, "step": 1479 }, { "epoch": 0.1, "grad_norm": 1.3747111558914185, "learning_rate": 9.88785058336053e-06, "loss": 0.569, "step": 1480 }, { "epoch": 0.1, "grad_norm": 1.2678924798965454, "learning_rate": 9.887630322309923e-06, "loss": 0.5861, "step": 1481 }, { "epoch": 0.1, "grad_norm": 1.2347700595855713, "learning_rate": 9.887409847633606e-06, "loss": 0.6038, "step": 1482 }, { "epoch": 0.1, "grad_norm": 1.2142212390899658, "learning_rate": 9.887189159341213e-06, "loss": 0.5686, "step": 1483 }, { "epoch": 0.1, "grad_norm": 1.2618999481201172, "learning_rate": 9.886968257442392e-06, "loss": 0.602, "step": 1484 }, { "epoch": 0.1, "grad_norm": 1.298300862312317, "learning_rate": 9.886747141946796e-06, "loss": 0.6181, "step": 1485 }, { "epoch": 0.1, "grad_norm": 1.259010672569275, "learning_rate": 9.886525812864092e-06, "loss": 0.6166, "step": 1486 }, { "epoch": 0.1, "grad_norm": 1.3874272108078003, "learning_rate": 9.88630427020395e-06, "loss": 0.5958, "step": 1487 }, { "epoch": 0.1, "grad_norm": 1.210863471031189, "learning_rate": 9.886082513976058e-06, "loss": 0.5867, "step": 1488 }, { "epoch": 0.1, "grad_norm": 1.2334825992584229, "learning_rate": 9.885860544190102e-06, "loss": 0.5808, "step": 1489 }, { "epoch": 0.1, "grad_norm": 1.253554344177246, "learning_rate": 9.88563836085579e-06, "loss": 0.5968, "step": 1490 }, { "epoch": 0.1, "grad_norm": 1.106610655784607, "learning_rate": 9.88541596398283e-06, "loss": 0.5129, "step": 1491 }, { "epoch": 0.1, "grad_norm": 1.2265058755874634, "learning_rate": 9.885193353580942e-06, "loss": 0.5912, "step": 1492 }, { "epoch": 0.1, "grad_norm": 1.3855417966842651, "learning_rate": 9.884970529659857e-06, "loss": 0.6776, "step": 1493 }, { "epoch": 0.1, "grad_norm": 1.2653698921203613, "learning_rate": 9.884747492229313e-06, "loss": 0.6091, "step": 1494 }, { "epoch": 0.1, "grad_norm": 1.2579516172409058, "learning_rate": 9.88452424129906e-06, "loss": 0.5893, "step": 1495 }, { "epoch": 0.1, "grad_norm": 1.220447063446045, "learning_rate": 9.884300776878854e-06, "loss": 0.6217, "step": 1496 }, { "epoch": 0.1, "grad_norm": 1.1797995567321777, "learning_rate": 9.884077098978462e-06, "loss": 0.5997, "step": 1497 }, { "epoch": 0.1, "grad_norm": 1.2733688354492188, "learning_rate": 9.883853207607664e-06, "loss": 0.5832, "step": 1498 }, { "epoch": 0.1, "grad_norm": 1.3639229536056519, "learning_rate": 9.88362910277624e-06, "loss": 0.6085, "step": 1499 }, { "epoch": 0.1, "grad_norm": 1.2600667476654053, "learning_rate": 9.883404784493988e-06, "loss": 0.6101, "step": 1500 }, { "epoch": 0.1, "grad_norm": 1.2578577995300293, "learning_rate": 9.883180252770712e-06, "loss": 0.5841, "step": 1501 }, { "epoch": 0.1, "grad_norm": 1.234103798866272, "learning_rate": 9.882955507616227e-06, "loss": 0.5539, "step": 1502 }, { "epoch": 0.1, "grad_norm": 1.284691572189331, "learning_rate": 9.882730549040354e-06, "loss": 0.5392, "step": 1503 }, { "epoch": 0.1, "grad_norm": 1.3622608184814453, "learning_rate": 9.882505377052926e-06, "loss": 0.5816, "step": 1504 }, { "epoch": 0.1, "grad_norm": 1.1929398775100708, "learning_rate": 9.882279991663789e-06, "loss": 0.6077, "step": 1505 }, { "epoch": 0.1, "grad_norm": 1.205164909362793, "learning_rate": 9.882054392882786e-06, "loss": 0.5665, "step": 1506 }, { "epoch": 0.1, "grad_norm": 1.1304913759231567, "learning_rate": 9.88182858071978e-06, "loss": 0.5721, "step": 1507 }, { "epoch": 0.1, "grad_norm": 1.269097089767456, "learning_rate": 9.881602555184646e-06, "loss": 0.5971, "step": 1508 }, { "epoch": 0.1, "grad_norm": 1.2872003316879272, "learning_rate": 9.881376316287257e-06, "loss": 0.5896, "step": 1509 }, { "epoch": 0.1, "grad_norm": 1.2820757627487183, "learning_rate": 9.881149864037503e-06, "loss": 0.6099, "step": 1510 }, { "epoch": 0.1, "grad_norm": 1.3377745151519775, "learning_rate": 9.880923198445283e-06, "loss": 0.5975, "step": 1511 }, { "epoch": 0.1, "grad_norm": 1.2665973901748657, "learning_rate": 9.880696319520503e-06, "loss": 0.6231, "step": 1512 }, { "epoch": 0.1, "grad_norm": 1.278267502784729, "learning_rate": 9.880469227273078e-06, "loss": 0.5792, "step": 1513 }, { "epoch": 0.1, "grad_norm": 1.3802825212478638, "learning_rate": 9.880241921712936e-06, "loss": 0.6169, "step": 1514 }, { "epoch": 0.1, "grad_norm": 1.363783597946167, "learning_rate": 9.880014402850012e-06, "loss": 0.6358, "step": 1515 }, { "epoch": 0.1, "grad_norm": 1.3479007482528687, "learning_rate": 9.879786670694247e-06, "loss": 0.6398, "step": 1516 }, { "epoch": 0.1, "grad_norm": 1.2922918796539307, "learning_rate": 9.879558725255596e-06, "loss": 0.6244, "step": 1517 }, { "epoch": 0.1, "grad_norm": 1.1596226692199707, "learning_rate": 9.879330566544024e-06, "loss": 0.5864, "step": 1518 }, { "epoch": 0.1, "grad_norm": 1.3130141496658325, "learning_rate": 9.879102194569501e-06, "loss": 0.5679, "step": 1519 }, { "epoch": 0.1, "grad_norm": 1.3320728540420532, "learning_rate": 9.87887360934201e-06, "loss": 0.6204, "step": 1520 }, { "epoch": 0.1, "grad_norm": 1.3217343091964722, "learning_rate": 9.878644810871541e-06, "loss": 0.6256, "step": 1521 }, { "epoch": 0.1, "grad_norm": 1.207932710647583, "learning_rate": 9.878415799168096e-06, "loss": 0.5808, "step": 1522 }, { "epoch": 0.1, "grad_norm": 1.2634326219558716, "learning_rate": 9.87818657424168e-06, "loss": 0.6242, "step": 1523 }, { "epoch": 0.1, "grad_norm": 1.2800370454788208, "learning_rate": 9.877957136102316e-06, "loss": 0.6196, "step": 1524 }, { "epoch": 0.1, "grad_norm": 1.2332037687301636, "learning_rate": 9.877727484760031e-06, "loss": 0.5848, "step": 1525 }, { "epoch": 0.1, "grad_norm": 1.311872959136963, "learning_rate": 9.877497620224861e-06, "loss": 0.6311, "step": 1526 }, { "epoch": 0.1, "grad_norm": 1.2995712757110596, "learning_rate": 9.877267542506857e-06, "loss": 0.5925, "step": 1527 }, { "epoch": 0.1, "grad_norm": 1.202862024307251, "learning_rate": 9.87703725161607e-06, "loss": 0.5863, "step": 1528 }, { "epoch": 0.1, "grad_norm": 1.2397493124008179, "learning_rate": 9.876806747562568e-06, "loss": 0.5722, "step": 1529 }, { "epoch": 0.1, "grad_norm": 1.2349504232406616, "learning_rate": 9.876576030356426e-06, "loss": 0.6011, "step": 1530 }, { "epoch": 0.1, "grad_norm": 1.4362415075302124, "learning_rate": 9.876345100007727e-06, "loss": 0.5899, "step": 1531 }, { "epoch": 0.1, "grad_norm": 1.2247912883758545, "learning_rate": 9.876113956526564e-06, "loss": 0.599, "step": 1532 }, { "epoch": 0.1, "grad_norm": 1.201733946800232, "learning_rate": 9.87588259992304e-06, "loss": 0.5454, "step": 1533 }, { "epoch": 0.1, "grad_norm": 1.269129991531372, "learning_rate": 9.875651030207272e-06, "loss": 0.5564, "step": 1534 }, { "epoch": 0.1, "grad_norm": 1.4201830625534058, "learning_rate": 9.875419247389373e-06, "loss": 0.5758, "step": 1535 }, { "epoch": 0.1, "grad_norm": 1.3509441614151, "learning_rate": 9.875187251479479e-06, "loss": 0.5816, "step": 1536 }, { "epoch": 0.1, "grad_norm": 1.219122052192688, "learning_rate": 9.874955042487726e-06, "loss": 0.6077, "step": 1537 }, { "epoch": 0.1, "grad_norm": 1.3230963945388794, "learning_rate": 9.874722620424267e-06, "loss": 0.5351, "step": 1538 }, { "epoch": 0.1, "grad_norm": 1.1748281717300415, "learning_rate": 9.87448998529926e-06, "loss": 0.5641, "step": 1539 }, { "epoch": 0.1, "grad_norm": 1.3033249378204346, "learning_rate": 9.874257137122871e-06, "loss": 0.6203, "step": 1540 }, { "epoch": 0.1, "grad_norm": 1.3954434394836426, "learning_rate": 9.87402407590528e-06, "loss": 0.5962, "step": 1541 }, { "epoch": 0.1, "grad_norm": 1.1827821731567383, "learning_rate": 9.87379080165667e-06, "loss": 0.5954, "step": 1542 }, { "epoch": 0.1, "grad_norm": 1.3859353065490723, "learning_rate": 9.87355731438724e-06, "loss": 0.5982, "step": 1543 }, { "epoch": 0.1, "grad_norm": 1.2597606182098389, "learning_rate": 9.873323614107191e-06, "loss": 0.6065, "step": 1544 }, { "epoch": 0.1, "grad_norm": 1.317116618156433, "learning_rate": 9.873089700826743e-06, "loss": 0.5877, "step": 1545 }, { "epoch": 0.1, "grad_norm": 1.2654443979263306, "learning_rate": 9.872855574556116e-06, "loss": 0.58, "step": 1546 }, { "epoch": 0.1, "grad_norm": 1.3405874967575073, "learning_rate": 9.872621235305544e-06, "loss": 0.6303, "step": 1547 }, { "epoch": 0.1, "grad_norm": 1.236033320426941, "learning_rate": 9.872386683085269e-06, "loss": 0.5594, "step": 1548 }, { "epoch": 0.1, "grad_norm": 1.2517242431640625, "learning_rate": 9.872151917905544e-06, "loss": 0.5692, "step": 1549 }, { "epoch": 0.1, "grad_norm": 1.28999662399292, "learning_rate": 9.871916939776628e-06, "loss": 0.5811, "step": 1550 }, { "epoch": 0.1, "grad_norm": 1.1078916788101196, "learning_rate": 9.871681748708792e-06, "loss": 0.6069, "step": 1551 }, { "epoch": 0.1, "grad_norm": 1.1921906471252441, "learning_rate": 9.871446344712317e-06, "loss": 0.5953, "step": 1552 }, { "epoch": 0.1, "grad_norm": 1.2647005319595337, "learning_rate": 9.87121072779749e-06, "loss": 0.5909, "step": 1553 }, { "epoch": 0.1, "grad_norm": 1.220839500427246, "learning_rate": 9.87097489797461e-06, "loss": 0.5799, "step": 1554 }, { "epoch": 0.1, "grad_norm": 1.3289848566055298, "learning_rate": 9.870738855253986e-06, "loss": 0.6122, "step": 1555 }, { "epoch": 0.1, "grad_norm": 1.3101272583007812, "learning_rate": 9.870502599645932e-06, "loss": 0.5911, "step": 1556 }, { "epoch": 0.1, "grad_norm": 1.2007287740707397, "learning_rate": 9.870266131160775e-06, "loss": 0.5992, "step": 1557 }, { "epoch": 0.1, "grad_norm": 1.3300931453704834, "learning_rate": 9.87002944980885e-06, "loss": 0.6522, "step": 1558 }, { "epoch": 0.1, "grad_norm": 1.3218977451324463, "learning_rate": 9.869792555600504e-06, "loss": 0.6127, "step": 1559 }, { "epoch": 0.1, "grad_norm": 1.316537857055664, "learning_rate": 9.86955544854609e-06, "loss": 0.5977, "step": 1560 }, { "epoch": 0.1, "grad_norm": 1.2036150693893433, "learning_rate": 9.869318128655971e-06, "loss": 0.609, "step": 1561 }, { "epoch": 0.1, "grad_norm": 1.2222894430160522, "learning_rate": 9.869080595940518e-06, "loss": 0.6078, "step": 1562 }, { "epoch": 0.1, "grad_norm": 1.2495450973510742, "learning_rate": 9.868842850410116e-06, "loss": 0.6097, "step": 1563 }, { "epoch": 0.1, "grad_norm": 1.623584270477295, "learning_rate": 9.868604892075156e-06, "loss": 0.6204, "step": 1564 }, { "epoch": 0.1, "grad_norm": 1.282177209854126, "learning_rate": 9.868366720946034e-06, "loss": 0.5698, "step": 1565 }, { "epoch": 0.1, "grad_norm": 1.306418776512146, "learning_rate": 9.868128337033164e-06, "loss": 0.631, "step": 1566 }, { "epoch": 0.1, "grad_norm": 1.189990520477295, "learning_rate": 9.867889740346968e-06, "loss": 0.5669, "step": 1567 }, { "epoch": 0.1, "grad_norm": 1.1343134641647339, "learning_rate": 9.867650930897868e-06, "loss": 0.5457, "step": 1568 }, { "epoch": 0.1, "grad_norm": 1.3890068531036377, "learning_rate": 9.867411908696304e-06, "loss": 0.5671, "step": 1569 }, { "epoch": 0.1, "grad_norm": 1.290588140487671, "learning_rate": 9.867172673752723e-06, "loss": 0.5883, "step": 1570 }, { "epoch": 0.1, "grad_norm": 1.1608911752700806, "learning_rate": 9.866933226077584e-06, "loss": 0.5936, "step": 1571 }, { "epoch": 0.1, "grad_norm": 1.205942988395691, "learning_rate": 9.86669356568135e-06, "loss": 0.6286, "step": 1572 }, { "epoch": 0.1, "grad_norm": 1.1458535194396973, "learning_rate": 9.866453692574496e-06, "loss": 0.5468, "step": 1573 }, { "epoch": 0.1, "grad_norm": 1.188177227973938, "learning_rate": 9.866213606767506e-06, "loss": 0.6642, "step": 1574 }, { "epoch": 0.1, "grad_norm": 1.2866183519363403, "learning_rate": 9.865973308270875e-06, "loss": 0.6282, "step": 1575 }, { "epoch": 0.1, "grad_norm": 1.2354004383087158, "learning_rate": 9.865732797095104e-06, "loss": 0.6005, "step": 1576 }, { "epoch": 0.1, "grad_norm": 1.4001927375793457, "learning_rate": 9.865492073250708e-06, "loss": 0.5566, "step": 1577 }, { "epoch": 0.1, "grad_norm": 1.1932621002197266, "learning_rate": 9.865251136748205e-06, "loss": 0.5517, "step": 1578 }, { "epoch": 0.1, "grad_norm": 1.2106069326400757, "learning_rate": 9.86500998759813e-06, "loss": 0.5433, "step": 1579 }, { "epoch": 0.1, "grad_norm": 1.1797235012054443, "learning_rate": 9.864768625811018e-06, "loss": 0.586, "step": 1580 }, { "epoch": 0.1, "grad_norm": 1.424242377281189, "learning_rate": 9.864527051397421e-06, "loss": 0.629, "step": 1581 }, { "epoch": 0.1, "grad_norm": 1.2060205936431885, "learning_rate": 9.864285264367897e-06, "loss": 0.6071, "step": 1582 }, { "epoch": 0.1, "grad_norm": 1.5271639823913574, "learning_rate": 9.864043264733016e-06, "loss": 0.6153, "step": 1583 }, { "epoch": 0.1, "grad_norm": 1.5220725536346436, "learning_rate": 9.86380105250335e-06, "loss": 0.5649, "step": 1584 }, { "epoch": 0.1, "grad_norm": 1.4828299283981323, "learning_rate": 9.863558627689491e-06, "loss": 0.5752, "step": 1585 }, { "epoch": 0.1, "grad_norm": 1.428370475769043, "learning_rate": 9.863315990302033e-06, "loss": 0.5613, "step": 1586 }, { "epoch": 0.1, "grad_norm": 1.1861270666122437, "learning_rate": 9.86307314035158e-06, "loss": 0.5216, "step": 1587 }, { "epoch": 0.1, "grad_norm": 1.3476382493972778, "learning_rate": 9.862830077848747e-06, "loss": 0.5929, "step": 1588 }, { "epoch": 0.1, "grad_norm": 1.195995569229126, "learning_rate": 9.862586802804157e-06, "loss": 0.5748, "step": 1589 }, { "epoch": 0.1, "grad_norm": 1.0838885307312012, "learning_rate": 9.862343315228442e-06, "loss": 0.5254, "step": 1590 }, { "epoch": 0.1, "grad_norm": 1.3153475522994995, "learning_rate": 9.862099615132249e-06, "loss": 0.5812, "step": 1591 }, { "epoch": 0.1, "grad_norm": 1.3726898431777954, "learning_rate": 9.861855702526224e-06, "loss": 0.6226, "step": 1592 }, { "epoch": 0.1, "grad_norm": 1.1126790046691895, "learning_rate": 9.861611577421031e-06, "loss": 0.576, "step": 1593 }, { "epoch": 0.1, "grad_norm": 1.2393194437026978, "learning_rate": 9.861367239827338e-06, "loss": 0.6017, "step": 1594 }, { "epoch": 0.1, "grad_norm": 1.2829982042312622, "learning_rate": 9.861122689755827e-06, "loss": 0.5718, "step": 1595 }, { "epoch": 0.1, "grad_norm": 1.2593618631362915, "learning_rate": 9.860877927217183e-06, "loss": 0.5906, "step": 1596 }, { "epoch": 0.1, "grad_norm": 1.2654752731323242, "learning_rate": 9.86063295222211e-06, "loss": 0.5962, "step": 1597 }, { "epoch": 0.1, "grad_norm": 1.206935167312622, "learning_rate": 9.860387764781307e-06, "loss": 0.5638, "step": 1598 }, { "epoch": 0.1, "grad_norm": 1.3539648056030273, "learning_rate": 9.860142364905496e-06, "loss": 0.6307, "step": 1599 }, { "epoch": 0.1, "grad_norm": 1.3025622367858887, "learning_rate": 9.859896752605402e-06, "loss": 0.6075, "step": 1600 }, { "epoch": 0.1, "grad_norm": 1.1173646450042725, "learning_rate": 9.859650927891759e-06, "loss": 0.5385, "step": 1601 }, { "epoch": 0.1, "grad_norm": 1.3359417915344238, "learning_rate": 9.859404890775313e-06, "loss": 0.648, "step": 1602 }, { "epoch": 0.1, "grad_norm": 1.1753231287002563, "learning_rate": 9.859158641266817e-06, "loss": 0.5699, "step": 1603 }, { "epoch": 0.1, "grad_norm": 1.1759647130966187, "learning_rate": 9.858912179377032e-06, "loss": 0.5755, "step": 1604 }, { "epoch": 0.1, "grad_norm": 1.2100486755371094, "learning_rate": 9.858665505116733e-06, "loss": 0.6139, "step": 1605 }, { "epoch": 0.1, "grad_norm": 1.3660125732421875, "learning_rate": 9.858418618496698e-06, "loss": 0.6353, "step": 1606 }, { "epoch": 0.1, "grad_norm": 1.3629943132400513, "learning_rate": 9.858171519527723e-06, "loss": 0.5952, "step": 1607 }, { "epoch": 0.1, "grad_norm": 1.0517582893371582, "learning_rate": 9.857924208220604e-06, "loss": 0.5447, "step": 1608 }, { "epoch": 0.1, "grad_norm": 1.2868379354476929, "learning_rate": 9.857676684586153e-06, "loss": 0.6038, "step": 1609 }, { "epoch": 0.1, "grad_norm": 1.306420922279358, "learning_rate": 9.857428948635184e-06, "loss": 0.5917, "step": 1610 }, { "epoch": 0.1, "grad_norm": 1.253893494606018, "learning_rate": 9.857181000378531e-06, "loss": 0.5922, "step": 1611 }, { "epoch": 0.1, "grad_norm": 1.22707998752594, "learning_rate": 9.856932839827026e-06, "loss": 0.5723, "step": 1612 }, { "epoch": 0.1, "grad_norm": 1.2688989639282227, "learning_rate": 9.856684466991518e-06, "loss": 0.585, "step": 1613 }, { "epoch": 0.1, "grad_norm": 1.2153335809707642, "learning_rate": 9.856435881882863e-06, "loss": 0.6365, "step": 1614 }, { "epoch": 0.1, "grad_norm": 1.483553409576416, "learning_rate": 9.856187084511926e-06, "loss": 0.6335, "step": 1615 }, { "epoch": 0.1, "grad_norm": 1.292234182357788, "learning_rate": 9.85593807488958e-06, "loss": 0.573, "step": 1616 }, { "epoch": 0.1, "grad_norm": 1.3948979377746582, "learning_rate": 9.855688853026708e-06, "loss": 0.5917, "step": 1617 }, { "epoch": 0.1, "grad_norm": 1.292607069015503, "learning_rate": 9.855439418934206e-06, "loss": 0.583, "step": 1618 }, { "epoch": 0.1, "grad_norm": 1.3603483438491821, "learning_rate": 9.855189772622973e-06, "loss": 0.6085, "step": 1619 }, { "epoch": 0.1, "grad_norm": 1.1809104681015015, "learning_rate": 9.85493991410392e-06, "loss": 0.5757, "step": 1620 }, { "epoch": 0.1, "grad_norm": 1.2688435316085815, "learning_rate": 9.854689843387972e-06, "loss": 0.5974, "step": 1621 }, { "epoch": 0.1, "grad_norm": 1.2843914031982422, "learning_rate": 9.854439560486055e-06, "loss": 0.5859, "step": 1622 }, { "epoch": 0.1, "grad_norm": 1.2443370819091797, "learning_rate": 9.85418906540911e-06, "loss": 0.5861, "step": 1623 }, { "epoch": 0.1, "grad_norm": 1.1343896389007568, "learning_rate": 9.853938358168086e-06, "loss": 0.5629, "step": 1624 }, { "epoch": 0.1, "grad_norm": 1.4136300086975098, "learning_rate": 9.853687438773937e-06, "loss": 0.6244, "step": 1625 }, { "epoch": 0.1, "grad_norm": 1.2921760082244873, "learning_rate": 9.853436307237635e-06, "loss": 0.5676, "step": 1626 }, { "epoch": 0.11, "grad_norm": 1.262235403060913, "learning_rate": 9.853184963570155e-06, "loss": 0.5281, "step": 1627 }, { "epoch": 0.11, "grad_norm": 1.2056875228881836, "learning_rate": 9.85293340778248e-06, "loss": 0.5959, "step": 1628 }, { "epoch": 0.11, "grad_norm": 1.280164361000061, "learning_rate": 9.852681639885605e-06, "loss": 0.5685, "step": 1629 }, { "epoch": 0.11, "grad_norm": 1.330934762954712, "learning_rate": 9.852429659890537e-06, "loss": 0.6068, "step": 1630 }, { "epoch": 0.11, "grad_norm": 1.3074699640274048, "learning_rate": 9.852177467808288e-06, "loss": 0.5636, "step": 1631 }, { "epoch": 0.11, "grad_norm": 1.1158162355422974, "learning_rate": 9.85192506364988e-06, "loss": 0.6092, "step": 1632 }, { "epoch": 0.11, "grad_norm": 1.341554880142212, "learning_rate": 9.851672447426346e-06, "loss": 0.5745, "step": 1633 }, { "epoch": 0.11, "grad_norm": 1.2525924444198608, "learning_rate": 9.851419619148728e-06, "loss": 0.5703, "step": 1634 }, { "epoch": 0.11, "grad_norm": 1.3581814765930176, "learning_rate": 9.851166578828072e-06, "loss": 0.6275, "step": 1635 }, { "epoch": 0.11, "grad_norm": 1.2262235879898071, "learning_rate": 9.850913326475446e-06, "loss": 0.5538, "step": 1636 }, { "epoch": 0.11, "grad_norm": 1.322755217552185, "learning_rate": 9.85065986210191e-06, "loss": 0.5968, "step": 1637 }, { "epoch": 0.11, "grad_norm": 1.2966547012329102, "learning_rate": 9.850406185718548e-06, "loss": 0.6231, "step": 1638 }, { "epoch": 0.11, "grad_norm": 1.185438871383667, "learning_rate": 9.850152297336447e-06, "loss": 0.5614, "step": 1639 }, { "epoch": 0.11, "grad_norm": 1.376561164855957, "learning_rate": 9.8498981969667e-06, "loss": 0.6177, "step": 1640 }, { "epoch": 0.11, "grad_norm": 1.3491688966751099, "learning_rate": 9.849643884620416e-06, "loss": 0.5568, "step": 1641 }, { "epoch": 0.11, "grad_norm": 1.2133309841156006, "learning_rate": 9.849389360308713e-06, "loss": 0.5857, "step": 1642 }, { "epoch": 0.11, "grad_norm": 1.3471426963806152, "learning_rate": 9.84913462404271e-06, "loss": 0.6236, "step": 1643 }, { "epoch": 0.11, "grad_norm": 1.3137670755386353, "learning_rate": 9.848879675833542e-06, "loss": 0.6212, "step": 1644 }, { "epoch": 0.11, "grad_norm": 1.2108771800994873, "learning_rate": 9.848624515692357e-06, "loss": 0.559, "step": 1645 }, { "epoch": 0.11, "grad_norm": 1.367174744606018, "learning_rate": 9.848369143630302e-06, "loss": 0.6493, "step": 1646 }, { "epoch": 0.11, "grad_norm": 1.2323886156082153, "learning_rate": 9.848113559658541e-06, "loss": 0.5628, "step": 1647 }, { "epoch": 0.11, "grad_norm": 1.1359220743179321, "learning_rate": 9.847857763788245e-06, "loss": 0.5904, "step": 1648 }, { "epoch": 0.11, "grad_norm": 1.2820178270339966, "learning_rate": 9.847601756030594e-06, "loss": 0.618, "step": 1649 }, { "epoch": 0.11, "grad_norm": 1.374669075012207, "learning_rate": 9.847345536396778e-06, "loss": 0.5279, "step": 1650 }, { "epoch": 0.11, "grad_norm": 1.254718542098999, "learning_rate": 9.847089104897993e-06, "loss": 0.6127, "step": 1651 }, { "epoch": 0.11, "grad_norm": 1.4707276821136475, "learning_rate": 9.84683246154545e-06, "loss": 0.5676, "step": 1652 }, { "epoch": 0.11, "grad_norm": 1.2875808477401733, "learning_rate": 9.846575606350366e-06, "loss": 0.5458, "step": 1653 }, { "epoch": 0.11, "grad_norm": 1.186255931854248, "learning_rate": 9.846318539323965e-06, "loss": 0.6079, "step": 1654 }, { "epoch": 0.11, "grad_norm": 1.3172937631607056, "learning_rate": 9.846061260477484e-06, "loss": 0.5971, "step": 1655 }, { "epoch": 0.11, "grad_norm": 1.4032652378082275, "learning_rate": 9.84580376982217e-06, "loss": 0.5655, "step": 1656 }, { "epoch": 0.11, "grad_norm": 1.2269459962844849, "learning_rate": 9.845546067369274e-06, "loss": 0.543, "step": 1657 }, { "epoch": 0.11, "grad_norm": 1.4534449577331543, "learning_rate": 9.845288153130061e-06, "loss": 0.6452, "step": 1658 }, { "epoch": 0.11, "grad_norm": 1.3264373540878296, "learning_rate": 9.845030027115805e-06, "loss": 0.5886, "step": 1659 }, { "epoch": 0.11, "grad_norm": 1.229418158531189, "learning_rate": 9.844771689337786e-06, "loss": 0.6021, "step": 1660 }, { "epoch": 0.11, "grad_norm": 1.2463024854660034, "learning_rate": 9.844513139807298e-06, "loss": 0.5935, "step": 1661 }, { "epoch": 0.11, "grad_norm": 1.4079941511154175, "learning_rate": 9.844254378535638e-06, "loss": 0.6021, "step": 1662 }, { "epoch": 0.11, "grad_norm": 1.2816811800003052, "learning_rate": 9.843995405534117e-06, "loss": 0.5991, "step": 1663 }, { "epoch": 0.11, "grad_norm": 1.247162938117981, "learning_rate": 9.843736220814055e-06, "loss": 0.5404, "step": 1664 }, { "epoch": 0.11, "grad_norm": 1.302781343460083, "learning_rate": 9.84347682438678e-06, "loss": 0.5814, "step": 1665 }, { "epoch": 0.11, "grad_norm": 1.2521874904632568, "learning_rate": 9.84321721626363e-06, "loss": 0.5874, "step": 1666 }, { "epoch": 0.11, "grad_norm": 1.1374802589416504, "learning_rate": 9.84295739645595e-06, "loss": 0.5536, "step": 1667 }, { "epoch": 0.11, "grad_norm": 1.3259036540985107, "learning_rate": 9.842697364975098e-06, "loss": 0.6029, "step": 1668 }, { "epoch": 0.11, "grad_norm": 1.1114073991775513, "learning_rate": 9.842437121832439e-06, "loss": 0.57, "step": 1669 }, { "epoch": 0.11, "grad_norm": 1.2196245193481445, "learning_rate": 9.842176667039345e-06, "loss": 0.6005, "step": 1670 }, { "epoch": 0.11, "grad_norm": 1.2498644590377808, "learning_rate": 9.841916000607204e-06, "loss": 0.585, "step": 1671 }, { "epoch": 0.11, "grad_norm": 1.2595659494400024, "learning_rate": 9.841655122547407e-06, "loss": 0.5602, "step": 1672 }, { "epoch": 0.11, "grad_norm": 1.3095893859863281, "learning_rate": 9.841394032871354e-06, "loss": 0.6169, "step": 1673 }, { "epoch": 0.11, "grad_norm": 1.3331403732299805, "learning_rate": 9.84113273159046e-06, "loss": 0.5806, "step": 1674 }, { "epoch": 0.11, "grad_norm": 1.3317261934280396, "learning_rate": 9.840871218716147e-06, "loss": 0.5796, "step": 1675 }, { "epoch": 0.11, "grad_norm": 1.1692553758621216, "learning_rate": 9.840609494259839e-06, "loss": 0.5414, "step": 1676 }, { "epoch": 0.11, "grad_norm": 1.3171956539154053, "learning_rate": 9.84034755823298e-06, "loss": 0.5483, "step": 1677 }, { "epoch": 0.11, "grad_norm": 1.4201020002365112, "learning_rate": 9.840085410647017e-06, "loss": 0.6066, "step": 1678 }, { "epoch": 0.11, "grad_norm": 1.3633091449737549, "learning_rate": 9.83982305151341e-06, "loss": 0.5484, "step": 1679 }, { "epoch": 0.11, "grad_norm": 1.1625350713729858, "learning_rate": 9.839560480843623e-06, "loss": 0.5627, "step": 1680 }, { "epoch": 0.11, "grad_norm": 1.2924920320510864, "learning_rate": 9.839297698649134e-06, "loss": 0.5826, "step": 1681 }, { "epoch": 0.11, "grad_norm": 1.262059211730957, "learning_rate": 9.839034704941429e-06, "loss": 0.6413, "step": 1682 }, { "epoch": 0.11, "grad_norm": 1.348254680633545, "learning_rate": 9.838771499732e-06, "loss": 0.5536, "step": 1683 }, { "epoch": 0.11, "grad_norm": 1.2687008380889893, "learning_rate": 9.838508083032354e-06, "loss": 0.6153, "step": 1684 }, { "epoch": 0.11, "grad_norm": 1.2640042304992676, "learning_rate": 9.838244454854003e-06, "loss": 0.5995, "step": 1685 }, { "epoch": 0.11, "grad_norm": 1.2058042287826538, "learning_rate": 9.83798061520847e-06, "loss": 0.6096, "step": 1686 }, { "epoch": 0.11, "grad_norm": 1.181051254272461, "learning_rate": 9.837716564107286e-06, "loss": 0.5982, "step": 1687 }, { "epoch": 0.11, "grad_norm": 1.2039128541946411, "learning_rate": 9.837452301561994e-06, "loss": 0.6057, "step": 1688 }, { "epoch": 0.11, "grad_norm": 1.2835317850112915, "learning_rate": 9.83718782758414e-06, "loss": 0.5771, "step": 1689 }, { "epoch": 0.11, "grad_norm": 1.2463041543960571, "learning_rate": 9.836923142185289e-06, "loss": 0.6521, "step": 1690 }, { "epoch": 0.11, "grad_norm": 1.173244833946228, "learning_rate": 9.836658245377004e-06, "loss": 0.576, "step": 1691 }, { "epoch": 0.11, "grad_norm": 1.4143650531768799, "learning_rate": 9.836393137170867e-06, "loss": 0.5892, "step": 1692 }, { "epoch": 0.11, "grad_norm": 1.2270565032958984, "learning_rate": 9.836127817578465e-06, "loss": 0.6064, "step": 1693 }, { "epoch": 0.11, "grad_norm": 1.2943652868270874, "learning_rate": 9.835862286611393e-06, "loss": 0.6636, "step": 1694 }, { "epoch": 0.11, "grad_norm": 1.208587646484375, "learning_rate": 9.835596544281258e-06, "loss": 0.6319, "step": 1695 }, { "epoch": 0.11, "grad_norm": 1.2099225521087646, "learning_rate": 9.835330590599673e-06, "loss": 0.6001, "step": 1696 }, { "epoch": 0.11, "grad_norm": 1.2244880199432373, "learning_rate": 9.835064425578264e-06, "loss": 0.5866, "step": 1697 }, { "epoch": 0.11, "grad_norm": 1.3683770895004272, "learning_rate": 9.834798049228663e-06, "loss": 0.6608, "step": 1698 }, { "epoch": 0.11, "grad_norm": 1.1503448486328125, "learning_rate": 9.834531461562512e-06, "loss": 0.6006, "step": 1699 }, { "epoch": 0.11, "grad_norm": 1.2198982238769531, "learning_rate": 9.834264662591467e-06, "loss": 0.5668, "step": 1700 }, { "epoch": 0.11, "grad_norm": 1.1400381326675415, "learning_rate": 9.833997652327182e-06, "loss": 0.6057, "step": 1701 }, { "epoch": 0.11, "grad_norm": 1.3786346912384033, "learning_rate": 9.833730430781334e-06, "loss": 0.6287, "step": 1702 }, { "epoch": 0.11, "grad_norm": 2.5563337802886963, "learning_rate": 9.8334629979656e-06, "loss": 0.6032, "step": 1703 }, { "epoch": 0.11, "grad_norm": 1.2759833335876465, "learning_rate": 9.833195353891669e-06, "loss": 0.6084, "step": 1704 }, { "epoch": 0.11, "grad_norm": 1.5524300336837769, "learning_rate": 9.832927498571239e-06, "loss": 0.5684, "step": 1705 }, { "epoch": 0.11, "grad_norm": 1.266768455505371, "learning_rate": 9.832659432016017e-06, "loss": 0.645, "step": 1706 }, { "epoch": 0.11, "grad_norm": 1.3067835569381714, "learning_rate": 9.83239115423772e-06, "loss": 0.6371, "step": 1707 }, { "epoch": 0.11, "grad_norm": 1.2978724241256714, "learning_rate": 9.832122665248071e-06, "loss": 0.5849, "step": 1708 }, { "epoch": 0.11, "grad_norm": 1.1884313821792603, "learning_rate": 9.83185396505881e-06, "loss": 0.6494, "step": 1709 }, { "epoch": 0.11, "grad_norm": 1.1904563903808594, "learning_rate": 9.831585053681678e-06, "loss": 0.6259, "step": 1710 }, { "epoch": 0.11, "grad_norm": 1.276056170463562, "learning_rate": 9.83131593112843e-06, "loss": 0.5737, "step": 1711 }, { "epoch": 0.11, "grad_norm": 1.318115472793579, "learning_rate": 9.831046597410825e-06, "loss": 0.5265, "step": 1712 }, { "epoch": 0.11, "grad_norm": 1.202966570854187, "learning_rate": 9.830777052540638e-06, "loss": 0.5926, "step": 1713 }, { "epoch": 0.11, "grad_norm": 1.2723348140716553, "learning_rate": 9.830507296529653e-06, "loss": 0.5919, "step": 1714 }, { "epoch": 0.11, "grad_norm": 1.1810786724090576, "learning_rate": 9.830237329389653e-06, "loss": 0.5847, "step": 1715 }, { "epoch": 0.11, "grad_norm": 1.2049835920333862, "learning_rate": 9.829967151132442e-06, "loss": 0.5838, "step": 1716 }, { "epoch": 0.11, "grad_norm": 1.4668763875961304, "learning_rate": 9.829696761769829e-06, "loss": 0.5678, "step": 1717 }, { "epoch": 0.11, "grad_norm": 1.2371288537979126, "learning_rate": 9.829426161313634e-06, "loss": 0.6087, "step": 1718 }, { "epoch": 0.11, "grad_norm": 1.3208084106445312, "learning_rate": 9.829155349775677e-06, "loss": 0.5503, "step": 1719 }, { "epoch": 0.11, "grad_norm": 1.1921125650405884, "learning_rate": 9.828884327167802e-06, "loss": 0.5881, "step": 1720 }, { "epoch": 0.11, "grad_norm": 1.2115521430969238, "learning_rate": 9.828613093501851e-06, "loss": 0.554, "step": 1721 }, { "epoch": 0.11, "grad_norm": 1.1917823553085327, "learning_rate": 9.82834164878968e-06, "loss": 0.5835, "step": 1722 }, { "epoch": 0.11, "grad_norm": 1.1923388242721558, "learning_rate": 9.828069993043152e-06, "loss": 0.547, "step": 1723 }, { "epoch": 0.11, "grad_norm": 1.1772022247314453, "learning_rate": 9.827798126274143e-06, "loss": 0.586, "step": 1724 }, { "epoch": 0.11, "grad_norm": 1.2242655754089355, "learning_rate": 9.82752604849453e-06, "loss": 0.595, "step": 1725 }, { "epoch": 0.11, "grad_norm": 1.2700272798538208, "learning_rate": 9.827253759716212e-06, "loss": 0.5601, "step": 1726 }, { "epoch": 0.11, "grad_norm": 1.1445930004119873, "learning_rate": 9.826981259951085e-06, "loss": 0.5702, "step": 1727 }, { "epoch": 0.11, "grad_norm": 1.3127777576446533, "learning_rate": 9.826708549211061e-06, "loss": 0.5882, "step": 1728 }, { "epoch": 0.11, "grad_norm": 1.2900506258010864, "learning_rate": 9.826435627508059e-06, "loss": 0.5732, "step": 1729 }, { "epoch": 0.11, "grad_norm": 1.1932706832885742, "learning_rate": 9.82616249485401e-06, "loss": 0.5208, "step": 1730 }, { "epoch": 0.11, "grad_norm": 1.3856558799743652, "learning_rate": 9.825889151260848e-06, "loss": 0.6286, "step": 1731 }, { "epoch": 0.11, "grad_norm": 1.16860830783844, "learning_rate": 9.825615596740524e-06, "loss": 0.5832, "step": 1732 }, { "epoch": 0.11, "grad_norm": 1.3680094480514526, "learning_rate": 9.825341831304992e-06, "loss": 0.6121, "step": 1733 }, { "epoch": 0.11, "grad_norm": 1.3236126899719238, "learning_rate": 9.825067854966217e-06, "loss": 0.5817, "step": 1734 }, { "epoch": 0.11, "grad_norm": 1.3064522743225098, "learning_rate": 9.824793667736176e-06, "loss": 0.6454, "step": 1735 }, { "epoch": 0.11, "grad_norm": 1.1722313165664673, "learning_rate": 9.824519269626853e-06, "loss": 0.5582, "step": 1736 }, { "epoch": 0.11, "grad_norm": 1.2169842720031738, "learning_rate": 9.824244660650237e-06, "loss": 0.5847, "step": 1737 }, { "epoch": 0.11, "grad_norm": 1.2674376964569092, "learning_rate": 9.823969840818336e-06, "loss": 0.6182, "step": 1738 }, { "epoch": 0.11, "grad_norm": 1.479569435119629, "learning_rate": 9.823694810143159e-06, "loss": 0.6076, "step": 1739 }, { "epoch": 0.11, "grad_norm": 1.228732943534851, "learning_rate": 9.823419568636726e-06, "loss": 0.583, "step": 1740 }, { "epoch": 0.11, "grad_norm": 1.4542303085327148, "learning_rate": 9.823144116311069e-06, "loss": 0.6001, "step": 1741 }, { "epoch": 0.11, "grad_norm": 1.2961039543151855, "learning_rate": 9.822868453178227e-06, "loss": 0.6401, "step": 1742 }, { "epoch": 0.11, "grad_norm": 1.2850587368011475, "learning_rate": 9.822592579250247e-06, "loss": 0.5859, "step": 1743 }, { "epoch": 0.11, "grad_norm": 1.201331615447998, "learning_rate": 9.82231649453919e-06, "loss": 0.5856, "step": 1744 }, { "epoch": 0.11, "grad_norm": 1.2255533933639526, "learning_rate": 9.82204019905712e-06, "loss": 0.5838, "step": 1745 }, { "epoch": 0.11, "grad_norm": 1.2916171550750732, "learning_rate": 9.82176369281611e-06, "loss": 0.6069, "step": 1746 }, { "epoch": 0.11, "grad_norm": 1.513266682624817, "learning_rate": 9.821486975828253e-06, "loss": 0.6322, "step": 1747 }, { "epoch": 0.11, "grad_norm": 1.5735187530517578, "learning_rate": 9.821210048105641e-06, "loss": 0.6189, "step": 1748 }, { "epoch": 0.11, "grad_norm": 1.2360678911209106, "learning_rate": 9.820932909660375e-06, "loss": 0.5561, "step": 1749 }, { "epoch": 0.11, "grad_norm": 1.2092454433441162, "learning_rate": 9.820655560504569e-06, "loss": 0.5566, "step": 1750 }, { "epoch": 0.11, "grad_norm": 1.4228312969207764, "learning_rate": 9.820378000650345e-06, "loss": 0.5956, "step": 1751 }, { "epoch": 0.11, "grad_norm": 1.252224326133728, "learning_rate": 9.820100230109839e-06, "loss": 0.6677, "step": 1752 }, { "epoch": 0.11, "grad_norm": 1.418012261390686, "learning_rate": 9.819822248895186e-06, "loss": 0.5968, "step": 1753 }, { "epoch": 0.11, "grad_norm": 1.2132819890975952, "learning_rate": 9.819544057018538e-06, "loss": 0.588, "step": 1754 }, { "epoch": 0.11, "grad_norm": 1.4355652332305908, "learning_rate": 9.819265654492054e-06, "loss": 0.6174, "step": 1755 }, { "epoch": 0.11, "grad_norm": 1.1643140316009521, "learning_rate": 9.818987041327901e-06, "loss": 0.5366, "step": 1756 }, { "epoch": 0.11, "grad_norm": 1.1881314516067505, "learning_rate": 9.818708217538257e-06, "loss": 0.597, "step": 1757 }, { "epoch": 0.11, "grad_norm": 1.1382529735565186, "learning_rate": 9.818429183135311e-06, "loss": 0.5824, "step": 1758 }, { "epoch": 0.11, "grad_norm": 1.2946689128875732, "learning_rate": 9.818149938131257e-06, "loss": 0.6176, "step": 1759 }, { "epoch": 0.11, "grad_norm": 1.3456512689590454, "learning_rate": 9.8178704825383e-06, "loss": 0.5944, "step": 1760 }, { "epoch": 0.11, "grad_norm": 1.370301365852356, "learning_rate": 9.817590816368654e-06, "loss": 0.6482, "step": 1761 }, { "epoch": 0.11, "grad_norm": 1.2308542728424072, "learning_rate": 9.817310939634544e-06, "loss": 0.6211, "step": 1762 }, { "epoch": 0.11, "grad_norm": 1.245786190032959, "learning_rate": 9.817030852348199e-06, "loss": 0.587, "step": 1763 }, { "epoch": 0.11, "grad_norm": 1.1468744277954102, "learning_rate": 9.816750554521866e-06, "loss": 0.5378, "step": 1764 }, { "epoch": 0.11, "grad_norm": 1.4206559658050537, "learning_rate": 9.816470046167795e-06, "loss": 0.6358, "step": 1765 }, { "epoch": 0.11, "grad_norm": 1.4533734321594238, "learning_rate": 9.816189327298243e-06, "loss": 0.6193, "step": 1766 }, { "epoch": 0.11, "grad_norm": 1.1588735580444336, "learning_rate": 9.81590839792548e-06, "loss": 0.6089, "step": 1767 }, { "epoch": 0.11, "grad_norm": 1.347670078277588, "learning_rate": 9.815627258061788e-06, "loss": 0.6105, "step": 1768 }, { "epoch": 0.11, "grad_norm": 1.180303931236267, "learning_rate": 9.815345907719452e-06, "loss": 0.5639, "step": 1769 }, { "epoch": 0.11, "grad_norm": 1.2600352764129639, "learning_rate": 9.815064346910772e-06, "loss": 0.6149, "step": 1770 }, { "epoch": 0.11, "grad_norm": 1.1753357648849487, "learning_rate": 9.81478257564805e-06, "loss": 0.5566, "step": 1771 }, { "epoch": 0.11, "grad_norm": 1.159230351448059, "learning_rate": 9.814500593943606e-06, "loss": 0.5553, "step": 1772 }, { "epoch": 0.11, "grad_norm": 1.1406266689300537, "learning_rate": 9.814218401809761e-06, "loss": 0.5479, "step": 1773 }, { "epoch": 0.11, "grad_norm": 1.3669794797897339, "learning_rate": 9.813935999258852e-06, "loss": 0.5945, "step": 1774 }, { "epoch": 0.11, "grad_norm": 1.2479232549667358, "learning_rate": 9.81365338630322e-06, "loss": 0.5692, "step": 1775 }, { "epoch": 0.11, "grad_norm": 1.240748643875122, "learning_rate": 9.813370562955218e-06, "loss": 0.5386, "step": 1776 }, { "epoch": 0.11, "grad_norm": 1.4104503393173218, "learning_rate": 9.813087529227207e-06, "loss": 0.6265, "step": 1777 }, { "epoch": 0.11, "grad_norm": 1.281108021736145, "learning_rate": 9.812804285131557e-06, "loss": 0.6189, "step": 1778 }, { "epoch": 0.11, "grad_norm": 1.190992832183838, "learning_rate": 9.81252083068065e-06, "loss": 0.5857, "step": 1779 }, { "epoch": 0.11, "grad_norm": 1.266798734664917, "learning_rate": 9.812237165886875e-06, "loss": 0.6061, "step": 1780 }, { "epoch": 0.11, "grad_norm": 1.2472916841506958, "learning_rate": 9.811953290762628e-06, "loss": 0.6114, "step": 1781 }, { "epoch": 0.12, "grad_norm": 1.1618328094482422, "learning_rate": 9.811669205320317e-06, "loss": 0.612, "step": 1782 }, { "epoch": 0.12, "grad_norm": 1.1857287883758545, "learning_rate": 9.811384909572361e-06, "loss": 0.5817, "step": 1783 }, { "epoch": 0.12, "grad_norm": 1.3487706184387207, "learning_rate": 9.811100403531184e-06, "loss": 0.606, "step": 1784 }, { "epoch": 0.12, "grad_norm": 1.2474074363708496, "learning_rate": 9.81081568720922e-06, "loss": 0.6032, "step": 1785 }, { "epoch": 0.12, "grad_norm": 1.2070280313491821, "learning_rate": 9.810530760618914e-06, "loss": 0.5694, "step": 1786 }, { "epoch": 0.12, "grad_norm": 1.2904704809188843, "learning_rate": 9.81024562377272e-06, "loss": 0.5633, "step": 1787 }, { "epoch": 0.12, "grad_norm": 1.2435886859893799, "learning_rate": 9.809960276683102e-06, "loss": 0.6019, "step": 1788 }, { "epoch": 0.12, "grad_norm": 1.2322582006454468, "learning_rate": 9.80967471936253e-06, "loss": 0.5607, "step": 1789 }, { "epoch": 0.12, "grad_norm": 1.2954716682434082, "learning_rate": 9.809388951823484e-06, "loss": 0.5786, "step": 1790 }, { "epoch": 0.12, "grad_norm": 1.218957781791687, "learning_rate": 9.809102974078455e-06, "loss": 0.6331, "step": 1791 }, { "epoch": 0.12, "grad_norm": 1.1560513973236084, "learning_rate": 9.808816786139942e-06, "loss": 0.5863, "step": 1792 }, { "epoch": 0.12, "grad_norm": 1.2298085689544678, "learning_rate": 9.808530388020457e-06, "loss": 0.5952, "step": 1793 }, { "epoch": 0.12, "grad_norm": 1.4004945755004883, "learning_rate": 9.808243779732513e-06, "loss": 0.6053, "step": 1794 }, { "epoch": 0.12, "grad_norm": 1.2556527853012085, "learning_rate": 9.80795696128864e-06, "loss": 0.6296, "step": 1795 }, { "epoch": 0.12, "grad_norm": 1.3139132261276245, "learning_rate": 9.807669932701371e-06, "loss": 0.6091, "step": 1796 }, { "epoch": 0.12, "grad_norm": 1.2531620264053345, "learning_rate": 9.807382693983255e-06, "loss": 0.5796, "step": 1797 }, { "epoch": 0.12, "grad_norm": 1.245897650718689, "learning_rate": 9.807095245146845e-06, "loss": 0.6267, "step": 1798 }, { "epoch": 0.12, "grad_norm": 1.2131638526916504, "learning_rate": 9.806807586204703e-06, "loss": 0.5765, "step": 1799 }, { "epoch": 0.12, "grad_norm": 1.1925468444824219, "learning_rate": 9.806519717169402e-06, "loss": 0.5589, "step": 1800 }, { "epoch": 0.12, "grad_norm": 1.1822766065597534, "learning_rate": 9.806231638053527e-06, "loss": 0.5979, "step": 1801 }, { "epoch": 0.12, "grad_norm": 1.4730792045593262, "learning_rate": 9.805943348869667e-06, "loss": 0.5856, "step": 1802 }, { "epoch": 0.12, "grad_norm": 1.1833045482635498, "learning_rate": 9.805654849630424e-06, "loss": 0.5273, "step": 1803 }, { "epoch": 0.12, "grad_norm": 1.2579963207244873, "learning_rate": 9.805366140348404e-06, "loss": 0.6167, "step": 1804 }, { "epoch": 0.12, "grad_norm": 1.3429460525512695, "learning_rate": 9.80507722103623e-06, "loss": 0.6434, "step": 1805 }, { "epoch": 0.12, "grad_norm": 1.1620274782180786, "learning_rate": 9.804788091706524e-06, "loss": 0.633, "step": 1806 }, { "epoch": 0.12, "grad_norm": 1.238054633140564, "learning_rate": 9.804498752371932e-06, "loss": 0.5945, "step": 1807 }, { "epoch": 0.12, "grad_norm": 1.218548059463501, "learning_rate": 9.804209203045093e-06, "loss": 0.5755, "step": 1808 }, { "epoch": 0.12, "grad_norm": 1.1984636783599854, "learning_rate": 9.803919443738665e-06, "loss": 0.5578, "step": 1809 }, { "epoch": 0.12, "grad_norm": 1.2966810464859009, "learning_rate": 9.803629474465312e-06, "loss": 0.6057, "step": 1810 }, { "epoch": 0.12, "grad_norm": 1.2187998294830322, "learning_rate": 9.80333929523771e-06, "loss": 0.6095, "step": 1811 }, { "epoch": 0.12, "grad_norm": 1.443372130393982, "learning_rate": 9.803048906068537e-06, "loss": 0.5886, "step": 1812 }, { "epoch": 0.12, "grad_norm": 1.3527185916900635, "learning_rate": 9.80275830697049e-06, "loss": 0.6011, "step": 1813 }, { "epoch": 0.12, "grad_norm": 1.295005440711975, "learning_rate": 9.80246749795627e-06, "loss": 0.6172, "step": 1814 }, { "epoch": 0.12, "grad_norm": 1.204003930091858, "learning_rate": 9.802176479038586e-06, "loss": 0.5946, "step": 1815 }, { "epoch": 0.12, "grad_norm": 1.26210355758667, "learning_rate": 9.801885250230156e-06, "loss": 0.6315, "step": 1816 }, { "epoch": 0.12, "grad_norm": 1.2137072086334229, "learning_rate": 9.801593811543712e-06, "loss": 0.583, "step": 1817 }, { "epoch": 0.12, "grad_norm": 1.2537519931793213, "learning_rate": 9.80130216299199e-06, "loss": 0.5944, "step": 1818 }, { "epoch": 0.12, "grad_norm": 3.398599624633789, "learning_rate": 9.801010304587737e-06, "loss": 0.613, "step": 1819 }, { "epoch": 0.12, "grad_norm": 1.2663251161575317, "learning_rate": 9.800718236343712e-06, "loss": 0.5653, "step": 1820 }, { "epoch": 0.12, "grad_norm": 1.3322856426239014, "learning_rate": 9.800425958272678e-06, "loss": 0.5934, "step": 1821 }, { "epoch": 0.12, "grad_norm": 1.5552184581756592, "learning_rate": 9.800133470387413e-06, "loss": 0.6126, "step": 1822 }, { "epoch": 0.12, "grad_norm": 1.1743786334991455, "learning_rate": 9.799840772700697e-06, "loss": 0.5604, "step": 1823 }, { "epoch": 0.12, "grad_norm": 1.341381549835205, "learning_rate": 9.799547865225323e-06, "loss": 0.6063, "step": 1824 }, { "epoch": 0.12, "grad_norm": 1.2456583976745605, "learning_rate": 9.799254747974096e-06, "loss": 0.6184, "step": 1825 }, { "epoch": 0.12, "grad_norm": 1.1361373662948608, "learning_rate": 9.798961420959827e-06, "loss": 0.5645, "step": 1826 }, { "epoch": 0.12, "grad_norm": 1.349123239517212, "learning_rate": 9.798667884195335e-06, "loss": 0.5674, "step": 1827 }, { "epoch": 0.12, "grad_norm": 1.3202979564666748, "learning_rate": 9.798374137693452e-06, "loss": 0.5837, "step": 1828 }, { "epoch": 0.12, "grad_norm": 1.203865647315979, "learning_rate": 9.798080181467013e-06, "loss": 0.533, "step": 1829 }, { "epoch": 0.12, "grad_norm": 1.3414682149887085, "learning_rate": 9.79778601552887e-06, "loss": 0.5732, "step": 1830 }, { "epoch": 0.12, "grad_norm": 1.3274623155593872, "learning_rate": 9.797491639891878e-06, "loss": 0.5706, "step": 1831 }, { "epoch": 0.12, "grad_norm": 1.3223038911819458, "learning_rate": 9.797197054568905e-06, "loss": 0.6008, "step": 1832 }, { "epoch": 0.12, "grad_norm": 1.1874713897705078, "learning_rate": 9.796902259572825e-06, "loss": 0.5555, "step": 1833 }, { "epoch": 0.12, "grad_norm": 1.3030940294265747, "learning_rate": 9.796607254916523e-06, "loss": 0.5816, "step": 1834 }, { "epoch": 0.12, "grad_norm": 1.2211354970932007, "learning_rate": 9.796312040612895e-06, "loss": 0.6259, "step": 1835 }, { "epoch": 0.12, "grad_norm": 1.2645573616027832, "learning_rate": 9.796016616674839e-06, "loss": 0.5922, "step": 1836 }, { "epoch": 0.12, "grad_norm": 1.1987510919570923, "learning_rate": 9.795720983115273e-06, "loss": 0.5604, "step": 1837 }, { "epoch": 0.12, "grad_norm": 1.2133305072784424, "learning_rate": 9.795425139947117e-06, "loss": 0.6061, "step": 1838 }, { "epoch": 0.12, "grad_norm": 1.4986340999603271, "learning_rate": 9.795129087183299e-06, "loss": 0.5629, "step": 1839 }, { "epoch": 0.12, "grad_norm": 1.1749008893966675, "learning_rate": 9.79483282483676e-06, "loss": 0.6295, "step": 1840 }, { "epoch": 0.12, "grad_norm": 1.2877881526947021, "learning_rate": 9.79453635292045e-06, "loss": 0.5723, "step": 1841 }, { "epoch": 0.12, "grad_norm": 1.492825984954834, "learning_rate": 9.794239671447324e-06, "loss": 0.5898, "step": 1842 }, { "epoch": 0.12, "grad_norm": 1.2134982347488403, "learning_rate": 9.793942780430353e-06, "loss": 0.5356, "step": 1843 }, { "epoch": 0.12, "grad_norm": 1.2902053594589233, "learning_rate": 9.79364567988251e-06, "loss": 0.5774, "step": 1844 }, { "epoch": 0.12, "grad_norm": 1.3258963823318481, "learning_rate": 9.793348369816785e-06, "loss": 0.5719, "step": 1845 }, { "epoch": 0.12, "grad_norm": 1.203574538230896, "learning_rate": 9.793050850246168e-06, "loss": 0.5693, "step": 1846 }, { "epoch": 0.12, "grad_norm": 1.232825756072998, "learning_rate": 9.792753121183664e-06, "loss": 0.5549, "step": 1847 }, { "epoch": 0.12, "grad_norm": 1.1787687540054321, "learning_rate": 9.792455182642289e-06, "loss": 0.5677, "step": 1848 }, { "epoch": 0.12, "grad_norm": 1.3621352910995483, "learning_rate": 9.79215703463506e-06, "loss": 0.6401, "step": 1849 }, { "epoch": 0.12, "grad_norm": 1.366504192352295, "learning_rate": 9.791858677175012e-06, "loss": 0.6068, "step": 1850 }, { "epoch": 0.12, "grad_norm": 1.3006799221038818, "learning_rate": 9.791560110275184e-06, "loss": 0.6723, "step": 1851 }, { "epoch": 0.12, "grad_norm": 1.1390470266342163, "learning_rate": 9.791261333948627e-06, "loss": 0.5951, "step": 1852 }, { "epoch": 0.12, "grad_norm": 1.3004087209701538, "learning_rate": 9.790962348208397e-06, "loss": 0.5495, "step": 1853 }, { "epoch": 0.12, "grad_norm": 1.4582141637802124, "learning_rate": 9.790663153067566e-06, "loss": 0.5798, "step": 1854 }, { "epoch": 0.12, "grad_norm": 1.3086373805999756, "learning_rate": 9.790363748539206e-06, "loss": 0.5631, "step": 1855 }, { "epoch": 0.12, "grad_norm": 1.1191332340240479, "learning_rate": 9.790064134636408e-06, "loss": 0.5635, "step": 1856 }, { "epoch": 0.12, "grad_norm": 1.1931365728378296, "learning_rate": 9.789764311372265e-06, "loss": 0.6003, "step": 1857 }, { "epoch": 0.12, "grad_norm": 1.1665815114974976, "learning_rate": 9.78946427875988e-06, "loss": 0.5467, "step": 1858 }, { "epoch": 0.12, "grad_norm": 1.2634705305099487, "learning_rate": 9.789164036812368e-06, "loss": 0.5777, "step": 1859 }, { "epoch": 0.12, "grad_norm": 1.2326377630233765, "learning_rate": 9.788863585542854e-06, "loss": 0.5951, "step": 1860 }, { "epoch": 0.12, "grad_norm": 1.2811157703399658, "learning_rate": 9.788562924964469e-06, "loss": 0.5751, "step": 1861 }, { "epoch": 0.12, "grad_norm": 1.2913998365402222, "learning_rate": 9.788262055090352e-06, "loss": 0.5376, "step": 1862 }, { "epoch": 0.12, "grad_norm": 1.2250982522964478, "learning_rate": 9.787960975933653e-06, "loss": 0.5894, "step": 1863 }, { "epoch": 0.12, "grad_norm": 1.1298640966415405, "learning_rate": 9.787659687507535e-06, "loss": 0.5683, "step": 1864 }, { "epoch": 0.12, "grad_norm": 1.2974265813827515, "learning_rate": 9.787358189825163e-06, "loss": 0.5902, "step": 1865 }, { "epoch": 0.12, "grad_norm": 1.2052950859069824, "learning_rate": 9.787056482899717e-06, "loss": 0.6104, "step": 1866 }, { "epoch": 0.12, "grad_norm": 1.171879529953003, "learning_rate": 9.786754566744383e-06, "loss": 0.5869, "step": 1867 }, { "epoch": 0.12, "grad_norm": 1.2649821043014526, "learning_rate": 9.786452441372356e-06, "loss": 0.5389, "step": 1868 }, { "epoch": 0.12, "grad_norm": 1.4527508020401, "learning_rate": 9.786150106796842e-06, "loss": 0.6551, "step": 1869 }, { "epoch": 0.12, "grad_norm": 1.181561827659607, "learning_rate": 9.785847563031058e-06, "loss": 0.5956, "step": 1870 }, { "epoch": 0.12, "grad_norm": 1.180111289024353, "learning_rate": 9.785544810088221e-06, "loss": 0.5959, "step": 1871 }, { "epoch": 0.12, "grad_norm": 1.326596736907959, "learning_rate": 9.785241847981568e-06, "loss": 0.595, "step": 1872 }, { "epoch": 0.12, "grad_norm": 1.4772688150405884, "learning_rate": 9.78493867672434e-06, "loss": 0.5733, "step": 1873 }, { "epoch": 0.12, "grad_norm": 1.1543529033660889, "learning_rate": 9.78463529632979e-06, "loss": 0.5993, "step": 1874 }, { "epoch": 0.12, "grad_norm": 1.2125288248062134, "learning_rate": 9.784331706811171e-06, "loss": 0.5741, "step": 1875 }, { "epoch": 0.12, "grad_norm": 1.1739332675933838, "learning_rate": 9.784027908181762e-06, "loss": 0.5754, "step": 1876 }, { "epoch": 0.12, "grad_norm": 1.2900809049606323, "learning_rate": 9.783723900454833e-06, "loss": 0.5778, "step": 1877 }, { "epoch": 0.12, "grad_norm": 1.3567091226577759, "learning_rate": 9.783419683643675e-06, "loss": 0.6101, "step": 1878 }, { "epoch": 0.12, "grad_norm": 1.151408076286316, "learning_rate": 9.783115257761584e-06, "loss": 0.4953, "step": 1879 }, { "epoch": 0.12, "grad_norm": 1.2453583478927612, "learning_rate": 9.782810622821867e-06, "loss": 0.623, "step": 1880 }, { "epoch": 0.12, "grad_norm": 1.1339199542999268, "learning_rate": 9.782505778837837e-06, "loss": 0.5687, "step": 1881 }, { "epoch": 0.12, "grad_norm": 1.2841237783432007, "learning_rate": 9.782200725822816e-06, "loss": 0.6436, "step": 1882 }, { "epoch": 0.12, "grad_norm": 1.1688145399093628, "learning_rate": 9.781895463790142e-06, "loss": 0.6131, "step": 1883 }, { "epoch": 0.12, "grad_norm": 1.194228172302246, "learning_rate": 9.781589992753156e-06, "loss": 0.5318, "step": 1884 }, { "epoch": 0.12, "grad_norm": 1.2055388689041138, "learning_rate": 9.781284312725205e-06, "loss": 0.5811, "step": 1885 }, { "epoch": 0.12, "grad_norm": 1.1826879978179932, "learning_rate": 9.780978423719653e-06, "loss": 0.5868, "step": 1886 }, { "epoch": 0.12, "grad_norm": 1.2431731224060059, "learning_rate": 9.780672325749872e-06, "loss": 0.6241, "step": 1887 }, { "epoch": 0.12, "grad_norm": 1.223219394683838, "learning_rate": 9.780366018829235e-06, "loss": 0.6194, "step": 1888 }, { "epoch": 0.12, "grad_norm": 1.178626537322998, "learning_rate": 9.780059502971135e-06, "loss": 0.5778, "step": 1889 }, { "epoch": 0.12, "grad_norm": 1.1393959522247314, "learning_rate": 9.779752778188965e-06, "loss": 0.5253, "step": 1890 }, { "epoch": 0.12, "grad_norm": 1.1579242944717407, "learning_rate": 9.779445844496134e-06, "loss": 0.5672, "step": 1891 }, { "epoch": 0.12, "grad_norm": 1.1388498544692993, "learning_rate": 9.779138701906054e-06, "loss": 0.5774, "step": 1892 }, { "epoch": 0.12, "grad_norm": 1.214303970336914, "learning_rate": 9.778831350432155e-06, "loss": 0.6384, "step": 1893 }, { "epoch": 0.12, "grad_norm": 1.3074495792388916, "learning_rate": 9.778523790087867e-06, "loss": 0.6354, "step": 1894 }, { "epoch": 0.12, "grad_norm": 1.317692518234253, "learning_rate": 9.77821602088663e-06, "loss": 0.5964, "step": 1895 }, { "epoch": 0.12, "grad_norm": 1.2983649969100952, "learning_rate": 9.777908042841902e-06, "loss": 0.579, "step": 1896 }, { "epoch": 0.12, "grad_norm": 1.1635687351226807, "learning_rate": 9.777599855967137e-06, "loss": 0.5959, "step": 1897 }, { "epoch": 0.12, "grad_norm": 1.1642608642578125, "learning_rate": 9.777291460275812e-06, "loss": 0.5629, "step": 1898 }, { "epoch": 0.12, "grad_norm": 1.2557182312011719, "learning_rate": 9.7769828557814e-06, "loss": 0.5963, "step": 1899 }, { "epoch": 0.12, "grad_norm": 1.2655770778656006, "learning_rate": 9.776674042497394e-06, "loss": 0.5755, "step": 1900 }, { "epoch": 0.12, "grad_norm": 1.3094877004623413, "learning_rate": 9.77636502043729e-06, "loss": 0.6263, "step": 1901 }, { "epoch": 0.12, "grad_norm": 1.3165786266326904, "learning_rate": 9.776055789614594e-06, "loss": 0.577, "step": 1902 }, { "epoch": 0.12, "grad_norm": 1.184720754623413, "learning_rate": 9.775746350042821e-06, "loss": 0.5896, "step": 1903 }, { "epoch": 0.12, "grad_norm": 1.3175839185714722, "learning_rate": 9.775436701735497e-06, "loss": 0.5736, "step": 1904 }, { "epoch": 0.12, "grad_norm": 1.1587451696395874, "learning_rate": 9.775126844706155e-06, "loss": 0.5576, "step": 1905 }, { "epoch": 0.12, "grad_norm": 1.2817188501358032, "learning_rate": 9.77481677896834e-06, "loss": 0.6251, "step": 1906 }, { "epoch": 0.12, "grad_norm": 1.0893983840942383, "learning_rate": 9.774506504535601e-06, "loss": 0.5699, "step": 1907 }, { "epoch": 0.12, "grad_norm": 1.2100359201431274, "learning_rate": 9.774196021421503e-06, "loss": 0.5674, "step": 1908 }, { "epoch": 0.12, "grad_norm": 1.45118248462677, "learning_rate": 9.773885329639613e-06, "loss": 0.5746, "step": 1909 }, { "epoch": 0.12, "grad_norm": 1.2218302488327026, "learning_rate": 9.773574429203512e-06, "loss": 0.6117, "step": 1910 }, { "epoch": 0.12, "grad_norm": 1.3542184829711914, "learning_rate": 9.773263320126789e-06, "loss": 0.6049, "step": 1911 }, { "epoch": 0.12, "grad_norm": 1.2625106573104858, "learning_rate": 9.772952002423043e-06, "loss": 0.5493, "step": 1912 }, { "epoch": 0.12, "grad_norm": 1.2287752628326416, "learning_rate": 9.772640476105878e-06, "loss": 0.5615, "step": 1913 }, { "epoch": 0.12, "grad_norm": 1.3227187395095825, "learning_rate": 9.77232874118891e-06, "loss": 0.5786, "step": 1914 }, { "epoch": 0.12, "grad_norm": 1.1966227293014526, "learning_rate": 9.772016797685766e-06, "loss": 0.6311, "step": 1915 }, { "epoch": 0.12, "grad_norm": 1.1410915851593018, "learning_rate": 9.77170464561008e-06, "loss": 0.5514, "step": 1916 }, { "epoch": 0.12, "grad_norm": 1.2970585823059082, "learning_rate": 9.771392284975496e-06, "loss": 0.5996, "step": 1917 }, { "epoch": 0.12, "grad_norm": 1.562508225440979, "learning_rate": 9.771079715795666e-06, "loss": 0.623, "step": 1918 }, { "epoch": 0.12, "grad_norm": 1.3904250860214233, "learning_rate": 9.77076693808425e-06, "loss": 0.5821, "step": 1919 }, { "epoch": 0.12, "grad_norm": 1.3800384998321533, "learning_rate": 9.770453951854922e-06, "loss": 0.5795, "step": 1920 }, { "epoch": 0.12, "grad_norm": 1.2129323482513428, "learning_rate": 9.770140757121356e-06, "loss": 0.5767, "step": 1921 }, { "epoch": 0.12, "grad_norm": 1.0874282121658325, "learning_rate": 9.769827353897248e-06, "loss": 0.5393, "step": 1922 }, { "epoch": 0.12, "grad_norm": 1.318498134613037, "learning_rate": 9.76951374219629e-06, "loss": 0.6145, "step": 1923 }, { "epoch": 0.12, "grad_norm": 1.4175028800964355, "learning_rate": 9.769199922032194e-06, "loss": 0.5635, "step": 1924 }, { "epoch": 0.12, "grad_norm": 1.2827069759368896, "learning_rate": 9.768885893418673e-06, "loss": 0.5748, "step": 1925 }, { "epoch": 0.12, "grad_norm": 1.3337368965148926, "learning_rate": 9.768571656369455e-06, "loss": 0.619, "step": 1926 }, { "epoch": 0.12, "grad_norm": 1.2323764562606812, "learning_rate": 9.768257210898271e-06, "loss": 0.568, "step": 1927 }, { "epoch": 0.12, "grad_norm": 1.2282559871673584, "learning_rate": 9.767942557018866e-06, "loss": 0.5487, "step": 1928 }, { "epoch": 0.12, "grad_norm": 1.2202118635177612, "learning_rate": 9.767627694744994e-06, "loss": 0.5782, "step": 1929 }, { "epoch": 0.12, "grad_norm": 1.3141003847122192, "learning_rate": 9.767312624090416e-06, "loss": 0.5845, "step": 1930 }, { "epoch": 0.12, "grad_norm": 1.4363946914672852, "learning_rate": 9.766997345068905e-06, "loss": 0.61, "step": 1931 }, { "epoch": 0.12, "grad_norm": 1.3100533485412598, "learning_rate": 9.766681857694238e-06, "loss": 0.5451, "step": 1932 }, { "epoch": 0.12, "grad_norm": 1.307794213294983, "learning_rate": 9.766366161980205e-06, "loss": 0.5791, "step": 1933 }, { "epoch": 0.12, "grad_norm": 1.3880120515823364, "learning_rate": 9.766050257940605e-06, "loss": 0.5748, "step": 1934 }, { "epoch": 0.12, "grad_norm": 1.2077996730804443, "learning_rate": 9.765734145589244e-06, "loss": 0.6248, "step": 1935 }, { "epoch": 0.12, "grad_norm": 1.2555347681045532, "learning_rate": 9.76541782493994e-06, "loss": 0.5834, "step": 1936 }, { "epoch": 0.13, "grad_norm": 1.5886486768722534, "learning_rate": 9.765101296006516e-06, "loss": 0.6702, "step": 1937 }, { "epoch": 0.13, "grad_norm": 1.4633721113204956, "learning_rate": 9.76478455880281e-06, "loss": 0.5233, "step": 1938 }, { "epoch": 0.13, "grad_norm": 1.7812118530273438, "learning_rate": 9.764467613342665e-06, "loss": 0.6096, "step": 1939 }, { "epoch": 0.13, "grad_norm": 1.2537202835083008, "learning_rate": 9.764150459639932e-06, "loss": 0.5796, "step": 1940 }, { "epoch": 0.13, "grad_norm": 1.3194425106048584, "learning_rate": 9.763833097708474e-06, "loss": 0.6325, "step": 1941 }, { "epoch": 0.13, "grad_norm": 1.2226275205612183, "learning_rate": 9.763515527562162e-06, "loss": 0.6023, "step": 1942 }, { "epoch": 0.13, "grad_norm": 1.571604609489441, "learning_rate": 9.763197749214877e-06, "loss": 0.6141, "step": 1943 }, { "epoch": 0.13, "grad_norm": 1.3461682796478271, "learning_rate": 9.762879762680507e-06, "loss": 0.6411, "step": 1944 }, { "epoch": 0.13, "grad_norm": 1.5399253368377686, "learning_rate": 9.76256156797295e-06, "loss": 0.5516, "step": 1945 }, { "epoch": 0.13, "grad_norm": 1.2808163166046143, "learning_rate": 9.762243165106117e-06, "loss": 0.5964, "step": 1946 }, { "epoch": 0.13, "grad_norm": 1.2728078365325928, "learning_rate": 9.76192455409392e-06, "loss": 0.5506, "step": 1947 }, { "epoch": 0.13, "grad_norm": 1.1622576713562012, "learning_rate": 9.761605734950288e-06, "loss": 0.5517, "step": 1948 }, { "epoch": 0.13, "grad_norm": 1.28066885471344, "learning_rate": 9.761286707689154e-06, "loss": 0.6247, "step": 1949 }, { "epoch": 0.13, "grad_norm": 1.2471402883529663, "learning_rate": 9.760967472324462e-06, "loss": 0.5952, "step": 1950 }, { "epoch": 0.13, "grad_norm": 2.3533689975738525, "learning_rate": 9.760648028870165e-06, "loss": 0.6299, "step": 1951 }, { "epoch": 0.13, "grad_norm": 1.3218258619308472, "learning_rate": 9.760328377340225e-06, "loss": 0.6112, "step": 1952 }, { "epoch": 0.13, "grad_norm": 1.2774945497512817, "learning_rate": 9.760008517748615e-06, "loss": 0.5554, "step": 1953 }, { "epoch": 0.13, "grad_norm": 1.2092194557189941, "learning_rate": 9.759688450109313e-06, "loss": 0.5401, "step": 1954 }, { "epoch": 0.13, "grad_norm": 1.128546118736267, "learning_rate": 9.759368174436308e-06, "loss": 0.5728, "step": 1955 }, { "epoch": 0.13, "grad_norm": 2.5017595291137695, "learning_rate": 9.759047690743601e-06, "loss": 0.5708, "step": 1956 }, { "epoch": 0.13, "grad_norm": 1.2429723739624023, "learning_rate": 9.758726999045196e-06, "loss": 0.5951, "step": 1957 }, { "epoch": 0.13, "grad_norm": 1.1507195234298706, "learning_rate": 9.758406099355112e-06, "loss": 0.5377, "step": 1958 }, { "epoch": 0.13, "grad_norm": 1.2275227308273315, "learning_rate": 9.758084991687376e-06, "loss": 0.5914, "step": 1959 }, { "epoch": 0.13, "grad_norm": 1.217139720916748, "learning_rate": 9.75776367605602e-06, "loss": 0.5753, "step": 1960 }, { "epoch": 0.13, "grad_norm": 1.1265455484390259, "learning_rate": 9.75744215247509e-06, "loss": 0.583, "step": 1961 }, { "epoch": 0.13, "grad_norm": 1.4710177183151245, "learning_rate": 9.757120420958636e-06, "loss": 0.5729, "step": 1962 }, { "epoch": 0.13, "grad_norm": 1.1868430376052856, "learning_rate": 9.756798481520721e-06, "loss": 0.5706, "step": 1963 }, { "epoch": 0.13, "grad_norm": 1.145317554473877, "learning_rate": 9.75647633417542e-06, "loss": 0.6063, "step": 1964 }, { "epoch": 0.13, "grad_norm": 1.3045299053192139, "learning_rate": 9.756153978936809e-06, "loss": 0.6407, "step": 1965 }, { "epoch": 0.13, "grad_norm": 1.1495177745819092, "learning_rate": 9.75583141581898e-06, "loss": 0.5997, "step": 1966 }, { "epoch": 0.13, "grad_norm": 1.2236440181732178, "learning_rate": 9.755508644836027e-06, "loss": 0.5683, "step": 1967 }, { "epoch": 0.13, "grad_norm": 1.4318909645080566, "learning_rate": 9.755185666002062e-06, "loss": 0.5971, "step": 1968 }, { "epoch": 0.13, "grad_norm": 1.1840178966522217, "learning_rate": 9.754862479331201e-06, "loss": 0.5626, "step": 1969 }, { "epoch": 0.13, "grad_norm": 1.2187402248382568, "learning_rate": 9.75453908483757e-06, "loss": 0.5632, "step": 1970 }, { "epoch": 0.13, "grad_norm": 1.1612658500671387, "learning_rate": 9.754215482535298e-06, "loss": 0.5311, "step": 1971 }, { "epoch": 0.13, "grad_norm": 1.216050624847412, "learning_rate": 9.753891672438535e-06, "loss": 0.5659, "step": 1972 }, { "epoch": 0.13, "grad_norm": 1.2071647644042969, "learning_rate": 9.753567654561434e-06, "loss": 0.5625, "step": 1973 }, { "epoch": 0.13, "grad_norm": 1.3337637186050415, "learning_rate": 9.753243428918155e-06, "loss": 0.5543, "step": 1974 }, { "epoch": 0.13, "grad_norm": 1.2965619564056396, "learning_rate": 9.752918995522868e-06, "loss": 0.5777, "step": 1975 }, { "epoch": 0.13, "grad_norm": 1.3158308267593384, "learning_rate": 9.752594354389755e-06, "loss": 0.5768, "step": 1976 }, { "epoch": 0.13, "grad_norm": 1.3649696111679077, "learning_rate": 9.752269505533004e-06, "loss": 0.5603, "step": 1977 }, { "epoch": 0.13, "grad_norm": 1.162462592124939, "learning_rate": 9.751944448966814e-06, "loss": 0.552, "step": 1978 }, { "epoch": 0.13, "grad_norm": 1.2665541172027588, "learning_rate": 9.751619184705394e-06, "loss": 0.5907, "step": 1979 }, { "epoch": 0.13, "grad_norm": 1.3077783584594727, "learning_rate": 9.751293712762957e-06, "loss": 0.5786, "step": 1980 }, { "epoch": 0.13, "grad_norm": 1.2881865501403809, "learning_rate": 9.75096803315373e-06, "loss": 0.5791, "step": 1981 }, { "epoch": 0.13, "grad_norm": 1.1438863277435303, "learning_rate": 9.750642145891948e-06, "loss": 0.6326, "step": 1982 }, { "epoch": 0.13, "grad_norm": 1.2526662349700928, "learning_rate": 9.750316050991857e-06, "loss": 0.5789, "step": 1983 }, { "epoch": 0.13, "grad_norm": 1.3795610666275024, "learning_rate": 9.749989748467706e-06, "loss": 0.5952, "step": 1984 }, { "epoch": 0.13, "grad_norm": 1.239835262298584, "learning_rate": 9.749663238333758e-06, "loss": 0.5708, "step": 1985 }, { "epoch": 0.13, "grad_norm": 1.1743017435073853, "learning_rate": 9.749336520604283e-06, "loss": 0.5997, "step": 1986 }, { "epoch": 0.13, "grad_norm": 1.2717092037200928, "learning_rate": 9.749009595293563e-06, "loss": 0.5937, "step": 1987 }, { "epoch": 0.13, "grad_norm": 1.382873773574829, "learning_rate": 9.748682462415887e-06, "loss": 0.6058, "step": 1988 }, { "epoch": 0.13, "grad_norm": 1.3494387865066528, "learning_rate": 9.748355121985551e-06, "loss": 0.5883, "step": 1989 }, { "epoch": 0.13, "grad_norm": 1.1468411684036255, "learning_rate": 9.748027574016865e-06, "loss": 0.6121, "step": 1990 }, { "epoch": 0.13, "grad_norm": 1.3885080814361572, "learning_rate": 9.747699818524143e-06, "loss": 0.6468, "step": 1991 }, { "epoch": 0.13, "grad_norm": 1.2937772274017334, "learning_rate": 9.747371855521711e-06, "loss": 0.5499, "step": 1992 }, { "epoch": 0.13, "grad_norm": 1.3491286039352417, "learning_rate": 9.747043685023904e-06, "loss": 0.651, "step": 1993 }, { "epoch": 0.13, "grad_norm": 1.2226295471191406, "learning_rate": 9.746715307045065e-06, "loss": 0.5667, "step": 1994 }, { "epoch": 0.13, "grad_norm": 1.2409982681274414, "learning_rate": 9.746386721599549e-06, "loss": 0.5878, "step": 1995 }, { "epoch": 0.13, "grad_norm": 1.2291051149368286, "learning_rate": 9.746057928701711e-06, "loss": 0.5601, "step": 1996 }, { "epoch": 0.13, "grad_norm": 1.1077470779418945, "learning_rate": 9.745728928365927e-06, "loss": 0.6205, "step": 1997 }, { "epoch": 0.13, "grad_norm": 1.1672203540802002, "learning_rate": 9.745399720606577e-06, "loss": 0.5061, "step": 1998 }, { "epoch": 0.13, "grad_norm": 1.1957669258117676, "learning_rate": 9.745070305438048e-06, "loss": 0.566, "step": 1999 }, { "epoch": 0.13, "grad_norm": 1.2592484951019287, "learning_rate": 9.744740682874738e-06, "loss": 0.5416, "step": 2000 }, { "epoch": 0.13, "grad_norm": 1.3276654481887817, "learning_rate": 9.744410852931055e-06, "loss": 0.6022, "step": 2001 }, { "epoch": 0.13, "grad_norm": 1.3123151063919067, "learning_rate": 9.744080815621414e-06, "loss": 0.6177, "step": 2002 }, { "epoch": 0.13, "grad_norm": 1.2417765855789185, "learning_rate": 9.74375057096024e-06, "loss": 0.5884, "step": 2003 }, { "epoch": 0.13, "grad_norm": 1.2707048654556274, "learning_rate": 9.743420118961968e-06, "loss": 0.5832, "step": 2004 }, { "epoch": 0.13, "grad_norm": 1.1984328031539917, "learning_rate": 9.743089459641041e-06, "loss": 0.554, "step": 2005 }, { "epoch": 0.13, "grad_norm": 1.2912027835845947, "learning_rate": 9.742758593011911e-06, "loss": 0.6422, "step": 2006 }, { "epoch": 0.13, "grad_norm": 1.1801010370254517, "learning_rate": 9.74242751908904e-06, "loss": 0.6064, "step": 2007 }, { "epoch": 0.13, "grad_norm": 1.2652287483215332, "learning_rate": 9.742096237886896e-06, "loss": 0.5663, "step": 2008 }, { "epoch": 0.13, "grad_norm": 1.445594310760498, "learning_rate": 9.741764749419961e-06, "loss": 0.6131, "step": 2009 }, { "epoch": 0.13, "grad_norm": 1.2283141613006592, "learning_rate": 9.741433053702724e-06, "loss": 0.6157, "step": 2010 }, { "epoch": 0.13, "grad_norm": 1.2226669788360596, "learning_rate": 9.74110115074968e-06, "loss": 0.589, "step": 2011 }, { "epoch": 0.13, "grad_norm": 1.1754106283187866, "learning_rate": 9.740769040575338e-06, "loss": 0.5572, "step": 2012 }, { "epoch": 0.13, "grad_norm": 1.1932497024536133, "learning_rate": 9.74043672319421e-06, "loss": 0.5846, "step": 2013 }, { "epoch": 0.13, "grad_norm": 1.423450231552124, "learning_rate": 9.740104198620826e-06, "loss": 0.5887, "step": 2014 }, { "epoch": 0.13, "grad_norm": 1.1414827108383179, "learning_rate": 9.739771466869716e-06, "loss": 0.5698, "step": 2015 }, { "epoch": 0.13, "grad_norm": 1.1893290281295776, "learning_rate": 9.739438527955425e-06, "loss": 0.6491, "step": 2016 }, { "epoch": 0.13, "grad_norm": 1.168470025062561, "learning_rate": 9.739105381892502e-06, "loss": 0.5313, "step": 2017 }, { "epoch": 0.13, "grad_norm": 1.2419437170028687, "learning_rate": 9.738772028695512e-06, "loss": 0.62, "step": 2018 }, { "epoch": 0.13, "grad_norm": 1.236753225326538, "learning_rate": 9.738438468379022e-06, "loss": 0.6132, "step": 2019 }, { "epoch": 0.13, "grad_norm": 1.2995671033859253, "learning_rate": 9.73810470095761e-06, "loss": 0.5863, "step": 2020 }, { "epoch": 0.13, "grad_norm": 1.3253430128097534, "learning_rate": 9.737770726445867e-06, "loss": 0.6228, "step": 2021 }, { "epoch": 0.13, "grad_norm": 1.2938458919525146, "learning_rate": 9.73743654485839e-06, "loss": 0.5807, "step": 2022 }, { "epoch": 0.13, "grad_norm": 1.3980093002319336, "learning_rate": 9.737102156209785e-06, "loss": 0.5602, "step": 2023 }, { "epoch": 0.13, "grad_norm": 1.289013147354126, "learning_rate": 9.736767560514665e-06, "loss": 0.5553, "step": 2024 }, { "epoch": 0.13, "grad_norm": 1.3107153177261353, "learning_rate": 9.736432757787657e-06, "loss": 0.5236, "step": 2025 }, { "epoch": 0.13, "grad_norm": 1.2600624561309814, "learning_rate": 9.736097748043393e-06, "loss": 0.6185, "step": 2026 }, { "epoch": 0.13, "grad_norm": 1.3361259698867798, "learning_rate": 9.735762531296515e-06, "loss": 0.576, "step": 2027 }, { "epoch": 0.13, "grad_norm": 1.3110271692276, "learning_rate": 9.735427107561677e-06, "loss": 0.6396, "step": 2028 }, { "epoch": 0.13, "grad_norm": 1.2397972345352173, "learning_rate": 9.735091476853535e-06, "loss": 0.5916, "step": 2029 }, { "epoch": 0.13, "grad_norm": 1.1140775680541992, "learning_rate": 9.734755639186763e-06, "loss": 0.5683, "step": 2030 }, { "epoch": 0.13, "grad_norm": 1.396707534790039, "learning_rate": 9.73441959457604e-06, "loss": 0.602, "step": 2031 }, { "epoch": 0.13, "grad_norm": 1.3990041017532349, "learning_rate": 9.734083343036047e-06, "loss": 0.5766, "step": 2032 }, { "epoch": 0.13, "grad_norm": 1.2906670570373535, "learning_rate": 9.733746884581488e-06, "loss": 0.5884, "step": 2033 }, { "epoch": 0.13, "grad_norm": 1.476204752922058, "learning_rate": 9.733410219227065e-06, "loss": 0.5959, "step": 2034 }, { "epoch": 0.13, "grad_norm": 1.2632670402526855, "learning_rate": 9.733073346987494e-06, "loss": 0.6178, "step": 2035 }, { "epoch": 0.13, "grad_norm": 1.2319204807281494, "learning_rate": 9.732736267877498e-06, "loss": 0.5985, "step": 2036 }, { "epoch": 0.13, "grad_norm": 1.3031948804855347, "learning_rate": 9.73239898191181e-06, "loss": 0.5401, "step": 2037 }, { "epoch": 0.13, "grad_norm": 1.3771570920944214, "learning_rate": 9.732061489105173e-06, "loss": 0.5753, "step": 2038 }, { "epoch": 0.13, "grad_norm": 1.4327354431152344, "learning_rate": 9.731723789472339e-06, "loss": 0.6035, "step": 2039 }, { "epoch": 0.13, "grad_norm": 1.2551189661026, "learning_rate": 9.731385883028063e-06, "loss": 0.6079, "step": 2040 }, { "epoch": 0.13, "grad_norm": 1.292368769645691, "learning_rate": 9.73104776978712e-06, "loss": 0.5635, "step": 2041 }, { "epoch": 0.13, "grad_norm": 1.2917970418930054, "learning_rate": 9.730709449764281e-06, "loss": 0.585, "step": 2042 }, { "epoch": 0.13, "grad_norm": 1.2073440551757812, "learning_rate": 9.730370922974342e-06, "loss": 0.5785, "step": 2043 }, { "epoch": 0.13, "grad_norm": 1.2632750272750854, "learning_rate": 9.730032189432092e-06, "loss": 0.6175, "step": 2044 }, { "epoch": 0.13, "grad_norm": 1.646060585975647, "learning_rate": 9.72969324915234e-06, "loss": 0.5851, "step": 2045 }, { "epoch": 0.13, "grad_norm": 1.2970446348190308, "learning_rate": 9.729354102149898e-06, "loss": 0.5664, "step": 2046 }, { "epoch": 0.13, "grad_norm": 1.091869831085205, "learning_rate": 9.729014748439589e-06, "loss": 0.5687, "step": 2047 }, { "epoch": 0.13, "grad_norm": 1.1641631126403809, "learning_rate": 9.728675188036248e-06, "loss": 0.5826, "step": 2048 }, { "epoch": 0.13, "grad_norm": 1.1959518194198608, "learning_rate": 9.728335420954714e-06, "loss": 0.5679, "step": 2049 }, { "epoch": 0.13, "grad_norm": 1.1184587478637695, "learning_rate": 9.727995447209839e-06, "loss": 0.5952, "step": 2050 }, { "epoch": 0.13, "grad_norm": 1.2648861408233643, "learning_rate": 9.72765526681648e-06, "loss": 0.6002, "step": 2051 }, { "epoch": 0.13, "grad_norm": 1.2513256072998047, "learning_rate": 9.727314879789508e-06, "loss": 0.6073, "step": 2052 }, { "epoch": 0.13, "grad_norm": 1.4060897827148438, "learning_rate": 9.726974286143799e-06, "loss": 0.5726, "step": 2053 }, { "epoch": 0.13, "grad_norm": 1.3431997299194336, "learning_rate": 9.72663348589424e-06, "loss": 0.5271, "step": 2054 }, { "epoch": 0.13, "grad_norm": 1.3862227201461792, "learning_rate": 9.726292479055724e-06, "loss": 0.6429, "step": 2055 }, { "epoch": 0.13, "grad_norm": 1.3223991394042969, "learning_rate": 9.72595126564316e-06, "loss": 0.6012, "step": 2056 }, { "epoch": 0.13, "grad_norm": 1.1435027122497559, "learning_rate": 9.725609845671459e-06, "loss": 0.5582, "step": 2057 }, { "epoch": 0.13, "grad_norm": 1.325864315032959, "learning_rate": 9.725268219155544e-06, "loss": 0.5736, "step": 2058 }, { "epoch": 0.13, "grad_norm": 1.2785840034484863, "learning_rate": 9.724926386110345e-06, "loss": 0.5604, "step": 2059 }, { "epoch": 0.13, "grad_norm": 1.0811306238174438, "learning_rate": 9.724584346550804e-06, "loss": 0.5646, "step": 2060 }, { "epoch": 0.13, "grad_norm": 1.2087538242340088, "learning_rate": 9.724242100491873e-06, "loss": 0.6249, "step": 2061 }, { "epoch": 0.13, "grad_norm": 1.3127323389053345, "learning_rate": 9.723899647948508e-06, "loss": 0.5835, "step": 2062 }, { "epoch": 0.13, "grad_norm": 1.4850170612335205, "learning_rate": 9.723556988935676e-06, "loss": 0.5522, "step": 2063 }, { "epoch": 0.13, "grad_norm": 1.2918941974639893, "learning_rate": 9.723214123468356e-06, "loss": 0.5847, "step": 2064 }, { "epoch": 0.13, "grad_norm": 1.1872873306274414, "learning_rate": 9.722871051561532e-06, "loss": 0.5555, "step": 2065 }, { "epoch": 0.13, "grad_norm": 1.246658205986023, "learning_rate": 9.722527773230198e-06, "loss": 0.555, "step": 2066 }, { "epoch": 0.13, "grad_norm": 1.2813713550567627, "learning_rate": 9.722184288489362e-06, "loss": 0.5466, "step": 2067 }, { "epoch": 0.13, "grad_norm": 1.3019388914108276, "learning_rate": 9.721840597354035e-06, "loss": 0.6361, "step": 2068 }, { "epoch": 0.13, "grad_norm": 1.2277932167053223, "learning_rate": 9.721496699839235e-06, "loss": 0.6063, "step": 2069 }, { "epoch": 0.13, "grad_norm": 1.1573846340179443, "learning_rate": 9.721152595959998e-06, "loss": 0.5516, "step": 2070 }, { "epoch": 0.13, "grad_norm": 1.2369402647018433, "learning_rate": 9.720808285731361e-06, "loss": 0.6281, "step": 2071 }, { "epoch": 0.13, "grad_norm": 1.3754202127456665, "learning_rate": 9.720463769168373e-06, "loss": 0.6047, "step": 2072 }, { "epoch": 0.13, "grad_norm": 1.1867460012435913, "learning_rate": 9.720119046286094e-06, "loss": 0.6306, "step": 2073 }, { "epoch": 0.13, "grad_norm": 1.2692164182662964, "learning_rate": 9.719774117099588e-06, "loss": 0.6154, "step": 2074 }, { "epoch": 0.13, "grad_norm": 1.3375507593154907, "learning_rate": 9.719428981623933e-06, "loss": 0.6074, "step": 2075 }, { "epoch": 0.13, "grad_norm": 1.2670204639434814, "learning_rate": 9.719083639874215e-06, "loss": 0.5916, "step": 2076 }, { "epoch": 0.13, "grad_norm": 1.219361662864685, "learning_rate": 9.718738091865525e-06, "loss": 0.5723, "step": 2077 }, { "epoch": 0.13, "grad_norm": 1.2806639671325684, "learning_rate": 9.718392337612967e-06, "loss": 0.5871, "step": 2078 }, { "epoch": 0.13, "grad_norm": 1.1507517099380493, "learning_rate": 9.718046377131655e-06, "loss": 0.5707, "step": 2079 }, { "epoch": 0.13, "grad_norm": 1.3061188459396362, "learning_rate": 9.717700210436708e-06, "loss": 0.5583, "step": 2080 }, { "epoch": 0.13, "grad_norm": 1.1419332027435303, "learning_rate": 9.717353837543257e-06, "loss": 0.539, "step": 2081 }, { "epoch": 0.13, "grad_norm": 1.2740293741226196, "learning_rate": 9.71700725846644e-06, "loss": 0.5878, "step": 2082 }, { "epoch": 0.13, "grad_norm": 1.1957989931106567, "learning_rate": 9.716660473221406e-06, "loss": 0.5351, "step": 2083 }, { "epoch": 0.13, "grad_norm": 1.1525481939315796, "learning_rate": 9.716313481823312e-06, "loss": 0.5783, "step": 2084 }, { "epoch": 0.13, "grad_norm": 1.2132134437561035, "learning_rate": 9.715966284287324e-06, "loss": 0.5335, "step": 2085 }, { "epoch": 0.13, "grad_norm": 1.1922476291656494, "learning_rate": 9.715618880628617e-06, "loss": 0.5738, "step": 2086 }, { "epoch": 0.13, "grad_norm": 1.4314244985580444, "learning_rate": 9.715271270862374e-06, "loss": 0.5856, "step": 2087 }, { "epoch": 0.13, "grad_norm": 1.2908192873001099, "learning_rate": 9.71492345500379e-06, "loss": 0.6065, "step": 2088 }, { "epoch": 0.13, "grad_norm": 1.1606417894363403, "learning_rate": 9.714575433068068e-06, "loss": 0.5657, "step": 2089 }, { "epoch": 0.13, "grad_norm": 1.2673685550689697, "learning_rate": 9.714227205070417e-06, "loss": 0.5412, "step": 2090 }, { "epoch": 0.13, "grad_norm": 1.256041407585144, "learning_rate": 9.713878771026057e-06, "loss": 0.5708, "step": 2091 }, { "epoch": 0.14, "grad_norm": 1.1610071659088135, "learning_rate": 9.713530130950218e-06, "loss": 0.567, "step": 2092 }, { "epoch": 0.14, "grad_norm": 1.431620717048645, "learning_rate": 9.713181284858137e-06, "loss": 0.6133, "step": 2093 }, { "epoch": 0.14, "grad_norm": 1.3365845680236816, "learning_rate": 9.712832232765064e-06, "loss": 0.5601, "step": 2094 }, { "epoch": 0.14, "grad_norm": 1.2455729246139526, "learning_rate": 9.712482974686252e-06, "loss": 0.545, "step": 2095 }, { "epoch": 0.14, "grad_norm": 1.2998064756393433, "learning_rate": 9.71213351063697e-06, "loss": 0.6073, "step": 2096 }, { "epoch": 0.14, "grad_norm": 1.257233738899231, "learning_rate": 9.711783840632488e-06, "loss": 0.6085, "step": 2097 }, { "epoch": 0.14, "grad_norm": 1.2491514682769775, "learning_rate": 9.71143396468809e-06, "loss": 0.6228, "step": 2098 }, { "epoch": 0.14, "grad_norm": 1.36898672580719, "learning_rate": 9.71108388281907e-06, "loss": 0.6043, "step": 2099 }, { "epoch": 0.14, "grad_norm": 1.4574302434921265, "learning_rate": 9.710733595040727e-06, "loss": 0.6486, "step": 2100 }, { "epoch": 0.14, "grad_norm": 1.4441183805465698, "learning_rate": 9.710383101368374e-06, "loss": 0.5925, "step": 2101 }, { "epoch": 0.14, "grad_norm": 1.305396556854248, "learning_rate": 9.710032401817329e-06, "loss": 0.5802, "step": 2102 }, { "epoch": 0.14, "grad_norm": 1.1942869424819946, "learning_rate": 9.709681496402918e-06, "loss": 0.5584, "step": 2103 }, { "epoch": 0.14, "grad_norm": 1.3128820657730103, "learning_rate": 9.709330385140481e-06, "loss": 0.6326, "step": 2104 }, { "epoch": 0.14, "grad_norm": 1.2259386777877808, "learning_rate": 9.708979068045362e-06, "loss": 0.5748, "step": 2105 }, { "epoch": 0.14, "grad_norm": 1.2197731733322144, "learning_rate": 9.70862754513292e-06, "loss": 0.6094, "step": 2106 }, { "epoch": 0.14, "grad_norm": 1.2056622505187988, "learning_rate": 9.708275816418515e-06, "loss": 0.5973, "step": 2107 }, { "epoch": 0.14, "grad_norm": 1.381063461303711, "learning_rate": 9.70792388191752e-06, "loss": 0.599, "step": 2108 }, { "epoch": 0.14, "grad_norm": 1.3106271028518677, "learning_rate": 9.70757174164532e-06, "loss": 0.5963, "step": 2109 }, { "epoch": 0.14, "grad_norm": 1.248724102973938, "learning_rate": 9.707219395617305e-06, "loss": 0.5576, "step": 2110 }, { "epoch": 0.14, "grad_norm": 1.9143571853637695, "learning_rate": 9.706866843848875e-06, "loss": 0.6541, "step": 2111 }, { "epoch": 0.14, "grad_norm": 1.2718385457992554, "learning_rate": 9.706514086355439e-06, "loss": 0.5833, "step": 2112 }, { "epoch": 0.14, "grad_norm": 1.2912503480911255, "learning_rate": 9.706161123152416e-06, "loss": 0.5637, "step": 2113 }, { "epoch": 0.14, "grad_norm": 1.4122620820999146, "learning_rate": 9.705807954255232e-06, "loss": 0.6512, "step": 2114 }, { "epoch": 0.14, "grad_norm": 1.3010172843933105, "learning_rate": 9.705454579679321e-06, "loss": 0.5371, "step": 2115 }, { "epoch": 0.14, "grad_norm": 1.338728904724121, "learning_rate": 9.705100999440134e-06, "loss": 0.6236, "step": 2116 }, { "epoch": 0.14, "grad_norm": 1.2335692644119263, "learning_rate": 9.70474721355312e-06, "loss": 0.5653, "step": 2117 }, { "epoch": 0.14, "grad_norm": 1.7495681047439575, "learning_rate": 9.704393222033743e-06, "loss": 0.5862, "step": 2118 }, { "epoch": 0.14, "grad_norm": 1.4692577123641968, "learning_rate": 9.704039024897479e-06, "loss": 0.5701, "step": 2119 }, { "epoch": 0.14, "grad_norm": 1.2880183458328247, "learning_rate": 9.7036846221598e-06, "loss": 0.5679, "step": 2120 }, { "epoch": 0.14, "grad_norm": 1.3371719121932983, "learning_rate": 9.703330013836208e-06, "loss": 0.6062, "step": 2121 }, { "epoch": 0.14, "grad_norm": 1.7973837852478027, "learning_rate": 9.702975199942193e-06, "loss": 0.6082, "step": 2122 }, { "epoch": 0.14, "grad_norm": 1.513105869293213, "learning_rate": 9.702620180493265e-06, "loss": 0.6385, "step": 2123 }, { "epoch": 0.14, "grad_norm": 1.192142367362976, "learning_rate": 9.702264955504944e-06, "loss": 0.5962, "step": 2124 }, { "epoch": 0.14, "grad_norm": 1.190598964691162, "learning_rate": 9.701909524992753e-06, "loss": 0.5767, "step": 2125 }, { "epoch": 0.14, "grad_norm": 1.0754212141036987, "learning_rate": 9.701553888972225e-06, "loss": 0.5495, "step": 2126 }, { "epoch": 0.14, "grad_norm": 1.3006523847579956, "learning_rate": 9.701198047458911e-06, "loss": 0.6197, "step": 2127 }, { "epoch": 0.14, "grad_norm": 1.1690797805786133, "learning_rate": 9.700842000468359e-06, "loss": 0.5861, "step": 2128 }, { "epoch": 0.14, "grad_norm": 1.1525627374649048, "learning_rate": 9.70048574801613e-06, "loss": 0.5582, "step": 2129 }, { "epoch": 0.14, "grad_norm": 1.1343673467636108, "learning_rate": 9.700129290117795e-06, "loss": 0.5745, "step": 2130 }, { "epoch": 0.14, "grad_norm": 1.225948691368103, "learning_rate": 9.699772626788936e-06, "loss": 0.5106, "step": 2131 }, { "epoch": 0.14, "grad_norm": 1.1849002838134766, "learning_rate": 9.699415758045143e-06, "loss": 0.5453, "step": 2132 }, { "epoch": 0.14, "grad_norm": 1.20042884349823, "learning_rate": 9.699058683902011e-06, "loss": 0.581, "step": 2133 }, { "epoch": 0.14, "grad_norm": 1.2996376752853394, "learning_rate": 9.698701404375147e-06, "loss": 0.6098, "step": 2134 }, { "epoch": 0.14, "grad_norm": 1.5528444051742554, "learning_rate": 9.698343919480167e-06, "loss": 0.5545, "step": 2135 }, { "epoch": 0.14, "grad_norm": 1.3405975103378296, "learning_rate": 9.697986229232697e-06, "loss": 0.5966, "step": 2136 }, { "epoch": 0.14, "grad_norm": 1.2266021966934204, "learning_rate": 9.69762833364837e-06, "loss": 0.5372, "step": 2137 }, { "epoch": 0.14, "grad_norm": 1.2999351024627686, "learning_rate": 9.69727023274283e-06, "loss": 0.5985, "step": 2138 }, { "epoch": 0.14, "grad_norm": 1.313330888748169, "learning_rate": 9.696911926531725e-06, "loss": 0.5639, "step": 2139 }, { "epoch": 0.14, "grad_norm": 1.3677462339401245, "learning_rate": 9.696553415030719e-06, "loss": 0.5982, "step": 2140 }, { "epoch": 0.14, "grad_norm": 1.1854991912841797, "learning_rate": 9.696194698255478e-06, "loss": 0.5704, "step": 2141 }, { "epoch": 0.14, "grad_norm": 1.2946138381958008, "learning_rate": 9.695835776221688e-06, "loss": 0.5586, "step": 2142 }, { "epoch": 0.14, "grad_norm": 1.1617276668548584, "learning_rate": 9.695476648945028e-06, "loss": 0.5599, "step": 2143 }, { "epoch": 0.14, "grad_norm": 1.300403118133545, "learning_rate": 9.695117316441199e-06, "loss": 0.6262, "step": 2144 }, { "epoch": 0.14, "grad_norm": 1.2681413888931274, "learning_rate": 9.694757778725905e-06, "loss": 0.5878, "step": 2145 }, { "epoch": 0.14, "grad_norm": 1.6967183351516724, "learning_rate": 9.694398035814861e-06, "loss": 0.5397, "step": 2146 }, { "epoch": 0.14, "grad_norm": 1.2608509063720703, "learning_rate": 9.694038087723792e-06, "loss": 0.5867, "step": 2147 }, { "epoch": 0.14, "grad_norm": 1.2311463356018066, "learning_rate": 9.693677934468429e-06, "loss": 0.6058, "step": 2148 }, { "epoch": 0.14, "grad_norm": 1.3590210676193237, "learning_rate": 9.693317576064511e-06, "loss": 0.6013, "step": 2149 }, { "epoch": 0.14, "grad_norm": 1.4326390027999878, "learning_rate": 9.692957012527793e-06, "loss": 0.651, "step": 2150 }, { "epoch": 0.14, "grad_norm": 1.2370721101760864, "learning_rate": 9.692596243874031e-06, "loss": 0.535, "step": 2151 }, { "epoch": 0.14, "grad_norm": 1.1213598251342773, "learning_rate": 9.692235270118994e-06, "loss": 0.5621, "step": 2152 }, { "epoch": 0.14, "grad_norm": 1.213881492614746, "learning_rate": 9.691874091278461e-06, "loss": 0.6317, "step": 2153 }, { "epoch": 0.14, "grad_norm": 1.2220853567123413, "learning_rate": 9.691512707368215e-06, "loss": 0.5593, "step": 2154 }, { "epoch": 0.14, "grad_norm": 1.2432780265808105, "learning_rate": 9.691151118404055e-06, "loss": 0.5442, "step": 2155 }, { "epoch": 0.14, "grad_norm": 1.2540764808654785, "learning_rate": 9.690789324401781e-06, "loss": 0.6593, "step": 2156 }, { "epoch": 0.14, "grad_norm": 1.3018988370895386, "learning_rate": 9.690427325377207e-06, "loss": 0.6056, "step": 2157 }, { "epoch": 0.14, "grad_norm": 1.2293400764465332, "learning_rate": 9.690065121346158e-06, "loss": 0.5777, "step": 2158 }, { "epoch": 0.14, "grad_norm": 1.5501121282577515, "learning_rate": 9.689702712324463e-06, "loss": 0.564, "step": 2159 }, { "epoch": 0.14, "grad_norm": 1.217454195022583, "learning_rate": 9.689340098327962e-06, "loss": 0.6011, "step": 2160 }, { "epoch": 0.14, "grad_norm": 1.3172190189361572, "learning_rate": 9.688977279372503e-06, "loss": 0.5495, "step": 2161 }, { "epoch": 0.14, "grad_norm": 1.2891411781311035, "learning_rate": 9.688614255473945e-06, "loss": 0.5973, "step": 2162 }, { "epoch": 0.14, "grad_norm": 1.1839559078216553, "learning_rate": 9.688251026648156e-06, "loss": 0.6062, "step": 2163 }, { "epoch": 0.14, "grad_norm": 1.3623547554016113, "learning_rate": 9.68788759291101e-06, "loss": 0.5401, "step": 2164 }, { "epoch": 0.14, "grad_norm": 1.1271586418151855, "learning_rate": 9.68752395427839e-06, "loss": 0.5306, "step": 2165 }, { "epoch": 0.14, "grad_norm": 1.2955677509307861, "learning_rate": 9.687160110766195e-06, "loss": 0.5516, "step": 2166 }, { "epoch": 0.14, "grad_norm": 1.3122471570968628, "learning_rate": 9.686796062390323e-06, "loss": 0.5884, "step": 2167 }, { "epoch": 0.14, "grad_norm": 1.2571717500686646, "learning_rate": 9.686431809166687e-06, "loss": 0.5729, "step": 2168 }, { "epoch": 0.14, "grad_norm": 1.65104341506958, "learning_rate": 9.68606735111121e-06, "loss": 0.5883, "step": 2169 }, { "epoch": 0.14, "grad_norm": 1.3196066617965698, "learning_rate": 9.685702688239816e-06, "loss": 0.5746, "step": 2170 }, { "epoch": 0.14, "grad_norm": 1.3150825500488281, "learning_rate": 9.68533782056845e-06, "loss": 0.5568, "step": 2171 }, { "epoch": 0.14, "grad_norm": 1.2616404294967651, "learning_rate": 9.684972748113053e-06, "loss": 0.5669, "step": 2172 }, { "epoch": 0.14, "grad_norm": 1.310471534729004, "learning_rate": 9.684607470889586e-06, "loss": 0.583, "step": 2173 }, { "epoch": 0.14, "grad_norm": 1.30027437210083, "learning_rate": 9.684241988914014e-06, "loss": 0.5573, "step": 2174 }, { "epoch": 0.14, "grad_norm": 1.258154034614563, "learning_rate": 9.683876302202309e-06, "loss": 0.536, "step": 2175 }, { "epoch": 0.14, "grad_norm": 1.1068302392959595, "learning_rate": 9.683510410770456e-06, "loss": 0.5425, "step": 2176 }, { "epoch": 0.14, "grad_norm": 1.6278289556503296, "learning_rate": 9.683144314634445e-06, "loss": 0.6128, "step": 2177 }, { "epoch": 0.14, "grad_norm": 1.3248227834701538, "learning_rate": 9.68277801381028e-06, "loss": 0.5869, "step": 2178 }, { "epoch": 0.14, "grad_norm": 1.3348076343536377, "learning_rate": 9.682411508313971e-06, "loss": 0.5988, "step": 2179 }, { "epoch": 0.14, "grad_norm": 1.350197434425354, "learning_rate": 9.682044798161533e-06, "loss": 0.6313, "step": 2180 }, { "epoch": 0.14, "grad_norm": 1.1752551794052124, "learning_rate": 9.681677883368998e-06, "loss": 0.5483, "step": 2181 }, { "epoch": 0.14, "grad_norm": 1.2578835487365723, "learning_rate": 9.681310763952402e-06, "loss": 0.6201, "step": 2182 }, { "epoch": 0.14, "grad_norm": 1.4060643911361694, "learning_rate": 9.68094343992779e-06, "loss": 0.6527, "step": 2183 }, { "epoch": 0.14, "grad_norm": 1.1880756616592407, "learning_rate": 9.680575911311218e-06, "loss": 0.5789, "step": 2184 }, { "epoch": 0.14, "grad_norm": 1.229962706565857, "learning_rate": 9.680208178118748e-06, "loss": 0.5841, "step": 2185 }, { "epoch": 0.14, "grad_norm": 1.2445229291915894, "learning_rate": 9.679840240366454e-06, "loss": 0.6149, "step": 2186 }, { "epoch": 0.14, "grad_norm": 1.290664792060852, "learning_rate": 9.679472098070418e-06, "loss": 0.5629, "step": 2187 }, { "epoch": 0.14, "grad_norm": 1.2848691940307617, "learning_rate": 9.679103751246728e-06, "loss": 0.5963, "step": 2188 }, { "epoch": 0.14, "grad_norm": 1.2594220638275146, "learning_rate": 9.678735199911487e-06, "loss": 0.5975, "step": 2189 }, { "epoch": 0.14, "grad_norm": 1.3012579679489136, "learning_rate": 9.6783664440808e-06, "loss": 0.5751, "step": 2190 }, { "epoch": 0.14, "grad_norm": 1.1895650625228882, "learning_rate": 9.677997483770787e-06, "loss": 0.5324, "step": 2191 }, { "epoch": 0.14, "grad_norm": 1.3189595937728882, "learning_rate": 9.677628318997573e-06, "loss": 0.5724, "step": 2192 }, { "epoch": 0.14, "grad_norm": 1.1829861402511597, "learning_rate": 9.677258949777295e-06, "loss": 0.5711, "step": 2193 }, { "epoch": 0.14, "grad_norm": 1.2466140985488892, "learning_rate": 9.676889376126095e-06, "loss": 0.5812, "step": 2194 }, { "epoch": 0.14, "grad_norm": 1.2476017475128174, "learning_rate": 9.676519598060126e-06, "loss": 0.572, "step": 2195 }, { "epoch": 0.14, "grad_norm": 1.3510913848876953, "learning_rate": 9.676149615595553e-06, "loss": 0.5853, "step": 2196 }, { "epoch": 0.14, "grad_norm": 1.3763891458511353, "learning_rate": 9.675779428748543e-06, "loss": 0.5765, "step": 2197 }, { "epoch": 0.14, "grad_norm": 1.3485040664672852, "learning_rate": 9.675409037535278e-06, "loss": 0.5215, "step": 2198 }, { "epoch": 0.14, "grad_norm": 1.244563102722168, "learning_rate": 9.675038441971948e-06, "loss": 0.5687, "step": 2199 }, { "epoch": 0.14, "grad_norm": 1.5159119367599487, "learning_rate": 9.674667642074749e-06, "loss": 0.5851, "step": 2200 }, { "epoch": 0.14, "grad_norm": 1.3544142246246338, "learning_rate": 9.674296637859886e-06, "loss": 0.5415, "step": 2201 }, { "epoch": 0.14, "grad_norm": 1.2369039058685303, "learning_rate": 9.67392542934358e-06, "loss": 0.5937, "step": 2202 }, { "epoch": 0.14, "grad_norm": 1.3047127723693848, "learning_rate": 9.673554016542051e-06, "loss": 0.6141, "step": 2203 }, { "epoch": 0.14, "grad_norm": 1.3007752895355225, "learning_rate": 9.673182399471534e-06, "loss": 0.5588, "step": 2204 }, { "epoch": 0.14, "grad_norm": 1.3061177730560303, "learning_rate": 9.672810578148271e-06, "loss": 0.5702, "step": 2205 }, { "epoch": 0.14, "grad_norm": 1.2769436836242676, "learning_rate": 9.672438552588513e-06, "loss": 0.5827, "step": 2206 }, { "epoch": 0.14, "grad_norm": 1.3026691675186157, "learning_rate": 9.672066322808522e-06, "loss": 0.6145, "step": 2207 }, { "epoch": 0.14, "grad_norm": 2.1690943241119385, "learning_rate": 9.671693888824565e-06, "loss": 0.5521, "step": 2208 }, { "epoch": 0.14, "grad_norm": 1.2645831108093262, "learning_rate": 9.671321250652923e-06, "loss": 0.5409, "step": 2209 }, { "epoch": 0.14, "grad_norm": 1.238082766532898, "learning_rate": 9.67094840830988e-06, "loss": 0.5838, "step": 2210 }, { "epoch": 0.14, "grad_norm": 1.354540228843689, "learning_rate": 9.670575361811733e-06, "loss": 0.5434, "step": 2211 }, { "epoch": 0.14, "grad_norm": 1.2309508323669434, "learning_rate": 9.670202111174789e-06, "loss": 0.5865, "step": 2212 }, { "epoch": 0.14, "grad_norm": 1.3160568475723267, "learning_rate": 9.669828656415359e-06, "loss": 0.6326, "step": 2213 }, { "epoch": 0.14, "grad_norm": 1.1702847480773926, "learning_rate": 9.669454997549766e-06, "loss": 0.5434, "step": 2214 }, { "epoch": 0.14, "grad_norm": 1.301920771598816, "learning_rate": 9.669081134594342e-06, "loss": 0.6273, "step": 2215 }, { "epoch": 0.14, "grad_norm": 1.1758192777633667, "learning_rate": 9.66870706756543e-06, "loss": 0.5286, "step": 2216 }, { "epoch": 0.14, "grad_norm": 2.0923752784729004, "learning_rate": 9.668332796479376e-06, "loss": 0.5629, "step": 2217 }, { "epoch": 0.14, "grad_norm": 1.3720697164535522, "learning_rate": 9.667958321352541e-06, "loss": 0.6283, "step": 2218 }, { "epoch": 0.14, "grad_norm": 1.253583312034607, "learning_rate": 9.66758364220129e-06, "loss": 0.5783, "step": 2219 }, { "epoch": 0.14, "grad_norm": 1.256147861480713, "learning_rate": 9.667208759042002e-06, "loss": 0.5667, "step": 2220 }, { "epoch": 0.14, "grad_norm": 1.1834278106689453, "learning_rate": 9.66683367189106e-06, "loss": 0.5489, "step": 2221 }, { "epoch": 0.14, "grad_norm": 1.1297380924224854, "learning_rate": 9.666458380764859e-06, "loss": 0.5628, "step": 2222 }, { "epoch": 0.14, "grad_norm": 1.1614896059036255, "learning_rate": 9.6660828856798e-06, "loss": 0.572, "step": 2223 }, { "epoch": 0.14, "grad_norm": 1.211773157119751, "learning_rate": 9.6657071866523e-06, "loss": 0.4984, "step": 2224 }, { "epoch": 0.14, "grad_norm": 1.270801067352295, "learning_rate": 9.665331283698773e-06, "loss": 0.6347, "step": 2225 }, { "epoch": 0.14, "grad_norm": 1.5719215869903564, "learning_rate": 9.664955176835655e-06, "loss": 0.5502, "step": 2226 }, { "epoch": 0.14, "grad_norm": 1.2567658424377441, "learning_rate": 9.664578866079381e-06, "loss": 0.5938, "step": 2227 }, { "epoch": 0.14, "grad_norm": 1.3853346109390259, "learning_rate": 9.6642023514464e-06, "loss": 0.6372, "step": 2228 }, { "epoch": 0.14, "grad_norm": 1.2081413269042969, "learning_rate": 9.663825632953168e-06, "loss": 0.5946, "step": 2229 }, { "epoch": 0.14, "grad_norm": 1.2824665307998657, "learning_rate": 9.663448710616149e-06, "loss": 0.5789, "step": 2230 }, { "epoch": 0.14, "grad_norm": 1.116421103477478, "learning_rate": 9.66307158445182e-06, "loss": 0.5468, "step": 2231 }, { "epoch": 0.14, "grad_norm": 1.2633568048477173, "learning_rate": 9.662694254476661e-06, "loss": 0.6064, "step": 2232 }, { "epoch": 0.14, "grad_norm": 1.6939537525177002, "learning_rate": 9.66231672070717e-06, "loss": 0.5741, "step": 2233 }, { "epoch": 0.14, "grad_norm": 1.27329683303833, "learning_rate": 9.661938983159841e-06, "loss": 0.5688, "step": 2234 }, { "epoch": 0.14, "grad_norm": 1.3265035152435303, "learning_rate": 9.661561041851187e-06, "loss": 0.5887, "step": 2235 }, { "epoch": 0.14, "grad_norm": 1.1629674434661865, "learning_rate": 9.661182896797728e-06, "loss": 0.5487, "step": 2236 }, { "epoch": 0.14, "grad_norm": 1.3467267751693726, "learning_rate": 9.66080454801599e-06, "loss": 0.5795, "step": 2237 }, { "epoch": 0.14, "grad_norm": 1.260575294494629, "learning_rate": 9.66042599552251e-06, "loss": 0.5906, "step": 2238 }, { "epoch": 0.14, "grad_norm": 1.309519648551941, "learning_rate": 9.660047239333836e-06, "loss": 0.5822, "step": 2239 }, { "epoch": 0.14, "grad_norm": 1.2494131326675415, "learning_rate": 9.659668279466518e-06, "loss": 0.5542, "step": 2240 }, { "epoch": 0.14, "grad_norm": 1.3517229557037354, "learning_rate": 9.659289115937122e-06, "loss": 0.5832, "step": 2241 }, { "epoch": 0.14, "grad_norm": 1.3520346879959106, "learning_rate": 9.658909748762219e-06, "loss": 0.554, "step": 2242 }, { "epoch": 0.14, "grad_norm": 1.2834569215774536, "learning_rate": 9.658530177958393e-06, "loss": 0.616, "step": 2243 }, { "epoch": 0.14, "grad_norm": 1.4440158605575562, "learning_rate": 9.65815040354223e-06, "loss": 0.576, "step": 2244 }, { "epoch": 0.14, "grad_norm": 1.4062433242797852, "learning_rate": 9.657770425530332e-06, "loss": 0.5783, "step": 2245 }, { "epoch": 0.14, "grad_norm": 4.185822010040283, "learning_rate": 9.657390243939307e-06, "loss": 0.5904, "step": 2246 }, { "epoch": 0.15, "grad_norm": 1.170562982559204, "learning_rate": 9.65700985878577e-06, "loss": 0.5182, "step": 2247 }, { "epoch": 0.15, "grad_norm": 1.3173878192901611, "learning_rate": 9.656629270086346e-06, "loss": 0.6023, "step": 2248 }, { "epoch": 0.15, "grad_norm": 1.4052923917770386, "learning_rate": 9.656248477857673e-06, "loss": 0.6066, "step": 2249 }, { "epoch": 0.15, "grad_norm": 1.2516765594482422, "learning_rate": 9.655867482116391e-06, "loss": 0.5411, "step": 2250 }, { "epoch": 0.15, "grad_norm": 1.3880740404129028, "learning_rate": 9.655486282879154e-06, "loss": 0.605, "step": 2251 }, { "epoch": 0.15, "grad_norm": 1.3398581743240356, "learning_rate": 9.655104880162623e-06, "loss": 0.6023, "step": 2252 }, { "epoch": 0.15, "grad_norm": 1.1478582620620728, "learning_rate": 9.654723273983468e-06, "loss": 0.5664, "step": 2253 }, { "epoch": 0.15, "grad_norm": 1.2810457944869995, "learning_rate": 9.654341464358368e-06, "loss": 0.5317, "step": 2254 }, { "epoch": 0.15, "grad_norm": 1.3892611265182495, "learning_rate": 9.65395945130401e-06, "loss": 0.5647, "step": 2255 }, { "epoch": 0.15, "grad_norm": 1.3428596258163452, "learning_rate": 9.653577234837093e-06, "loss": 0.597, "step": 2256 }, { "epoch": 0.15, "grad_norm": 1.7454954385757446, "learning_rate": 9.65319481497432e-06, "loss": 0.6034, "step": 2257 }, { "epoch": 0.15, "grad_norm": 1.2904211282730103, "learning_rate": 9.652812191732409e-06, "loss": 0.5787, "step": 2258 }, { "epoch": 0.15, "grad_norm": 1.1765429973602295, "learning_rate": 9.65242936512808e-06, "loss": 0.5429, "step": 2259 }, { "epoch": 0.15, "grad_norm": 1.305192470550537, "learning_rate": 9.652046335178068e-06, "loss": 0.5995, "step": 2260 }, { "epoch": 0.15, "grad_norm": 1.1377174854278564, "learning_rate": 9.651663101899112e-06, "loss": 0.5645, "step": 2261 }, { "epoch": 0.15, "grad_norm": 1.4206033945083618, "learning_rate": 9.651279665307964e-06, "loss": 0.5536, "step": 2262 }, { "epoch": 0.15, "grad_norm": 1.3804030418395996, "learning_rate": 9.650896025421382e-06, "loss": 0.6559, "step": 2263 }, { "epoch": 0.15, "grad_norm": 1.582767367362976, "learning_rate": 9.650512182256135e-06, "loss": 0.5613, "step": 2264 }, { "epoch": 0.15, "grad_norm": 2.1341795921325684, "learning_rate": 9.650128135828998e-06, "loss": 0.5698, "step": 2265 }, { "epoch": 0.15, "grad_norm": 1.5176870822906494, "learning_rate": 9.649743886156756e-06, "loss": 0.5131, "step": 2266 }, { "epoch": 0.15, "grad_norm": 1.510414958000183, "learning_rate": 9.64935943325621e-06, "loss": 0.6034, "step": 2267 }, { "epoch": 0.15, "grad_norm": 1.2696515321731567, "learning_rate": 9.648974777144156e-06, "loss": 0.5719, "step": 2268 }, { "epoch": 0.15, "grad_norm": 1.1923511028289795, "learning_rate": 9.648589917837408e-06, "loss": 0.5844, "step": 2269 }, { "epoch": 0.15, "grad_norm": 1.2117177248001099, "learning_rate": 9.648204855352789e-06, "loss": 0.6189, "step": 2270 }, { "epoch": 0.15, "grad_norm": 1.7458269596099854, "learning_rate": 9.647819589707128e-06, "loss": 0.5926, "step": 2271 }, { "epoch": 0.15, "grad_norm": 1.1756089925765991, "learning_rate": 9.647434120917265e-06, "loss": 0.5617, "step": 2272 }, { "epoch": 0.15, "grad_norm": 1.4002065658569336, "learning_rate": 9.647048449000047e-06, "loss": 0.5971, "step": 2273 }, { "epoch": 0.15, "grad_norm": 1.331507921218872, "learning_rate": 9.64666257397233e-06, "loss": 0.6006, "step": 2274 }, { "epoch": 0.15, "grad_norm": 1.2057973146438599, "learning_rate": 9.646276495850982e-06, "loss": 0.5683, "step": 2275 }, { "epoch": 0.15, "grad_norm": 1.1592984199523926, "learning_rate": 9.645890214652875e-06, "loss": 0.595, "step": 2276 }, { "epoch": 0.15, "grad_norm": 1.2743752002716064, "learning_rate": 9.645503730394894e-06, "loss": 0.6366, "step": 2277 }, { "epoch": 0.15, "grad_norm": 1.401125192642212, "learning_rate": 9.64511704309393e-06, "loss": 0.5489, "step": 2278 }, { "epoch": 0.15, "grad_norm": 1.2972487211227417, "learning_rate": 9.644730152766883e-06, "loss": 0.5643, "step": 2279 }, { "epoch": 0.15, "grad_norm": 1.3308342695236206, "learning_rate": 9.644343059430667e-06, "loss": 0.6268, "step": 2280 }, { "epoch": 0.15, "grad_norm": 1.1566389799118042, "learning_rate": 9.643955763102197e-06, "loss": 0.5297, "step": 2281 }, { "epoch": 0.15, "grad_norm": 1.2108441591262817, "learning_rate": 9.643568263798402e-06, "loss": 0.5885, "step": 2282 }, { "epoch": 0.15, "grad_norm": 1.1659173965454102, "learning_rate": 9.64318056153622e-06, "loss": 0.5555, "step": 2283 }, { "epoch": 0.15, "grad_norm": 1.4342211484909058, "learning_rate": 9.642792656332593e-06, "loss": 0.5937, "step": 2284 }, { "epoch": 0.15, "grad_norm": 1.2208445072174072, "learning_rate": 9.64240454820448e-06, "loss": 0.5601, "step": 2285 }, { "epoch": 0.15, "grad_norm": 1.1705998182296753, "learning_rate": 9.642016237168841e-06, "loss": 0.5438, "step": 2286 }, { "epoch": 0.15, "grad_norm": 1.4870351552963257, "learning_rate": 9.641627723242648e-06, "loss": 0.6125, "step": 2287 }, { "epoch": 0.15, "grad_norm": 1.2587978839874268, "learning_rate": 9.641239006442883e-06, "loss": 0.5677, "step": 2288 }, { "epoch": 0.15, "grad_norm": 1.2312264442443848, "learning_rate": 9.640850086786538e-06, "loss": 0.6391, "step": 2289 }, { "epoch": 0.15, "grad_norm": 1.3610481023788452, "learning_rate": 9.640460964290607e-06, "loss": 0.5835, "step": 2290 }, { "epoch": 0.15, "grad_norm": 1.3551030158996582, "learning_rate": 9.640071638972099e-06, "loss": 0.5276, "step": 2291 }, { "epoch": 0.15, "grad_norm": 1.335957646369934, "learning_rate": 9.639682110848033e-06, "loss": 0.6148, "step": 2292 }, { "epoch": 0.15, "grad_norm": 1.295728087425232, "learning_rate": 9.639292379935432e-06, "loss": 0.5753, "step": 2293 }, { "epoch": 0.15, "grad_norm": 1.2942290306091309, "learning_rate": 9.63890244625133e-06, "loss": 0.5892, "step": 2294 }, { "epoch": 0.15, "grad_norm": 1.1726998090744019, "learning_rate": 9.63851230981277e-06, "loss": 0.5895, "step": 2295 }, { "epoch": 0.15, "grad_norm": 1.2293906211853027, "learning_rate": 9.638121970636807e-06, "loss": 0.5864, "step": 2296 }, { "epoch": 0.15, "grad_norm": 1.3276563882827759, "learning_rate": 9.637731428740498e-06, "loss": 0.5482, "step": 2297 }, { "epoch": 0.15, "grad_norm": 1.3116278648376465, "learning_rate": 9.637340684140913e-06, "loss": 0.5737, "step": 2298 }, { "epoch": 0.15, "grad_norm": 1.1556411981582642, "learning_rate": 9.636949736855133e-06, "loss": 0.5605, "step": 2299 }, { "epoch": 0.15, "grad_norm": 1.2626363039016724, "learning_rate": 9.63655858690024e-06, "loss": 0.5871, "step": 2300 }, { "epoch": 0.15, "grad_norm": 1.2934354543685913, "learning_rate": 9.636167234293337e-06, "loss": 0.6254, "step": 2301 }, { "epoch": 0.15, "grad_norm": 1.1421860456466675, "learning_rate": 9.635775679051525e-06, "loss": 0.5666, "step": 2302 }, { "epoch": 0.15, "grad_norm": 1.2716065645217896, "learning_rate": 9.635383921191916e-06, "loss": 0.5842, "step": 2303 }, { "epoch": 0.15, "grad_norm": 1.280240535736084, "learning_rate": 9.634991960731637e-06, "loss": 0.5589, "step": 2304 }, { "epoch": 0.15, "grad_norm": 1.2456196546554565, "learning_rate": 9.63459979768782e-06, "loss": 0.5419, "step": 2305 }, { "epoch": 0.15, "grad_norm": 1.1934056282043457, "learning_rate": 9.6342074320776e-06, "loss": 0.5846, "step": 2306 }, { "epoch": 0.15, "grad_norm": 1.188418984413147, "learning_rate": 9.633814863918131e-06, "loss": 0.5494, "step": 2307 }, { "epoch": 0.15, "grad_norm": 1.3069669008255005, "learning_rate": 9.63342209322657e-06, "loss": 0.5905, "step": 2308 }, { "epoch": 0.15, "grad_norm": 1.2642948627471924, "learning_rate": 9.633029120020083e-06, "loss": 0.5984, "step": 2309 }, { "epoch": 0.15, "grad_norm": 1.1840351819992065, "learning_rate": 9.632635944315847e-06, "loss": 0.577, "step": 2310 }, { "epoch": 0.15, "grad_norm": 1.3262995481491089, "learning_rate": 9.632242566131046e-06, "loss": 0.5761, "step": 2311 }, { "epoch": 0.15, "grad_norm": 1.4620412588119507, "learning_rate": 9.631848985482874e-06, "loss": 0.5819, "step": 2312 }, { "epoch": 0.15, "grad_norm": 1.2174646854400635, "learning_rate": 9.631455202388534e-06, "loss": 0.6027, "step": 2313 }, { "epoch": 0.15, "grad_norm": 1.2771097421646118, "learning_rate": 9.631061216865234e-06, "loss": 0.5944, "step": 2314 }, { "epoch": 0.15, "grad_norm": 1.3530374765396118, "learning_rate": 9.630667028930199e-06, "loss": 0.6121, "step": 2315 }, { "epoch": 0.15, "grad_norm": 1.3077861070632935, "learning_rate": 9.630272638600653e-06, "loss": 0.5759, "step": 2316 }, { "epoch": 0.15, "grad_norm": 1.1621959209442139, "learning_rate": 9.629878045893838e-06, "loss": 0.5254, "step": 2317 }, { "epoch": 0.15, "grad_norm": 1.4047924280166626, "learning_rate": 9.629483250826998e-06, "loss": 0.5875, "step": 2318 }, { "epoch": 0.15, "grad_norm": 1.1966543197631836, "learning_rate": 9.62908825341739e-06, "loss": 0.605, "step": 2319 }, { "epoch": 0.15, "grad_norm": 1.19468355178833, "learning_rate": 9.628693053682277e-06, "loss": 0.5665, "step": 2320 }, { "epoch": 0.15, "grad_norm": 1.2534891366958618, "learning_rate": 9.628297651638934e-06, "loss": 0.5983, "step": 2321 }, { "epoch": 0.15, "grad_norm": 1.1702730655670166, "learning_rate": 9.62790204730464e-06, "loss": 0.5431, "step": 2322 }, { "epoch": 0.15, "grad_norm": 1.1945624351501465, "learning_rate": 9.62750624069669e-06, "loss": 0.5799, "step": 2323 }, { "epoch": 0.15, "grad_norm": 1.2750418186187744, "learning_rate": 9.62711023183238e-06, "loss": 0.5533, "step": 2324 }, { "epoch": 0.15, "grad_norm": 1.1245088577270508, "learning_rate": 9.62671402072902e-06, "loss": 0.5331, "step": 2325 }, { "epoch": 0.15, "grad_norm": 1.2517772912979126, "learning_rate": 9.626317607403926e-06, "loss": 0.5877, "step": 2326 }, { "epoch": 0.15, "grad_norm": 1.2432036399841309, "learning_rate": 9.625920991874426e-06, "loss": 0.5518, "step": 2327 }, { "epoch": 0.15, "grad_norm": 1.090569019317627, "learning_rate": 9.625524174157857e-06, "loss": 0.5663, "step": 2328 }, { "epoch": 0.15, "grad_norm": 1.3824708461761475, "learning_rate": 9.625127154271557e-06, "loss": 0.5876, "step": 2329 }, { "epoch": 0.15, "grad_norm": 1.1558890342712402, "learning_rate": 9.624729932232883e-06, "loss": 0.5603, "step": 2330 }, { "epoch": 0.15, "grad_norm": 1.122809886932373, "learning_rate": 9.624332508059198e-06, "loss": 0.5867, "step": 2331 }, { "epoch": 0.15, "grad_norm": 1.760324239730835, "learning_rate": 9.623934881767867e-06, "loss": 0.5686, "step": 2332 }, { "epoch": 0.15, "grad_norm": 1.195502519607544, "learning_rate": 9.623537053376272e-06, "loss": 0.5803, "step": 2333 }, { "epoch": 0.15, "grad_norm": 1.2242933511734009, "learning_rate": 9.623139022901803e-06, "loss": 0.5722, "step": 2334 }, { "epoch": 0.15, "grad_norm": 1.319095253944397, "learning_rate": 9.622740790361855e-06, "loss": 0.6124, "step": 2335 }, { "epoch": 0.15, "grad_norm": 1.3603407144546509, "learning_rate": 9.622342355773836e-06, "loss": 0.6253, "step": 2336 }, { "epoch": 0.15, "grad_norm": 1.242473840713501, "learning_rate": 9.621943719155156e-06, "loss": 0.6175, "step": 2337 }, { "epoch": 0.15, "grad_norm": 1.2014567852020264, "learning_rate": 9.621544880523239e-06, "loss": 0.597, "step": 2338 }, { "epoch": 0.15, "grad_norm": 1.2141786813735962, "learning_rate": 9.621145839895522e-06, "loss": 0.5873, "step": 2339 }, { "epoch": 0.15, "grad_norm": 1.470713496208191, "learning_rate": 9.620746597289442e-06, "loss": 0.6435, "step": 2340 }, { "epoch": 0.15, "grad_norm": 1.2061851024627686, "learning_rate": 9.62034715272245e-06, "loss": 0.6505, "step": 2341 }, { "epoch": 0.15, "grad_norm": 1.2508490085601807, "learning_rate": 9.619947506212003e-06, "loss": 0.6301, "step": 2342 }, { "epoch": 0.15, "grad_norm": 1.1183902025222778, "learning_rate": 9.619547657775573e-06, "loss": 0.5643, "step": 2343 }, { "epoch": 0.15, "grad_norm": 1.25606107711792, "learning_rate": 9.619147607430633e-06, "loss": 0.5873, "step": 2344 }, { "epoch": 0.15, "grad_norm": 1.1922812461853027, "learning_rate": 9.618747355194666e-06, "loss": 0.6163, "step": 2345 }, { "epoch": 0.15, "grad_norm": 1.1700571775436401, "learning_rate": 9.618346901085172e-06, "loss": 0.5856, "step": 2346 }, { "epoch": 0.15, "grad_norm": 1.315779685974121, "learning_rate": 9.617946245119648e-06, "loss": 0.6786, "step": 2347 }, { "epoch": 0.15, "grad_norm": 1.2427393198013306, "learning_rate": 9.617545387315609e-06, "loss": 0.6498, "step": 2348 }, { "epoch": 0.15, "grad_norm": 1.2601161003112793, "learning_rate": 9.617144327690576e-06, "loss": 0.6082, "step": 2349 }, { "epoch": 0.15, "grad_norm": 1.1108661890029907, "learning_rate": 9.616743066262073e-06, "loss": 0.5555, "step": 2350 }, { "epoch": 0.15, "grad_norm": 1.173190951347351, "learning_rate": 9.616341603047645e-06, "loss": 0.6223, "step": 2351 }, { "epoch": 0.15, "grad_norm": 1.2171519994735718, "learning_rate": 9.615939938064836e-06, "loss": 0.5968, "step": 2352 }, { "epoch": 0.15, "grad_norm": 1.0905098915100098, "learning_rate": 9.6155380713312e-06, "loss": 0.5442, "step": 2353 }, { "epoch": 0.15, "grad_norm": 1.1807969808578491, "learning_rate": 9.615136002864304e-06, "loss": 0.5898, "step": 2354 }, { "epoch": 0.15, "grad_norm": 1.2014340162277222, "learning_rate": 9.61473373268172e-06, "loss": 0.5702, "step": 2355 }, { "epoch": 0.15, "grad_norm": 1.1515522003173828, "learning_rate": 9.614331260801031e-06, "loss": 0.5558, "step": 2356 }, { "epoch": 0.15, "grad_norm": 1.2614624500274658, "learning_rate": 9.613928587239827e-06, "loss": 0.5692, "step": 2357 }, { "epoch": 0.15, "grad_norm": 1.2551140785217285, "learning_rate": 9.61352571201571e-06, "loss": 0.5978, "step": 2358 }, { "epoch": 0.15, "grad_norm": 1.1464985609054565, "learning_rate": 9.613122635146286e-06, "loss": 0.5227, "step": 2359 }, { "epoch": 0.15, "grad_norm": 1.1588118076324463, "learning_rate": 9.612719356649174e-06, "loss": 0.5707, "step": 2360 }, { "epoch": 0.15, "grad_norm": 1.349876046180725, "learning_rate": 9.612315876542002e-06, "loss": 0.6023, "step": 2361 }, { "epoch": 0.15, "grad_norm": 1.2100404500961304, "learning_rate": 9.6119121948424e-06, "loss": 0.5827, "step": 2362 }, { "epoch": 0.15, "grad_norm": 1.4080336093902588, "learning_rate": 9.611508311568016e-06, "loss": 0.5559, "step": 2363 }, { "epoch": 0.15, "grad_norm": 1.2372465133666992, "learning_rate": 9.611104226736501e-06, "loss": 0.6233, "step": 2364 }, { "epoch": 0.15, "grad_norm": 1.2606184482574463, "learning_rate": 9.610699940365517e-06, "loss": 0.5587, "step": 2365 }, { "epoch": 0.15, "grad_norm": 1.22464919090271, "learning_rate": 9.610295452472735e-06, "loss": 0.6122, "step": 2366 }, { "epoch": 0.15, "grad_norm": 1.2917839288711548, "learning_rate": 9.609890763075835e-06, "loss": 0.628, "step": 2367 }, { "epoch": 0.15, "grad_norm": 1.3566125631332397, "learning_rate": 9.609485872192501e-06, "loss": 0.6071, "step": 2368 }, { "epoch": 0.15, "grad_norm": 1.4034277200698853, "learning_rate": 9.609080779840434e-06, "loss": 0.5793, "step": 2369 }, { "epoch": 0.15, "grad_norm": 1.0869051218032837, "learning_rate": 9.608675486037336e-06, "loss": 0.5751, "step": 2370 }, { "epoch": 0.15, "grad_norm": 1.2937740087509155, "learning_rate": 9.608269990800923e-06, "loss": 0.5904, "step": 2371 }, { "epoch": 0.15, "grad_norm": 1.1988797187805176, "learning_rate": 9.607864294148918e-06, "loss": 0.591, "step": 2372 }, { "epoch": 0.15, "grad_norm": 1.2434523105621338, "learning_rate": 9.607458396099055e-06, "loss": 0.5954, "step": 2373 }, { "epoch": 0.15, "grad_norm": 1.2183754444122314, "learning_rate": 9.607052296669072e-06, "loss": 0.5664, "step": 2374 }, { "epoch": 0.15, "grad_norm": 1.1635944843292236, "learning_rate": 9.60664599587672e-06, "loss": 0.575, "step": 2375 }, { "epoch": 0.15, "grad_norm": 1.21683669090271, "learning_rate": 9.606239493739755e-06, "loss": 0.5952, "step": 2376 }, { "epoch": 0.15, "grad_norm": 1.1796786785125732, "learning_rate": 9.605832790275947e-06, "loss": 0.6098, "step": 2377 }, { "epoch": 0.15, "grad_norm": 1.3026518821716309, "learning_rate": 9.605425885503073e-06, "loss": 0.5366, "step": 2378 }, { "epoch": 0.15, "grad_norm": 1.2523835897445679, "learning_rate": 9.605018779438913e-06, "loss": 0.5924, "step": 2379 }, { "epoch": 0.15, "grad_norm": 1.2526859045028687, "learning_rate": 9.604611472101263e-06, "loss": 0.5788, "step": 2380 }, { "epoch": 0.15, "grad_norm": 1.2881114482879639, "learning_rate": 9.604203963507928e-06, "loss": 0.4954, "step": 2381 }, { "epoch": 0.15, "grad_norm": 1.1942857503890991, "learning_rate": 9.603796253676715e-06, "loss": 0.5711, "step": 2382 }, { "epoch": 0.15, "grad_norm": 1.4902167320251465, "learning_rate": 9.603388342625446e-06, "loss": 0.5852, "step": 2383 }, { "epoch": 0.15, "grad_norm": 1.204564094543457, "learning_rate": 9.60298023037195e-06, "loss": 0.6177, "step": 2384 }, { "epoch": 0.15, "grad_norm": 1.1612284183502197, "learning_rate": 9.602571916934064e-06, "loss": 0.5682, "step": 2385 }, { "epoch": 0.15, "grad_norm": 1.281968116760254, "learning_rate": 9.602163402329633e-06, "loss": 0.5862, "step": 2386 }, { "epoch": 0.15, "grad_norm": 1.2823102474212646, "learning_rate": 9.601754686576514e-06, "loss": 0.5605, "step": 2387 }, { "epoch": 0.15, "grad_norm": 1.2411175966262817, "learning_rate": 9.601345769692572e-06, "loss": 0.6001, "step": 2388 }, { "epoch": 0.15, "grad_norm": 1.2741707563400269, "learning_rate": 9.600936651695676e-06, "loss": 0.5811, "step": 2389 }, { "epoch": 0.15, "grad_norm": 1.4653784036636353, "learning_rate": 9.60052733260371e-06, "loss": 0.5682, "step": 2390 }, { "epoch": 0.15, "grad_norm": 1.361946940422058, "learning_rate": 9.600117812434565e-06, "loss": 0.6063, "step": 2391 }, { "epoch": 0.15, "grad_norm": 1.2543718814849854, "learning_rate": 9.599708091206137e-06, "loss": 0.5217, "step": 2392 }, { "epoch": 0.15, "grad_norm": 1.3503762483596802, "learning_rate": 9.599298168936338e-06, "loss": 0.6448, "step": 2393 }, { "epoch": 0.15, "grad_norm": 1.3778592348098755, "learning_rate": 9.59888804564308e-06, "loss": 0.5617, "step": 2394 }, { "epoch": 0.15, "grad_norm": 1.2375470399856567, "learning_rate": 9.598477721344293e-06, "loss": 0.6026, "step": 2395 }, { "epoch": 0.15, "grad_norm": 1.2651448249816895, "learning_rate": 9.598067196057907e-06, "loss": 0.5521, "step": 2396 }, { "epoch": 0.15, "grad_norm": 1.3592066764831543, "learning_rate": 9.597656469801868e-06, "loss": 0.6129, "step": 2397 }, { "epoch": 0.15, "grad_norm": 1.3236581087112427, "learning_rate": 9.597245542594127e-06, "loss": 0.5601, "step": 2398 }, { "epoch": 0.15, "grad_norm": 1.239410400390625, "learning_rate": 9.596834414452642e-06, "loss": 0.5595, "step": 2399 }, { "epoch": 0.15, "grad_norm": 1.1781115531921387, "learning_rate": 9.596423085395388e-06, "loss": 0.5642, "step": 2400 }, { "epoch": 0.15, "grad_norm": 1.247229814529419, "learning_rate": 9.596011555440338e-06, "loss": 0.561, "step": 2401 }, { "epoch": 0.16, "grad_norm": 1.1713725328445435, "learning_rate": 9.595599824605482e-06, "loss": 0.5638, "step": 2402 }, { "epoch": 0.16, "grad_norm": 1.1964600086212158, "learning_rate": 9.595187892908814e-06, "loss": 0.5866, "step": 2403 }, { "epoch": 0.16, "grad_norm": 1.2125821113586426, "learning_rate": 9.59477576036834e-06, "loss": 0.4873, "step": 2404 }, { "epoch": 0.16, "grad_norm": 1.2931959629058838, "learning_rate": 9.594363427002071e-06, "loss": 0.5652, "step": 2405 }, { "epoch": 0.16, "grad_norm": 1.1264708042144775, "learning_rate": 9.59395089282803e-06, "loss": 0.5664, "step": 2406 }, { "epoch": 0.16, "grad_norm": 1.1939256191253662, "learning_rate": 9.593538157864249e-06, "loss": 0.5415, "step": 2407 }, { "epoch": 0.16, "grad_norm": 1.1859723329544067, "learning_rate": 9.593125222128766e-06, "loss": 0.537, "step": 2408 }, { "epoch": 0.16, "grad_norm": 1.694649338722229, "learning_rate": 9.592712085639629e-06, "loss": 0.6107, "step": 2409 }, { "epoch": 0.16, "grad_norm": 1.5309089422225952, "learning_rate": 9.592298748414896e-06, "loss": 0.5902, "step": 2410 }, { "epoch": 0.16, "grad_norm": 1.316836953163147, "learning_rate": 9.591885210472635e-06, "loss": 0.5875, "step": 2411 }, { "epoch": 0.16, "grad_norm": 1.3255391120910645, "learning_rate": 9.591471471830918e-06, "loss": 0.5794, "step": 2412 }, { "epoch": 0.16, "grad_norm": 1.191695213317871, "learning_rate": 9.591057532507828e-06, "loss": 0.5597, "step": 2413 }, { "epoch": 0.16, "grad_norm": 1.2373523712158203, "learning_rate": 9.59064339252146e-06, "loss": 0.55, "step": 2414 }, { "epoch": 0.16, "grad_norm": 1.2075889110565186, "learning_rate": 9.590229051889912e-06, "loss": 0.5829, "step": 2415 }, { "epoch": 0.16, "grad_norm": 1.380658745765686, "learning_rate": 9.589814510631297e-06, "loss": 0.5701, "step": 2416 }, { "epoch": 0.16, "grad_norm": 1.2278848886489868, "learning_rate": 9.589399768763729e-06, "loss": 0.6299, "step": 2417 }, { "epoch": 0.16, "grad_norm": 1.3189893960952759, "learning_rate": 9.588984826305338e-06, "loss": 0.5535, "step": 2418 }, { "epoch": 0.16, "grad_norm": 1.2117804288864136, "learning_rate": 9.588569683274262e-06, "loss": 0.6118, "step": 2419 }, { "epoch": 0.16, "grad_norm": 1.3241063356399536, "learning_rate": 9.588154339688643e-06, "loss": 0.648, "step": 2420 }, { "epoch": 0.16, "grad_norm": 1.1977243423461914, "learning_rate": 9.587738795566636e-06, "loss": 0.5574, "step": 2421 }, { "epoch": 0.16, "grad_norm": 1.1634418964385986, "learning_rate": 9.587323050926403e-06, "loss": 0.5668, "step": 2422 }, { "epoch": 0.16, "grad_norm": 1.1035184860229492, "learning_rate": 9.586907105786113e-06, "loss": 0.5737, "step": 2423 }, { "epoch": 0.16, "grad_norm": 1.1982890367507935, "learning_rate": 9.586490960163948e-06, "loss": 0.571, "step": 2424 }, { "epoch": 0.16, "grad_norm": 1.3244956731796265, "learning_rate": 9.586074614078097e-06, "loss": 0.5743, "step": 2425 }, { "epoch": 0.16, "grad_norm": 1.16761314868927, "learning_rate": 9.585658067546758e-06, "loss": 0.6005, "step": 2426 }, { "epoch": 0.16, "grad_norm": 1.621085286140442, "learning_rate": 9.585241320588135e-06, "loss": 0.5897, "step": 2427 }, { "epoch": 0.16, "grad_norm": 1.2167242765426636, "learning_rate": 9.584824373220444e-06, "loss": 0.5729, "step": 2428 }, { "epoch": 0.16, "grad_norm": 1.1912950277328491, "learning_rate": 9.584407225461909e-06, "loss": 0.5694, "step": 2429 }, { "epoch": 0.16, "grad_norm": 1.1641710996627808, "learning_rate": 9.583989877330762e-06, "loss": 0.5943, "step": 2430 }, { "epoch": 0.16, "grad_norm": 1.2249525785446167, "learning_rate": 9.583572328845244e-06, "loss": 0.5543, "step": 2431 }, { "epoch": 0.16, "grad_norm": 1.213224172592163, "learning_rate": 9.583154580023604e-06, "loss": 0.6152, "step": 2432 }, { "epoch": 0.16, "grad_norm": 1.1871885061264038, "learning_rate": 9.582736630884104e-06, "loss": 0.5823, "step": 2433 }, { "epoch": 0.16, "grad_norm": 1.1391040086746216, "learning_rate": 9.58231848144501e-06, "loss": 0.5905, "step": 2434 }, { "epoch": 0.16, "grad_norm": 1.2008001804351807, "learning_rate": 9.581900131724595e-06, "loss": 0.6095, "step": 2435 }, { "epoch": 0.16, "grad_norm": 1.2064872980117798, "learning_rate": 9.581481581741148e-06, "loss": 0.592, "step": 2436 }, { "epoch": 0.16, "grad_norm": 1.2820247411727905, "learning_rate": 9.581062831512962e-06, "loss": 0.5353, "step": 2437 }, { "epoch": 0.16, "grad_norm": 1.1917152404785156, "learning_rate": 9.58064388105834e-06, "loss": 0.591, "step": 2438 }, { "epoch": 0.16, "grad_norm": 1.3513128757476807, "learning_rate": 9.580224730395591e-06, "loss": 0.5704, "step": 2439 }, { "epoch": 0.16, "grad_norm": 1.185243010520935, "learning_rate": 9.579805379543034e-06, "loss": 0.5948, "step": 2440 }, { "epoch": 0.16, "grad_norm": 1.202951192855835, "learning_rate": 9.579385828519003e-06, "loss": 0.6006, "step": 2441 }, { "epoch": 0.16, "grad_norm": 1.2421716451644897, "learning_rate": 9.578966077341831e-06, "loss": 0.6009, "step": 2442 }, { "epoch": 0.16, "grad_norm": 1.2276034355163574, "learning_rate": 9.578546126029867e-06, "loss": 0.5527, "step": 2443 }, { "epoch": 0.16, "grad_norm": 1.3740376234054565, "learning_rate": 9.578125974601463e-06, "loss": 0.6067, "step": 2444 }, { "epoch": 0.16, "grad_norm": 1.4830857515335083, "learning_rate": 9.577705623074985e-06, "loss": 0.5865, "step": 2445 }, { "epoch": 0.16, "grad_norm": 1.308512568473816, "learning_rate": 9.577285071468804e-06, "loss": 0.5838, "step": 2446 }, { "epoch": 0.16, "grad_norm": 1.3435239791870117, "learning_rate": 9.576864319801304e-06, "loss": 0.5994, "step": 2447 }, { "epoch": 0.16, "grad_norm": 1.1191478967666626, "learning_rate": 9.576443368090872e-06, "loss": 0.5563, "step": 2448 }, { "epoch": 0.16, "grad_norm": 1.0840299129486084, "learning_rate": 9.576022216355908e-06, "loss": 0.5719, "step": 2449 }, { "epoch": 0.16, "grad_norm": 1.2278226613998413, "learning_rate": 9.575600864614817e-06, "loss": 0.5923, "step": 2450 }, { "epoch": 0.16, "grad_norm": 1.3050472736358643, "learning_rate": 9.57517931288602e-06, "loss": 0.6236, "step": 2451 }, { "epoch": 0.16, "grad_norm": 1.3628865480422974, "learning_rate": 9.57475756118794e-06, "loss": 0.5264, "step": 2452 }, { "epoch": 0.16, "grad_norm": 1.287627100944519, "learning_rate": 9.574335609539008e-06, "loss": 0.6338, "step": 2453 }, { "epoch": 0.16, "grad_norm": 1.1064939498901367, "learning_rate": 9.57391345795767e-06, "loss": 0.5827, "step": 2454 }, { "epoch": 0.16, "grad_norm": 1.1522388458251953, "learning_rate": 9.573491106462373e-06, "loss": 0.553, "step": 2455 }, { "epoch": 0.16, "grad_norm": 1.188976526260376, "learning_rate": 9.573068555071582e-06, "loss": 0.5412, "step": 2456 }, { "epoch": 0.16, "grad_norm": 1.1021692752838135, "learning_rate": 9.572645803803764e-06, "loss": 0.5676, "step": 2457 }, { "epoch": 0.16, "grad_norm": 1.1708279848098755, "learning_rate": 9.572222852677393e-06, "loss": 0.5805, "step": 2458 }, { "epoch": 0.16, "grad_norm": 1.5551587343215942, "learning_rate": 9.571799701710958e-06, "loss": 0.5826, "step": 2459 }, { "epoch": 0.16, "grad_norm": 1.185505747795105, "learning_rate": 9.571376350922954e-06, "loss": 0.5979, "step": 2460 }, { "epoch": 0.16, "grad_norm": 1.2637628316879272, "learning_rate": 9.570952800331885e-06, "loss": 0.5957, "step": 2461 }, { "epoch": 0.16, "grad_norm": 1.1700838804244995, "learning_rate": 9.57052904995626e-06, "loss": 0.5629, "step": 2462 }, { "epoch": 0.16, "grad_norm": 1.5035780668258667, "learning_rate": 9.570105099814603e-06, "loss": 0.5335, "step": 2463 }, { "epoch": 0.16, "grad_norm": 1.2153022289276123, "learning_rate": 9.569680949925446e-06, "loss": 0.5547, "step": 2464 }, { "epoch": 0.16, "grad_norm": 1.326195240020752, "learning_rate": 9.569256600307321e-06, "loss": 0.5956, "step": 2465 }, { "epoch": 0.16, "grad_norm": 1.4071964025497437, "learning_rate": 9.56883205097878e-06, "loss": 0.5157, "step": 2466 }, { "epoch": 0.16, "grad_norm": 1.2796958684921265, "learning_rate": 9.56840730195838e-06, "loss": 0.6463, "step": 2467 }, { "epoch": 0.16, "grad_norm": 1.2527236938476562, "learning_rate": 9.56798235326468e-06, "loss": 0.5743, "step": 2468 }, { "epoch": 0.16, "grad_norm": 1.2283459901809692, "learning_rate": 9.567557204916259e-06, "loss": 0.5466, "step": 2469 }, { "epoch": 0.16, "grad_norm": 1.2601817846298218, "learning_rate": 9.567131856931696e-06, "loss": 0.5972, "step": 2470 }, { "epoch": 0.16, "grad_norm": 1.3324599266052246, "learning_rate": 9.566706309329582e-06, "loss": 0.588, "step": 2471 }, { "epoch": 0.16, "grad_norm": 1.2477879524230957, "learning_rate": 9.56628056212852e-06, "loss": 0.5486, "step": 2472 }, { "epoch": 0.16, "grad_norm": 1.324092984199524, "learning_rate": 9.565854615347114e-06, "loss": 0.5604, "step": 2473 }, { "epoch": 0.16, "grad_norm": 1.2545945644378662, "learning_rate": 9.565428469003984e-06, "loss": 0.6125, "step": 2474 }, { "epoch": 0.16, "grad_norm": 1.2465825080871582, "learning_rate": 9.565002123117755e-06, "loss": 0.5953, "step": 2475 }, { "epoch": 0.16, "grad_norm": 1.373238444328308, "learning_rate": 9.56457557770706e-06, "loss": 0.6192, "step": 2476 }, { "epoch": 0.16, "grad_norm": 1.2470842599868774, "learning_rate": 9.564148832790542e-06, "loss": 0.6107, "step": 2477 }, { "epoch": 0.16, "grad_norm": 1.1954013109207153, "learning_rate": 9.563721888386856e-06, "loss": 0.5899, "step": 2478 }, { "epoch": 0.16, "grad_norm": 1.1691445112228394, "learning_rate": 9.563294744514661e-06, "loss": 0.598, "step": 2479 }, { "epoch": 0.16, "grad_norm": 1.5453540086746216, "learning_rate": 9.562867401192626e-06, "loss": 0.5746, "step": 2480 }, { "epoch": 0.16, "grad_norm": 1.445894479751587, "learning_rate": 9.562439858439427e-06, "loss": 0.5981, "step": 2481 }, { "epoch": 0.16, "grad_norm": 1.3509982824325562, "learning_rate": 9.562012116273756e-06, "loss": 0.5892, "step": 2482 }, { "epoch": 0.16, "grad_norm": 1.1153353452682495, "learning_rate": 9.561584174714304e-06, "loss": 0.5734, "step": 2483 }, { "epoch": 0.16, "grad_norm": 1.0469130277633667, "learning_rate": 9.561156033779778e-06, "loss": 0.562, "step": 2484 }, { "epoch": 0.16, "grad_norm": 1.0347833633422852, "learning_rate": 9.560727693488886e-06, "loss": 0.5362, "step": 2485 }, { "epoch": 0.16, "grad_norm": 1.165458083152771, "learning_rate": 9.560299153860358e-06, "loss": 0.5458, "step": 2486 }, { "epoch": 0.16, "grad_norm": 1.3159316778182983, "learning_rate": 9.559870414912917e-06, "loss": 0.6134, "step": 2487 }, { "epoch": 0.16, "grad_norm": 1.2301976680755615, "learning_rate": 9.559441476665307e-06, "loss": 0.5475, "step": 2488 }, { "epoch": 0.16, "grad_norm": 1.1921168565750122, "learning_rate": 9.559012339136272e-06, "loss": 0.5866, "step": 2489 }, { "epoch": 0.16, "grad_norm": 1.1595447063446045, "learning_rate": 9.55858300234457e-06, "loss": 0.5586, "step": 2490 }, { "epoch": 0.16, "grad_norm": 1.1792163848876953, "learning_rate": 9.558153466308965e-06, "loss": 0.5573, "step": 2491 }, { "epoch": 0.16, "grad_norm": 1.183424472808838, "learning_rate": 9.557723731048236e-06, "loss": 0.5467, "step": 2492 }, { "epoch": 0.16, "grad_norm": 1.1261142492294312, "learning_rate": 9.55729379658116e-06, "loss": 0.5572, "step": 2493 }, { "epoch": 0.16, "grad_norm": 1.1467530727386475, "learning_rate": 9.556863662926528e-06, "loss": 0.5267, "step": 2494 }, { "epoch": 0.16, "grad_norm": 1.1247540712356567, "learning_rate": 9.556433330103145e-06, "loss": 0.558, "step": 2495 }, { "epoch": 0.16, "grad_norm": 1.3648779392242432, "learning_rate": 9.556002798129815e-06, "loss": 0.5822, "step": 2496 }, { "epoch": 0.16, "grad_norm": 1.2184350490570068, "learning_rate": 9.555572067025359e-06, "loss": 0.6055, "step": 2497 }, { "epoch": 0.16, "grad_norm": 1.1832987070083618, "learning_rate": 9.5551411368086e-06, "loss": 0.5566, "step": 2498 }, { "epoch": 0.16, "grad_norm": 1.2198398113250732, "learning_rate": 9.554710007498374e-06, "loss": 0.6037, "step": 2499 }, { "epoch": 0.16, "grad_norm": 1.2121220827102661, "learning_rate": 9.554278679113527e-06, "loss": 0.5292, "step": 2500 }, { "epoch": 0.16, "grad_norm": 1.251957893371582, "learning_rate": 9.553847151672906e-06, "loss": 0.6323, "step": 2501 }, { "epoch": 0.16, "grad_norm": 1.163297414779663, "learning_rate": 9.553415425195378e-06, "loss": 0.6149, "step": 2502 }, { "epoch": 0.16, "grad_norm": 1.342529535293579, "learning_rate": 9.552983499699808e-06, "loss": 0.6081, "step": 2503 }, { "epoch": 0.16, "grad_norm": 1.4691011905670166, "learning_rate": 9.552551375205077e-06, "loss": 0.6882, "step": 2504 }, { "epoch": 0.16, "grad_norm": 1.278033971786499, "learning_rate": 9.55211905173007e-06, "loss": 0.5649, "step": 2505 }, { "epoch": 0.16, "grad_norm": 1.273820161819458, "learning_rate": 9.551686529293686e-06, "loss": 0.6267, "step": 2506 }, { "epoch": 0.16, "grad_norm": 1.1661673784255981, "learning_rate": 9.551253807914827e-06, "loss": 0.6287, "step": 2507 }, { "epoch": 0.16, "grad_norm": 1.138310194015503, "learning_rate": 9.550820887612405e-06, "loss": 0.5648, "step": 2508 }, { "epoch": 0.16, "grad_norm": 1.3597909212112427, "learning_rate": 9.550387768405342e-06, "loss": 0.5755, "step": 2509 }, { "epoch": 0.16, "grad_norm": 1.1849324703216553, "learning_rate": 9.549954450312574e-06, "loss": 0.5632, "step": 2510 }, { "epoch": 0.16, "grad_norm": 1.2920811176300049, "learning_rate": 9.549520933353032e-06, "loss": 0.5416, "step": 2511 }, { "epoch": 0.16, "grad_norm": 1.1722428798675537, "learning_rate": 9.54908721754567e-06, "loss": 0.5967, "step": 2512 }, { "epoch": 0.16, "grad_norm": 1.1306278705596924, "learning_rate": 9.548653302909441e-06, "loss": 0.5246, "step": 2513 }, { "epoch": 0.16, "grad_norm": 1.285272479057312, "learning_rate": 9.548219189463315e-06, "loss": 0.5549, "step": 2514 }, { "epoch": 0.16, "grad_norm": 1.2518268823623657, "learning_rate": 9.54778487722626e-06, "loss": 0.6076, "step": 2515 }, { "epoch": 0.16, "grad_norm": 1.363383173942566, "learning_rate": 9.547350366217262e-06, "loss": 0.5457, "step": 2516 }, { "epoch": 0.16, "grad_norm": 1.254520297050476, "learning_rate": 9.54691565645531e-06, "loss": 0.5884, "step": 2517 }, { "epoch": 0.16, "grad_norm": 1.4913491010665894, "learning_rate": 9.54648074795941e-06, "loss": 0.6036, "step": 2518 }, { "epoch": 0.16, "grad_norm": 1.2105106115341187, "learning_rate": 9.546045640748564e-06, "loss": 0.5614, "step": 2519 }, { "epoch": 0.16, "grad_norm": 1.146660327911377, "learning_rate": 9.545610334841793e-06, "loss": 0.5308, "step": 2520 }, { "epoch": 0.16, "grad_norm": 1.3622645139694214, "learning_rate": 9.54517483025812e-06, "loss": 0.6001, "step": 2521 }, { "epoch": 0.16, "grad_norm": 1.1390268802642822, "learning_rate": 9.544739127016585e-06, "loss": 0.616, "step": 2522 }, { "epoch": 0.16, "grad_norm": 1.1952205896377563, "learning_rate": 9.544303225136225e-06, "loss": 0.5227, "step": 2523 }, { "epoch": 0.16, "grad_norm": 1.2356319427490234, "learning_rate": 9.543867124636097e-06, "loss": 0.5812, "step": 2524 }, { "epoch": 0.16, "grad_norm": 1.2104296684265137, "learning_rate": 9.54343082553526e-06, "loss": 0.5732, "step": 2525 }, { "epoch": 0.16, "grad_norm": 1.6525534391403198, "learning_rate": 9.542994327852786e-06, "loss": 0.568, "step": 2526 }, { "epoch": 0.16, "grad_norm": 2.119110107421875, "learning_rate": 9.542557631607749e-06, "loss": 0.6181, "step": 2527 }, { "epoch": 0.16, "grad_norm": 1.2049134969711304, "learning_rate": 9.542120736819239e-06, "loss": 0.5473, "step": 2528 }, { "epoch": 0.16, "grad_norm": 1.3038384914398193, "learning_rate": 9.541683643506348e-06, "loss": 0.614, "step": 2529 }, { "epoch": 0.16, "grad_norm": 1.2822033166885376, "learning_rate": 9.541246351688185e-06, "loss": 0.5535, "step": 2530 }, { "epoch": 0.16, "grad_norm": 1.1599940061569214, "learning_rate": 9.54080886138386e-06, "loss": 0.5779, "step": 2531 }, { "epoch": 0.16, "grad_norm": 1.0870636701583862, "learning_rate": 9.540371172612494e-06, "loss": 0.5414, "step": 2532 }, { "epoch": 0.16, "grad_norm": 1.325946569442749, "learning_rate": 9.53993328539322e-06, "loss": 0.5555, "step": 2533 }, { "epoch": 0.16, "grad_norm": 1.473523497581482, "learning_rate": 9.539495199745174e-06, "loss": 0.6058, "step": 2534 }, { "epoch": 0.16, "grad_norm": 1.1915123462677002, "learning_rate": 9.539056915687507e-06, "loss": 0.5969, "step": 2535 }, { "epoch": 0.16, "grad_norm": 1.2120487689971924, "learning_rate": 9.53861843323937e-06, "loss": 0.5861, "step": 2536 }, { "epoch": 0.16, "grad_norm": 1.101071834564209, "learning_rate": 9.538179752419933e-06, "loss": 0.5689, "step": 2537 }, { "epoch": 0.16, "grad_norm": 1.3268840312957764, "learning_rate": 9.537740873248368e-06, "loss": 0.6206, "step": 2538 }, { "epoch": 0.16, "grad_norm": 1.2339918613433838, "learning_rate": 9.537301795743856e-06, "loss": 0.6012, "step": 2539 }, { "epoch": 0.16, "grad_norm": 1.177768349647522, "learning_rate": 9.536862519925589e-06, "loss": 0.6384, "step": 2540 }, { "epoch": 0.16, "grad_norm": 1.1872676610946655, "learning_rate": 9.536423045812767e-06, "loss": 0.5525, "step": 2541 }, { "epoch": 0.16, "grad_norm": 1.220292568206787, "learning_rate": 9.535983373424597e-06, "loss": 0.5869, "step": 2542 }, { "epoch": 0.16, "grad_norm": 1.203374981880188, "learning_rate": 9.535543502780299e-06, "loss": 0.5723, "step": 2543 }, { "epoch": 0.16, "grad_norm": 1.150447964668274, "learning_rate": 9.535103433899093e-06, "loss": 0.6175, "step": 2544 }, { "epoch": 0.16, "grad_norm": 1.19645357131958, "learning_rate": 9.534663166800217e-06, "loss": 0.6076, "step": 2545 }, { "epoch": 0.16, "grad_norm": 1.2364282608032227, "learning_rate": 9.534222701502915e-06, "loss": 0.579, "step": 2546 }, { "epoch": 0.16, "grad_norm": 1.2371670007705688, "learning_rate": 9.533782038026437e-06, "loss": 0.5505, "step": 2547 }, { "epoch": 0.16, "grad_norm": 1.0799633264541626, "learning_rate": 9.533341176390044e-06, "loss": 0.5524, "step": 2548 }, { "epoch": 0.16, "grad_norm": 1.3121696710586548, "learning_rate": 9.532900116613004e-06, "loss": 0.5485, "step": 2549 }, { "epoch": 0.16, "grad_norm": 1.2208893299102783, "learning_rate": 9.532458858714595e-06, "loss": 0.5845, "step": 2550 }, { "epoch": 0.16, "grad_norm": 1.2546663284301758, "learning_rate": 9.532017402714103e-06, "loss": 0.5615, "step": 2551 }, { "epoch": 0.16, "grad_norm": 1.137694001197815, "learning_rate": 9.531575748630823e-06, "loss": 0.6039, "step": 2552 }, { "epoch": 0.16, "grad_norm": 1.2807554006576538, "learning_rate": 9.531133896484058e-06, "loss": 0.5636, "step": 2553 }, { "epoch": 0.16, "grad_norm": 1.129840612411499, "learning_rate": 9.53069184629312e-06, "loss": 0.5379, "step": 2554 }, { "epoch": 0.16, "grad_norm": 1.4111180305480957, "learning_rate": 9.530249598077334e-06, "loss": 0.513, "step": 2555 }, { "epoch": 0.16, "grad_norm": 1.2460901737213135, "learning_rate": 9.529807151856025e-06, "loss": 0.6259, "step": 2556 }, { "epoch": 0.17, "grad_norm": 1.2566149234771729, "learning_rate": 9.529364507648531e-06, "loss": 0.5822, "step": 2557 }, { "epoch": 0.17, "grad_norm": 1.3677438497543335, "learning_rate": 9.528921665474202e-06, "loss": 0.5413, "step": 2558 }, { "epoch": 0.17, "grad_norm": 1.2217352390289307, "learning_rate": 9.52847862535239e-06, "loss": 0.6092, "step": 2559 }, { "epoch": 0.17, "grad_norm": 1.438706398010254, "learning_rate": 9.528035387302463e-06, "loss": 0.5745, "step": 2560 }, { "epoch": 0.17, "grad_norm": 1.3806403875350952, "learning_rate": 9.52759195134379e-06, "loss": 0.5847, "step": 2561 }, { "epoch": 0.17, "grad_norm": 1.1834934949874878, "learning_rate": 9.527148317495755e-06, "loss": 0.5555, "step": 2562 }, { "epoch": 0.17, "grad_norm": 1.1682989597320557, "learning_rate": 9.526704485777746e-06, "loss": 0.5981, "step": 2563 }, { "epoch": 0.17, "grad_norm": 1.2085938453674316, "learning_rate": 9.526260456209165e-06, "loss": 0.5716, "step": 2564 }, { "epoch": 0.17, "grad_norm": 1.3598735332489014, "learning_rate": 9.525816228809417e-06, "loss": 0.5702, "step": 2565 }, { "epoch": 0.17, "grad_norm": 1.20726478099823, "learning_rate": 9.525371803597915e-06, "loss": 0.5648, "step": 2566 }, { "epoch": 0.17, "grad_norm": 1.2910000085830688, "learning_rate": 9.524927180594092e-06, "loss": 0.5941, "step": 2567 }, { "epoch": 0.17, "grad_norm": 1.1383451223373413, "learning_rate": 9.524482359817372e-06, "loss": 0.6114, "step": 2568 }, { "epoch": 0.17, "grad_norm": 1.1212916374206543, "learning_rate": 9.524037341287205e-06, "loss": 0.5308, "step": 2569 }, { "epoch": 0.17, "grad_norm": 1.1218570470809937, "learning_rate": 9.523592125023037e-06, "loss": 0.5058, "step": 2570 }, { "epoch": 0.17, "grad_norm": 1.4970612525939941, "learning_rate": 9.523146711044328e-06, "loss": 0.6065, "step": 2571 }, { "epoch": 0.17, "grad_norm": 1.420291781425476, "learning_rate": 9.522701099370547e-06, "loss": 0.5658, "step": 2572 }, { "epoch": 0.17, "grad_norm": 1.173219084739685, "learning_rate": 9.522255290021168e-06, "loss": 0.5794, "step": 2573 }, { "epoch": 0.17, "grad_norm": 1.3971176147460938, "learning_rate": 9.521809283015677e-06, "loss": 0.5798, "step": 2574 }, { "epoch": 0.17, "grad_norm": 1.1988996267318726, "learning_rate": 9.52136307837357e-06, "loss": 0.5968, "step": 2575 }, { "epoch": 0.17, "grad_norm": 1.2439770698547363, "learning_rate": 9.520916676114349e-06, "loss": 0.5509, "step": 2576 }, { "epoch": 0.17, "grad_norm": 1.2166671752929688, "learning_rate": 9.520470076257523e-06, "loss": 0.6137, "step": 2577 }, { "epoch": 0.17, "grad_norm": 1.2102619409561157, "learning_rate": 9.520023278822614e-06, "loss": 0.6124, "step": 2578 }, { "epoch": 0.17, "grad_norm": 1.213700294494629, "learning_rate": 9.519576283829149e-06, "loss": 0.5828, "step": 2579 }, { "epoch": 0.17, "grad_norm": 1.260302186012268, "learning_rate": 9.519129091296665e-06, "loss": 0.5659, "step": 2580 }, { "epoch": 0.17, "grad_norm": 1.2155969142913818, "learning_rate": 9.518681701244708e-06, "loss": 0.56, "step": 2581 }, { "epoch": 0.17, "grad_norm": 1.3326911926269531, "learning_rate": 9.518234113692832e-06, "loss": 0.5732, "step": 2582 }, { "epoch": 0.17, "grad_norm": 1.3743246793746948, "learning_rate": 9.5177863286606e-06, "loss": 0.6037, "step": 2583 }, { "epoch": 0.17, "grad_norm": 1.181209683418274, "learning_rate": 9.517338346167583e-06, "loss": 0.5887, "step": 2584 }, { "epoch": 0.17, "grad_norm": 1.2447185516357422, "learning_rate": 9.516890166233363e-06, "loss": 0.5299, "step": 2585 }, { "epoch": 0.17, "grad_norm": 1.277764081954956, "learning_rate": 9.516441788877528e-06, "loss": 0.5886, "step": 2586 }, { "epoch": 0.17, "grad_norm": 1.1897168159484863, "learning_rate": 9.515993214119674e-06, "loss": 0.5625, "step": 2587 }, { "epoch": 0.17, "grad_norm": 1.2810721397399902, "learning_rate": 9.515544441979408e-06, "loss": 0.6006, "step": 2588 }, { "epoch": 0.17, "grad_norm": 1.2649016380310059, "learning_rate": 9.515095472476346e-06, "loss": 0.5783, "step": 2589 }, { "epoch": 0.17, "grad_norm": 1.2824745178222656, "learning_rate": 9.514646305630109e-06, "loss": 0.5901, "step": 2590 }, { "epoch": 0.17, "grad_norm": 1.305395245552063, "learning_rate": 9.514196941460328e-06, "loss": 0.6612, "step": 2591 }, { "epoch": 0.17, "grad_norm": 1.2532658576965332, "learning_rate": 9.513747379986648e-06, "loss": 0.6176, "step": 2592 }, { "epoch": 0.17, "grad_norm": 1.1623055934906006, "learning_rate": 9.513297621228715e-06, "loss": 0.593, "step": 2593 }, { "epoch": 0.17, "grad_norm": 1.3800380229949951, "learning_rate": 9.512847665206187e-06, "loss": 0.5932, "step": 2594 }, { "epoch": 0.17, "grad_norm": 1.112827181816101, "learning_rate": 9.512397511938732e-06, "loss": 0.613, "step": 2595 }, { "epoch": 0.17, "grad_norm": 1.0557461977005005, "learning_rate": 9.511947161446023e-06, "loss": 0.5382, "step": 2596 }, { "epoch": 0.17, "grad_norm": 1.1948403120040894, "learning_rate": 9.511496613747744e-06, "loss": 0.5584, "step": 2597 }, { "epoch": 0.17, "grad_norm": 1.2135486602783203, "learning_rate": 9.511045868863589e-06, "loss": 0.6211, "step": 2598 }, { "epoch": 0.17, "grad_norm": 1.205934762954712, "learning_rate": 9.510594926813256e-06, "loss": 0.6015, "step": 2599 }, { "epoch": 0.17, "grad_norm": 1.1474671363830566, "learning_rate": 9.510143787616457e-06, "loss": 0.5434, "step": 2600 }, { "epoch": 0.17, "grad_norm": 1.208664059638977, "learning_rate": 9.509692451292911e-06, "loss": 0.5459, "step": 2601 }, { "epoch": 0.17, "grad_norm": 1.2028684616088867, "learning_rate": 9.509240917862342e-06, "loss": 0.5603, "step": 2602 }, { "epoch": 0.17, "grad_norm": 1.1617093086242676, "learning_rate": 9.508789187344487e-06, "loss": 0.5646, "step": 2603 }, { "epoch": 0.17, "grad_norm": 1.1629319190979004, "learning_rate": 9.508337259759089e-06, "loss": 0.5828, "step": 2604 }, { "epoch": 0.17, "grad_norm": 1.4412208795547485, "learning_rate": 9.5078851351259e-06, "loss": 0.6291, "step": 2605 }, { "epoch": 0.17, "grad_norm": 1.3128838539123535, "learning_rate": 9.507432813464683e-06, "loss": 0.6197, "step": 2606 }, { "epoch": 0.17, "grad_norm": 1.0851377248764038, "learning_rate": 9.506980294795207e-06, "loss": 0.5443, "step": 2607 }, { "epoch": 0.17, "grad_norm": 1.1576595306396484, "learning_rate": 9.506527579137251e-06, "loss": 0.5817, "step": 2608 }, { "epoch": 0.17, "grad_norm": 1.1148006916046143, "learning_rate": 9.506074666510601e-06, "loss": 0.5761, "step": 2609 }, { "epoch": 0.17, "grad_norm": 1.2916840314865112, "learning_rate": 9.505621556935054e-06, "loss": 0.6172, "step": 2610 }, { "epoch": 0.17, "grad_norm": 1.1992136240005493, "learning_rate": 9.505168250430413e-06, "loss": 0.5855, "step": 2611 }, { "epoch": 0.17, "grad_norm": 1.1074063777923584, "learning_rate": 9.50471474701649e-06, "loss": 0.5682, "step": 2612 }, { "epoch": 0.17, "grad_norm": 1.18030846118927, "learning_rate": 9.50426104671311e-06, "loss": 0.5838, "step": 2613 }, { "epoch": 0.17, "grad_norm": 1.1431571245193481, "learning_rate": 9.503807149540099e-06, "loss": 0.5384, "step": 2614 }, { "epoch": 0.17, "grad_norm": 1.227578043937683, "learning_rate": 9.503353055517298e-06, "loss": 0.5575, "step": 2615 }, { "epoch": 0.17, "grad_norm": 1.2921686172485352, "learning_rate": 9.502898764664554e-06, "loss": 0.5881, "step": 2616 }, { "epoch": 0.17, "grad_norm": 1.209244728088379, "learning_rate": 9.502444277001721e-06, "loss": 0.5912, "step": 2617 }, { "epoch": 0.17, "grad_norm": 1.1964019536972046, "learning_rate": 9.501989592548667e-06, "loss": 0.5523, "step": 2618 }, { "epoch": 0.17, "grad_norm": 1.1191556453704834, "learning_rate": 9.501534711325264e-06, "loss": 0.6109, "step": 2619 }, { "epoch": 0.17, "grad_norm": 1.139445185661316, "learning_rate": 9.50107963335139e-06, "loss": 0.5839, "step": 2620 }, { "epoch": 0.17, "grad_norm": 1.1636196374893188, "learning_rate": 9.50062435864694e-06, "loss": 0.5849, "step": 2621 }, { "epoch": 0.17, "grad_norm": 1.1472831964492798, "learning_rate": 9.500168887231811e-06, "loss": 0.5844, "step": 2622 }, { "epoch": 0.17, "grad_norm": 1.1853594779968262, "learning_rate": 9.49971321912591e-06, "loss": 0.5685, "step": 2623 }, { "epoch": 0.17, "grad_norm": 1.1808714866638184, "learning_rate": 9.499257354349154e-06, "loss": 0.5865, "step": 2624 }, { "epoch": 0.17, "grad_norm": 1.2493209838867188, "learning_rate": 9.498801292921468e-06, "loss": 0.5793, "step": 2625 }, { "epoch": 0.17, "grad_norm": 1.196696400642395, "learning_rate": 9.498345034862783e-06, "loss": 0.5854, "step": 2626 }, { "epoch": 0.17, "grad_norm": 1.1912639141082764, "learning_rate": 9.497888580193044e-06, "loss": 0.5244, "step": 2627 }, { "epoch": 0.17, "grad_norm": 1.2439824342727661, "learning_rate": 9.497431928932198e-06, "loss": 0.585, "step": 2628 }, { "epoch": 0.17, "grad_norm": 1.3827365636825562, "learning_rate": 9.496975081100209e-06, "loss": 0.5405, "step": 2629 }, { "epoch": 0.17, "grad_norm": 1.1100351810455322, "learning_rate": 9.496518036717039e-06, "loss": 0.5221, "step": 2630 }, { "epoch": 0.17, "grad_norm": 1.1295136213302612, "learning_rate": 9.496060795802666e-06, "loss": 0.5837, "step": 2631 }, { "epoch": 0.17, "grad_norm": 1.1938449144363403, "learning_rate": 9.495603358377076e-06, "loss": 0.5798, "step": 2632 }, { "epoch": 0.17, "grad_norm": 1.3371795415878296, "learning_rate": 9.495145724460264e-06, "loss": 0.6129, "step": 2633 }, { "epoch": 0.17, "grad_norm": 1.108052134513855, "learning_rate": 9.49468789407223e-06, "loss": 0.5717, "step": 2634 }, { "epoch": 0.17, "grad_norm": 1.1377352476119995, "learning_rate": 9.494229867232983e-06, "loss": 0.5771, "step": 2635 }, { "epoch": 0.17, "grad_norm": 1.2490344047546387, "learning_rate": 9.493771643962544e-06, "loss": 0.5791, "step": 2636 }, { "epoch": 0.17, "grad_norm": 1.3394063711166382, "learning_rate": 9.493313224280941e-06, "loss": 0.5799, "step": 2637 }, { "epoch": 0.17, "grad_norm": 1.28109610080719, "learning_rate": 9.49285460820821e-06, "loss": 0.5392, "step": 2638 }, { "epoch": 0.17, "grad_norm": 1.1369372606277466, "learning_rate": 9.492395795764395e-06, "loss": 0.5636, "step": 2639 }, { "epoch": 0.17, "grad_norm": 1.1989933252334595, "learning_rate": 9.491936786969551e-06, "loss": 0.5711, "step": 2640 }, { "epoch": 0.17, "grad_norm": 1.1379995346069336, "learning_rate": 9.49147758184374e-06, "loss": 0.5403, "step": 2641 }, { "epoch": 0.17, "grad_norm": 1.2496048212051392, "learning_rate": 9.49101818040703e-06, "loss": 0.557, "step": 2642 }, { "epoch": 0.17, "grad_norm": 1.205374836921692, "learning_rate": 9.490558582679504e-06, "loss": 0.5858, "step": 2643 }, { "epoch": 0.17, "grad_norm": 1.2686034440994263, "learning_rate": 9.490098788681248e-06, "loss": 0.6465, "step": 2644 }, { "epoch": 0.17, "grad_norm": 1.1388007402420044, "learning_rate": 9.489638798432359e-06, "loss": 0.5422, "step": 2645 }, { "epoch": 0.17, "grad_norm": 1.2106060981750488, "learning_rate": 9.489178611952941e-06, "loss": 0.5533, "step": 2646 }, { "epoch": 0.17, "grad_norm": 1.137117862701416, "learning_rate": 9.488718229263108e-06, "loss": 0.5532, "step": 2647 }, { "epoch": 0.17, "grad_norm": 1.228391408920288, "learning_rate": 9.488257650382982e-06, "loss": 0.5715, "step": 2648 }, { "epoch": 0.17, "grad_norm": 1.2563996315002441, "learning_rate": 9.487796875332694e-06, "loss": 0.5981, "step": 2649 }, { "epoch": 0.17, "grad_norm": 1.2232089042663574, "learning_rate": 9.487335904132382e-06, "loss": 0.6118, "step": 2650 }, { "epoch": 0.17, "grad_norm": 1.1633679866790771, "learning_rate": 9.486874736802198e-06, "loss": 0.5243, "step": 2651 }, { "epoch": 0.17, "grad_norm": 1.249067783355713, "learning_rate": 9.486413373362293e-06, "loss": 0.5378, "step": 2652 }, { "epoch": 0.17, "grad_norm": 1.1216133832931519, "learning_rate": 9.485951813832837e-06, "loss": 0.5807, "step": 2653 }, { "epoch": 0.17, "grad_norm": 1.1226880550384521, "learning_rate": 9.485490058234e-06, "loss": 0.5773, "step": 2654 }, { "epoch": 0.17, "grad_norm": 1.247696876525879, "learning_rate": 9.485028106585962e-06, "loss": 0.5639, "step": 2655 }, { "epoch": 0.17, "grad_norm": 1.167783260345459, "learning_rate": 9.484565958908922e-06, "loss": 0.5492, "step": 2656 }, { "epoch": 0.17, "grad_norm": 1.2315465211868286, "learning_rate": 9.484103615223073e-06, "loss": 0.6202, "step": 2657 }, { "epoch": 0.17, "grad_norm": 1.0998605489730835, "learning_rate": 9.483641075548623e-06, "loss": 0.5534, "step": 2658 }, { "epoch": 0.17, "grad_norm": 1.2543059587478638, "learning_rate": 9.48317833990579e-06, "loss": 0.5678, "step": 2659 }, { "epoch": 0.17, "grad_norm": 1.336724877357483, "learning_rate": 9.482715408314798e-06, "loss": 0.5485, "step": 2660 }, { "epoch": 0.17, "grad_norm": 1.150166392326355, "learning_rate": 9.48225228079588e-06, "loss": 0.5656, "step": 2661 }, { "epoch": 0.17, "grad_norm": 1.1862998008728027, "learning_rate": 9.481788957369282e-06, "loss": 0.5694, "step": 2662 }, { "epoch": 0.17, "grad_norm": 1.3097760677337646, "learning_rate": 9.48132543805525e-06, "loss": 0.5626, "step": 2663 }, { "epoch": 0.17, "grad_norm": 1.2523106336593628, "learning_rate": 9.480861722874046e-06, "loss": 0.615, "step": 2664 }, { "epoch": 0.17, "grad_norm": 1.3190727233886719, "learning_rate": 9.480397811845937e-06, "loss": 0.64, "step": 2665 }, { "epoch": 0.17, "grad_norm": 1.1509026288986206, "learning_rate": 9.479933704991198e-06, "loss": 0.5999, "step": 2666 }, { "epoch": 0.17, "grad_norm": 1.2013988494873047, "learning_rate": 9.479469402330116e-06, "loss": 0.5313, "step": 2667 }, { "epoch": 0.17, "grad_norm": 1.2729318141937256, "learning_rate": 9.479004903882983e-06, "loss": 0.5637, "step": 2668 }, { "epoch": 0.17, "grad_norm": 1.216601848602295, "learning_rate": 9.478540209670103e-06, "loss": 0.5862, "step": 2669 }, { "epoch": 0.17, "grad_norm": 1.2241106033325195, "learning_rate": 9.478075319711783e-06, "loss": 0.5971, "step": 2670 }, { "epoch": 0.17, "grad_norm": 1.2441306114196777, "learning_rate": 9.477610234028345e-06, "loss": 0.6443, "step": 2671 }, { "epoch": 0.17, "grad_norm": 1.2198270559310913, "learning_rate": 9.477144952640119e-06, "loss": 0.5822, "step": 2672 }, { "epoch": 0.17, "grad_norm": 1.274335265159607, "learning_rate": 9.476679475567435e-06, "loss": 0.6288, "step": 2673 }, { "epoch": 0.17, "grad_norm": 1.3862143754959106, "learning_rate": 9.476213802830643e-06, "loss": 0.5432, "step": 2674 }, { "epoch": 0.17, "grad_norm": 1.2569196224212646, "learning_rate": 9.475747934450093e-06, "loss": 0.5443, "step": 2675 }, { "epoch": 0.17, "grad_norm": 1.2422752380371094, "learning_rate": 9.47528187044615e-06, "loss": 0.5671, "step": 2676 }, { "epoch": 0.17, "grad_norm": 1.2276687622070312, "learning_rate": 9.474815610839181e-06, "loss": 0.5389, "step": 2677 }, { "epoch": 0.17, "grad_norm": 1.1782163381576538, "learning_rate": 9.474349155649567e-06, "loss": 0.5833, "step": 2678 }, { "epoch": 0.17, "grad_norm": 1.449939250946045, "learning_rate": 9.473882504897695e-06, "loss": 0.5492, "step": 2679 }, { "epoch": 0.17, "grad_norm": 1.3355916738510132, "learning_rate": 9.473415658603964e-06, "loss": 0.5254, "step": 2680 }, { "epoch": 0.17, "grad_norm": 1.156309723854065, "learning_rate": 9.472948616788774e-06, "loss": 0.5354, "step": 2681 }, { "epoch": 0.17, "grad_norm": 1.303714394569397, "learning_rate": 9.472481379472542e-06, "loss": 0.5819, "step": 2682 }, { "epoch": 0.17, "grad_norm": 1.1271915435791016, "learning_rate": 9.472013946675686e-06, "loss": 0.5597, "step": 2683 }, { "epoch": 0.17, "grad_norm": 1.0945603847503662, "learning_rate": 9.471546318418638e-06, "loss": 0.5594, "step": 2684 }, { "epoch": 0.17, "grad_norm": 1.2813810110092163, "learning_rate": 9.471078494721839e-06, "loss": 0.5518, "step": 2685 }, { "epoch": 0.17, "grad_norm": 1.3424354791641235, "learning_rate": 9.470610475605735e-06, "loss": 0.5759, "step": 2686 }, { "epoch": 0.17, "grad_norm": 1.2070119380950928, "learning_rate": 9.47014226109078e-06, "loss": 0.5108, "step": 2687 }, { "epoch": 0.17, "grad_norm": 1.2905491590499878, "learning_rate": 9.46967385119744e-06, "loss": 0.5885, "step": 2688 }, { "epoch": 0.17, "grad_norm": 1.282543420791626, "learning_rate": 9.469205245946189e-06, "loss": 0.5938, "step": 2689 }, { "epoch": 0.17, "grad_norm": 1.277572751045227, "learning_rate": 9.468736445357508e-06, "loss": 0.5571, "step": 2690 }, { "epoch": 0.17, "grad_norm": 1.2028579711914062, "learning_rate": 9.468267449451884e-06, "loss": 0.5615, "step": 2691 }, { "epoch": 0.17, "grad_norm": 1.2134696245193481, "learning_rate": 9.46779825824982e-06, "loss": 0.5873, "step": 2692 }, { "epoch": 0.17, "grad_norm": 1.1812561750411987, "learning_rate": 9.46732887177182e-06, "loss": 0.5835, "step": 2693 }, { "epoch": 0.17, "grad_norm": 1.2059465646743774, "learning_rate": 9.4668592900384e-06, "loss": 0.5441, "step": 2694 }, { "epoch": 0.17, "grad_norm": 1.1555109024047852, "learning_rate": 9.466389513070087e-06, "loss": 0.5773, "step": 2695 }, { "epoch": 0.17, "grad_norm": 1.5102622509002686, "learning_rate": 9.465919540887412e-06, "loss": 0.5573, "step": 2696 }, { "epoch": 0.17, "grad_norm": 1.1853837966918945, "learning_rate": 9.465449373510914e-06, "loss": 0.535, "step": 2697 }, { "epoch": 0.17, "grad_norm": 1.2305511236190796, "learning_rate": 9.464979010961146e-06, "loss": 0.5929, "step": 2698 }, { "epoch": 0.17, "grad_norm": 1.3274978399276733, "learning_rate": 9.464508453258666e-06, "loss": 0.5816, "step": 2699 }, { "epoch": 0.17, "grad_norm": 1.1548744440078735, "learning_rate": 9.464037700424039e-06, "loss": 0.5713, "step": 2700 }, { "epoch": 0.17, "grad_norm": 1.2231045961380005, "learning_rate": 9.463566752477842e-06, "loss": 0.6208, "step": 2701 }, { "epoch": 0.17, "grad_norm": 1.4071745872497559, "learning_rate": 9.463095609440658e-06, "loss": 0.5864, "step": 2702 }, { "epoch": 0.17, "grad_norm": 1.086135745048523, "learning_rate": 9.46262427133308e-06, "loss": 0.5546, "step": 2703 }, { "epoch": 0.17, "grad_norm": 1.2159970998764038, "learning_rate": 9.46215273817571e-06, "loss": 0.5766, "step": 2704 }, { "epoch": 0.17, "grad_norm": 1.3437169790267944, "learning_rate": 9.461681009989154e-06, "loss": 0.5713, "step": 2705 }, { "epoch": 0.17, "grad_norm": 1.1049915552139282, "learning_rate": 9.461209086794034e-06, "loss": 0.5643, "step": 2706 }, { "epoch": 0.17, "grad_norm": 1.3201334476470947, "learning_rate": 9.460736968610975e-06, "loss": 0.6361, "step": 2707 }, { "epoch": 0.17, "grad_norm": 1.187973141670227, "learning_rate": 9.460264655460612e-06, "loss": 0.6, "step": 2708 }, { "epoch": 0.17, "grad_norm": 1.2185839414596558, "learning_rate": 9.459792147363587e-06, "loss": 0.58, "step": 2709 }, { "epoch": 0.17, "grad_norm": 1.203035593032837, "learning_rate": 9.459319444340556e-06, "loss": 0.5505, "step": 2710 }, { "epoch": 0.17, "grad_norm": 1.1410996913909912, "learning_rate": 9.458846546412176e-06, "loss": 0.5347, "step": 2711 }, { "epoch": 0.18, "grad_norm": 1.5229580402374268, "learning_rate": 9.458373453599117e-06, "loss": 0.6146, "step": 2712 }, { "epoch": 0.18, "grad_norm": 1.277181625366211, "learning_rate": 9.457900165922058e-06, "loss": 0.5652, "step": 2713 }, { "epoch": 0.18, "grad_norm": 1.1814225912094116, "learning_rate": 9.457426683401684e-06, "loss": 0.5672, "step": 2714 }, { "epoch": 0.18, "grad_norm": 1.1557003259658813, "learning_rate": 9.456953006058689e-06, "loss": 0.6051, "step": 2715 }, { "epoch": 0.18, "grad_norm": 1.2031573057174683, "learning_rate": 9.456479133913779e-06, "loss": 0.6275, "step": 2716 }, { "epoch": 0.18, "grad_norm": 1.2008702754974365, "learning_rate": 9.456005066987663e-06, "loss": 0.5125, "step": 2717 }, { "epoch": 0.18, "grad_norm": 1.2628456354141235, "learning_rate": 9.45553080530106e-06, "loss": 0.5868, "step": 2718 }, { "epoch": 0.18, "grad_norm": 1.211694598197937, "learning_rate": 9.455056348874705e-06, "loss": 0.5509, "step": 2719 }, { "epoch": 0.18, "grad_norm": 1.23084557056427, "learning_rate": 9.454581697729328e-06, "loss": 0.56, "step": 2720 }, { "epoch": 0.18, "grad_norm": 1.1766339540481567, "learning_rate": 9.45410685188568e-06, "loss": 0.5565, "step": 2721 }, { "epoch": 0.18, "grad_norm": 1.19166898727417, "learning_rate": 9.453631811364512e-06, "loss": 0.5021, "step": 2722 }, { "epoch": 0.18, "grad_norm": 1.1693459749221802, "learning_rate": 9.453156576186589e-06, "loss": 0.5892, "step": 2723 }, { "epoch": 0.18, "grad_norm": 1.2863600254058838, "learning_rate": 9.45268114637268e-06, "loss": 0.6483, "step": 2724 }, { "epoch": 0.18, "grad_norm": 1.230186104774475, "learning_rate": 9.452205521943568e-06, "loss": 0.5677, "step": 2725 }, { "epoch": 0.18, "grad_norm": 1.090714693069458, "learning_rate": 9.451729702920038e-06, "loss": 0.6239, "step": 2726 }, { "epoch": 0.18, "grad_norm": 1.1837078332901, "learning_rate": 9.451253689322889e-06, "loss": 0.4921, "step": 2727 }, { "epoch": 0.18, "grad_norm": 1.3603761196136475, "learning_rate": 9.450777481172925e-06, "loss": 0.5694, "step": 2728 }, { "epoch": 0.18, "grad_norm": 1.3587783575057983, "learning_rate": 9.450301078490961e-06, "loss": 0.5936, "step": 2729 }, { "epoch": 0.18, "grad_norm": 1.209930181503296, "learning_rate": 9.449824481297818e-06, "loss": 0.5875, "step": 2730 }, { "epoch": 0.18, "grad_norm": 1.2842377424240112, "learning_rate": 9.44934768961433e-06, "loss": 0.5712, "step": 2731 }, { "epoch": 0.18, "grad_norm": 1.2894915342330933, "learning_rate": 9.448870703461332e-06, "loss": 0.6001, "step": 2732 }, { "epoch": 0.18, "grad_norm": 1.008888602256775, "learning_rate": 9.448393522859674e-06, "loss": 0.5598, "step": 2733 }, { "epoch": 0.18, "grad_norm": 1.274376392364502, "learning_rate": 9.447916147830211e-06, "loss": 0.5553, "step": 2734 }, { "epoch": 0.18, "grad_norm": 1.201782464981079, "learning_rate": 9.44743857839381e-06, "loss": 0.6133, "step": 2735 }, { "epoch": 0.18, "grad_norm": 1.1803269386291504, "learning_rate": 9.446960814571345e-06, "loss": 0.5455, "step": 2736 }, { "epoch": 0.18, "grad_norm": 1.1448427438735962, "learning_rate": 9.446482856383694e-06, "loss": 0.4862, "step": 2737 }, { "epoch": 0.18, "grad_norm": 1.4729905128479004, "learning_rate": 9.44600470385175e-06, "loss": 0.6151, "step": 2738 }, { "epoch": 0.18, "grad_norm": 1.5023168325424194, "learning_rate": 9.44552635699641e-06, "loss": 0.56, "step": 2739 }, { "epoch": 0.18, "grad_norm": 1.2716331481933594, "learning_rate": 9.445047815838584e-06, "loss": 0.6356, "step": 2740 }, { "epoch": 0.18, "grad_norm": 1.2370781898498535, "learning_rate": 9.444569080399186e-06, "loss": 0.5516, "step": 2741 }, { "epoch": 0.18, "grad_norm": 1.2361699342727661, "learning_rate": 9.444090150699141e-06, "loss": 0.5565, "step": 2742 }, { "epoch": 0.18, "grad_norm": 1.2608875036239624, "learning_rate": 9.443611026759382e-06, "loss": 0.627, "step": 2743 }, { "epoch": 0.18, "grad_norm": 1.2316913604736328, "learning_rate": 9.443131708600848e-06, "loss": 0.5493, "step": 2744 }, { "epoch": 0.18, "grad_norm": 1.3860583305358887, "learning_rate": 9.442652196244492e-06, "loss": 0.6227, "step": 2745 }, { "epoch": 0.18, "grad_norm": 1.1769028902053833, "learning_rate": 9.44217248971127e-06, "loss": 0.586, "step": 2746 }, { "epoch": 0.18, "grad_norm": 1.1711853742599487, "learning_rate": 9.441692589022149e-06, "loss": 0.5924, "step": 2747 }, { "epoch": 0.18, "grad_norm": 1.1489996910095215, "learning_rate": 9.441212494198105e-06, "loss": 0.5825, "step": 2748 }, { "epoch": 0.18, "grad_norm": 1.145720362663269, "learning_rate": 9.440732205260122e-06, "loss": 0.5831, "step": 2749 }, { "epoch": 0.18, "grad_norm": 1.2584439516067505, "learning_rate": 9.44025172222919e-06, "loss": 0.6376, "step": 2750 }, { "epoch": 0.18, "grad_norm": 1.1369194984436035, "learning_rate": 9.439771045126313e-06, "loss": 0.5428, "step": 2751 }, { "epoch": 0.18, "grad_norm": 1.1223664283752441, "learning_rate": 9.439290173972497e-06, "loss": 0.5714, "step": 2752 }, { "epoch": 0.18, "grad_norm": 1.221194863319397, "learning_rate": 9.438809108788763e-06, "loss": 0.5843, "step": 2753 }, { "epoch": 0.18, "grad_norm": 1.1007137298583984, "learning_rate": 9.438327849596133e-06, "loss": 0.623, "step": 2754 }, { "epoch": 0.18, "grad_norm": 1.1797475814819336, "learning_rate": 9.437846396415645e-06, "loss": 0.5741, "step": 2755 }, { "epoch": 0.18, "grad_norm": 1.3386212587356567, "learning_rate": 9.437364749268339e-06, "loss": 0.6125, "step": 2756 }, { "epoch": 0.18, "grad_norm": 1.2065805196762085, "learning_rate": 9.43688290817527e-06, "loss": 0.5799, "step": 2757 }, { "epoch": 0.18, "grad_norm": 1.2449828386306763, "learning_rate": 9.436400873157497e-06, "loss": 0.57, "step": 2758 }, { "epoch": 0.18, "grad_norm": 1.2308356761932373, "learning_rate": 9.435918644236087e-06, "loss": 0.6254, "step": 2759 }, { "epoch": 0.18, "grad_norm": 1.2217352390289307, "learning_rate": 9.435436221432117e-06, "loss": 0.5762, "step": 2760 }, { "epoch": 0.18, "grad_norm": 1.182031512260437, "learning_rate": 9.434953604766674e-06, "loss": 0.5448, "step": 2761 }, { "epoch": 0.18, "grad_norm": 1.2911531925201416, "learning_rate": 9.43447079426085e-06, "loss": 0.5499, "step": 2762 }, { "epoch": 0.18, "grad_norm": 1.3767788410186768, "learning_rate": 9.433987789935751e-06, "loss": 0.5697, "step": 2763 }, { "epoch": 0.18, "grad_norm": 1.1789405345916748, "learning_rate": 9.433504591812484e-06, "loss": 0.6255, "step": 2764 }, { "epoch": 0.18, "grad_norm": 1.2137447595596313, "learning_rate": 9.433021199912171e-06, "loss": 0.5962, "step": 2765 }, { "epoch": 0.18, "grad_norm": 1.3893331289291382, "learning_rate": 9.43253761425594e-06, "loss": 0.6051, "step": 2766 }, { "epoch": 0.18, "grad_norm": 1.277564525604248, "learning_rate": 9.432053834864923e-06, "loss": 0.5896, "step": 2767 }, { "epoch": 0.18, "grad_norm": 1.289645791053772, "learning_rate": 9.431569861760268e-06, "loss": 0.6504, "step": 2768 }, { "epoch": 0.18, "grad_norm": 1.3092973232269287, "learning_rate": 9.43108569496313e-06, "loss": 0.5599, "step": 2769 }, { "epoch": 0.18, "grad_norm": 1.1597448587417603, "learning_rate": 9.430601334494666e-06, "loss": 0.5776, "step": 2770 }, { "epoch": 0.18, "grad_norm": 1.283332347869873, "learning_rate": 9.43011678037605e-06, "loss": 0.5606, "step": 2771 }, { "epoch": 0.18, "grad_norm": 1.155444860458374, "learning_rate": 9.429632032628462e-06, "loss": 0.5462, "step": 2772 }, { "epoch": 0.18, "grad_norm": 1.23137629032135, "learning_rate": 9.429147091273083e-06, "loss": 0.577, "step": 2773 }, { "epoch": 0.18, "grad_norm": 1.1372097730636597, "learning_rate": 9.428661956331112e-06, "loss": 0.522, "step": 2774 }, { "epoch": 0.18, "grad_norm": 1.0727673768997192, "learning_rate": 9.428176627823755e-06, "loss": 0.5249, "step": 2775 }, { "epoch": 0.18, "grad_norm": 1.2708286046981812, "learning_rate": 9.427691105772223e-06, "loss": 0.6143, "step": 2776 }, { "epoch": 0.18, "grad_norm": 1.226637601852417, "learning_rate": 9.427205390197733e-06, "loss": 0.5724, "step": 2777 }, { "epoch": 0.18, "grad_norm": 1.1255509853363037, "learning_rate": 9.42671948112152e-06, "loss": 0.524, "step": 2778 }, { "epoch": 0.18, "grad_norm": 1.2513936758041382, "learning_rate": 9.42623337856482e-06, "loss": 0.5949, "step": 2779 }, { "epoch": 0.18, "grad_norm": 1.1815718412399292, "learning_rate": 9.425747082548877e-06, "loss": 0.5552, "step": 2780 }, { "epoch": 0.18, "grad_norm": 1.1517143249511719, "learning_rate": 9.42526059309495e-06, "loss": 0.5123, "step": 2781 }, { "epoch": 0.18, "grad_norm": 1.0912017822265625, "learning_rate": 9.424773910224298e-06, "loss": 0.5735, "step": 2782 }, { "epoch": 0.18, "grad_norm": 1.1106799840927124, "learning_rate": 9.424287033958195e-06, "loss": 0.5131, "step": 2783 }, { "epoch": 0.18, "grad_norm": 1.0675545930862427, "learning_rate": 9.42379996431792e-06, "loss": 0.5335, "step": 2784 }, { "epoch": 0.18, "grad_norm": 1.1717967987060547, "learning_rate": 9.423312701324762e-06, "loss": 0.6013, "step": 2785 }, { "epoch": 0.18, "grad_norm": 1.1720796823501587, "learning_rate": 9.422825245000018e-06, "loss": 0.5331, "step": 2786 }, { "epoch": 0.18, "grad_norm": 1.1147704124450684, "learning_rate": 9.422337595364995e-06, "loss": 0.5388, "step": 2787 }, { "epoch": 0.18, "grad_norm": 1.2041555643081665, "learning_rate": 9.421849752441006e-06, "loss": 0.5964, "step": 2788 }, { "epoch": 0.18, "grad_norm": 1.2012966871261597, "learning_rate": 9.42136171624937e-06, "loss": 0.5497, "step": 2789 }, { "epoch": 0.18, "grad_norm": 1.1942931413650513, "learning_rate": 9.420873486811423e-06, "loss": 0.6137, "step": 2790 }, { "epoch": 0.18, "grad_norm": 1.15489661693573, "learning_rate": 9.420385064148501e-06, "loss": 0.5896, "step": 2791 }, { "epoch": 0.18, "grad_norm": 1.2382676601409912, "learning_rate": 9.419896448281954e-06, "loss": 0.531, "step": 2792 }, { "epoch": 0.18, "grad_norm": 1.0968576669692993, "learning_rate": 9.419407639233135e-06, "loss": 0.5517, "step": 2793 }, { "epoch": 0.18, "grad_norm": 1.2006522417068481, "learning_rate": 9.418918637023411e-06, "loss": 0.5732, "step": 2794 }, { "epoch": 0.18, "grad_norm": 1.1380589008331299, "learning_rate": 9.418429441674153e-06, "loss": 0.5585, "step": 2795 }, { "epoch": 0.18, "grad_norm": 1.129982352256775, "learning_rate": 9.417940053206745e-06, "loss": 0.5379, "step": 2796 }, { "epoch": 0.18, "grad_norm": 1.1912018060684204, "learning_rate": 9.417450471642576e-06, "loss": 0.5521, "step": 2797 }, { "epoch": 0.18, "grad_norm": 1.1600464582443237, "learning_rate": 9.416960697003043e-06, "loss": 0.57, "step": 2798 }, { "epoch": 0.18, "grad_norm": 1.1501250267028809, "learning_rate": 9.416470729309555e-06, "loss": 0.584, "step": 2799 }, { "epoch": 0.18, "grad_norm": 1.1641696691513062, "learning_rate": 9.415980568583525e-06, "loss": 0.628, "step": 2800 }, { "epoch": 0.18, "grad_norm": 1.1834733486175537, "learning_rate": 9.41549021484638e-06, "loss": 0.5752, "step": 2801 }, { "epoch": 0.18, "grad_norm": 1.138297200202942, "learning_rate": 9.414999668119547e-06, "loss": 0.5834, "step": 2802 }, { "epoch": 0.18, "grad_norm": 1.2032941579818726, "learning_rate": 9.41450892842447e-06, "loss": 0.5962, "step": 2803 }, { "epoch": 0.18, "grad_norm": 1.1836161613464355, "learning_rate": 9.4140179957826e-06, "loss": 0.464, "step": 2804 }, { "epoch": 0.18, "grad_norm": 1.4300559759140015, "learning_rate": 9.413526870215389e-06, "loss": 0.5085, "step": 2805 }, { "epoch": 0.18, "grad_norm": 1.5404317378997803, "learning_rate": 9.413035551744306e-06, "loss": 0.5687, "step": 2806 }, { "epoch": 0.18, "grad_norm": 1.1355453729629517, "learning_rate": 9.412544040390826e-06, "loss": 0.5378, "step": 2807 }, { "epoch": 0.18, "grad_norm": 1.1849149465560913, "learning_rate": 9.412052336176428e-06, "loss": 0.5687, "step": 2808 }, { "epoch": 0.18, "grad_norm": 1.1207712888717651, "learning_rate": 9.411560439122607e-06, "loss": 0.5352, "step": 2809 }, { "epoch": 0.18, "grad_norm": 1.1633033752441406, "learning_rate": 9.411068349250862e-06, "loss": 0.5553, "step": 2810 }, { "epoch": 0.18, "grad_norm": 1.081028938293457, "learning_rate": 9.410576066582699e-06, "loss": 0.5334, "step": 2811 }, { "epoch": 0.18, "grad_norm": 1.2328284978866577, "learning_rate": 9.410083591139635e-06, "loss": 0.6096, "step": 2812 }, { "epoch": 0.18, "grad_norm": 1.2062984704971313, "learning_rate": 9.409590922943196e-06, "loss": 0.5515, "step": 2813 }, { "epoch": 0.18, "grad_norm": 1.1705025434494019, "learning_rate": 9.409098062014916e-06, "loss": 0.575, "step": 2814 }, { "epoch": 0.18, "grad_norm": 1.3307453393936157, "learning_rate": 9.408605008376335e-06, "loss": 0.5653, "step": 2815 }, { "epoch": 0.18, "grad_norm": 1.2680844068527222, "learning_rate": 9.408111762049004e-06, "loss": 0.5904, "step": 2816 }, { "epoch": 0.18, "grad_norm": 1.1830233335494995, "learning_rate": 9.407618323054479e-06, "loss": 0.5907, "step": 2817 }, { "epoch": 0.18, "grad_norm": 1.1526038646697998, "learning_rate": 9.407124691414329e-06, "loss": 0.6103, "step": 2818 }, { "epoch": 0.18, "grad_norm": 1.2823009490966797, "learning_rate": 9.40663086715013e-06, "loss": 0.591, "step": 2819 }, { "epoch": 0.18, "grad_norm": 1.2484725713729858, "learning_rate": 9.406136850283465e-06, "loss": 0.5812, "step": 2820 }, { "epoch": 0.18, "grad_norm": 1.2744557857513428, "learning_rate": 9.405642640835926e-06, "loss": 0.5755, "step": 2821 }, { "epoch": 0.18, "grad_norm": 1.2910406589508057, "learning_rate": 9.405148238829115e-06, "loss": 0.6456, "step": 2822 }, { "epoch": 0.18, "grad_norm": 1.1236181259155273, "learning_rate": 9.404653644284639e-06, "loss": 0.5459, "step": 2823 }, { "epoch": 0.18, "grad_norm": 1.3196485042572021, "learning_rate": 9.404158857224116e-06, "loss": 0.5113, "step": 2824 }, { "epoch": 0.18, "grad_norm": 1.247275710105896, "learning_rate": 9.403663877669173e-06, "loss": 0.544, "step": 2825 }, { "epoch": 0.18, "grad_norm": 1.243589997291565, "learning_rate": 9.403168705641443e-06, "loss": 0.6037, "step": 2826 }, { "epoch": 0.18, "grad_norm": 1.2891871929168701, "learning_rate": 9.402673341162569e-06, "loss": 0.6046, "step": 2827 }, { "epoch": 0.18, "grad_norm": 1.1992034912109375, "learning_rate": 9.402177784254202e-06, "loss": 0.54, "step": 2828 }, { "epoch": 0.18, "grad_norm": 1.1884682178497314, "learning_rate": 9.401682034938004e-06, "loss": 0.6066, "step": 2829 }, { "epoch": 0.18, "grad_norm": 1.2265369892120361, "learning_rate": 9.40118609323564e-06, "loss": 0.5837, "step": 2830 }, { "epoch": 0.18, "grad_norm": 1.264366865158081, "learning_rate": 9.400689959168784e-06, "loss": 0.6, "step": 2831 }, { "epoch": 0.18, "grad_norm": 1.270106554031372, "learning_rate": 9.400193632759127e-06, "loss": 0.5609, "step": 2832 }, { "epoch": 0.18, "grad_norm": 1.2690627574920654, "learning_rate": 9.399697114028359e-06, "loss": 0.5823, "step": 2833 }, { "epoch": 0.18, "grad_norm": 1.1926723718643188, "learning_rate": 9.399200402998181e-06, "loss": 0.5676, "step": 2834 }, { "epoch": 0.18, "grad_norm": 1.2667149305343628, "learning_rate": 9.398703499690303e-06, "loss": 0.5668, "step": 2835 }, { "epoch": 0.18, "grad_norm": 1.1778860092163086, "learning_rate": 9.398206404126445e-06, "loss": 0.6002, "step": 2836 }, { "epoch": 0.18, "grad_norm": 1.3265962600708008, "learning_rate": 9.397709116328331e-06, "loss": 0.6119, "step": 2837 }, { "epoch": 0.18, "grad_norm": 1.1129626035690308, "learning_rate": 9.397211636317698e-06, "loss": 0.5195, "step": 2838 }, { "epoch": 0.18, "grad_norm": 1.1273404359817505, "learning_rate": 9.396713964116289e-06, "loss": 0.5951, "step": 2839 }, { "epoch": 0.18, "grad_norm": 1.3270646333694458, "learning_rate": 9.396216099745857e-06, "loss": 0.562, "step": 2840 }, { "epoch": 0.18, "grad_norm": 1.1624705791473389, "learning_rate": 9.395718043228162e-06, "loss": 0.5628, "step": 2841 }, { "epoch": 0.18, "grad_norm": 1.169798493385315, "learning_rate": 9.395219794584972e-06, "loss": 0.5748, "step": 2842 }, { "epoch": 0.18, "grad_norm": 1.0912595987319946, "learning_rate": 9.394721353838064e-06, "loss": 0.5462, "step": 2843 }, { "epoch": 0.18, "grad_norm": 1.2102645635604858, "learning_rate": 9.394222721009225e-06, "loss": 0.5973, "step": 2844 }, { "epoch": 0.18, "grad_norm": 1.1993839740753174, "learning_rate": 9.393723896120249e-06, "loss": 0.5854, "step": 2845 }, { "epoch": 0.18, "grad_norm": 1.2422130107879639, "learning_rate": 9.393224879192934e-06, "loss": 0.5633, "step": 2846 }, { "epoch": 0.18, "grad_norm": 1.131446123123169, "learning_rate": 9.392725670249098e-06, "loss": 0.5755, "step": 2847 }, { "epoch": 0.18, "grad_norm": 1.174874186515808, "learning_rate": 9.392226269310555e-06, "loss": 0.5296, "step": 2848 }, { "epoch": 0.18, "grad_norm": 1.2191447019577026, "learning_rate": 9.391726676399134e-06, "loss": 0.5776, "step": 2849 }, { "epoch": 0.18, "grad_norm": 1.1904046535491943, "learning_rate": 9.39122689153667e-06, "loss": 0.5209, "step": 2850 }, { "epoch": 0.18, "grad_norm": 1.186779260635376, "learning_rate": 9.39072691474501e-06, "loss": 0.5967, "step": 2851 }, { "epoch": 0.18, "grad_norm": 1.2039259672164917, "learning_rate": 9.390226746046003e-06, "loss": 0.6162, "step": 2852 }, { "epoch": 0.18, "grad_norm": 1.178320288658142, "learning_rate": 9.389726385461511e-06, "loss": 0.5466, "step": 2853 }, { "epoch": 0.18, "grad_norm": 1.2646173238754272, "learning_rate": 9.389225833013407e-06, "loss": 0.5785, "step": 2854 }, { "epoch": 0.18, "grad_norm": 1.0940253734588623, "learning_rate": 9.388725088723564e-06, "loss": 0.5728, "step": 2855 }, { "epoch": 0.18, "grad_norm": 1.227213978767395, "learning_rate": 9.388224152613872e-06, "loss": 0.5458, "step": 2856 }, { "epoch": 0.18, "grad_norm": 1.0410388708114624, "learning_rate": 9.387723024706223e-06, "loss": 0.5709, "step": 2857 }, { "epoch": 0.18, "grad_norm": 1.2903791666030884, "learning_rate": 9.38722170502252e-06, "loss": 0.6112, "step": 2858 }, { "epoch": 0.18, "grad_norm": 1.173178791999817, "learning_rate": 9.386720193584678e-06, "loss": 0.5778, "step": 2859 }, { "epoch": 0.18, "grad_norm": 1.06918203830719, "learning_rate": 9.386218490414613e-06, "loss": 0.5402, "step": 2860 }, { "epoch": 0.18, "grad_norm": 1.2885617017745972, "learning_rate": 9.385716595534255e-06, "loss": 0.5811, "step": 2861 }, { "epoch": 0.18, "grad_norm": 1.1211549043655396, "learning_rate": 9.38521450896554e-06, "loss": 0.6134, "step": 2862 }, { "epoch": 0.18, "grad_norm": 1.1817574501037598, "learning_rate": 9.38471223073041e-06, "loss": 0.5952, "step": 2863 }, { "epoch": 0.18, "grad_norm": 1.183013916015625, "learning_rate": 9.384209760850825e-06, "loss": 0.5547, "step": 2864 }, { "epoch": 0.18, "grad_norm": 1.205237627029419, "learning_rate": 9.38370709934874e-06, "loss": 0.5766, "step": 2865 }, { "epoch": 0.18, "grad_norm": 1.1885569095611572, "learning_rate": 9.38320424624613e-06, "loss": 0.5857, "step": 2866 }, { "epoch": 0.19, "grad_norm": 1.1400482654571533, "learning_rate": 9.382701201564968e-06, "loss": 0.5833, "step": 2867 }, { "epoch": 0.19, "grad_norm": 1.2362560033798218, "learning_rate": 9.382197965327246e-06, "loss": 0.5886, "step": 2868 }, { "epoch": 0.19, "grad_norm": 1.282267689704895, "learning_rate": 9.381694537554958e-06, "loss": 0.5956, "step": 2869 }, { "epoch": 0.19, "grad_norm": 1.5343232154846191, "learning_rate": 9.381190918270107e-06, "loss": 0.5447, "step": 2870 }, { "epoch": 0.19, "grad_norm": 1.3110018968582153, "learning_rate": 9.380687107494703e-06, "loss": 0.5897, "step": 2871 }, { "epoch": 0.19, "grad_norm": 1.1775888204574585, "learning_rate": 9.380183105250768e-06, "loss": 0.5693, "step": 2872 }, { "epoch": 0.19, "grad_norm": 1.1687729358673096, "learning_rate": 9.37967891156033e-06, "loss": 0.6432, "step": 2873 }, { "epoch": 0.19, "grad_norm": 1.2713888883590698, "learning_rate": 9.379174526445428e-06, "loss": 0.6168, "step": 2874 }, { "epoch": 0.19, "grad_norm": 1.179858922958374, "learning_rate": 9.378669949928105e-06, "loss": 0.591, "step": 2875 }, { "epoch": 0.19, "grad_norm": 1.1706594228744507, "learning_rate": 9.378165182030416e-06, "loss": 0.5698, "step": 2876 }, { "epoch": 0.19, "grad_norm": 1.2692897319793701, "learning_rate": 9.377660222774423e-06, "loss": 0.5478, "step": 2877 }, { "epoch": 0.19, "grad_norm": 1.1573748588562012, "learning_rate": 9.377155072182195e-06, "loss": 0.5951, "step": 2878 }, { "epoch": 0.19, "grad_norm": 1.2071622610092163, "learning_rate": 9.376649730275813e-06, "loss": 0.6155, "step": 2879 }, { "epoch": 0.19, "grad_norm": 1.1227587461471558, "learning_rate": 9.376144197077364e-06, "loss": 0.5363, "step": 2880 }, { "epoch": 0.19, "grad_norm": 1.1794252395629883, "learning_rate": 9.37563847260894e-06, "loss": 0.6178, "step": 2881 }, { "epoch": 0.19, "grad_norm": 1.2575901746749878, "learning_rate": 9.37513255689265e-06, "loss": 0.5482, "step": 2882 }, { "epoch": 0.19, "grad_norm": 1.417494535446167, "learning_rate": 9.374626449950603e-06, "loss": 0.6062, "step": 2883 }, { "epoch": 0.19, "grad_norm": 1.2105929851531982, "learning_rate": 9.374120151804922e-06, "loss": 0.5951, "step": 2884 }, { "epoch": 0.19, "grad_norm": 1.5485612154006958, "learning_rate": 9.373613662477733e-06, "loss": 0.6206, "step": 2885 }, { "epoch": 0.19, "grad_norm": 1.1332621574401855, "learning_rate": 9.373106981991174e-06, "loss": 0.5426, "step": 2886 }, { "epoch": 0.19, "grad_norm": 1.104100227355957, "learning_rate": 9.372600110367394e-06, "loss": 0.5653, "step": 2887 }, { "epoch": 0.19, "grad_norm": 1.2156257629394531, "learning_rate": 9.372093047628543e-06, "loss": 0.5546, "step": 2888 }, { "epoch": 0.19, "grad_norm": 1.0692386627197266, "learning_rate": 9.371585793796785e-06, "loss": 0.5639, "step": 2889 }, { "epoch": 0.19, "grad_norm": 1.0691006183624268, "learning_rate": 9.371078348894291e-06, "loss": 0.5746, "step": 2890 }, { "epoch": 0.19, "grad_norm": 1.1931782960891724, "learning_rate": 9.370570712943242e-06, "loss": 0.5487, "step": 2891 }, { "epoch": 0.19, "grad_norm": 1.1959723234176636, "learning_rate": 9.37006288596582e-06, "loss": 0.5584, "step": 2892 }, { "epoch": 0.19, "grad_norm": 1.316166877746582, "learning_rate": 9.369554867984226e-06, "loss": 0.6385, "step": 2893 }, { "epoch": 0.19, "grad_norm": 1.0417795181274414, "learning_rate": 9.369046659020661e-06, "loss": 0.5178, "step": 2894 }, { "epoch": 0.19, "grad_norm": 1.1608120203018188, "learning_rate": 9.36853825909734e-06, "loss": 0.5524, "step": 2895 }, { "epoch": 0.19, "grad_norm": 1.3393449783325195, "learning_rate": 9.368029668236483e-06, "loss": 0.5932, "step": 2896 }, { "epoch": 0.19, "grad_norm": 1.1463454961776733, "learning_rate": 9.367520886460318e-06, "loss": 0.5779, "step": 2897 }, { "epoch": 0.19, "grad_norm": 1.2025853395462036, "learning_rate": 9.367011913791084e-06, "loss": 0.5823, "step": 2898 }, { "epoch": 0.19, "grad_norm": 1.259071707725525, "learning_rate": 9.366502750251027e-06, "loss": 0.5308, "step": 2899 }, { "epoch": 0.19, "grad_norm": 1.15497624874115, "learning_rate": 9.365993395862399e-06, "loss": 0.5685, "step": 2900 }, { "epoch": 0.19, "grad_norm": 1.181823968887329, "learning_rate": 9.365483850647466e-06, "loss": 0.552, "step": 2901 }, { "epoch": 0.19, "grad_norm": 1.286189317703247, "learning_rate": 9.364974114628494e-06, "loss": 0.5693, "step": 2902 }, { "epoch": 0.19, "grad_norm": 1.2807742357254028, "learning_rate": 9.364464187827767e-06, "loss": 0.5993, "step": 2903 }, { "epoch": 0.19, "grad_norm": 1.1729151010513306, "learning_rate": 9.363954070267571e-06, "loss": 0.5344, "step": 2904 }, { "epoch": 0.19, "grad_norm": 1.2433383464813232, "learning_rate": 9.3634437619702e-06, "loss": 0.6226, "step": 2905 }, { "epoch": 0.19, "grad_norm": 1.2436273097991943, "learning_rate": 9.362933262957963e-06, "loss": 0.563, "step": 2906 }, { "epoch": 0.19, "grad_norm": 1.255078911781311, "learning_rate": 9.362422573253168e-06, "loss": 0.5691, "step": 2907 }, { "epoch": 0.19, "grad_norm": 1.1799046993255615, "learning_rate": 9.361911692878139e-06, "loss": 0.5542, "step": 2908 }, { "epoch": 0.19, "grad_norm": 1.2937884330749512, "learning_rate": 9.361400621855201e-06, "loss": 0.5709, "step": 2909 }, { "epoch": 0.19, "grad_norm": 1.3306301832199097, "learning_rate": 9.360889360206695e-06, "loss": 0.5845, "step": 2910 }, { "epoch": 0.19, "grad_norm": 1.1823747158050537, "learning_rate": 9.360377907954967e-06, "loss": 0.5504, "step": 2911 }, { "epoch": 0.19, "grad_norm": 1.1087062358856201, "learning_rate": 9.359866265122372e-06, "loss": 0.5104, "step": 2912 }, { "epoch": 0.19, "grad_norm": 1.1423441171646118, "learning_rate": 9.359354431731268e-06, "loss": 0.5658, "step": 2913 }, { "epoch": 0.19, "grad_norm": 1.1000564098358154, "learning_rate": 9.358842407804032e-06, "loss": 0.5536, "step": 2914 }, { "epoch": 0.19, "grad_norm": 1.2518064975738525, "learning_rate": 9.35833019336304e-06, "loss": 0.6079, "step": 2915 }, { "epoch": 0.19, "grad_norm": 1.179193377494812, "learning_rate": 9.357817788430678e-06, "loss": 0.5812, "step": 2916 }, { "epoch": 0.19, "grad_norm": 1.136351466178894, "learning_rate": 9.357305193029345e-06, "loss": 0.5872, "step": 2917 }, { "epoch": 0.19, "grad_norm": 1.6022664308547974, "learning_rate": 9.356792407181445e-06, "loss": 0.5478, "step": 2918 }, { "epoch": 0.19, "grad_norm": 1.187699317932129, "learning_rate": 9.356279430909387e-06, "loss": 0.6144, "step": 2919 }, { "epoch": 0.19, "grad_norm": 1.3177673816680908, "learning_rate": 9.355766264235597e-06, "loss": 0.5836, "step": 2920 }, { "epoch": 0.19, "grad_norm": 1.2527815103530884, "learning_rate": 9.355252907182499e-06, "loss": 0.6062, "step": 2921 }, { "epoch": 0.19, "grad_norm": 1.0907096862792969, "learning_rate": 9.354739359772535e-06, "loss": 0.5461, "step": 2922 }, { "epoch": 0.19, "grad_norm": 1.1122839450836182, "learning_rate": 9.35422562202815e-06, "loss": 0.513, "step": 2923 }, { "epoch": 0.19, "grad_norm": 1.145888090133667, "learning_rate": 9.353711693971795e-06, "loss": 0.5322, "step": 2924 }, { "epoch": 0.19, "grad_norm": 1.129329800605774, "learning_rate": 9.353197575625934e-06, "loss": 0.5484, "step": 2925 }, { "epoch": 0.19, "grad_norm": 1.2029716968536377, "learning_rate": 9.352683267013042e-06, "loss": 0.6362, "step": 2926 }, { "epoch": 0.19, "grad_norm": 1.0698649883270264, "learning_rate": 9.35216876815559e-06, "loss": 0.5587, "step": 2927 }, { "epoch": 0.19, "grad_norm": 1.1497304439544678, "learning_rate": 9.351654079076072e-06, "loss": 0.5777, "step": 2928 }, { "epoch": 0.19, "grad_norm": 1.0718717575073242, "learning_rate": 9.35113919979698e-06, "loss": 0.6084, "step": 2929 }, { "epoch": 0.19, "grad_norm": 1.1856486797332764, "learning_rate": 9.350624130340824e-06, "loss": 0.5331, "step": 2930 }, { "epoch": 0.19, "grad_norm": 1.1352362632751465, "learning_rate": 9.350108870730108e-06, "loss": 0.5984, "step": 2931 }, { "epoch": 0.19, "grad_norm": 1.0719783306121826, "learning_rate": 9.349593420987359e-06, "loss": 0.5679, "step": 2932 }, { "epoch": 0.19, "grad_norm": 1.26412832736969, "learning_rate": 9.349077781135102e-06, "loss": 0.5877, "step": 2933 }, { "epoch": 0.19, "grad_norm": 1.2011152505874634, "learning_rate": 9.348561951195878e-06, "loss": 0.569, "step": 2934 }, { "epoch": 0.19, "grad_norm": 1.125433325767517, "learning_rate": 9.348045931192227e-06, "loss": 0.5425, "step": 2935 }, { "epoch": 0.19, "grad_norm": 1.3288426399230957, "learning_rate": 9.34752972114671e-06, "loss": 0.5942, "step": 2936 }, { "epoch": 0.19, "grad_norm": 1.1615608930587769, "learning_rate": 9.347013321081883e-06, "loss": 0.6519, "step": 2937 }, { "epoch": 0.19, "grad_norm": 2.056138515472412, "learning_rate": 9.346496731020321e-06, "loss": 0.6025, "step": 2938 }, { "epoch": 0.19, "grad_norm": 1.138484001159668, "learning_rate": 9.3459799509846e-06, "loss": 0.6005, "step": 2939 }, { "epoch": 0.19, "grad_norm": 1.0522960424423218, "learning_rate": 9.34546298099731e-06, "loss": 0.5614, "step": 2940 }, { "epoch": 0.19, "grad_norm": 1.1532444953918457, "learning_rate": 9.344945821081043e-06, "loss": 0.5953, "step": 2941 }, { "epoch": 0.19, "grad_norm": 1.2115492820739746, "learning_rate": 9.344428471258405e-06, "loss": 0.5923, "step": 2942 }, { "epoch": 0.19, "grad_norm": 1.1558897495269775, "learning_rate": 9.343910931552005e-06, "loss": 0.5391, "step": 2943 }, { "epoch": 0.19, "grad_norm": 1.1717842817306519, "learning_rate": 9.343393201984468e-06, "loss": 0.5442, "step": 2944 }, { "epoch": 0.19, "grad_norm": 1.2257479429244995, "learning_rate": 9.34287528257842e-06, "loss": 0.602, "step": 2945 }, { "epoch": 0.19, "grad_norm": 1.2489970922470093, "learning_rate": 9.342357173356498e-06, "loss": 0.5781, "step": 2946 }, { "epoch": 0.19, "grad_norm": 1.1945534944534302, "learning_rate": 9.341838874341347e-06, "loss": 0.6121, "step": 2947 }, { "epoch": 0.19, "grad_norm": 1.18329918384552, "learning_rate": 9.34132038555562e-06, "loss": 0.5654, "step": 2948 }, { "epoch": 0.19, "grad_norm": 1.1799614429473877, "learning_rate": 9.34080170702198e-06, "loss": 0.5888, "step": 2949 }, { "epoch": 0.19, "grad_norm": 1.19789457321167, "learning_rate": 9.340282838763099e-06, "loss": 0.6383, "step": 2950 }, { "epoch": 0.19, "grad_norm": 1.3394668102264404, "learning_rate": 9.339763780801651e-06, "loss": 0.6225, "step": 2951 }, { "epoch": 0.19, "grad_norm": 1.248391032218933, "learning_rate": 9.339244533160326e-06, "loss": 0.6688, "step": 2952 }, { "epoch": 0.19, "grad_norm": 1.1701760292053223, "learning_rate": 9.338725095861816e-06, "loss": 0.5683, "step": 2953 }, { "epoch": 0.19, "grad_norm": 1.215515375137329, "learning_rate": 9.338205468928829e-06, "loss": 0.5518, "step": 2954 }, { "epoch": 0.19, "grad_norm": 1.198807954788208, "learning_rate": 9.337685652384072e-06, "loss": 0.5702, "step": 2955 }, { "epoch": 0.19, "grad_norm": 1.2344108819961548, "learning_rate": 9.337165646250267e-06, "loss": 0.6326, "step": 2956 }, { "epoch": 0.19, "grad_norm": 1.089939832687378, "learning_rate": 9.336645450550141e-06, "loss": 0.4948, "step": 2957 }, { "epoch": 0.19, "grad_norm": 1.1651105880737305, "learning_rate": 9.336125065306432e-06, "loss": 0.6094, "step": 2958 }, { "epoch": 0.19, "grad_norm": 1.170209288597107, "learning_rate": 9.335604490541883e-06, "loss": 0.6208, "step": 2959 }, { "epoch": 0.19, "grad_norm": 1.1290645599365234, "learning_rate": 9.335083726279248e-06, "loss": 0.5607, "step": 2960 }, { "epoch": 0.19, "grad_norm": 1.3427270650863647, "learning_rate": 9.334562772541287e-06, "loss": 0.5659, "step": 2961 }, { "epoch": 0.19, "grad_norm": 1.3329110145568848, "learning_rate": 9.334041629350772e-06, "loss": 0.574, "step": 2962 }, { "epoch": 0.19, "grad_norm": 1.2645525932312012, "learning_rate": 9.333520296730479e-06, "loss": 0.5995, "step": 2963 }, { "epoch": 0.19, "grad_norm": 1.0561652183532715, "learning_rate": 9.332998774703194e-06, "loss": 0.57, "step": 2964 }, { "epoch": 0.19, "grad_norm": 1.1189767122268677, "learning_rate": 9.332477063291711e-06, "loss": 0.5411, "step": 2965 }, { "epoch": 0.19, "grad_norm": 1.2157710790634155, "learning_rate": 9.331955162518836e-06, "loss": 0.6162, "step": 2966 }, { "epoch": 0.19, "grad_norm": 1.226801872253418, "learning_rate": 9.331433072407375e-06, "loss": 0.5652, "step": 2967 }, { "epoch": 0.19, "grad_norm": 1.3100322484970093, "learning_rate": 9.330910792980151e-06, "loss": 0.5564, "step": 2968 }, { "epoch": 0.19, "grad_norm": 1.0878851413726807, "learning_rate": 9.330388324259991e-06, "loss": 0.5185, "step": 2969 }, { "epoch": 0.19, "grad_norm": 1.1371158361434937, "learning_rate": 9.32986566626973e-06, "loss": 0.5729, "step": 2970 }, { "epoch": 0.19, "grad_norm": 1.1481420993804932, "learning_rate": 9.329342819032207e-06, "loss": 0.5371, "step": 2971 }, { "epoch": 0.19, "grad_norm": 1.2638026475906372, "learning_rate": 9.328819782570284e-06, "loss": 0.6401, "step": 2972 }, { "epoch": 0.19, "grad_norm": 1.2203782796859741, "learning_rate": 9.328296556906816e-06, "loss": 0.5686, "step": 2973 }, { "epoch": 0.19, "grad_norm": 1.2569352388381958, "learning_rate": 9.327773142064673e-06, "loss": 0.5728, "step": 2974 }, { "epoch": 0.19, "grad_norm": 1.2261801958084106, "learning_rate": 9.327249538066731e-06, "loss": 0.554, "step": 2975 }, { "epoch": 0.19, "grad_norm": 1.295271873474121, "learning_rate": 9.326725744935877e-06, "loss": 0.5668, "step": 2976 }, { "epoch": 0.19, "grad_norm": 1.1027625799179077, "learning_rate": 9.326201762695002e-06, "loss": 0.5392, "step": 2977 }, { "epoch": 0.19, "grad_norm": 1.2012465000152588, "learning_rate": 9.325677591367011e-06, "loss": 0.5329, "step": 2978 }, { "epoch": 0.19, "grad_norm": 1.479375958442688, "learning_rate": 9.325153230974812e-06, "loss": 0.6107, "step": 2979 }, { "epoch": 0.19, "grad_norm": 1.3752119541168213, "learning_rate": 9.324628681541326e-06, "loss": 0.5338, "step": 2980 }, { "epoch": 0.19, "grad_norm": 1.1353546380996704, "learning_rate": 9.324103943089476e-06, "loss": 0.5631, "step": 2981 }, { "epoch": 0.19, "grad_norm": 1.2195281982421875, "learning_rate": 9.3235790156422e-06, "loss": 0.5352, "step": 2982 }, { "epoch": 0.19, "grad_norm": 1.3040847778320312, "learning_rate": 9.32305389922244e-06, "loss": 0.5763, "step": 2983 }, { "epoch": 0.19, "grad_norm": 1.2530272006988525, "learning_rate": 9.322528593853148e-06, "loss": 0.5954, "step": 2984 }, { "epoch": 0.19, "grad_norm": 1.2810713052749634, "learning_rate": 9.322003099557284e-06, "loss": 0.5461, "step": 2985 }, { "epoch": 0.19, "grad_norm": 1.2396403551101685, "learning_rate": 9.321477416357815e-06, "loss": 0.6446, "step": 2986 }, { "epoch": 0.19, "grad_norm": 1.3727738857269287, "learning_rate": 9.320951544277717e-06, "loss": 0.6057, "step": 2987 }, { "epoch": 0.19, "grad_norm": 1.3131061792373657, "learning_rate": 9.320425483339974e-06, "loss": 0.5658, "step": 2988 }, { "epoch": 0.19, "grad_norm": 1.1775065660476685, "learning_rate": 9.319899233567583e-06, "loss": 0.5169, "step": 2989 }, { "epoch": 0.19, "grad_norm": 1.162467122077942, "learning_rate": 9.319372794983541e-06, "loss": 0.5926, "step": 2990 }, { "epoch": 0.19, "grad_norm": 1.1808030605316162, "learning_rate": 9.318846167610858e-06, "loss": 0.5965, "step": 2991 }, { "epoch": 0.19, "grad_norm": 1.1030610799789429, "learning_rate": 9.318319351472551e-06, "loss": 0.5213, "step": 2992 }, { "epoch": 0.19, "grad_norm": 1.1440644264221191, "learning_rate": 9.317792346591647e-06, "loss": 0.5881, "step": 2993 }, { "epoch": 0.19, "grad_norm": 1.129438042640686, "learning_rate": 9.31726515299118e-06, "loss": 0.5612, "step": 2994 }, { "epoch": 0.19, "grad_norm": 1.1915926933288574, "learning_rate": 9.31673777069419e-06, "loss": 0.6069, "step": 2995 }, { "epoch": 0.19, "grad_norm": 1.2369052171707153, "learning_rate": 9.316210199723731e-06, "loss": 0.5794, "step": 2996 }, { "epoch": 0.19, "grad_norm": 1.185879111289978, "learning_rate": 9.315682440102861e-06, "loss": 0.5384, "step": 2997 }, { "epoch": 0.19, "grad_norm": 1.2312569618225098, "learning_rate": 9.315154491854646e-06, "loss": 0.5406, "step": 2998 }, { "epoch": 0.19, "grad_norm": 1.297387957572937, "learning_rate": 9.31462635500216e-06, "loss": 0.5588, "step": 2999 }, { "epoch": 0.19, "grad_norm": 1.1654167175292969, "learning_rate": 9.314098029568488e-06, "loss": 0.5497, "step": 3000 }, { "epoch": 0.19, "grad_norm": 1.2026379108428955, "learning_rate": 9.313569515576724e-06, "loss": 0.574, "step": 3001 }, { "epoch": 0.19, "grad_norm": 1.1790704727172852, "learning_rate": 9.313040813049963e-06, "loss": 0.571, "step": 3002 }, { "epoch": 0.19, "grad_norm": 1.1747041940689087, "learning_rate": 9.312511922011317e-06, "loss": 0.6134, "step": 3003 }, { "epoch": 0.19, "grad_norm": 1.1289405822753906, "learning_rate": 9.311982842483902e-06, "loss": 0.545, "step": 3004 }, { "epoch": 0.19, "grad_norm": 1.1866462230682373, "learning_rate": 9.31145357449084e-06, "loss": 0.5195, "step": 3005 }, { "epoch": 0.19, "grad_norm": 1.3184033632278442, "learning_rate": 9.310924118055266e-06, "loss": 0.6042, "step": 3006 }, { "epoch": 0.19, "grad_norm": 1.1499971151351929, "learning_rate": 9.310394473200325e-06, "loss": 0.5747, "step": 3007 }, { "epoch": 0.19, "grad_norm": 1.1757899522781372, "learning_rate": 9.309864639949157e-06, "loss": 0.589, "step": 3008 }, { "epoch": 0.19, "grad_norm": 1.2816922664642334, "learning_rate": 9.30933461832493e-06, "loss": 0.5881, "step": 3009 }, { "epoch": 0.19, "grad_norm": 1.2438931465148926, "learning_rate": 9.308804408350802e-06, "loss": 0.5569, "step": 3010 }, { "epoch": 0.19, "grad_norm": 1.2539668083190918, "learning_rate": 9.308274010049952e-06, "loss": 0.5831, "step": 3011 }, { "epoch": 0.19, "grad_norm": 1.118465781211853, "learning_rate": 9.307743423445558e-06, "loss": 0.5419, "step": 3012 }, { "epoch": 0.19, "grad_norm": 1.2188332080841064, "learning_rate": 9.307212648560814e-06, "loss": 0.5509, "step": 3013 }, { "epoch": 0.19, "grad_norm": 1.2364872694015503, "learning_rate": 9.306681685418918e-06, "loss": 0.5904, "step": 3014 }, { "epoch": 0.19, "grad_norm": 1.3011926412582397, "learning_rate": 9.30615053404308e-06, "loss": 0.5953, "step": 3015 }, { "epoch": 0.19, "grad_norm": 1.263076901435852, "learning_rate": 9.30561919445651e-06, "loss": 0.5631, "step": 3016 }, { "epoch": 0.19, "grad_norm": 1.1906912326812744, "learning_rate": 9.305087666682432e-06, "loss": 0.5798, "step": 3017 }, { "epoch": 0.19, "grad_norm": 1.1447947025299072, "learning_rate": 9.30455595074408e-06, "loss": 0.5769, "step": 3018 }, { "epoch": 0.19, "grad_norm": 1.1908756494522095, "learning_rate": 9.304024046664695e-06, "loss": 0.6019, "step": 3019 }, { "epoch": 0.19, "grad_norm": 1.204148292541504, "learning_rate": 9.303491954467523e-06, "loss": 0.5979, "step": 3020 }, { "epoch": 0.2, "grad_norm": 1.182617425918579, "learning_rate": 9.30295967417582e-06, "loss": 0.6354, "step": 3021 }, { "epoch": 0.2, "grad_norm": 1.1092060804367065, "learning_rate": 9.302427205812851e-06, "loss": 0.5832, "step": 3022 }, { "epoch": 0.2, "grad_norm": 1.2104548215866089, "learning_rate": 9.30189454940189e-06, "loss": 0.6054, "step": 3023 }, { "epoch": 0.2, "grad_norm": 1.2978227138519287, "learning_rate": 9.301361704966219e-06, "loss": 0.5435, "step": 3024 }, { "epoch": 0.2, "grad_norm": 1.1844843626022339, "learning_rate": 9.300828672529123e-06, "loss": 0.5429, "step": 3025 }, { "epoch": 0.2, "grad_norm": 1.1885480880737305, "learning_rate": 9.300295452113903e-06, "loss": 0.5715, "step": 3026 }, { "epoch": 0.2, "grad_norm": 1.3221757411956787, "learning_rate": 9.299762043743863e-06, "loss": 0.559, "step": 3027 }, { "epoch": 0.2, "grad_norm": 1.5773168802261353, "learning_rate": 9.299228447442317e-06, "loss": 0.5759, "step": 3028 }, { "epoch": 0.2, "grad_norm": 1.2749098539352417, "learning_rate": 9.29869466323259e-06, "loss": 0.5976, "step": 3029 }, { "epoch": 0.2, "grad_norm": 1.0603853464126587, "learning_rate": 9.29816069113801e-06, "loss": 0.5573, "step": 3030 }, { "epoch": 0.2, "grad_norm": 1.1376712322235107, "learning_rate": 9.297626531181913e-06, "loss": 0.5433, "step": 3031 }, { "epoch": 0.2, "grad_norm": 1.208250880241394, "learning_rate": 9.29709218338765e-06, "loss": 0.5845, "step": 3032 }, { "epoch": 0.2, "grad_norm": 1.193292498588562, "learning_rate": 9.296557647778574e-06, "loss": 0.5866, "step": 3033 }, { "epoch": 0.2, "grad_norm": 1.115847110748291, "learning_rate": 9.29602292437805e-06, "loss": 0.5608, "step": 3034 }, { "epoch": 0.2, "grad_norm": 1.238458275794983, "learning_rate": 9.295488013209445e-06, "loss": 0.5705, "step": 3035 }, { "epoch": 0.2, "grad_norm": 1.0180041790008545, "learning_rate": 9.294952914296142e-06, "loss": 0.5449, "step": 3036 }, { "epoch": 0.2, "grad_norm": 1.115996241569519, "learning_rate": 9.294417627661531e-06, "loss": 0.521, "step": 3037 }, { "epoch": 0.2, "grad_norm": 1.3330835103988647, "learning_rate": 9.293882153329003e-06, "loss": 0.5853, "step": 3038 }, { "epoch": 0.2, "grad_norm": 1.0698211193084717, "learning_rate": 9.293346491321965e-06, "loss": 0.5526, "step": 3039 }, { "epoch": 0.2, "grad_norm": 1.2698723077774048, "learning_rate": 9.292810641663828e-06, "loss": 0.6264, "step": 3040 }, { "epoch": 0.2, "grad_norm": 1.1035642623901367, "learning_rate": 9.292274604378014e-06, "loss": 0.5675, "step": 3041 }, { "epoch": 0.2, "grad_norm": 1.1591750383377075, "learning_rate": 9.291738379487952e-06, "loss": 0.5751, "step": 3042 }, { "epoch": 0.2, "grad_norm": 1.1899405717849731, "learning_rate": 9.291201967017078e-06, "loss": 0.6132, "step": 3043 }, { "epoch": 0.2, "grad_norm": 1.2209100723266602, "learning_rate": 9.290665366988835e-06, "loss": 0.6155, "step": 3044 }, { "epoch": 0.2, "grad_norm": 1.1243348121643066, "learning_rate": 9.290128579426683e-06, "loss": 0.5541, "step": 3045 }, { "epoch": 0.2, "grad_norm": 1.0980095863342285, "learning_rate": 9.289591604354076e-06, "loss": 0.5454, "step": 3046 }, { "epoch": 0.2, "grad_norm": 1.1246838569641113, "learning_rate": 9.289054441794489e-06, "loss": 0.6025, "step": 3047 }, { "epoch": 0.2, "grad_norm": 1.4605313539505005, "learning_rate": 9.2885170917714e-06, "loss": 0.6037, "step": 3048 }, { "epoch": 0.2, "grad_norm": 1.1088520288467407, "learning_rate": 9.28797955430829e-06, "loss": 0.5419, "step": 3049 }, { "epoch": 0.2, "grad_norm": 1.202724575996399, "learning_rate": 9.287441829428659e-06, "loss": 0.5985, "step": 3050 }, { "epoch": 0.2, "grad_norm": 1.261145830154419, "learning_rate": 9.286903917156005e-06, "loss": 0.5385, "step": 3051 }, { "epoch": 0.2, "grad_norm": 1.1214854717254639, "learning_rate": 9.286365817513845e-06, "loss": 0.5899, "step": 3052 }, { "epoch": 0.2, "grad_norm": 1.1702290773391724, "learning_rate": 9.28582753052569e-06, "loss": 0.5388, "step": 3053 }, { "epoch": 0.2, "grad_norm": 1.2340562343597412, "learning_rate": 9.285289056215075e-06, "loss": 0.6168, "step": 3054 }, { "epoch": 0.2, "grad_norm": 1.225834846496582, "learning_rate": 9.284750394605528e-06, "loss": 0.5294, "step": 3055 }, { "epoch": 0.2, "grad_norm": 1.1382027864456177, "learning_rate": 9.284211545720599e-06, "loss": 0.5029, "step": 3056 }, { "epoch": 0.2, "grad_norm": 1.2150357961654663, "learning_rate": 9.283672509583834e-06, "loss": 0.5657, "step": 3057 }, { "epoch": 0.2, "grad_norm": 1.2301353216171265, "learning_rate": 9.283133286218797e-06, "loss": 0.5552, "step": 3058 }, { "epoch": 0.2, "grad_norm": 1.2691477537155151, "learning_rate": 9.282593875649055e-06, "loss": 0.59, "step": 3059 }, { "epoch": 0.2, "grad_norm": 1.1839356422424316, "learning_rate": 9.282054277898184e-06, "loss": 0.588, "step": 3060 }, { "epoch": 0.2, "grad_norm": 1.2212755680084229, "learning_rate": 9.281514492989768e-06, "loss": 0.6149, "step": 3061 }, { "epoch": 0.2, "grad_norm": 1.098632574081421, "learning_rate": 9.280974520947401e-06, "loss": 0.6034, "step": 3062 }, { "epoch": 0.2, "grad_norm": 1.1404131650924683, "learning_rate": 9.280434361794682e-06, "loss": 0.5874, "step": 3063 }, { "epoch": 0.2, "grad_norm": 1.0942269563674927, "learning_rate": 9.279894015555219e-06, "loss": 0.55, "step": 3064 }, { "epoch": 0.2, "grad_norm": 1.3545130491256714, "learning_rate": 9.279353482252633e-06, "loss": 0.5875, "step": 3065 }, { "epoch": 0.2, "grad_norm": 1.2901129722595215, "learning_rate": 9.278812761910547e-06, "loss": 0.5926, "step": 3066 }, { "epoch": 0.2, "grad_norm": 1.2716058492660522, "learning_rate": 9.278271854552594e-06, "loss": 0.5859, "step": 3067 }, { "epoch": 0.2, "grad_norm": 1.170413613319397, "learning_rate": 9.277730760202417e-06, "loss": 0.526, "step": 3068 }, { "epoch": 0.2, "grad_norm": 1.2183921337127686, "learning_rate": 9.277189478883663e-06, "loss": 0.5738, "step": 3069 }, { "epoch": 0.2, "grad_norm": 1.4036601781845093, "learning_rate": 9.276648010619996e-06, "loss": 0.6182, "step": 3070 }, { "epoch": 0.2, "grad_norm": 1.1547746658325195, "learning_rate": 9.276106355435075e-06, "loss": 0.5522, "step": 3071 }, { "epoch": 0.2, "grad_norm": 1.1385976076126099, "learning_rate": 9.27556451335258e-06, "loss": 0.5868, "step": 3072 }, { "epoch": 0.2, "grad_norm": 1.1587268114089966, "learning_rate": 9.27502248439619e-06, "loss": 0.6209, "step": 3073 }, { "epoch": 0.2, "grad_norm": 1.1533708572387695, "learning_rate": 9.274480268589597e-06, "loss": 0.5461, "step": 3074 }, { "epoch": 0.2, "grad_norm": 1.1614896059036255, "learning_rate": 9.273937865956499e-06, "loss": 0.5658, "step": 3075 }, { "epoch": 0.2, "grad_norm": 1.2673977613449097, "learning_rate": 9.273395276520605e-06, "loss": 0.5812, "step": 3076 }, { "epoch": 0.2, "grad_norm": 1.2389756441116333, "learning_rate": 9.272852500305629e-06, "loss": 0.5545, "step": 3077 }, { "epoch": 0.2, "grad_norm": 1.1499866247177124, "learning_rate": 9.272309537335293e-06, "loss": 0.628, "step": 3078 }, { "epoch": 0.2, "grad_norm": 1.1667910814285278, "learning_rate": 9.271766387633329e-06, "loss": 0.6075, "step": 3079 }, { "epoch": 0.2, "grad_norm": 1.2398629188537598, "learning_rate": 9.271223051223482e-06, "loss": 0.6133, "step": 3080 }, { "epoch": 0.2, "grad_norm": 1.2859171628952026, "learning_rate": 9.270679528129491e-06, "loss": 0.5454, "step": 3081 }, { "epoch": 0.2, "grad_norm": 1.0893192291259766, "learning_rate": 9.270135818375118e-06, "loss": 0.5017, "step": 3082 }, { "epoch": 0.2, "grad_norm": 1.1965618133544922, "learning_rate": 9.269591921984125e-06, "loss": 0.6198, "step": 3083 }, { "epoch": 0.2, "grad_norm": 1.2517597675323486, "learning_rate": 9.269047838980285e-06, "loss": 0.5545, "step": 3084 }, { "epoch": 0.2, "grad_norm": 1.1563276052474976, "learning_rate": 9.26850356938738e-06, "loss": 0.5546, "step": 3085 }, { "epoch": 0.2, "grad_norm": 1.1209627389907837, "learning_rate": 9.267959113229194e-06, "loss": 0.5448, "step": 3086 }, { "epoch": 0.2, "grad_norm": 1.2530274391174316, "learning_rate": 9.267414470529528e-06, "loss": 0.563, "step": 3087 }, { "epoch": 0.2, "grad_norm": 1.1420531272888184, "learning_rate": 9.266869641312186e-06, "loss": 0.5224, "step": 3088 }, { "epoch": 0.2, "grad_norm": 1.1945964097976685, "learning_rate": 9.26632462560098e-06, "loss": 0.5863, "step": 3089 }, { "epoch": 0.2, "grad_norm": 1.2304948568344116, "learning_rate": 9.265779423419732e-06, "loss": 0.5907, "step": 3090 }, { "epoch": 0.2, "grad_norm": 1.1841979026794434, "learning_rate": 9.265234034792272e-06, "loss": 0.5614, "step": 3091 }, { "epoch": 0.2, "grad_norm": 1.1420583724975586, "learning_rate": 9.264688459742435e-06, "loss": 0.569, "step": 3092 }, { "epoch": 0.2, "grad_norm": 1.226837396621704, "learning_rate": 9.26414269829407e-06, "loss": 0.565, "step": 3093 }, { "epoch": 0.2, "grad_norm": 1.1481131315231323, "learning_rate": 9.263596750471028e-06, "loss": 0.5534, "step": 3094 }, { "epoch": 0.2, "grad_norm": 1.2474517822265625, "learning_rate": 9.263050616297174e-06, "loss": 0.5393, "step": 3095 }, { "epoch": 0.2, "grad_norm": 1.060219168663025, "learning_rate": 9.262504295796374e-06, "loss": 0.5645, "step": 3096 }, { "epoch": 0.2, "grad_norm": 1.1034587621688843, "learning_rate": 9.261957788992511e-06, "loss": 0.5369, "step": 3097 }, { "epoch": 0.2, "grad_norm": 1.2520973682403564, "learning_rate": 9.261411095909467e-06, "loss": 0.5771, "step": 3098 }, { "epoch": 0.2, "grad_norm": 1.2227691411972046, "learning_rate": 9.26086421657114e-06, "loss": 0.5856, "step": 3099 }, { "epoch": 0.2, "grad_norm": 1.2240806818008423, "learning_rate": 9.260317151001432e-06, "loss": 0.5862, "step": 3100 }, { "epoch": 0.2, "grad_norm": 1.3179073333740234, "learning_rate": 9.259769899224252e-06, "loss": 0.567, "step": 3101 }, { "epoch": 0.2, "grad_norm": 1.1904536485671997, "learning_rate": 9.259222461263519e-06, "loss": 0.6121, "step": 3102 }, { "epoch": 0.2, "grad_norm": 1.2896807193756104, "learning_rate": 9.258674837143162e-06, "loss": 0.6074, "step": 3103 }, { "epoch": 0.2, "grad_norm": 1.153404712677002, "learning_rate": 9.258127026887115e-06, "loss": 0.5655, "step": 3104 }, { "epoch": 0.2, "grad_norm": 1.200103759765625, "learning_rate": 9.257579030519324e-06, "loss": 0.5785, "step": 3105 }, { "epoch": 0.2, "grad_norm": 1.2146048545837402, "learning_rate": 9.257030848063737e-06, "loss": 0.5825, "step": 3106 }, { "epoch": 0.2, "grad_norm": 1.2679156064987183, "learning_rate": 9.256482479544314e-06, "loss": 0.5603, "step": 3107 }, { "epoch": 0.2, "grad_norm": 1.2259807586669922, "learning_rate": 9.255933924985024e-06, "loss": 0.5563, "step": 3108 }, { "epoch": 0.2, "grad_norm": 1.5176646709442139, "learning_rate": 9.255385184409844e-06, "loss": 0.5643, "step": 3109 }, { "epoch": 0.2, "grad_norm": 1.242699384689331, "learning_rate": 9.254836257842755e-06, "loss": 0.564, "step": 3110 }, { "epoch": 0.2, "grad_norm": 1.166407585144043, "learning_rate": 9.254287145307752e-06, "loss": 0.5885, "step": 3111 }, { "epoch": 0.2, "grad_norm": 1.5741389989852905, "learning_rate": 9.253737846828833e-06, "loss": 0.5841, "step": 3112 }, { "epoch": 0.2, "grad_norm": 1.2254014015197754, "learning_rate": 9.253188362430008e-06, "loss": 0.5246, "step": 3113 }, { "epoch": 0.2, "grad_norm": 1.1280170679092407, "learning_rate": 9.252638692135293e-06, "loss": 0.5499, "step": 3114 }, { "epoch": 0.2, "grad_norm": 1.1870758533477783, "learning_rate": 9.252088835968713e-06, "loss": 0.5667, "step": 3115 }, { "epoch": 0.2, "grad_norm": 1.2024322748184204, "learning_rate": 9.2515387939543e-06, "loss": 0.5601, "step": 3116 }, { "epoch": 0.2, "grad_norm": 1.232771396636963, "learning_rate": 9.250988566116095e-06, "loss": 0.608, "step": 3117 }, { "epoch": 0.2, "grad_norm": 1.2011046409606934, "learning_rate": 9.250438152478149e-06, "loss": 0.5322, "step": 3118 }, { "epoch": 0.2, "grad_norm": 1.0950895547866821, "learning_rate": 9.249887553064515e-06, "loss": 0.5804, "step": 3119 }, { "epoch": 0.2, "grad_norm": 1.1692290306091309, "learning_rate": 9.249336767899263e-06, "loss": 0.5892, "step": 3120 }, { "epoch": 0.2, "grad_norm": 1.3078993558883667, "learning_rate": 9.248785797006466e-06, "loss": 0.6005, "step": 3121 }, { "epoch": 0.2, "grad_norm": 1.1862350702285767, "learning_rate": 9.2482346404102e-06, "loss": 0.6049, "step": 3122 }, { "epoch": 0.2, "grad_norm": 1.1863757371902466, "learning_rate": 9.24768329813456e-06, "loss": 0.5306, "step": 3123 }, { "epoch": 0.2, "grad_norm": 1.282405138015747, "learning_rate": 9.247131770203643e-06, "loss": 0.5769, "step": 3124 }, { "epoch": 0.2, "grad_norm": 1.362438440322876, "learning_rate": 9.246580056641555e-06, "loss": 0.6167, "step": 3125 }, { "epoch": 0.2, "grad_norm": 1.176234483718872, "learning_rate": 9.246028157472406e-06, "loss": 0.5424, "step": 3126 }, { "epoch": 0.2, "grad_norm": 1.1148561239242554, "learning_rate": 9.245476072720322e-06, "loss": 0.5813, "step": 3127 }, { "epoch": 0.2, "grad_norm": 1.21293044090271, "learning_rate": 9.244923802409435e-06, "loss": 0.6077, "step": 3128 }, { "epoch": 0.2, "grad_norm": 1.1458754539489746, "learning_rate": 9.244371346563879e-06, "loss": 0.5527, "step": 3129 }, { "epoch": 0.2, "grad_norm": 1.2727504968643188, "learning_rate": 9.2438187052078e-06, "loss": 0.6013, "step": 3130 }, { "epoch": 0.2, "grad_norm": 1.2853513956069946, "learning_rate": 9.243265878365358e-06, "loss": 0.5643, "step": 3131 }, { "epoch": 0.2, "grad_norm": 1.1593635082244873, "learning_rate": 9.24271286606071e-06, "loss": 0.5642, "step": 3132 }, { "epoch": 0.2, "grad_norm": 1.1768016815185547, "learning_rate": 9.242159668318029e-06, "loss": 0.5223, "step": 3133 }, { "epoch": 0.2, "grad_norm": 1.1358944177627563, "learning_rate": 9.241606285161495e-06, "loss": 0.5339, "step": 3134 }, { "epoch": 0.2, "grad_norm": 1.2683675289154053, "learning_rate": 9.241052716615294e-06, "loss": 0.5278, "step": 3135 }, { "epoch": 0.2, "grad_norm": 1.2040568590164185, "learning_rate": 9.24049896270362e-06, "loss": 0.572, "step": 3136 }, { "epoch": 0.2, "grad_norm": 1.0954809188842773, "learning_rate": 9.239945023450676e-06, "loss": 0.4931, "step": 3137 }, { "epoch": 0.2, "grad_norm": 1.2793693542480469, "learning_rate": 9.239390898880675e-06, "loss": 0.5486, "step": 3138 }, { "epoch": 0.2, "grad_norm": 1.2387056350708008, "learning_rate": 9.238836589017835e-06, "loss": 0.5513, "step": 3139 }, { "epoch": 0.2, "grad_norm": 1.1596306562423706, "learning_rate": 9.238282093886386e-06, "loss": 0.6126, "step": 3140 }, { "epoch": 0.2, "grad_norm": 1.2129570245742798, "learning_rate": 9.23772741351056e-06, "loss": 0.609, "step": 3141 }, { "epoch": 0.2, "grad_norm": 1.2449871301651, "learning_rate": 9.237172547914604e-06, "loss": 0.5751, "step": 3142 }, { "epoch": 0.2, "grad_norm": 1.3400845527648926, "learning_rate": 9.236617497122765e-06, "loss": 0.6106, "step": 3143 }, { "epoch": 0.2, "grad_norm": 1.2310261726379395, "learning_rate": 9.236062261159308e-06, "loss": 0.5224, "step": 3144 }, { "epoch": 0.2, "grad_norm": 1.1320041418075562, "learning_rate": 9.235506840048498e-06, "loss": 0.5693, "step": 3145 }, { "epoch": 0.2, "grad_norm": 1.217547059059143, "learning_rate": 9.234951233814612e-06, "loss": 0.5733, "step": 3146 }, { "epoch": 0.2, "grad_norm": 1.2469518184661865, "learning_rate": 9.234395442481933e-06, "loss": 0.5408, "step": 3147 }, { "epoch": 0.2, "grad_norm": 1.3814339637756348, "learning_rate": 9.233839466074756e-06, "loss": 0.5927, "step": 3148 }, { "epoch": 0.2, "grad_norm": 1.1280173063278198, "learning_rate": 9.233283304617378e-06, "loss": 0.5369, "step": 3149 }, { "epoch": 0.2, "grad_norm": 1.1675474643707275, "learning_rate": 9.232726958134108e-06, "loss": 0.5673, "step": 3150 }, { "epoch": 0.2, "grad_norm": 1.133532166481018, "learning_rate": 9.232170426649265e-06, "loss": 0.5523, "step": 3151 }, { "epoch": 0.2, "grad_norm": 1.2565594911575317, "learning_rate": 9.231613710187172e-06, "loss": 0.5523, "step": 3152 }, { "epoch": 0.2, "grad_norm": 1.2449867725372314, "learning_rate": 9.23105680877216e-06, "loss": 0.618, "step": 3153 }, { "epoch": 0.2, "grad_norm": 1.9856747388839722, "learning_rate": 9.230499722428571e-06, "loss": 0.5663, "step": 3154 }, { "epoch": 0.2, "grad_norm": 1.366297960281372, "learning_rate": 9.229942451180755e-06, "loss": 0.5813, "step": 3155 }, { "epoch": 0.2, "grad_norm": 1.3256531953811646, "learning_rate": 9.229384995053067e-06, "loss": 0.6084, "step": 3156 }, { "epoch": 0.2, "grad_norm": 1.1897234916687012, "learning_rate": 9.228827354069873e-06, "loss": 0.5848, "step": 3157 }, { "epoch": 0.2, "grad_norm": 1.2810373306274414, "learning_rate": 9.228269528255546e-06, "loss": 0.6374, "step": 3158 }, { "epoch": 0.2, "grad_norm": 1.2191492319107056, "learning_rate": 9.227711517634468e-06, "loss": 0.545, "step": 3159 }, { "epoch": 0.2, "grad_norm": 1.291992425918579, "learning_rate": 9.227153322231027e-06, "loss": 0.5534, "step": 3160 }, { "epoch": 0.2, "grad_norm": 1.2392547130584717, "learning_rate": 9.22659494206962e-06, "loss": 0.5758, "step": 3161 }, { "epoch": 0.2, "grad_norm": 1.2138727903366089, "learning_rate": 9.226036377174654e-06, "loss": 0.5447, "step": 3162 }, { "epoch": 0.2, "grad_norm": 1.1750915050506592, "learning_rate": 9.22547762757054e-06, "loss": 0.5904, "step": 3163 }, { "epoch": 0.2, "grad_norm": 1.21768319606781, "learning_rate": 9.2249186932817e-06, "loss": 0.557, "step": 3164 }, { "epoch": 0.2, "grad_norm": 1.2026715278625488, "learning_rate": 9.224359574332564e-06, "loss": 0.6422, "step": 3165 }, { "epoch": 0.2, "grad_norm": 1.2104121446609497, "learning_rate": 9.223800270747571e-06, "loss": 0.6157, "step": 3166 }, { "epoch": 0.2, "grad_norm": 1.4290170669555664, "learning_rate": 9.223240782551168e-06, "loss": 0.5507, "step": 3167 }, { "epoch": 0.2, "grad_norm": 1.1749558448791504, "learning_rate": 9.222681109767803e-06, "loss": 0.5356, "step": 3168 }, { "epoch": 0.2, "grad_norm": 1.1751580238342285, "learning_rate": 9.222121252421942e-06, "loss": 0.5612, "step": 3169 }, { "epoch": 0.2, "grad_norm": 1.2719545364379883, "learning_rate": 9.221561210538057e-06, "loss": 0.6228, "step": 3170 }, { "epoch": 0.2, "grad_norm": 1.0592013597488403, "learning_rate": 9.22100098414062e-06, "loss": 0.5478, "step": 3171 }, { "epoch": 0.2, "grad_norm": 1.0779725313186646, "learning_rate": 9.220440573254123e-06, "loss": 0.6089, "step": 3172 }, { "epoch": 0.2, "grad_norm": 1.336137056350708, "learning_rate": 9.219879977903055e-06, "loss": 0.6171, "step": 3173 }, { "epoch": 0.2, "grad_norm": 1.2628681659698486, "learning_rate": 9.219319198111923e-06, "loss": 0.5971, "step": 3174 }, { "epoch": 0.2, "grad_norm": 1.1908735036849976, "learning_rate": 9.218758233905232e-06, "loss": 0.5477, "step": 3175 }, { "epoch": 0.21, "grad_norm": 1.2320318222045898, "learning_rate": 9.218197085307505e-06, "loss": 0.58, "step": 3176 }, { "epoch": 0.21, "grad_norm": 1.2913347482681274, "learning_rate": 9.217635752343267e-06, "loss": 0.5659, "step": 3177 }, { "epoch": 0.21, "grad_norm": 1.57328200340271, "learning_rate": 9.217074235037051e-06, "loss": 0.5721, "step": 3178 }, { "epoch": 0.21, "grad_norm": 1.2339264154434204, "learning_rate": 9.2165125334134e-06, "loss": 0.5902, "step": 3179 }, { "epoch": 0.21, "grad_norm": 1.1812976598739624, "learning_rate": 9.215950647496865e-06, "loss": 0.5553, "step": 3180 }, { "epoch": 0.21, "grad_norm": 1.155061960220337, "learning_rate": 9.215388577312004e-06, "loss": 0.5426, "step": 3181 }, { "epoch": 0.21, "grad_norm": 1.279393196105957, "learning_rate": 9.214826322883386e-06, "loss": 0.5895, "step": 3182 }, { "epoch": 0.21, "grad_norm": 1.527185320854187, "learning_rate": 9.214263884235581e-06, "loss": 0.5351, "step": 3183 }, { "epoch": 0.21, "grad_norm": 1.3971984386444092, "learning_rate": 9.213701261393177e-06, "loss": 0.6391, "step": 3184 }, { "epoch": 0.21, "grad_norm": 1.1661394834518433, "learning_rate": 9.213138454380762e-06, "loss": 0.5594, "step": 3185 }, { "epoch": 0.21, "grad_norm": 1.238395094871521, "learning_rate": 9.212575463222934e-06, "loss": 0.5774, "step": 3186 }, { "epoch": 0.21, "grad_norm": 1.162309169769287, "learning_rate": 9.212012287944302e-06, "loss": 0.5525, "step": 3187 }, { "epoch": 0.21, "grad_norm": 1.241858959197998, "learning_rate": 9.211448928569481e-06, "loss": 0.617, "step": 3188 }, { "epoch": 0.21, "grad_norm": 1.1788252592086792, "learning_rate": 9.210885385123092e-06, "loss": 0.5484, "step": 3189 }, { "epoch": 0.21, "grad_norm": 1.2766435146331787, "learning_rate": 9.210321657629768e-06, "loss": 0.601, "step": 3190 }, { "epoch": 0.21, "grad_norm": 1.202851414680481, "learning_rate": 9.209757746114145e-06, "loss": 0.586, "step": 3191 }, { "epoch": 0.21, "grad_norm": 1.2497546672821045, "learning_rate": 9.209193650600874e-06, "loss": 0.5802, "step": 3192 }, { "epoch": 0.21, "grad_norm": 1.2120978832244873, "learning_rate": 9.208629371114609e-06, "loss": 0.5468, "step": 3193 }, { "epoch": 0.21, "grad_norm": 1.1653333902359009, "learning_rate": 9.20806490768001e-06, "loss": 0.5323, "step": 3194 }, { "epoch": 0.21, "grad_norm": 1.2423603534698486, "learning_rate": 9.207500260321754e-06, "loss": 0.5854, "step": 3195 }, { "epoch": 0.21, "grad_norm": 1.2802413702011108, "learning_rate": 9.206935429064515e-06, "loss": 0.5751, "step": 3196 }, { "epoch": 0.21, "grad_norm": 1.3120566606521606, "learning_rate": 9.206370413932982e-06, "loss": 0.5877, "step": 3197 }, { "epoch": 0.21, "grad_norm": 1.1930646896362305, "learning_rate": 9.20580521495185e-06, "loss": 0.5264, "step": 3198 }, { "epoch": 0.21, "grad_norm": 1.2261452674865723, "learning_rate": 9.205239832145826e-06, "loss": 0.5944, "step": 3199 }, { "epoch": 0.21, "grad_norm": 1.189013123512268, "learning_rate": 9.204674265539617e-06, "loss": 0.5521, "step": 3200 }, { "epoch": 0.21, "grad_norm": 1.3942326307296753, "learning_rate": 9.204108515157945e-06, "loss": 0.5965, "step": 3201 }, { "epoch": 0.21, "grad_norm": 1.2205475568771362, "learning_rate": 9.203542581025536e-06, "loss": 0.5721, "step": 3202 }, { "epoch": 0.21, "grad_norm": 1.5473785400390625, "learning_rate": 9.202976463167125e-06, "loss": 0.5935, "step": 3203 }, { "epoch": 0.21, "grad_norm": 1.242194652557373, "learning_rate": 9.202410161607458e-06, "loss": 0.5492, "step": 3204 }, { "epoch": 0.21, "grad_norm": 1.2715033292770386, "learning_rate": 9.201843676371285e-06, "loss": 0.5794, "step": 3205 }, { "epoch": 0.21, "grad_norm": 1.1847392320632935, "learning_rate": 9.201277007483365e-06, "loss": 0.519, "step": 3206 }, { "epoch": 0.21, "grad_norm": 1.1556737422943115, "learning_rate": 9.200710154968468e-06, "loss": 0.5661, "step": 3207 }, { "epoch": 0.21, "grad_norm": 1.1237311363220215, "learning_rate": 9.200143118851367e-06, "loss": 0.4972, "step": 3208 }, { "epoch": 0.21, "grad_norm": 1.2016903162002563, "learning_rate": 9.199575899156847e-06, "loss": 0.59, "step": 3209 }, { "epoch": 0.21, "grad_norm": 1.3927706480026245, "learning_rate": 9.199008495909701e-06, "loss": 0.5747, "step": 3210 }, { "epoch": 0.21, "grad_norm": 1.2326676845550537, "learning_rate": 9.198440909134726e-06, "loss": 0.6098, "step": 3211 }, { "epoch": 0.21, "grad_norm": 1.2626922130584717, "learning_rate": 9.197873138856728e-06, "loss": 0.5385, "step": 3212 }, { "epoch": 0.21, "grad_norm": 1.1958080530166626, "learning_rate": 9.197305185100529e-06, "loss": 0.6488, "step": 3213 }, { "epoch": 0.21, "grad_norm": 1.2457338571548462, "learning_rate": 9.196737047890949e-06, "loss": 0.5994, "step": 3214 }, { "epoch": 0.21, "grad_norm": 1.1240876913070679, "learning_rate": 9.196168727252821e-06, "loss": 0.6064, "step": 3215 }, { "epoch": 0.21, "grad_norm": 1.2801575660705566, "learning_rate": 9.195600223210983e-06, "loss": 0.6172, "step": 3216 }, { "epoch": 0.21, "grad_norm": 1.2687699794769287, "learning_rate": 9.195031535790285e-06, "loss": 0.5801, "step": 3217 }, { "epoch": 0.21, "grad_norm": 1.2943322658538818, "learning_rate": 9.194462665015581e-06, "loss": 0.5393, "step": 3218 }, { "epoch": 0.21, "grad_norm": 1.326707124710083, "learning_rate": 9.193893610911737e-06, "loss": 0.5842, "step": 3219 }, { "epoch": 0.21, "grad_norm": 1.1659423112869263, "learning_rate": 9.193324373503623e-06, "loss": 0.5343, "step": 3220 }, { "epoch": 0.21, "grad_norm": 1.1552953720092773, "learning_rate": 9.19275495281612e-06, "loss": 0.5866, "step": 3221 }, { "epoch": 0.21, "grad_norm": 1.2007992267608643, "learning_rate": 9.192185348874115e-06, "loss": 0.5805, "step": 3222 }, { "epoch": 0.21, "grad_norm": 1.2805343866348267, "learning_rate": 9.191615561702504e-06, "loss": 0.6344, "step": 3223 }, { "epoch": 0.21, "grad_norm": 1.1210075616836548, "learning_rate": 9.191045591326191e-06, "loss": 0.5946, "step": 3224 }, { "epoch": 0.21, "grad_norm": 1.1996288299560547, "learning_rate": 9.19047543777009e-06, "loss": 0.5732, "step": 3225 }, { "epoch": 0.21, "grad_norm": 1.0973951816558838, "learning_rate": 9.189905101059118e-06, "loss": 0.523, "step": 3226 }, { "epoch": 0.21, "grad_norm": 1.3126815557479858, "learning_rate": 9.189334581218203e-06, "loss": 0.6291, "step": 3227 }, { "epoch": 0.21, "grad_norm": 1.305846095085144, "learning_rate": 9.188763878272284e-06, "loss": 0.6113, "step": 3228 }, { "epoch": 0.21, "grad_norm": 1.1899962425231934, "learning_rate": 9.188192992246301e-06, "loss": 0.5473, "step": 3229 }, { "epoch": 0.21, "grad_norm": 1.3014800548553467, "learning_rate": 9.187621923165211e-06, "loss": 0.5744, "step": 3230 }, { "epoch": 0.21, "grad_norm": 1.28195059299469, "learning_rate": 9.187050671053969e-06, "loss": 0.5665, "step": 3231 }, { "epoch": 0.21, "grad_norm": 1.1685539484024048, "learning_rate": 9.186479235937545e-06, "loss": 0.56, "step": 3232 }, { "epoch": 0.21, "grad_norm": 1.2888380289077759, "learning_rate": 9.185907617840914e-06, "loss": 0.5851, "step": 3233 }, { "epoch": 0.21, "grad_norm": 1.3450217247009277, "learning_rate": 9.185335816789062e-06, "loss": 0.5668, "step": 3234 }, { "epoch": 0.21, "grad_norm": 1.4762879610061646, "learning_rate": 9.184763832806979e-06, "loss": 0.6002, "step": 3235 }, { "epoch": 0.21, "grad_norm": 1.1459236145019531, "learning_rate": 9.184191665919668e-06, "loss": 0.6122, "step": 3236 }, { "epoch": 0.21, "grad_norm": 1.363157868385315, "learning_rate": 9.183619316152132e-06, "loss": 0.5363, "step": 3237 }, { "epoch": 0.21, "grad_norm": 1.2686368227005005, "learning_rate": 9.18304678352939e-06, "loss": 0.5849, "step": 3238 }, { "epoch": 0.21, "grad_norm": 1.4138545989990234, "learning_rate": 9.182474068076468e-06, "loss": 0.5276, "step": 3239 }, { "epoch": 0.21, "grad_norm": 1.3664382696151733, "learning_rate": 9.181901169818392e-06, "loss": 0.5695, "step": 3240 }, { "epoch": 0.21, "grad_norm": 1.1814804077148438, "learning_rate": 9.181328088780208e-06, "loss": 0.5731, "step": 3241 }, { "epoch": 0.21, "grad_norm": 1.1912208795547485, "learning_rate": 9.18075482498696e-06, "loss": 0.5954, "step": 3242 }, { "epoch": 0.21, "grad_norm": 1.1862224340438843, "learning_rate": 9.180181378463705e-06, "loss": 0.5655, "step": 3243 }, { "epoch": 0.21, "grad_norm": 1.125704288482666, "learning_rate": 9.179607749235506e-06, "loss": 0.5362, "step": 3244 }, { "epoch": 0.21, "grad_norm": 1.2126131057739258, "learning_rate": 9.179033937327437e-06, "loss": 0.5758, "step": 3245 }, { "epoch": 0.21, "grad_norm": 1.232466459274292, "learning_rate": 9.178459942764577e-06, "loss": 0.5152, "step": 3246 }, { "epoch": 0.21, "grad_norm": 1.1146864891052246, "learning_rate": 9.177885765572013e-06, "loss": 0.574, "step": 3247 }, { "epoch": 0.21, "grad_norm": 1.142322063446045, "learning_rate": 9.17731140577484e-06, "loss": 0.565, "step": 3248 }, { "epoch": 0.21, "grad_norm": 1.3318791389465332, "learning_rate": 9.176736863398164e-06, "loss": 0.5411, "step": 3249 }, { "epoch": 0.21, "grad_norm": 1.1368727684020996, "learning_rate": 9.176162138467098e-06, "loss": 0.5404, "step": 3250 }, { "epoch": 0.21, "grad_norm": 1.1240506172180176, "learning_rate": 9.175587231006757e-06, "loss": 0.5155, "step": 3251 }, { "epoch": 0.21, "grad_norm": 1.2712898254394531, "learning_rate": 9.175012141042272e-06, "loss": 0.5866, "step": 3252 }, { "epoch": 0.21, "grad_norm": 1.1614004373550415, "learning_rate": 9.174436868598777e-06, "loss": 0.5514, "step": 3253 }, { "epoch": 0.21, "grad_norm": 1.1111935377120972, "learning_rate": 9.173861413701419e-06, "loss": 0.5175, "step": 3254 }, { "epoch": 0.21, "grad_norm": 1.1777615547180176, "learning_rate": 9.173285776375344e-06, "loss": 0.5538, "step": 3255 }, { "epoch": 0.21, "grad_norm": 1.2240502834320068, "learning_rate": 9.172709956645718e-06, "loss": 0.5746, "step": 3256 }, { "epoch": 0.21, "grad_norm": 1.2722468376159668, "learning_rate": 9.172133954537705e-06, "loss": 0.5768, "step": 3257 }, { "epoch": 0.21, "grad_norm": 1.2131973505020142, "learning_rate": 9.17155777007648e-06, "loss": 0.5705, "step": 3258 }, { "epoch": 0.21, "grad_norm": 1.1751967668533325, "learning_rate": 9.170981403287228e-06, "loss": 0.5331, "step": 3259 }, { "epoch": 0.21, "grad_norm": 1.2388476133346558, "learning_rate": 9.170404854195142e-06, "loss": 0.5874, "step": 3260 }, { "epoch": 0.21, "grad_norm": 2.0257680416107178, "learning_rate": 9.169828122825417e-06, "loss": 0.5661, "step": 3261 }, { "epoch": 0.21, "grad_norm": 1.2512621879577637, "learning_rate": 9.169251209203263e-06, "loss": 0.6367, "step": 3262 }, { "epoch": 0.21, "grad_norm": 2.77243971824646, "learning_rate": 9.168674113353898e-06, "loss": 0.5747, "step": 3263 }, { "epoch": 0.21, "grad_norm": 1.174136757850647, "learning_rate": 9.16809683530254e-06, "loss": 0.5337, "step": 3264 }, { "epoch": 0.21, "grad_norm": 1.3985810279846191, "learning_rate": 9.167519375074424e-06, "loss": 0.5459, "step": 3265 }, { "epoch": 0.21, "grad_norm": 1.364342451095581, "learning_rate": 9.16694173269479e-06, "loss": 0.6048, "step": 3266 }, { "epoch": 0.21, "grad_norm": 1.201115369796753, "learning_rate": 9.166363908188882e-06, "loss": 0.6003, "step": 3267 }, { "epoch": 0.21, "grad_norm": 1.0660287141799927, "learning_rate": 9.165785901581956e-06, "loss": 0.5561, "step": 3268 }, { "epoch": 0.21, "grad_norm": 1.2646626234054565, "learning_rate": 9.165207712899277e-06, "loss": 0.6103, "step": 3269 }, { "epoch": 0.21, "grad_norm": 1.1312966346740723, "learning_rate": 9.164629342166118e-06, "loss": 0.5685, "step": 3270 }, { "epoch": 0.21, "grad_norm": 1.3114506006240845, "learning_rate": 9.164050789407752e-06, "loss": 0.5889, "step": 3271 }, { "epoch": 0.21, "grad_norm": 1.1000677347183228, "learning_rate": 9.163472054649471e-06, "loss": 0.5461, "step": 3272 }, { "epoch": 0.21, "grad_norm": 1.2345082759857178, "learning_rate": 9.162893137916568e-06, "loss": 0.6049, "step": 3273 }, { "epoch": 0.21, "grad_norm": 1.1921223402023315, "learning_rate": 9.162314039234346e-06, "loss": 0.5865, "step": 3274 }, { "epoch": 0.21, "grad_norm": 1.3029680252075195, "learning_rate": 9.161734758628117e-06, "loss": 0.6085, "step": 3275 }, { "epoch": 0.21, "grad_norm": 1.2463217973709106, "learning_rate": 9.1611552961232e-06, "loss": 0.5809, "step": 3276 }, { "epoch": 0.21, "grad_norm": 1.213276982307434, "learning_rate": 9.160575651744919e-06, "loss": 0.5378, "step": 3277 }, { "epoch": 0.21, "grad_norm": 1.1138880252838135, "learning_rate": 9.15999582551861e-06, "loss": 0.5837, "step": 3278 }, { "epoch": 0.21, "grad_norm": 1.2306404113769531, "learning_rate": 9.159415817469617e-06, "loss": 0.5392, "step": 3279 }, { "epoch": 0.21, "grad_norm": 1.1619255542755127, "learning_rate": 9.158835627623293e-06, "loss": 0.5476, "step": 3280 }, { "epoch": 0.21, "grad_norm": 1.1789219379425049, "learning_rate": 9.15825525600499e-06, "loss": 0.551, "step": 3281 }, { "epoch": 0.21, "grad_norm": 1.1690165996551514, "learning_rate": 9.15767470264008e-06, "loss": 0.5808, "step": 3282 }, { "epoch": 0.21, "grad_norm": 1.1859601736068726, "learning_rate": 9.157093967553935e-06, "loss": 0.5747, "step": 3283 }, { "epoch": 0.21, "grad_norm": 1.2261254787445068, "learning_rate": 9.156513050771938e-06, "loss": 0.565, "step": 3284 }, { "epoch": 0.21, "grad_norm": 1.228365182876587, "learning_rate": 9.155931952319481e-06, "loss": 0.5745, "step": 3285 }, { "epoch": 0.21, "grad_norm": 1.2197307348251343, "learning_rate": 9.15535067222196e-06, "loss": 0.5659, "step": 3286 }, { "epoch": 0.21, "grad_norm": 1.2443053722381592, "learning_rate": 9.154769210504782e-06, "loss": 0.5726, "step": 3287 }, { "epoch": 0.21, "grad_norm": 1.1320019960403442, "learning_rate": 9.15418756719336e-06, "loss": 0.5222, "step": 3288 }, { "epoch": 0.21, "grad_norm": 1.1971169710159302, "learning_rate": 9.153605742313119e-06, "loss": 0.5626, "step": 3289 }, { "epoch": 0.21, "grad_norm": 1.2094495296478271, "learning_rate": 9.153023735889485e-06, "loss": 0.5789, "step": 3290 }, { "epoch": 0.21, "grad_norm": 1.1274956464767456, "learning_rate": 9.152441547947902e-06, "loss": 0.5112, "step": 3291 }, { "epoch": 0.21, "grad_norm": 1.2734386920928955, "learning_rate": 9.15185917851381e-06, "loss": 0.5563, "step": 3292 }, { "epoch": 0.21, "grad_norm": 1.1931273937225342, "learning_rate": 9.151276627612667e-06, "loss": 0.6121, "step": 3293 }, { "epoch": 0.21, "grad_norm": 1.1216012239456177, "learning_rate": 9.150693895269931e-06, "loss": 0.551, "step": 3294 }, { "epoch": 0.21, "grad_norm": 1.225411295890808, "learning_rate": 9.150110981511076e-06, "loss": 0.5531, "step": 3295 }, { "epoch": 0.21, "grad_norm": 1.3127251863479614, "learning_rate": 9.149527886361576e-06, "loss": 0.5885, "step": 3296 }, { "epoch": 0.21, "grad_norm": 1.1828556060791016, "learning_rate": 9.148944609846917e-06, "loss": 0.5163, "step": 3297 }, { "epoch": 0.21, "grad_norm": 1.584447979927063, "learning_rate": 9.148361151992595e-06, "loss": 0.5398, "step": 3298 }, { "epoch": 0.21, "grad_norm": 1.2711509466171265, "learning_rate": 9.14777751282411e-06, "loss": 0.5947, "step": 3299 }, { "epoch": 0.21, "grad_norm": 1.2207423448562622, "learning_rate": 9.147193692366971e-06, "loss": 0.5979, "step": 3300 }, { "epoch": 0.21, "grad_norm": 1.261200189590454, "learning_rate": 9.146609690646697e-06, "loss": 0.5409, "step": 3301 }, { "epoch": 0.21, "grad_norm": 1.2165231704711914, "learning_rate": 9.14602550768881e-06, "loss": 0.5402, "step": 3302 }, { "epoch": 0.21, "grad_norm": 1.0806571245193481, "learning_rate": 9.145441143518845e-06, "loss": 0.5491, "step": 3303 }, { "epoch": 0.21, "grad_norm": 1.1135239601135254, "learning_rate": 9.144856598162344e-06, "loss": 0.5337, "step": 3304 }, { "epoch": 0.21, "grad_norm": 1.2031333446502686, "learning_rate": 9.144271871644854e-06, "loss": 0.5529, "step": 3305 }, { "epoch": 0.21, "grad_norm": 1.2339937686920166, "learning_rate": 9.143686963991933e-06, "loss": 0.5537, "step": 3306 }, { "epoch": 0.21, "grad_norm": 1.1834051609039307, "learning_rate": 9.143101875229146e-06, "loss": 0.5636, "step": 3307 }, { "epoch": 0.21, "grad_norm": 1.1311811208724976, "learning_rate": 9.142516605382065e-06, "loss": 0.5525, "step": 3308 }, { "epoch": 0.21, "grad_norm": 1.083512783050537, "learning_rate": 9.141931154476271e-06, "loss": 0.476, "step": 3309 }, { "epoch": 0.21, "grad_norm": 1.3622775077819824, "learning_rate": 9.141345522537352e-06, "loss": 0.55, "step": 3310 }, { "epoch": 0.21, "grad_norm": 1.183584451675415, "learning_rate": 9.140759709590908e-06, "loss": 0.5744, "step": 3311 }, { "epoch": 0.21, "grad_norm": 1.168188214302063, "learning_rate": 9.140173715662537e-06, "loss": 0.5502, "step": 3312 }, { "epoch": 0.21, "grad_norm": 1.3496763706207275, "learning_rate": 9.139587540777857e-06, "loss": 0.5654, "step": 3313 }, { "epoch": 0.21, "grad_norm": 1.1359775066375732, "learning_rate": 9.139001184962485e-06, "loss": 0.5556, "step": 3314 }, { "epoch": 0.21, "grad_norm": 1.1893349885940552, "learning_rate": 9.138414648242048e-06, "loss": 0.5871, "step": 3315 }, { "epoch": 0.21, "grad_norm": 1.2179439067840576, "learning_rate": 9.137827930642187e-06, "loss": 0.5991, "step": 3316 }, { "epoch": 0.21, "grad_norm": 1.2648370265960693, "learning_rate": 9.137241032188541e-06, "loss": 0.5603, "step": 3317 }, { "epoch": 0.21, "grad_norm": 1.1682921648025513, "learning_rate": 9.136653952906765e-06, "loss": 0.6156, "step": 3318 }, { "epoch": 0.21, "grad_norm": 1.3237451314926147, "learning_rate": 9.136066692822516e-06, "loss": 0.5992, "step": 3319 }, { "epoch": 0.21, "grad_norm": 1.300005316734314, "learning_rate": 9.135479251961465e-06, "loss": 0.5221, "step": 3320 }, { "epoch": 0.21, "grad_norm": 1.2731091976165771, "learning_rate": 9.134891630349283e-06, "loss": 0.552, "step": 3321 }, { "epoch": 0.21, "grad_norm": 1.2213120460510254, "learning_rate": 9.134303828011658e-06, "loss": 0.5395, "step": 3322 }, { "epoch": 0.21, "grad_norm": 1.099104404449463, "learning_rate": 9.133715844974277e-06, "loss": 0.5427, "step": 3323 }, { "epoch": 0.21, "grad_norm": 1.2346879243850708, "learning_rate": 9.133127681262846e-06, "loss": 0.6153, "step": 3324 }, { "epoch": 0.21, "grad_norm": 1.1816496849060059, "learning_rate": 9.132539336903063e-06, "loss": 0.5658, "step": 3325 }, { "epoch": 0.21, "grad_norm": 1.381506323814392, "learning_rate": 9.13195081192065e-06, "loss": 0.6004, "step": 3326 }, { "epoch": 0.21, "grad_norm": 1.2298451662063599, "learning_rate": 9.131362106341329e-06, "loss": 0.5367, "step": 3327 }, { "epoch": 0.21, "grad_norm": 1.2213636636734009, "learning_rate": 9.130773220190826e-06, "loss": 0.5966, "step": 3328 }, { "epoch": 0.21, "grad_norm": 1.4005558490753174, "learning_rate": 9.130184153494885e-06, "loss": 0.6019, "step": 3329 }, { "epoch": 0.21, "grad_norm": 1.1884208917617798, "learning_rate": 9.129594906279251e-06, "loss": 0.5766, "step": 3330 }, { "epoch": 0.22, "grad_norm": 1.2754526138305664, "learning_rate": 9.129005478569677e-06, "loss": 0.5555, "step": 3331 }, { "epoch": 0.22, "grad_norm": 1.2525064945220947, "learning_rate": 9.128415870391927e-06, "loss": 0.5567, "step": 3332 }, { "epoch": 0.22, "grad_norm": 1.2152231931686401, "learning_rate": 9.127826081771772e-06, "loss": 0.6034, "step": 3333 }, { "epoch": 0.22, "grad_norm": 1.519621729850769, "learning_rate": 9.127236112734986e-06, "loss": 0.6062, "step": 3334 }, { "epoch": 0.22, "grad_norm": 1.16194748878479, "learning_rate": 9.12664596330736e-06, "loss": 0.5488, "step": 3335 }, { "epoch": 0.22, "grad_norm": 1.2370996475219727, "learning_rate": 9.126055633514685e-06, "loss": 0.5619, "step": 3336 }, { "epoch": 0.22, "grad_norm": 1.2171757221221924, "learning_rate": 9.125465123382765e-06, "loss": 0.5834, "step": 3337 }, { "epoch": 0.22, "grad_norm": 1.240556240081787, "learning_rate": 9.124874432937408e-06, "loss": 0.5675, "step": 3338 }, { "epoch": 0.22, "grad_norm": 1.2205790281295776, "learning_rate": 9.124283562204431e-06, "loss": 0.5927, "step": 3339 }, { "epoch": 0.22, "grad_norm": 1.1878061294555664, "learning_rate": 9.12369251120966e-06, "loss": 0.5241, "step": 3340 }, { "epoch": 0.22, "grad_norm": 1.3554977178573608, "learning_rate": 9.123101279978928e-06, "loss": 0.647, "step": 3341 }, { "epoch": 0.22, "grad_norm": 1.24547278881073, "learning_rate": 9.122509868538078e-06, "loss": 0.5883, "step": 3342 }, { "epoch": 0.22, "grad_norm": 1.2205818891525269, "learning_rate": 9.121918276912957e-06, "loss": 0.523, "step": 3343 }, { "epoch": 0.22, "grad_norm": 1.433243751525879, "learning_rate": 9.121326505129424e-06, "loss": 0.5478, "step": 3344 }, { "epoch": 0.22, "grad_norm": 1.237426996231079, "learning_rate": 9.120734553213342e-06, "loss": 0.6147, "step": 3345 }, { "epoch": 0.22, "grad_norm": 1.3273146152496338, "learning_rate": 9.120142421190585e-06, "loss": 0.5981, "step": 3346 }, { "epoch": 0.22, "grad_norm": 1.365454077720642, "learning_rate": 9.119550109087032e-06, "loss": 0.5484, "step": 3347 }, { "epoch": 0.22, "grad_norm": 1.3098211288452148, "learning_rate": 9.118957616928572e-06, "loss": 0.5217, "step": 3348 }, { "epoch": 0.22, "grad_norm": 1.2910627126693726, "learning_rate": 9.118364944741104e-06, "loss": 0.5662, "step": 3349 }, { "epoch": 0.22, "grad_norm": 1.3541970252990723, "learning_rate": 9.117772092550528e-06, "loss": 0.5856, "step": 3350 }, { "epoch": 0.22, "grad_norm": 1.2411192655563354, "learning_rate": 9.117179060382757e-06, "loss": 0.6046, "step": 3351 }, { "epoch": 0.22, "grad_norm": 1.3590519428253174, "learning_rate": 9.116585848263712e-06, "loss": 0.5542, "step": 3352 }, { "epoch": 0.22, "grad_norm": 1.1394646167755127, "learning_rate": 9.115992456219323e-06, "loss": 0.5461, "step": 3353 }, { "epoch": 0.22, "grad_norm": 1.3534663915634155, "learning_rate": 9.115398884275521e-06, "loss": 0.6511, "step": 3354 }, { "epoch": 0.22, "grad_norm": 1.137691855430603, "learning_rate": 9.114805132458252e-06, "loss": 0.5769, "step": 3355 }, { "epoch": 0.22, "grad_norm": 1.1583060026168823, "learning_rate": 9.114211200793466e-06, "loss": 0.5942, "step": 3356 }, { "epoch": 0.22, "grad_norm": 1.2199069261550903, "learning_rate": 9.113617089307126e-06, "loss": 0.6189, "step": 3357 }, { "epoch": 0.22, "grad_norm": 1.3968993425369263, "learning_rate": 9.113022798025192e-06, "loss": 0.5441, "step": 3358 }, { "epoch": 0.22, "grad_norm": 1.3259552717208862, "learning_rate": 9.112428326973646e-06, "loss": 0.5618, "step": 3359 }, { "epoch": 0.22, "grad_norm": 1.2219535112380981, "learning_rate": 9.111833676178468e-06, "loss": 0.5853, "step": 3360 }, { "epoch": 0.22, "grad_norm": 1.2070176601409912, "learning_rate": 9.111238845665649e-06, "loss": 0.5886, "step": 3361 }, { "epoch": 0.22, "grad_norm": 1.2042232751846313, "learning_rate": 9.110643835461186e-06, "loss": 0.5403, "step": 3362 }, { "epoch": 0.22, "grad_norm": 1.3294527530670166, "learning_rate": 9.110048645591088e-06, "loss": 0.6168, "step": 3363 }, { "epoch": 0.22, "grad_norm": 1.2276585102081299, "learning_rate": 9.109453276081366e-06, "loss": 0.5886, "step": 3364 }, { "epoch": 0.22, "grad_norm": 1.1552995443344116, "learning_rate": 9.108857726958045e-06, "loss": 0.528, "step": 3365 }, { "epoch": 0.22, "grad_norm": 1.1561810970306396, "learning_rate": 9.108261998247155e-06, "loss": 0.574, "step": 3366 }, { "epoch": 0.22, "grad_norm": 1.224009394645691, "learning_rate": 9.10766608997473e-06, "loss": 0.6068, "step": 3367 }, { "epoch": 0.22, "grad_norm": 1.2173620462417603, "learning_rate": 9.10707000216682e-06, "loss": 0.5651, "step": 3368 }, { "epoch": 0.22, "grad_norm": 1.1569442749023438, "learning_rate": 9.106473734849476e-06, "loss": 0.5363, "step": 3369 }, { "epoch": 0.22, "grad_norm": 1.294775366783142, "learning_rate": 9.105877288048759e-06, "loss": 0.5392, "step": 3370 }, { "epoch": 0.22, "grad_norm": 1.3254109621047974, "learning_rate": 9.105280661790739e-06, "loss": 0.5751, "step": 3371 }, { "epoch": 0.22, "grad_norm": 1.1831724643707275, "learning_rate": 9.104683856101493e-06, "loss": 0.5846, "step": 3372 }, { "epoch": 0.22, "grad_norm": 1.311856746673584, "learning_rate": 9.104086871007107e-06, "loss": 0.5646, "step": 3373 }, { "epoch": 0.22, "grad_norm": 1.1754685640335083, "learning_rate": 9.103489706533673e-06, "loss": 0.601, "step": 3374 }, { "epoch": 0.22, "grad_norm": 1.1787011623382568, "learning_rate": 9.10289236270729e-06, "loss": 0.5451, "step": 3375 }, { "epoch": 0.22, "grad_norm": 1.1376912593841553, "learning_rate": 9.102294839554068e-06, "loss": 0.5564, "step": 3376 }, { "epoch": 0.22, "grad_norm": 1.183764934539795, "learning_rate": 9.101697137100122e-06, "loss": 0.5458, "step": 3377 }, { "epoch": 0.22, "grad_norm": 1.2523541450500488, "learning_rate": 9.101099255371578e-06, "loss": 0.5499, "step": 3378 }, { "epoch": 0.22, "grad_norm": 1.237389087677002, "learning_rate": 9.100501194394564e-06, "loss": 0.5685, "step": 3379 }, { "epoch": 0.22, "grad_norm": 1.2199842929840088, "learning_rate": 9.099902954195224e-06, "loss": 0.5707, "step": 3380 }, { "epoch": 0.22, "grad_norm": 1.1703287363052368, "learning_rate": 9.099304534799703e-06, "loss": 0.5529, "step": 3381 }, { "epoch": 0.22, "grad_norm": 1.3177540302276611, "learning_rate": 9.098705936234158e-06, "loss": 0.6132, "step": 3382 }, { "epoch": 0.22, "grad_norm": 1.0627267360687256, "learning_rate": 9.098107158524751e-06, "loss": 0.5352, "step": 3383 }, { "epoch": 0.22, "grad_norm": 1.0912359952926636, "learning_rate": 9.097508201697653e-06, "loss": 0.5386, "step": 3384 }, { "epoch": 0.22, "grad_norm": 1.1586511135101318, "learning_rate": 9.096909065779043e-06, "loss": 0.5587, "step": 3385 }, { "epoch": 0.22, "grad_norm": 1.0696500539779663, "learning_rate": 9.09630975079511e-06, "loss": 0.5734, "step": 3386 }, { "epoch": 0.22, "grad_norm": 1.2109841108322144, "learning_rate": 9.095710256772043e-06, "loss": 0.5867, "step": 3387 }, { "epoch": 0.22, "grad_norm": 1.3736355304718018, "learning_rate": 9.09511058373605e-06, "loss": 0.6029, "step": 3388 }, { "epoch": 0.22, "grad_norm": 1.2479972839355469, "learning_rate": 9.094510731713338e-06, "loss": 0.5218, "step": 3389 }, { "epoch": 0.22, "grad_norm": 1.4497084617614746, "learning_rate": 9.093910700730127e-06, "loss": 0.5671, "step": 3390 }, { "epoch": 0.22, "grad_norm": 1.1594645977020264, "learning_rate": 9.093310490812642e-06, "loss": 0.537, "step": 3391 }, { "epoch": 0.22, "grad_norm": 1.2901784181594849, "learning_rate": 9.092710101987115e-06, "loss": 0.6081, "step": 3392 }, { "epoch": 0.22, "grad_norm": 1.0957826375961304, "learning_rate": 9.092109534279787e-06, "loss": 0.5703, "step": 3393 }, { "epoch": 0.22, "grad_norm": 1.1658477783203125, "learning_rate": 9.091508787716912e-06, "loss": 0.5376, "step": 3394 }, { "epoch": 0.22, "grad_norm": 1.1101865768432617, "learning_rate": 9.090907862324744e-06, "loss": 0.5996, "step": 3395 }, { "epoch": 0.22, "grad_norm": 1.1986188888549805, "learning_rate": 9.090306758129548e-06, "loss": 0.5808, "step": 3396 }, { "epoch": 0.22, "grad_norm": 1.342271089553833, "learning_rate": 9.089705475157595e-06, "loss": 0.6249, "step": 3397 }, { "epoch": 0.22, "grad_norm": 1.1814051866531372, "learning_rate": 9.089104013435167e-06, "loss": 0.6125, "step": 3398 }, { "epoch": 0.22, "grad_norm": 1.3399453163146973, "learning_rate": 9.088502372988556e-06, "loss": 0.5906, "step": 3399 }, { "epoch": 0.22, "grad_norm": 1.2102042436599731, "learning_rate": 9.087900553844053e-06, "loss": 0.5239, "step": 3400 }, { "epoch": 0.22, "grad_norm": 1.2343536615371704, "learning_rate": 9.087298556027964e-06, "loss": 0.5954, "step": 3401 }, { "epoch": 0.22, "grad_norm": 1.2134275436401367, "learning_rate": 9.086696379566598e-06, "loss": 0.552, "step": 3402 }, { "epoch": 0.22, "grad_norm": 1.2390857934951782, "learning_rate": 9.086094024486279e-06, "loss": 0.619, "step": 3403 }, { "epoch": 0.22, "grad_norm": 1.2450799942016602, "learning_rate": 9.085491490813333e-06, "loss": 0.4842, "step": 3404 }, { "epoch": 0.22, "grad_norm": 1.2400490045547485, "learning_rate": 9.084888778574095e-06, "loss": 0.5228, "step": 3405 }, { "epoch": 0.22, "grad_norm": 1.1139897108078003, "learning_rate": 9.084285887794906e-06, "loss": 0.5579, "step": 3406 }, { "epoch": 0.22, "grad_norm": 1.3204652070999146, "learning_rate": 9.083682818502118e-06, "loss": 0.5635, "step": 3407 }, { "epoch": 0.22, "grad_norm": 1.2658946514129639, "learning_rate": 9.083079570722093e-06, "loss": 0.5188, "step": 3408 }, { "epoch": 0.22, "grad_norm": 1.2543385028839111, "learning_rate": 9.082476144481192e-06, "loss": 0.5981, "step": 3409 }, { "epoch": 0.22, "grad_norm": 1.2104966640472412, "learning_rate": 9.081872539805792e-06, "loss": 0.591, "step": 3410 }, { "epoch": 0.22, "grad_norm": 1.251417875289917, "learning_rate": 9.081268756722274e-06, "loss": 0.6036, "step": 3411 }, { "epoch": 0.22, "grad_norm": 1.195966124534607, "learning_rate": 9.080664795257031e-06, "loss": 0.5667, "step": 3412 }, { "epoch": 0.22, "grad_norm": 1.2317850589752197, "learning_rate": 9.080060655436456e-06, "loss": 0.616, "step": 3413 }, { "epoch": 0.22, "grad_norm": 1.2039223909378052, "learning_rate": 9.079456337286956e-06, "loss": 0.5554, "step": 3414 }, { "epoch": 0.22, "grad_norm": 1.2074543237686157, "learning_rate": 9.078851840834946e-06, "loss": 0.5879, "step": 3415 }, { "epoch": 0.22, "grad_norm": 1.1357425451278687, "learning_rate": 9.078247166106845e-06, "loss": 0.5253, "step": 3416 }, { "epoch": 0.22, "grad_norm": 1.2768242359161377, "learning_rate": 9.077642313129083e-06, "loss": 0.6234, "step": 3417 }, { "epoch": 0.22, "grad_norm": 1.1601399183273315, "learning_rate": 9.077037281928095e-06, "loss": 0.5684, "step": 3418 }, { "epoch": 0.22, "grad_norm": 1.0549405813217163, "learning_rate": 9.076432072530326e-06, "loss": 0.497, "step": 3419 }, { "epoch": 0.22, "grad_norm": 1.2365461587905884, "learning_rate": 9.07582668496223e-06, "loss": 0.5521, "step": 3420 }, { "epoch": 0.22, "grad_norm": 1.11464262008667, "learning_rate": 9.075221119250265e-06, "loss": 0.568, "step": 3421 }, { "epoch": 0.22, "grad_norm": 1.2383155822753906, "learning_rate": 9.074615375420898e-06, "loss": 0.5473, "step": 3422 }, { "epoch": 0.22, "grad_norm": 1.195481777191162, "learning_rate": 9.074009453500608e-06, "loss": 0.6154, "step": 3423 }, { "epoch": 0.22, "grad_norm": 1.1253453493118286, "learning_rate": 9.073403353515874e-06, "loss": 0.5938, "step": 3424 }, { "epoch": 0.22, "grad_norm": 1.3476852178573608, "learning_rate": 9.072797075493188e-06, "loss": 0.5369, "step": 3425 }, { "epoch": 0.22, "grad_norm": 1.2337161302566528, "learning_rate": 9.072190619459052e-06, "loss": 0.5543, "step": 3426 }, { "epoch": 0.22, "grad_norm": 1.3331717252731323, "learning_rate": 9.071583985439969e-06, "loss": 0.5432, "step": 3427 }, { "epoch": 0.22, "grad_norm": 1.2257176637649536, "learning_rate": 9.070977173462455e-06, "loss": 0.5754, "step": 3428 }, { "epoch": 0.22, "grad_norm": 1.1770446300506592, "learning_rate": 9.070370183553032e-06, "loss": 0.5475, "step": 3429 }, { "epoch": 0.22, "grad_norm": 1.3582412004470825, "learning_rate": 9.06976301573823e-06, "loss": 0.5555, "step": 3430 }, { "epoch": 0.22, "grad_norm": 1.1617169380187988, "learning_rate": 9.069155670044588e-06, "loss": 0.5825, "step": 3431 }, { "epoch": 0.22, "grad_norm": 1.3239657878875732, "learning_rate": 9.068548146498649e-06, "loss": 0.529, "step": 3432 }, { "epoch": 0.22, "grad_norm": 1.4919397830963135, "learning_rate": 9.067940445126965e-06, "loss": 0.5967, "step": 3433 }, { "epoch": 0.22, "grad_norm": 1.308070182800293, "learning_rate": 9.067332565956104e-06, "loss": 0.6098, "step": 3434 }, { "epoch": 0.22, "grad_norm": 1.1814504861831665, "learning_rate": 9.066724509012628e-06, "loss": 0.588, "step": 3435 }, { "epoch": 0.22, "grad_norm": 1.1702558994293213, "learning_rate": 9.066116274323116e-06, "loss": 0.537, "step": 3436 }, { "epoch": 0.22, "grad_norm": 1.5144407749176025, "learning_rate": 9.065507861914153e-06, "loss": 0.537, "step": 3437 }, { "epoch": 0.22, "grad_norm": 1.1525070667266846, "learning_rate": 9.064899271812332e-06, "loss": 0.5769, "step": 3438 }, { "epoch": 0.22, "grad_norm": 1.1153249740600586, "learning_rate": 9.064290504044248e-06, "loss": 0.5634, "step": 3439 }, { "epoch": 0.22, "grad_norm": 1.1262915134429932, "learning_rate": 9.063681558636515e-06, "loss": 0.5904, "step": 3440 }, { "epoch": 0.22, "grad_norm": 1.2399790287017822, "learning_rate": 9.063072435615743e-06, "loss": 0.5591, "step": 3441 }, { "epoch": 0.22, "grad_norm": 1.2999075651168823, "learning_rate": 9.062463135008558e-06, "loss": 0.6168, "step": 3442 }, { "epoch": 0.22, "grad_norm": 1.3405550718307495, "learning_rate": 9.061853656841594e-06, "loss": 0.5161, "step": 3443 }, { "epoch": 0.22, "grad_norm": 1.3262947797775269, "learning_rate": 9.061244001141483e-06, "loss": 0.5842, "step": 3444 }, { "epoch": 0.22, "grad_norm": 1.2320655584335327, "learning_rate": 9.060634167934877e-06, "loss": 0.6196, "step": 3445 }, { "epoch": 0.22, "grad_norm": 1.2369860410690308, "learning_rate": 9.060024157248426e-06, "loss": 0.5421, "step": 3446 }, { "epoch": 0.22, "grad_norm": 1.243093490600586, "learning_rate": 9.059413969108796e-06, "loss": 0.5958, "step": 3447 }, { "epoch": 0.22, "grad_norm": 1.2053123712539673, "learning_rate": 9.058803603542654e-06, "loss": 0.5184, "step": 3448 }, { "epoch": 0.22, "grad_norm": 1.2504795789718628, "learning_rate": 9.05819306057668e-06, "loss": 0.5809, "step": 3449 }, { "epoch": 0.22, "grad_norm": 1.1286747455596924, "learning_rate": 9.057582340237555e-06, "loss": 0.5701, "step": 3450 }, { "epoch": 0.22, "grad_norm": 1.3050148487091064, "learning_rate": 9.056971442551974e-06, "loss": 0.591, "step": 3451 }, { "epoch": 0.22, "grad_norm": 1.2564215660095215, "learning_rate": 9.056360367546641e-06, "loss": 0.5795, "step": 3452 }, { "epoch": 0.22, "grad_norm": 1.2225559949874878, "learning_rate": 9.05574911524826e-06, "loss": 0.6064, "step": 3453 }, { "epoch": 0.22, "grad_norm": 1.218198299407959, "learning_rate": 9.05513768568355e-06, "loss": 0.5816, "step": 3454 }, { "epoch": 0.22, "grad_norm": 1.1946271657943726, "learning_rate": 9.054526078879236e-06, "loss": 0.5575, "step": 3455 }, { "epoch": 0.22, "grad_norm": 1.1461219787597656, "learning_rate": 9.053914294862045e-06, "loss": 0.573, "step": 3456 }, { "epoch": 0.22, "grad_norm": 1.3616241216659546, "learning_rate": 9.05330233365872e-06, "loss": 0.5841, "step": 3457 }, { "epoch": 0.22, "grad_norm": 1.3149510622024536, "learning_rate": 9.052690195296007e-06, "loss": 0.5435, "step": 3458 }, { "epoch": 0.22, "grad_norm": 1.2100763320922852, "learning_rate": 9.052077879800666e-06, "loss": 0.5312, "step": 3459 }, { "epoch": 0.22, "grad_norm": 1.166896104812622, "learning_rate": 9.05146538719945e-06, "loss": 0.5473, "step": 3460 }, { "epoch": 0.22, "grad_norm": 1.1554473638534546, "learning_rate": 9.050852717519137e-06, "loss": 0.5774, "step": 3461 }, { "epoch": 0.22, "grad_norm": 1.3468364477157593, "learning_rate": 9.050239870786504e-06, "loss": 0.5714, "step": 3462 }, { "epoch": 0.22, "grad_norm": 1.2460333108901978, "learning_rate": 9.049626847028334e-06, "loss": 0.5801, "step": 3463 }, { "epoch": 0.22, "grad_norm": 1.1861591339111328, "learning_rate": 9.049013646271424e-06, "loss": 0.5604, "step": 3464 }, { "epoch": 0.22, "grad_norm": 1.332926630973816, "learning_rate": 9.048400268542575e-06, "loss": 0.5621, "step": 3465 }, { "epoch": 0.22, "grad_norm": 1.1964319944381714, "learning_rate": 9.047786713868592e-06, "loss": 0.5335, "step": 3466 }, { "epoch": 0.22, "grad_norm": 1.3548600673675537, "learning_rate": 9.047172982276298e-06, "loss": 0.5692, "step": 3467 }, { "epoch": 0.22, "grad_norm": 1.1920245885849, "learning_rate": 9.046559073792513e-06, "loss": 0.5151, "step": 3468 }, { "epoch": 0.22, "grad_norm": 1.2754424810409546, "learning_rate": 9.045944988444072e-06, "loss": 0.5935, "step": 3469 }, { "epoch": 0.22, "grad_norm": 1.2671390771865845, "learning_rate": 9.045330726257816e-06, "loss": 0.5878, "step": 3470 }, { "epoch": 0.22, "grad_norm": 1.408695101737976, "learning_rate": 9.044716287260589e-06, "loss": 0.5707, "step": 3471 }, { "epoch": 0.22, "grad_norm": 1.1191415786743164, "learning_rate": 9.04410167147925e-06, "loss": 0.5661, "step": 3472 }, { "epoch": 0.22, "grad_norm": 1.1092541217803955, "learning_rate": 9.043486878940659e-06, "loss": 0.5629, "step": 3473 }, { "epoch": 0.22, "grad_norm": 1.3368600606918335, "learning_rate": 9.042871909671691e-06, "loss": 0.5764, "step": 3474 }, { "epoch": 0.22, "grad_norm": 1.26378333568573, "learning_rate": 9.042256763699221e-06, "loss": 0.5363, "step": 3475 }, { "epoch": 0.22, "grad_norm": 1.1502121686935425, "learning_rate": 9.041641441050137e-06, "loss": 0.56, "step": 3476 }, { "epoch": 0.22, "grad_norm": 1.1501450538635254, "learning_rate": 9.041025941751333e-06, "loss": 0.5484, "step": 3477 }, { "epoch": 0.22, "grad_norm": 1.0703470706939697, "learning_rate": 9.040410265829713e-06, "loss": 0.5136, "step": 3478 }, { "epoch": 0.22, "grad_norm": 1.1986662149429321, "learning_rate": 9.039794413312183e-06, "loss": 0.5356, "step": 3479 }, { "epoch": 0.22, "grad_norm": 1.0929728746414185, "learning_rate": 9.039178384225662e-06, "loss": 0.5656, "step": 3480 }, { "epoch": 0.22, "grad_norm": 1.8985852003097534, "learning_rate": 9.038562178597076e-06, "loss": 0.568, "step": 3481 }, { "epoch": 0.22, "grad_norm": 1.2187508344650269, "learning_rate": 9.037945796453356e-06, "loss": 0.5596, "step": 3482 }, { "epoch": 0.22, "grad_norm": 1.1087236404418945, "learning_rate": 9.037329237821442e-06, "loss": 0.5199, "step": 3483 }, { "epoch": 0.22, "grad_norm": 1.268444538116455, "learning_rate": 9.036712502728287e-06, "loss": 0.5493, "step": 3484 }, { "epoch": 0.22, "grad_norm": 1.2265454530715942, "learning_rate": 9.03609559120084e-06, "loss": 0.5693, "step": 3485 }, { "epoch": 0.23, "grad_norm": 1.2051042318344116, "learning_rate": 9.035478503266069e-06, "loss": 0.576, "step": 3486 }, { "epoch": 0.23, "grad_norm": 1.2575733661651611, "learning_rate": 9.034861238950944e-06, "loss": 0.5869, "step": 3487 }, { "epoch": 0.23, "grad_norm": 1.4779446125030518, "learning_rate": 9.034243798282445e-06, "loss": 0.6048, "step": 3488 }, { "epoch": 0.23, "grad_norm": 1.256517767906189, "learning_rate": 9.033626181287559e-06, "loss": 0.55, "step": 3489 }, { "epoch": 0.23, "grad_norm": 1.230330467224121, "learning_rate": 9.033008387993279e-06, "loss": 0.5945, "step": 3490 }, { "epoch": 0.23, "grad_norm": 1.0727907419204712, "learning_rate": 9.032390418426606e-06, "loss": 0.5033, "step": 3491 }, { "epoch": 0.23, "grad_norm": 1.1858004331588745, "learning_rate": 9.031772272614554e-06, "loss": 0.5616, "step": 3492 }, { "epoch": 0.23, "grad_norm": 1.1735066175460815, "learning_rate": 9.031153950584137e-06, "loss": 0.5462, "step": 3493 }, { "epoch": 0.23, "grad_norm": 1.209593415260315, "learning_rate": 9.03053545236238e-06, "loss": 0.5705, "step": 3494 }, { "epoch": 0.23, "grad_norm": 1.2137794494628906, "learning_rate": 9.029916777976318e-06, "loss": 0.555, "step": 3495 }, { "epoch": 0.23, "grad_norm": 1.1812242269515991, "learning_rate": 9.029297927452991e-06, "loss": 0.555, "step": 3496 }, { "epoch": 0.23, "grad_norm": 1.233926773071289, "learning_rate": 9.028678900819448e-06, "loss": 0.5708, "step": 3497 }, { "epoch": 0.23, "grad_norm": 1.1238843202590942, "learning_rate": 9.028059698102744e-06, "loss": 0.51, "step": 3498 }, { "epoch": 0.23, "grad_norm": 1.1296669244766235, "learning_rate": 9.027440319329943e-06, "loss": 0.5279, "step": 3499 }, { "epoch": 0.23, "grad_norm": 1.2949450016021729, "learning_rate": 9.026820764528117e-06, "loss": 0.5424, "step": 3500 }, { "epoch": 0.23, "grad_norm": 1.4331763982772827, "learning_rate": 9.026201033724345e-06, "loss": 0.5781, "step": 3501 }, { "epoch": 0.23, "grad_norm": 1.2980660200119019, "learning_rate": 9.025581126945712e-06, "loss": 0.5195, "step": 3502 }, { "epoch": 0.23, "grad_norm": 1.1737911701202393, "learning_rate": 9.024961044219315e-06, "loss": 0.5884, "step": 3503 }, { "epoch": 0.23, "grad_norm": 1.1557012796401978, "learning_rate": 9.024340785572256e-06, "loss": 0.5619, "step": 3504 }, { "epoch": 0.23, "grad_norm": 1.300521969795227, "learning_rate": 9.023720351031644e-06, "loss": 0.5626, "step": 3505 }, { "epoch": 0.23, "grad_norm": 1.4591643810272217, "learning_rate": 9.023099740624597e-06, "loss": 0.6035, "step": 3506 }, { "epoch": 0.23, "grad_norm": 1.1692585945129395, "learning_rate": 9.022478954378238e-06, "loss": 0.5784, "step": 3507 }, { "epoch": 0.23, "grad_norm": 1.291734218597412, "learning_rate": 9.021857992319704e-06, "loss": 0.5607, "step": 3508 }, { "epoch": 0.23, "grad_norm": 1.3107160329818726, "learning_rate": 9.021236854476132e-06, "loss": 0.6281, "step": 3509 }, { "epoch": 0.23, "grad_norm": 1.276250958442688, "learning_rate": 9.020615540874673e-06, "loss": 0.5326, "step": 3510 }, { "epoch": 0.23, "grad_norm": 1.1599657535552979, "learning_rate": 9.019994051542482e-06, "loss": 0.55, "step": 3511 }, { "epoch": 0.23, "grad_norm": 1.9957512617111206, "learning_rate": 9.019372386506725e-06, "loss": 0.5486, "step": 3512 }, { "epoch": 0.23, "grad_norm": 1.1361147165298462, "learning_rate": 9.018750545794567e-06, "loss": 0.5155, "step": 3513 }, { "epoch": 0.23, "grad_norm": 1.2562047243118286, "learning_rate": 9.018128529433194e-06, "loss": 0.593, "step": 3514 }, { "epoch": 0.23, "grad_norm": 1.264703631401062, "learning_rate": 9.017506337449789e-06, "loss": 0.557, "step": 3515 }, { "epoch": 0.23, "grad_norm": 1.2972403764724731, "learning_rate": 9.016883969871548e-06, "loss": 0.6216, "step": 3516 }, { "epoch": 0.23, "grad_norm": 1.3363595008850098, "learning_rate": 9.016261426725672e-06, "loss": 0.5181, "step": 3517 }, { "epoch": 0.23, "grad_norm": 1.3686448335647583, "learning_rate": 9.01563870803937e-06, "loss": 0.5672, "step": 3518 }, { "epoch": 0.23, "grad_norm": 1.2133761644363403, "learning_rate": 9.015015813839863e-06, "loss": 0.6352, "step": 3519 }, { "epoch": 0.23, "grad_norm": 1.1429916620254517, "learning_rate": 9.014392744154371e-06, "loss": 0.5939, "step": 3520 }, { "epoch": 0.23, "grad_norm": 1.408186435699463, "learning_rate": 9.01376949901013e-06, "loss": 0.5907, "step": 3521 }, { "epoch": 0.23, "grad_norm": 1.267733097076416, "learning_rate": 9.013146078434381e-06, "loss": 0.5954, "step": 3522 }, { "epoch": 0.23, "grad_norm": 1.2277287244796753, "learning_rate": 9.01252248245437e-06, "loss": 0.5603, "step": 3523 }, { "epoch": 0.23, "grad_norm": 1.2062709331512451, "learning_rate": 9.011898711097355e-06, "loss": 0.551, "step": 3524 }, { "epoch": 0.23, "grad_norm": 1.1865904331207275, "learning_rate": 9.011274764390596e-06, "loss": 0.5759, "step": 3525 }, { "epoch": 0.23, "grad_norm": 1.3010691404342651, "learning_rate": 9.010650642361367e-06, "loss": 0.5444, "step": 3526 }, { "epoch": 0.23, "grad_norm": 1.1853318214416504, "learning_rate": 9.010026345036948e-06, "loss": 0.5869, "step": 3527 }, { "epoch": 0.23, "grad_norm": 1.3832709789276123, "learning_rate": 9.009401872444622e-06, "loss": 0.535, "step": 3528 }, { "epoch": 0.23, "grad_norm": 1.31244957447052, "learning_rate": 9.008777224611685e-06, "loss": 0.5975, "step": 3529 }, { "epoch": 0.23, "grad_norm": 1.3251399993896484, "learning_rate": 9.008152401565437e-06, "loss": 0.5679, "step": 3530 }, { "epoch": 0.23, "grad_norm": 1.340275526046753, "learning_rate": 9.00752740333319e-06, "loss": 0.5032, "step": 3531 }, { "epoch": 0.23, "grad_norm": 1.1940977573394775, "learning_rate": 9.006902229942258e-06, "loss": 0.5531, "step": 3532 }, { "epoch": 0.23, "grad_norm": 1.1531860828399658, "learning_rate": 9.006276881419969e-06, "loss": 0.5632, "step": 3533 }, { "epoch": 0.23, "grad_norm": 1.264479637145996, "learning_rate": 9.005651357793654e-06, "loss": 0.5754, "step": 3534 }, { "epoch": 0.23, "grad_norm": 1.2364400625228882, "learning_rate": 9.005025659090652e-06, "loss": 0.6121, "step": 3535 }, { "epoch": 0.23, "grad_norm": 1.3099560737609863, "learning_rate": 9.004399785338314e-06, "loss": 0.5739, "step": 3536 }, { "epoch": 0.23, "grad_norm": 1.433181643486023, "learning_rate": 9.00377373656399e-06, "loss": 0.5762, "step": 3537 }, { "epoch": 0.23, "grad_norm": 1.1128193140029907, "learning_rate": 9.003147512795048e-06, "loss": 0.5525, "step": 3538 }, { "epoch": 0.23, "grad_norm": 1.1925426721572876, "learning_rate": 9.002521114058854e-06, "loss": 0.5452, "step": 3539 }, { "epoch": 0.23, "grad_norm": 1.1911181211471558, "learning_rate": 9.00189454038279e-06, "loss": 0.5913, "step": 3540 }, { "epoch": 0.23, "grad_norm": 1.4492920637130737, "learning_rate": 9.001267791794242e-06, "loss": 0.589, "step": 3541 }, { "epoch": 0.23, "grad_norm": 1.3508026599884033, "learning_rate": 9.0006408683206e-06, "loss": 0.5418, "step": 3542 }, { "epoch": 0.23, "grad_norm": 1.2412409782409668, "learning_rate": 9.000013769989269e-06, "loss": 0.5781, "step": 3543 }, { "epoch": 0.23, "grad_norm": 1.298175573348999, "learning_rate": 8.999386496827655e-06, "loss": 0.5737, "step": 3544 }, { "epoch": 0.23, "grad_norm": 1.2928717136383057, "learning_rate": 8.998759048863176e-06, "loss": 0.5507, "step": 3545 }, { "epoch": 0.23, "grad_norm": 1.236097812652588, "learning_rate": 8.998131426123257e-06, "loss": 0.5473, "step": 3546 }, { "epoch": 0.23, "grad_norm": 1.293396234512329, "learning_rate": 8.997503628635327e-06, "loss": 0.5523, "step": 3547 }, { "epoch": 0.23, "grad_norm": 1.2536765336990356, "learning_rate": 8.99687565642683e-06, "loss": 0.5901, "step": 3548 }, { "epoch": 0.23, "grad_norm": 1.197483777999878, "learning_rate": 8.996247509525206e-06, "loss": 0.571, "step": 3549 }, { "epoch": 0.23, "grad_norm": 1.2687698602676392, "learning_rate": 8.995619187957916e-06, "loss": 0.5529, "step": 3550 }, { "epoch": 0.23, "grad_norm": 1.1963809728622437, "learning_rate": 8.994990691752422e-06, "loss": 0.5593, "step": 3551 }, { "epoch": 0.23, "grad_norm": 1.13083815574646, "learning_rate": 8.994362020936189e-06, "loss": 0.5514, "step": 3552 }, { "epoch": 0.23, "grad_norm": 1.226601004600525, "learning_rate": 8.9937331755367e-06, "loss": 0.551, "step": 3553 }, { "epoch": 0.23, "grad_norm": 1.230892300605774, "learning_rate": 8.993104155581436e-06, "loss": 0.5947, "step": 3554 }, { "epoch": 0.23, "grad_norm": 1.1737644672393799, "learning_rate": 8.992474961097892e-06, "loss": 0.5588, "step": 3555 }, { "epoch": 0.23, "grad_norm": 1.2551031112670898, "learning_rate": 8.991845592113568e-06, "loss": 0.552, "step": 3556 }, { "epoch": 0.23, "grad_norm": 1.4428449869155884, "learning_rate": 8.991216048655973e-06, "loss": 0.5689, "step": 3557 }, { "epoch": 0.23, "grad_norm": 1.2583400011062622, "learning_rate": 8.990586330752622e-06, "loss": 0.607, "step": 3558 }, { "epoch": 0.23, "grad_norm": 1.2305209636688232, "learning_rate": 8.989956438431038e-06, "loss": 0.617, "step": 3559 }, { "epoch": 0.23, "grad_norm": 1.1473618745803833, "learning_rate": 8.989326371718754e-06, "loss": 0.5507, "step": 3560 }, { "epoch": 0.23, "grad_norm": 1.1809051036834717, "learning_rate": 8.988696130643306e-06, "loss": 0.5354, "step": 3561 }, { "epoch": 0.23, "grad_norm": 1.2732359170913696, "learning_rate": 8.98806571523224e-06, "loss": 0.5276, "step": 3562 }, { "epoch": 0.23, "grad_norm": 1.3486616611480713, "learning_rate": 8.98743512551311e-06, "loss": 0.5692, "step": 3563 }, { "epoch": 0.23, "grad_norm": 1.255086898803711, "learning_rate": 8.986804361513482e-06, "loss": 0.6077, "step": 3564 }, { "epoch": 0.23, "grad_norm": 1.2901512384414673, "learning_rate": 8.986173423260917e-06, "loss": 0.5635, "step": 3565 }, { "epoch": 0.23, "grad_norm": 1.1116397380828857, "learning_rate": 8.985542310782999e-06, "loss": 0.5878, "step": 3566 }, { "epoch": 0.23, "grad_norm": 1.2086284160614014, "learning_rate": 8.98491102410731e-06, "loss": 0.5385, "step": 3567 }, { "epoch": 0.23, "grad_norm": 1.2622050046920776, "learning_rate": 8.98427956326144e-06, "loss": 0.6019, "step": 3568 }, { "epoch": 0.23, "grad_norm": 1.155648112297058, "learning_rate": 8.983647928272989e-06, "loss": 0.5927, "step": 3569 }, { "epoch": 0.23, "grad_norm": 1.2028125524520874, "learning_rate": 8.983016119169567e-06, "loss": 0.5425, "step": 3570 }, { "epoch": 0.23, "grad_norm": 1.3604377508163452, "learning_rate": 8.982384135978785e-06, "loss": 0.5796, "step": 3571 }, { "epoch": 0.23, "grad_norm": 1.2143384218215942, "learning_rate": 8.981751978728268e-06, "loss": 0.6008, "step": 3572 }, { "epoch": 0.23, "grad_norm": 1.22402822971344, "learning_rate": 8.981119647445645e-06, "loss": 0.6076, "step": 3573 }, { "epoch": 0.23, "grad_norm": 1.2570991516113281, "learning_rate": 8.980487142158554e-06, "loss": 0.6158, "step": 3574 }, { "epoch": 0.23, "grad_norm": 1.1755892038345337, "learning_rate": 8.97985446289464e-06, "loss": 0.5568, "step": 3575 }, { "epoch": 0.23, "grad_norm": 1.40088951587677, "learning_rate": 8.979221609681553e-06, "loss": 0.5464, "step": 3576 }, { "epoch": 0.23, "grad_norm": 1.2116270065307617, "learning_rate": 8.97858858254696e-06, "loss": 0.592, "step": 3577 }, { "epoch": 0.23, "grad_norm": 1.1904971599578857, "learning_rate": 8.977955381518523e-06, "loss": 0.524, "step": 3578 }, { "epoch": 0.23, "grad_norm": 2.104623794555664, "learning_rate": 8.97732200662392e-06, "loss": 0.5484, "step": 3579 }, { "epoch": 0.23, "grad_norm": 1.1586856842041016, "learning_rate": 8.976688457890831e-06, "loss": 0.5379, "step": 3580 }, { "epoch": 0.23, "grad_norm": 1.4386885166168213, "learning_rate": 8.976054735346953e-06, "loss": 0.5671, "step": 3581 }, { "epoch": 0.23, "grad_norm": 1.3405455350875854, "learning_rate": 8.975420839019979e-06, "loss": 0.551, "step": 3582 }, { "epoch": 0.23, "grad_norm": 1.2581846714019775, "learning_rate": 8.974786768937618e-06, "loss": 0.5394, "step": 3583 }, { "epoch": 0.23, "grad_norm": 1.3081377744674683, "learning_rate": 8.974152525127583e-06, "loss": 0.5965, "step": 3584 }, { "epoch": 0.23, "grad_norm": 1.2652215957641602, "learning_rate": 8.973518107617593e-06, "loss": 0.5778, "step": 3585 }, { "epoch": 0.23, "grad_norm": 1.1571736335754395, "learning_rate": 8.972883516435378e-06, "loss": 0.576, "step": 3586 }, { "epoch": 0.23, "grad_norm": 1.1772489547729492, "learning_rate": 8.972248751608677e-06, "loss": 0.6084, "step": 3587 }, { "epoch": 0.23, "grad_norm": 1.1778663396835327, "learning_rate": 8.971613813165229e-06, "loss": 0.5655, "step": 3588 }, { "epoch": 0.23, "grad_norm": 1.3005980253219604, "learning_rate": 8.970978701132788e-06, "loss": 0.5947, "step": 3589 }, { "epoch": 0.23, "grad_norm": 1.3170580863952637, "learning_rate": 8.970343415539116e-06, "loss": 0.6235, "step": 3590 }, { "epoch": 0.23, "grad_norm": 1.2503340244293213, "learning_rate": 8.969707956411974e-06, "loss": 0.5539, "step": 3591 }, { "epoch": 0.23, "grad_norm": 1.2405260801315308, "learning_rate": 8.96907232377914e-06, "loss": 0.5616, "step": 3592 }, { "epoch": 0.23, "grad_norm": 1.206490397453308, "learning_rate": 8.968436517668398e-06, "loss": 0.5695, "step": 3593 }, { "epoch": 0.23, "grad_norm": 1.1989610195159912, "learning_rate": 8.967800538107532e-06, "loss": 0.5849, "step": 3594 }, { "epoch": 0.23, "grad_norm": 1.2336804866790771, "learning_rate": 8.96716438512434e-06, "loss": 0.5505, "step": 3595 }, { "epoch": 0.23, "grad_norm": 1.0975446701049805, "learning_rate": 8.96652805874663e-06, "loss": 0.4979, "step": 3596 }, { "epoch": 0.23, "grad_norm": 1.0905932188034058, "learning_rate": 8.965891559002213e-06, "loss": 0.5984, "step": 3597 }, { "epoch": 0.23, "grad_norm": 1.2475956678390503, "learning_rate": 8.965254885918906e-06, "loss": 0.5571, "step": 3598 }, { "epoch": 0.23, "grad_norm": 1.211595892906189, "learning_rate": 8.964618039524539e-06, "loss": 0.5548, "step": 3599 }, { "epoch": 0.23, "grad_norm": 1.2695186138153076, "learning_rate": 8.963981019846948e-06, "loss": 0.5352, "step": 3600 }, { "epoch": 0.23, "grad_norm": 1.2463698387145996, "learning_rate": 8.963343826913972e-06, "loss": 0.5381, "step": 3601 }, { "epoch": 0.23, "grad_norm": 3.1991071701049805, "learning_rate": 8.962706460753465e-06, "loss": 0.5688, "step": 3602 }, { "epoch": 0.23, "grad_norm": 1.195000171661377, "learning_rate": 8.962068921393281e-06, "loss": 0.5693, "step": 3603 }, { "epoch": 0.23, "grad_norm": 1.330970287322998, "learning_rate": 8.961431208861286e-06, "loss": 0.5674, "step": 3604 }, { "epoch": 0.23, "grad_norm": 1.3849122524261475, "learning_rate": 8.960793323185355e-06, "loss": 0.5741, "step": 3605 }, { "epoch": 0.23, "grad_norm": 1.0396184921264648, "learning_rate": 8.960155264393367e-06, "loss": 0.5482, "step": 3606 }, { "epoch": 0.23, "grad_norm": 1.1665830612182617, "learning_rate": 8.959517032513208e-06, "loss": 0.5758, "step": 3607 }, { "epoch": 0.23, "grad_norm": 1.2510324716567993, "learning_rate": 8.958878627572778e-06, "loss": 0.5841, "step": 3608 }, { "epoch": 0.23, "grad_norm": 1.272153615951538, "learning_rate": 8.958240049599974e-06, "loss": 0.5326, "step": 3609 }, { "epoch": 0.23, "grad_norm": 1.2974604368209839, "learning_rate": 8.95760129862271e-06, "loss": 0.6287, "step": 3610 }, { "epoch": 0.23, "grad_norm": 1.210016131401062, "learning_rate": 8.956962374668907e-06, "loss": 0.6207, "step": 3611 }, { "epoch": 0.23, "grad_norm": 1.4693936109542847, "learning_rate": 8.956323277766485e-06, "loss": 0.5952, "step": 3612 }, { "epoch": 0.23, "grad_norm": 1.191937804222107, "learning_rate": 8.955684007943381e-06, "loss": 0.5837, "step": 3613 }, { "epoch": 0.23, "grad_norm": 1.3005820512771606, "learning_rate": 8.955044565227536e-06, "loss": 0.5948, "step": 3614 }, { "epoch": 0.23, "grad_norm": 1.222990870475769, "learning_rate": 8.954404949646895e-06, "loss": 0.5904, "step": 3615 }, { "epoch": 0.23, "grad_norm": 1.413400650024414, "learning_rate": 8.953765161229418e-06, "loss": 0.5858, "step": 3616 }, { "epoch": 0.23, "grad_norm": 1.1496944427490234, "learning_rate": 8.953125200003065e-06, "loss": 0.5917, "step": 3617 }, { "epoch": 0.23, "grad_norm": 1.1768110990524292, "learning_rate": 8.952485065995812e-06, "loss": 0.5863, "step": 3618 }, { "epoch": 0.23, "grad_norm": 1.0945119857788086, "learning_rate": 8.95184475923563e-06, "loss": 0.4938, "step": 3619 }, { "epoch": 0.23, "grad_norm": 1.1786479949951172, "learning_rate": 8.951204279750511e-06, "loss": 0.6, "step": 3620 }, { "epoch": 0.23, "grad_norm": 1.0731433629989624, "learning_rate": 8.950563627568448e-06, "loss": 0.5892, "step": 3621 }, { "epoch": 0.23, "grad_norm": 1.1446269750595093, "learning_rate": 8.94992280271744e-06, "loss": 0.5525, "step": 3622 }, { "epoch": 0.23, "grad_norm": 1.1923633813858032, "learning_rate": 8.949281805225499e-06, "loss": 0.5824, "step": 3623 }, { "epoch": 0.23, "grad_norm": 1.177105188369751, "learning_rate": 8.948640635120637e-06, "loss": 0.5685, "step": 3624 }, { "epoch": 0.23, "grad_norm": 1.1896719932556152, "learning_rate": 8.947999292430883e-06, "loss": 0.5409, "step": 3625 }, { "epoch": 0.23, "grad_norm": 1.1834403276443481, "learning_rate": 8.947357777184265e-06, "loss": 0.5114, "step": 3626 }, { "epoch": 0.23, "grad_norm": 1.2680052518844604, "learning_rate": 8.946716089408823e-06, "loss": 0.5892, "step": 3627 }, { "epoch": 0.23, "grad_norm": 1.2104241847991943, "learning_rate": 8.946074229132603e-06, "loss": 0.5807, "step": 3628 }, { "epoch": 0.23, "grad_norm": 1.2050000429153442, "learning_rate": 8.94543219638366e-06, "loss": 0.5302, "step": 3629 }, { "epoch": 0.23, "grad_norm": 1.2470394372940063, "learning_rate": 8.944789991190055e-06, "loss": 0.6099, "step": 3630 }, { "epoch": 0.23, "grad_norm": 1.0471371412277222, "learning_rate": 8.944147613579857e-06, "loss": 0.5576, "step": 3631 }, { "epoch": 0.23, "grad_norm": 1.3477920293807983, "learning_rate": 8.943505063581143e-06, "loss": 0.6073, "step": 3632 }, { "epoch": 0.23, "grad_norm": 1.2073179483413696, "learning_rate": 8.942862341221996e-06, "loss": 0.5761, "step": 3633 }, { "epoch": 0.23, "grad_norm": 1.269831895828247, "learning_rate": 8.94221944653051e-06, "loss": 0.5961, "step": 3634 }, { "epoch": 0.23, "grad_norm": 1.1357709169387817, "learning_rate": 8.941576379534783e-06, "loss": 0.5666, "step": 3635 }, { "epoch": 0.23, "grad_norm": 1.2436250448226929, "learning_rate": 8.940933140262921e-06, "loss": 0.5491, "step": 3636 }, { "epoch": 0.23, "grad_norm": 1.1695176362991333, "learning_rate": 8.94028972874304e-06, "loss": 0.6052, "step": 3637 }, { "epoch": 0.23, "grad_norm": 1.2201786041259766, "learning_rate": 8.939646145003261e-06, "loss": 0.612, "step": 3638 }, { "epoch": 0.23, "grad_norm": 1.1301679611206055, "learning_rate": 8.939002389071714e-06, "loss": 0.5021, "step": 3639 }, { "epoch": 0.23, "grad_norm": 1.099530577659607, "learning_rate": 8.938358460976534e-06, "loss": 0.5728, "step": 3640 }, { "epoch": 0.24, "grad_norm": 1.2058912515640259, "learning_rate": 8.937714360745868e-06, "loss": 0.6093, "step": 3641 }, { "epoch": 0.24, "grad_norm": 1.1088035106658936, "learning_rate": 8.937070088407865e-06, "loss": 0.5477, "step": 3642 }, { "epoch": 0.24, "grad_norm": 1.141484022140503, "learning_rate": 8.936425643990688e-06, "loss": 0.57, "step": 3643 }, { "epoch": 0.24, "grad_norm": 1.2335926294326782, "learning_rate": 8.9357810275225e-06, "loss": 0.5448, "step": 3644 }, { "epoch": 0.24, "grad_norm": 1.2889320850372314, "learning_rate": 8.935136239031482e-06, "loss": 0.6078, "step": 3645 }, { "epoch": 0.24, "grad_norm": 1.2143715620040894, "learning_rate": 8.934491278545807e-06, "loss": 0.5588, "step": 3646 }, { "epoch": 0.24, "grad_norm": 1.3000459671020508, "learning_rate": 8.933846146093673e-06, "loss": 0.6314, "step": 3647 }, { "epoch": 0.24, "grad_norm": 1.2866578102111816, "learning_rate": 8.933200841703271e-06, "loss": 0.5809, "step": 3648 }, { "epoch": 0.24, "grad_norm": 1.207376480102539, "learning_rate": 8.932555365402809e-06, "loss": 0.5806, "step": 3649 }, { "epoch": 0.24, "grad_norm": 1.115464210510254, "learning_rate": 8.931909717220497e-06, "loss": 0.5398, "step": 3650 }, { "epoch": 0.24, "grad_norm": 1.1558036804199219, "learning_rate": 8.931263897184556e-06, "loss": 0.557, "step": 3651 }, { "epoch": 0.24, "grad_norm": 1.2026220560073853, "learning_rate": 8.930617905323214e-06, "loss": 0.5372, "step": 3652 }, { "epoch": 0.24, "grad_norm": 1.1625785827636719, "learning_rate": 8.929971741664703e-06, "loss": 0.6146, "step": 3653 }, { "epoch": 0.24, "grad_norm": 1.2889100313186646, "learning_rate": 8.929325406237269e-06, "loss": 0.5797, "step": 3654 }, { "epoch": 0.24, "grad_norm": 1.2443420886993408, "learning_rate": 8.928678899069157e-06, "loss": 0.5488, "step": 3655 }, { "epoch": 0.24, "grad_norm": 1.1963366270065308, "learning_rate": 8.928032220188628e-06, "loss": 0.6299, "step": 3656 }, { "epoch": 0.24, "grad_norm": 1.2685742378234863, "learning_rate": 8.927385369623944e-06, "loss": 0.5814, "step": 3657 }, { "epoch": 0.24, "grad_norm": 1.1511744260787964, "learning_rate": 8.926738347403378e-06, "loss": 0.5787, "step": 3658 }, { "epoch": 0.24, "grad_norm": 1.2253482341766357, "learning_rate": 8.926091153555213e-06, "loss": 0.5796, "step": 3659 }, { "epoch": 0.24, "grad_norm": 1.3016078472137451, "learning_rate": 8.92544378810773e-06, "loss": 0.6092, "step": 3660 }, { "epoch": 0.24, "grad_norm": 1.1647098064422607, "learning_rate": 8.924796251089228e-06, "loss": 0.5358, "step": 3661 }, { "epoch": 0.24, "grad_norm": 1.227901816368103, "learning_rate": 8.924148542528006e-06, "loss": 0.5563, "step": 3662 }, { "epoch": 0.24, "grad_norm": 1.2945066690444946, "learning_rate": 8.923500662452378e-06, "loss": 0.5475, "step": 3663 }, { "epoch": 0.24, "grad_norm": 1.4390311241149902, "learning_rate": 8.922852610890657e-06, "loss": 0.579, "step": 3664 }, { "epoch": 0.24, "grad_norm": 1.1074755191802979, "learning_rate": 8.92220438787117e-06, "loss": 0.5269, "step": 3665 }, { "epoch": 0.24, "grad_norm": 1.2902731895446777, "learning_rate": 8.921555993422251e-06, "loss": 0.6141, "step": 3666 }, { "epoch": 0.24, "grad_norm": 1.2078657150268555, "learning_rate": 8.920907427572234e-06, "loss": 0.5729, "step": 3667 }, { "epoch": 0.24, "grad_norm": 1.2649637460708618, "learning_rate": 8.92025869034947e-06, "loss": 0.5856, "step": 3668 }, { "epoch": 0.24, "grad_norm": 1.1725353002548218, "learning_rate": 8.919609781782314e-06, "loss": 0.5452, "step": 3669 }, { "epoch": 0.24, "grad_norm": 1.33144211769104, "learning_rate": 8.918960701899128e-06, "loss": 0.6373, "step": 3670 }, { "epoch": 0.24, "grad_norm": 1.3370091915130615, "learning_rate": 8.918311450728279e-06, "loss": 0.5402, "step": 3671 }, { "epoch": 0.24, "grad_norm": 1.259839653968811, "learning_rate": 8.917662028298148e-06, "loss": 0.5681, "step": 3672 }, { "epoch": 0.24, "grad_norm": 1.1266422271728516, "learning_rate": 8.917012434637116e-06, "loss": 0.5898, "step": 3673 }, { "epoch": 0.24, "grad_norm": 1.1631824970245361, "learning_rate": 8.916362669773576e-06, "loss": 0.5684, "step": 3674 }, { "epoch": 0.24, "grad_norm": 1.2862550020217896, "learning_rate": 8.915712733735929e-06, "loss": 0.5342, "step": 3675 }, { "epoch": 0.24, "grad_norm": 1.364203929901123, "learning_rate": 8.91506262655258e-06, "loss": 0.5736, "step": 3676 }, { "epoch": 0.24, "grad_norm": 1.3307965993881226, "learning_rate": 8.914412348251947e-06, "loss": 0.5859, "step": 3677 }, { "epoch": 0.24, "grad_norm": 1.2251602411270142, "learning_rate": 8.913761898862447e-06, "loss": 0.5461, "step": 3678 }, { "epoch": 0.24, "grad_norm": 1.2649340629577637, "learning_rate": 8.913111278412513e-06, "loss": 0.5757, "step": 3679 }, { "epoch": 0.24, "grad_norm": 1.1998591423034668, "learning_rate": 8.912460486930582e-06, "loss": 0.52, "step": 3680 }, { "epoch": 0.24, "grad_norm": 1.309250831604004, "learning_rate": 8.911809524445094e-06, "loss": 0.5232, "step": 3681 }, { "epoch": 0.24, "grad_norm": 1.2841871976852417, "learning_rate": 8.911158390984508e-06, "loss": 0.5648, "step": 3682 }, { "epoch": 0.24, "grad_norm": 1.1427396535873413, "learning_rate": 8.910507086577279e-06, "loss": 0.5699, "step": 3683 }, { "epoch": 0.24, "grad_norm": 1.2796101570129395, "learning_rate": 8.909855611251871e-06, "loss": 0.5691, "step": 3684 }, { "epoch": 0.24, "grad_norm": 1.5045236349105835, "learning_rate": 8.909203965036766e-06, "loss": 0.5513, "step": 3685 }, { "epoch": 0.24, "grad_norm": 1.2616832256317139, "learning_rate": 8.90855214796044e-06, "loss": 0.6201, "step": 3686 }, { "epoch": 0.24, "grad_norm": 1.1917073726654053, "learning_rate": 8.907900160051383e-06, "loss": 0.5918, "step": 3687 }, { "epoch": 0.24, "grad_norm": 1.2186131477355957, "learning_rate": 8.907248001338095e-06, "loss": 0.5575, "step": 3688 }, { "epoch": 0.24, "grad_norm": 1.175803780555725, "learning_rate": 8.906595671849076e-06, "loss": 0.5613, "step": 3689 }, { "epoch": 0.24, "grad_norm": 1.2628415822982788, "learning_rate": 8.905943171612839e-06, "loss": 0.5787, "step": 3690 }, { "epoch": 0.24, "grad_norm": 1.4495763778686523, "learning_rate": 8.905290500657904e-06, "loss": 0.5692, "step": 3691 }, { "epoch": 0.24, "grad_norm": 1.2580089569091797, "learning_rate": 8.904637659012798e-06, "loss": 0.6245, "step": 3692 }, { "epoch": 0.24, "grad_norm": 1.3527129888534546, "learning_rate": 8.903984646706051e-06, "loss": 0.5657, "step": 3693 }, { "epoch": 0.24, "grad_norm": 1.2280699014663696, "learning_rate": 8.90333146376621e-06, "loss": 0.5911, "step": 3694 }, { "epoch": 0.24, "grad_norm": 1.134678840637207, "learning_rate": 8.902678110221821e-06, "loss": 0.5314, "step": 3695 }, { "epoch": 0.24, "grad_norm": 1.2814154624938965, "learning_rate": 8.90202458610144e-06, "loss": 0.5809, "step": 3696 }, { "epoch": 0.24, "grad_norm": 2.3206639289855957, "learning_rate": 8.901370891433634e-06, "loss": 0.5679, "step": 3697 }, { "epoch": 0.24, "grad_norm": 1.105638027191162, "learning_rate": 8.90071702624697e-06, "loss": 0.5261, "step": 3698 }, { "epoch": 0.24, "grad_norm": 1.1771490573883057, "learning_rate": 8.900062990570028e-06, "loss": 0.56, "step": 3699 }, { "epoch": 0.24, "grad_norm": 1.2693557739257812, "learning_rate": 8.899408784431399e-06, "loss": 0.5156, "step": 3700 }, { "epoch": 0.24, "grad_norm": 1.2465630769729614, "learning_rate": 8.898754407859669e-06, "loss": 0.5715, "step": 3701 }, { "epoch": 0.24, "grad_norm": 1.164779543876648, "learning_rate": 8.898099860883443e-06, "loss": 0.5606, "step": 3702 }, { "epoch": 0.24, "grad_norm": 1.3213862180709839, "learning_rate": 8.897445143531329e-06, "loss": 0.571, "step": 3703 }, { "epoch": 0.24, "grad_norm": 1.1980911493301392, "learning_rate": 8.896790255831945e-06, "loss": 0.568, "step": 3704 }, { "epoch": 0.24, "grad_norm": 1.1390535831451416, "learning_rate": 8.896135197813911e-06, "loss": 0.5426, "step": 3705 }, { "epoch": 0.24, "grad_norm": 1.2250341176986694, "learning_rate": 8.895479969505862e-06, "loss": 0.5476, "step": 3706 }, { "epoch": 0.24, "grad_norm": 1.3084235191345215, "learning_rate": 8.894824570936433e-06, "loss": 0.5893, "step": 3707 }, { "epoch": 0.24, "grad_norm": 1.3049858808517456, "learning_rate": 8.89416900213427e-06, "loss": 0.5533, "step": 3708 }, { "epoch": 0.24, "grad_norm": 1.208743691444397, "learning_rate": 8.893513263128026e-06, "loss": 0.5735, "step": 3709 }, { "epoch": 0.24, "grad_norm": 1.5364540815353394, "learning_rate": 8.892857353946364e-06, "loss": 0.5585, "step": 3710 }, { "epoch": 0.24, "grad_norm": 1.1470948457717896, "learning_rate": 8.892201274617952e-06, "loss": 0.5563, "step": 3711 }, { "epoch": 0.24, "grad_norm": 1.2374919652938843, "learning_rate": 8.891545025171464e-06, "loss": 0.6034, "step": 3712 }, { "epoch": 0.24, "grad_norm": 1.2064156532287598, "learning_rate": 8.890888605635581e-06, "loss": 0.5879, "step": 3713 }, { "epoch": 0.24, "grad_norm": 1.2384380102157593, "learning_rate": 8.890232016038998e-06, "loss": 0.5439, "step": 3714 }, { "epoch": 0.24, "grad_norm": 1.1228376626968384, "learning_rate": 8.889575256410409e-06, "loss": 0.5734, "step": 3715 }, { "epoch": 0.24, "grad_norm": 1.1075273752212524, "learning_rate": 8.888918326778521e-06, "loss": 0.5696, "step": 3716 }, { "epoch": 0.24, "grad_norm": 1.154042363166809, "learning_rate": 8.888261227172049e-06, "loss": 0.5483, "step": 3717 }, { "epoch": 0.24, "grad_norm": 1.363288164138794, "learning_rate": 8.887603957619706e-06, "loss": 0.5826, "step": 3718 }, { "epoch": 0.24, "grad_norm": 1.283308744430542, "learning_rate": 8.886946518150227e-06, "loss": 0.5936, "step": 3719 }, { "epoch": 0.24, "grad_norm": 1.2080562114715576, "learning_rate": 8.886288908792344e-06, "loss": 0.5933, "step": 3720 }, { "epoch": 0.24, "grad_norm": 1.2338250875473022, "learning_rate": 8.8856311295748e-06, "loss": 0.5072, "step": 3721 }, { "epoch": 0.24, "grad_norm": 1.2376129627227783, "learning_rate": 8.884973180526345e-06, "loss": 0.533, "step": 3722 }, { "epoch": 0.24, "grad_norm": 1.096683382987976, "learning_rate": 8.884315061675733e-06, "loss": 0.5573, "step": 3723 }, { "epoch": 0.24, "grad_norm": 1.2990609407424927, "learning_rate": 8.883656773051733e-06, "loss": 0.5843, "step": 3724 }, { "epoch": 0.24, "grad_norm": 1.2940970659255981, "learning_rate": 8.882998314683115e-06, "loss": 0.5976, "step": 3725 }, { "epoch": 0.24, "grad_norm": 1.1625884771347046, "learning_rate": 8.88233968659866e-06, "loss": 0.5602, "step": 3726 }, { "epoch": 0.24, "grad_norm": 1.1884493827819824, "learning_rate": 8.881680888827152e-06, "loss": 0.5554, "step": 3727 }, { "epoch": 0.24, "grad_norm": 1.1323477029800415, "learning_rate": 8.881021921397388e-06, "loss": 0.5508, "step": 3728 }, { "epoch": 0.24, "grad_norm": 1.224126935005188, "learning_rate": 8.88036278433817e-06, "loss": 0.5878, "step": 3729 }, { "epoch": 0.24, "grad_norm": 1.2119860649108887, "learning_rate": 8.879703477678305e-06, "loss": 0.585, "step": 3730 }, { "epoch": 0.24, "grad_norm": 1.1622562408447266, "learning_rate": 8.87904400144661e-06, "loss": 0.5767, "step": 3731 }, { "epoch": 0.24, "grad_norm": 1.2632945775985718, "learning_rate": 8.878384355671912e-06, "loss": 0.5685, "step": 3732 }, { "epoch": 0.24, "grad_norm": 1.1207679510116577, "learning_rate": 8.87772454038304e-06, "loss": 0.5578, "step": 3733 }, { "epoch": 0.24, "grad_norm": 1.1854729652404785, "learning_rate": 8.87706455560883e-06, "loss": 0.5818, "step": 3734 }, { "epoch": 0.24, "grad_norm": 1.172262191772461, "learning_rate": 8.876404401378134e-06, "loss": 0.5483, "step": 3735 }, { "epoch": 0.24, "grad_norm": 1.9560728073120117, "learning_rate": 8.875744077719802e-06, "loss": 0.5457, "step": 3736 }, { "epoch": 0.24, "grad_norm": 1.1187926530838013, "learning_rate": 8.875083584662695e-06, "loss": 0.5587, "step": 3737 }, { "epoch": 0.24, "grad_norm": 1.195417881011963, "learning_rate": 8.874422922235684e-06, "loss": 0.5811, "step": 3738 }, { "epoch": 0.24, "grad_norm": 1.196449637413025, "learning_rate": 8.873762090467644e-06, "loss": 0.5361, "step": 3739 }, { "epoch": 0.24, "grad_norm": 1.2436561584472656, "learning_rate": 8.873101089387456e-06, "loss": 0.5917, "step": 3740 }, { "epoch": 0.24, "grad_norm": 1.2150909900665283, "learning_rate": 8.872439919024012e-06, "loss": 0.5932, "step": 3741 }, { "epoch": 0.24, "grad_norm": 1.1637135744094849, "learning_rate": 8.87177857940621e-06, "loss": 0.5293, "step": 3742 }, { "epoch": 0.24, "grad_norm": 1.2207852602005005, "learning_rate": 8.871117070562956e-06, "loss": 0.5523, "step": 3743 }, { "epoch": 0.24, "grad_norm": 1.259633183479309, "learning_rate": 8.870455392523163e-06, "loss": 0.5571, "step": 3744 }, { "epoch": 0.24, "grad_norm": 1.2637687921524048, "learning_rate": 8.869793545315751e-06, "loss": 0.5434, "step": 3745 }, { "epoch": 0.24, "grad_norm": 1.225131630897522, "learning_rate": 8.869131528969646e-06, "loss": 0.5483, "step": 3746 }, { "epoch": 0.24, "grad_norm": 1.1363645792007446, "learning_rate": 8.868469343513783e-06, "loss": 0.5511, "step": 3747 }, { "epoch": 0.24, "grad_norm": 1.1933754682540894, "learning_rate": 8.867806988977109e-06, "loss": 0.5849, "step": 3748 }, { "epoch": 0.24, "grad_norm": 1.2763655185699463, "learning_rate": 8.867144465388569e-06, "loss": 0.5594, "step": 3749 }, { "epoch": 0.24, "grad_norm": 1.1482094526290894, "learning_rate": 8.866481772777123e-06, "loss": 0.5447, "step": 3750 }, { "epoch": 0.24, "grad_norm": 1.2339028120040894, "learning_rate": 8.865818911171734e-06, "loss": 0.5959, "step": 3751 }, { "epoch": 0.24, "grad_norm": 1.4560091495513916, "learning_rate": 8.865155880601373e-06, "loss": 0.5525, "step": 3752 }, { "epoch": 0.24, "grad_norm": 1.2205772399902344, "learning_rate": 8.86449268109502e-06, "loss": 0.5889, "step": 3753 }, { "epoch": 0.24, "grad_norm": 1.325608491897583, "learning_rate": 8.863829312681664e-06, "loss": 0.6098, "step": 3754 }, { "epoch": 0.24, "grad_norm": 1.087243914604187, "learning_rate": 8.863165775390299e-06, "loss": 0.5567, "step": 3755 }, { "epoch": 0.24, "grad_norm": 1.3113672733306885, "learning_rate": 8.862502069249921e-06, "loss": 0.5741, "step": 3756 }, { "epoch": 0.24, "grad_norm": 1.157629132270813, "learning_rate": 8.861838194289547e-06, "loss": 0.5374, "step": 3757 }, { "epoch": 0.24, "grad_norm": 1.2328928709030151, "learning_rate": 8.861174150538188e-06, "loss": 0.5151, "step": 3758 }, { "epoch": 0.24, "grad_norm": 1.202149510383606, "learning_rate": 8.860509938024868e-06, "loss": 0.5419, "step": 3759 }, { "epoch": 0.24, "grad_norm": 1.0942474603652954, "learning_rate": 8.85984555677862e-06, "loss": 0.5424, "step": 3760 }, { "epoch": 0.24, "grad_norm": 1.0563100576400757, "learning_rate": 8.85918100682848e-06, "loss": 0.545, "step": 3761 }, { "epoch": 0.24, "grad_norm": 1.1577180624008179, "learning_rate": 8.858516288203494e-06, "loss": 0.5327, "step": 3762 }, { "epoch": 0.24, "grad_norm": 1.2536931037902832, "learning_rate": 8.857851400932718e-06, "loss": 0.5609, "step": 3763 }, { "epoch": 0.24, "grad_norm": 1.1997034549713135, "learning_rate": 8.85718634504521e-06, "loss": 0.5774, "step": 3764 }, { "epoch": 0.24, "grad_norm": 1.1557927131652832, "learning_rate": 8.856521120570039e-06, "loss": 0.5446, "step": 3765 }, { "epoch": 0.24, "grad_norm": 1.1630181074142456, "learning_rate": 8.85585572753628e-06, "loss": 0.5543, "step": 3766 }, { "epoch": 0.24, "grad_norm": 1.2842705249786377, "learning_rate": 8.855190165973015e-06, "loss": 0.5494, "step": 3767 }, { "epoch": 0.24, "grad_norm": 1.4085006713867188, "learning_rate": 8.854524435909335e-06, "loss": 0.545, "step": 3768 }, { "epoch": 0.24, "grad_norm": 1.1464823484420776, "learning_rate": 8.853858537374336e-06, "loss": 0.5347, "step": 3769 }, { "epoch": 0.24, "grad_norm": 1.185293436050415, "learning_rate": 8.853192470397123e-06, "loss": 0.5646, "step": 3770 }, { "epoch": 0.24, "grad_norm": 1.2752511501312256, "learning_rate": 8.85252623500681e-06, "loss": 0.5584, "step": 3771 }, { "epoch": 0.24, "grad_norm": 1.4375927448272705, "learning_rate": 8.851859831232515e-06, "loss": 0.5636, "step": 3772 }, { "epoch": 0.24, "grad_norm": 1.27762770652771, "learning_rate": 8.851193259103362e-06, "loss": 0.5784, "step": 3773 }, { "epoch": 0.24, "grad_norm": 1.3238850831985474, "learning_rate": 8.85052651864849e-06, "loss": 0.5834, "step": 3774 }, { "epoch": 0.24, "grad_norm": 1.2855148315429688, "learning_rate": 8.849859609897038e-06, "loss": 0.6213, "step": 3775 }, { "epoch": 0.24, "grad_norm": 1.2414981126785278, "learning_rate": 8.849192532878154e-06, "loss": 0.5566, "step": 3776 }, { "epoch": 0.24, "grad_norm": 1.3112491369247437, "learning_rate": 8.848525287620996e-06, "loss": 0.5733, "step": 3777 }, { "epoch": 0.24, "grad_norm": 1.1139386892318726, "learning_rate": 8.847857874154728e-06, "loss": 0.5162, "step": 3778 }, { "epoch": 0.24, "grad_norm": 1.3391157388687134, "learning_rate": 8.84719029250852e-06, "loss": 0.5356, "step": 3779 }, { "epoch": 0.24, "grad_norm": 1.378156065940857, "learning_rate": 8.84652254271155e-06, "loss": 0.6069, "step": 3780 }, { "epoch": 0.24, "grad_norm": 1.21543550491333, "learning_rate": 8.845854624793003e-06, "loss": 0.486, "step": 3781 }, { "epoch": 0.24, "grad_norm": 1.140283465385437, "learning_rate": 8.845186538782074e-06, "loss": 0.5059, "step": 3782 }, { "epoch": 0.24, "grad_norm": 1.1641535758972168, "learning_rate": 8.84451828470796e-06, "loss": 0.5461, "step": 3783 }, { "epoch": 0.24, "grad_norm": 1.2544349431991577, "learning_rate": 8.843849862599874e-06, "loss": 0.5406, "step": 3784 }, { "epoch": 0.24, "grad_norm": 1.2205764055252075, "learning_rate": 8.843181272487025e-06, "loss": 0.6036, "step": 3785 }, { "epoch": 0.24, "grad_norm": 1.1348648071289062, "learning_rate": 8.842512514398638e-06, "loss": 0.5726, "step": 3786 }, { "epoch": 0.24, "grad_norm": 1.2896801233291626, "learning_rate": 8.841843588363945e-06, "loss": 0.5605, "step": 3787 }, { "epoch": 0.24, "grad_norm": 1.1271148920059204, "learning_rate": 8.841174494412179e-06, "loss": 0.5084, "step": 3788 }, { "epoch": 0.24, "grad_norm": 1.1271013021469116, "learning_rate": 8.840505232572587e-06, "loss": 0.5818, "step": 3789 }, { "epoch": 0.24, "grad_norm": 1.271979808807373, "learning_rate": 8.83983580287442e-06, "loss": 0.6038, "step": 3790 }, { "epoch": 0.24, "grad_norm": 1.3116223812103271, "learning_rate": 8.839166205346936e-06, "loss": 0.6177, "step": 3791 }, { "epoch": 0.24, "grad_norm": 1.3282017707824707, "learning_rate": 8.838496440019404e-06, "loss": 0.5624, "step": 3792 }, { "epoch": 0.24, "grad_norm": 1.1313327550888062, "learning_rate": 8.837826506921096e-06, "loss": 0.5787, "step": 3793 }, { "epoch": 0.24, "grad_norm": 1.2031514644622803, "learning_rate": 8.837156406081292e-06, "loss": 0.5947, "step": 3794 }, { "epoch": 0.24, "grad_norm": 1.2128450870513916, "learning_rate": 8.836486137529281e-06, "loss": 0.5575, "step": 3795 }, { "epoch": 0.25, "grad_norm": 1.2262444496154785, "learning_rate": 8.835815701294361e-06, "loss": 0.5866, "step": 3796 }, { "epoch": 0.25, "grad_norm": 1.2118840217590332, "learning_rate": 8.835145097405832e-06, "loss": 0.5822, "step": 3797 }, { "epoch": 0.25, "grad_norm": 1.2792487144470215, "learning_rate": 8.834474325893006e-06, "loss": 0.5518, "step": 3798 }, { "epoch": 0.25, "grad_norm": 1.3870594501495361, "learning_rate": 8.8338033867852e-06, "loss": 0.5646, "step": 3799 }, { "epoch": 0.25, "grad_norm": 1.184407353401184, "learning_rate": 8.833132280111738e-06, "loss": 0.5604, "step": 3800 }, { "epoch": 0.25, "grad_norm": 1.2311209440231323, "learning_rate": 8.832461005901955e-06, "loss": 0.5791, "step": 3801 }, { "epoch": 0.25, "grad_norm": 1.2228103876113892, "learning_rate": 8.83178956418519e-06, "loss": 0.5352, "step": 3802 }, { "epoch": 0.25, "grad_norm": 1.1320029497146606, "learning_rate": 8.831117954990789e-06, "loss": 0.5382, "step": 3803 }, { "epoch": 0.25, "grad_norm": 1.206002950668335, "learning_rate": 8.830446178348107e-06, "loss": 0.5419, "step": 3804 }, { "epoch": 0.25, "grad_norm": 1.3752858638763428, "learning_rate": 8.829774234286503e-06, "loss": 0.6045, "step": 3805 }, { "epoch": 0.25, "grad_norm": 1.272305965423584, "learning_rate": 8.829102122835351e-06, "loss": 0.5542, "step": 3806 }, { "epoch": 0.25, "grad_norm": 1.203271746635437, "learning_rate": 8.828429844024023e-06, "loss": 0.5541, "step": 3807 }, { "epoch": 0.25, "grad_norm": 1.2019383907318115, "learning_rate": 8.827757397881904e-06, "loss": 0.5566, "step": 3808 }, { "epoch": 0.25, "grad_norm": 1.2941473722457886, "learning_rate": 8.827084784438385e-06, "loss": 0.5341, "step": 3809 }, { "epoch": 0.25, "grad_norm": 1.169275164604187, "learning_rate": 8.826412003722864e-06, "loss": 0.5984, "step": 3810 }, { "epoch": 0.25, "grad_norm": 1.0848134756088257, "learning_rate": 8.825739055764745e-06, "loss": 0.485, "step": 3811 }, { "epoch": 0.25, "grad_norm": 1.2795578241348267, "learning_rate": 8.825065940593444e-06, "loss": 0.5306, "step": 3812 }, { "epoch": 0.25, "grad_norm": 1.2309701442718506, "learning_rate": 8.824392658238381e-06, "loss": 0.5408, "step": 3813 }, { "epoch": 0.25, "grad_norm": 1.260751724243164, "learning_rate": 8.82371920872898e-06, "loss": 0.5481, "step": 3814 }, { "epoch": 0.25, "grad_norm": 1.1370011568069458, "learning_rate": 8.823045592094678e-06, "loss": 0.635, "step": 3815 }, { "epoch": 0.25, "grad_norm": 1.1770859956741333, "learning_rate": 8.822371808364919e-06, "loss": 0.589, "step": 3816 }, { "epoch": 0.25, "grad_norm": 1.3455657958984375, "learning_rate": 8.821697857569147e-06, "loss": 0.5998, "step": 3817 }, { "epoch": 0.25, "grad_norm": 1.0782071352005005, "learning_rate": 8.821023739736823e-06, "loss": 0.5346, "step": 3818 }, { "epoch": 0.25, "grad_norm": 1.2268821001052856, "learning_rate": 8.82034945489741e-06, "loss": 0.5913, "step": 3819 }, { "epoch": 0.25, "grad_norm": 1.1678768396377563, "learning_rate": 8.819675003080379e-06, "loss": 0.5517, "step": 3820 }, { "epoch": 0.25, "grad_norm": 1.109872579574585, "learning_rate": 8.819000384315208e-06, "loss": 0.5701, "step": 3821 }, { "epoch": 0.25, "grad_norm": 1.4456909894943237, "learning_rate": 8.818325598631382e-06, "loss": 0.5838, "step": 3822 }, { "epoch": 0.25, "grad_norm": 1.1845611333847046, "learning_rate": 8.817650646058397e-06, "loss": 0.5515, "step": 3823 }, { "epoch": 0.25, "grad_norm": 1.2595956325531006, "learning_rate": 8.816975526625752e-06, "loss": 0.5102, "step": 3824 }, { "epoch": 0.25, "grad_norm": 1.2451354265213013, "learning_rate": 8.816300240362955e-06, "loss": 0.5221, "step": 3825 }, { "epoch": 0.25, "grad_norm": 1.1490252017974854, "learning_rate": 8.815624787299521e-06, "loss": 0.5371, "step": 3826 }, { "epoch": 0.25, "grad_norm": 1.1326720714569092, "learning_rate": 8.81494916746497e-06, "loss": 0.5729, "step": 3827 }, { "epoch": 0.25, "grad_norm": 1.2144536972045898, "learning_rate": 8.814273380888837e-06, "loss": 0.5692, "step": 3828 }, { "epoch": 0.25, "grad_norm": 1.2656124830245972, "learning_rate": 8.813597427600651e-06, "loss": 0.5798, "step": 3829 }, { "epoch": 0.25, "grad_norm": 1.225560188293457, "learning_rate": 8.812921307629963e-06, "loss": 0.513, "step": 3830 }, { "epoch": 0.25, "grad_norm": 1.1653681993484497, "learning_rate": 8.812245021006322e-06, "loss": 0.5732, "step": 3831 }, { "epoch": 0.25, "grad_norm": 1.1681710481643677, "learning_rate": 8.811568567759288e-06, "loss": 0.5397, "step": 3832 }, { "epoch": 0.25, "grad_norm": 1.1355681419372559, "learning_rate": 8.810891947918425e-06, "loss": 0.5547, "step": 3833 }, { "epoch": 0.25, "grad_norm": 1.2398431301116943, "learning_rate": 8.810215161513306e-06, "loss": 0.5837, "step": 3834 }, { "epoch": 0.25, "grad_norm": 1.2276116609573364, "learning_rate": 8.809538208573513e-06, "loss": 0.6141, "step": 3835 }, { "epoch": 0.25, "grad_norm": 1.6631081104278564, "learning_rate": 8.808861089128635e-06, "loss": 0.6045, "step": 3836 }, { "epoch": 0.25, "grad_norm": 1.0802605152130127, "learning_rate": 8.808183803208266e-06, "loss": 0.5047, "step": 3837 }, { "epoch": 0.25, "grad_norm": 1.1553936004638672, "learning_rate": 8.807506350842007e-06, "loss": 0.5975, "step": 3838 }, { "epoch": 0.25, "grad_norm": 1.19140625, "learning_rate": 8.80682873205947e-06, "loss": 0.555, "step": 3839 }, { "epoch": 0.25, "grad_norm": 1.2107505798339844, "learning_rate": 8.806150946890268e-06, "loss": 0.5682, "step": 3840 }, { "epoch": 0.25, "grad_norm": 1.2002856731414795, "learning_rate": 8.805472995364031e-06, "loss": 0.611, "step": 3841 }, { "epoch": 0.25, "grad_norm": 1.1893727779388428, "learning_rate": 8.804794877510388e-06, "loss": 0.5675, "step": 3842 }, { "epoch": 0.25, "grad_norm": 1.2047946453094482, "learning_rate": 8.804116593358976e-06, "loss": 0.5479, "step": 3843 }, { "epoch": 0.25, "grad_norm": 1.19058358669281, "learning_rate": 8.803438142939442e-06, "loss": 0.5249, "step": 3844 }, { "epoch": 0.25, "grad_norm": 1.1280372142791748, "learning_rate": 8.80275952628144e-06, "loss": 0.5332, "step": 3845 }, { "epoch": 0.25, "grad_norm": 1.2497825622558594, "learning_rate": 8.802080743414631e-06, "loss": 0.5817, "step": 3846 }, { "epoch": 0.25, "grad_norm": 1.2797229290008545, "learning_rate": 8.801401794368682e-06, "loss": 0.5752, "step": 3847 }, { "epoch": 0.25, "grad_norm": 1.2418112754821777, "learning_rate": 8.800722679173269e-06, "loss": 0.5367, "step": 3848 }, { "epoch": 0.25, "grad_norm": 1.144011378288269, "learning_rate": 8.800043397858073e-06, "loss": 0.5284, "step": 3849 }, { "epoch": 0.25, "grad_norm": 1.1714195013046265, "learning_rate": 8.799363950452785e-06, "loss": 0.5022, "step": 3850 }, { "epoch": 0.25, "grad_norm": 1.2461236715316772, "learning_rate": 8.7986843369871e-06, "loss": 0.5825, "step": 3851 }, { "epoch": 0.25, "grad_norm": 1.164200782775879, "learning_rate": 8.798004557490725e-06, "loss": 0.547, "step": 3852 }, { "epoch": 0.25, "grad_norm": 1.4610264301300049, "learning_rate": 8.797324611993368e-06, "loss": 0.6002, "step": 3853 }, { "epoch": 0.25, "grad_norm": 1.2486541271209717, "learning_rate": 8.796644500524749e-06, "loss": 0.522, "step": 3854 }, { "epoch": 0.25, "grad_norm": 1.2434566020965576, "learning_rate": 8.795964223114597e-06, "loss": 0.5522, "step": 3855 }, { "epoch": 0.25, "grad_norm": 1.134519100189209, "learning_rate": 8.79528377979264e-06, "loss": 0.5589, "step": 3856 }, { "epoch": 0.25, "grad_norm": 1.1818419694900513, "learning_rate": 8.794603170588623e-06, "loss": 0.5477, "step": 3857 }, { "epoch": 0.25, "grad_norm": 1.2256478071212769, "learning_rate": 8.79392239553229e-06, "loss": 0.5751, "step": 3858 }, { "epoch": 0.25, "grad_norm": 1.2718076705932617, "learning_rate": 8.793241454653398e-06, "loss": 0.5529, "step": 3859 }, { "epoch": 0.25, "grad_norm": 1.2789033651351929, "learning_rate": 8.79256034798171e-06, "loss": 0.5293, "step": 3860 }, { "epoch": 0.25, "grad_norm": 1.1363581418991089, "learning_rate": 8.791879075546991e-06, "loss": 0.5775, "step": 3861 }, { "epoch": 0.25, "grad_norm": 1.1704951524734497, "learning_rate": 8.791197637379025e-06, "loss": 0.5709, "step": 3862 }, { "epoch": 0.25, "grad_norm": 1.3532160520553589, "learning_rate": 8.790516033507589e-06, "loss": 0.5462, "step": 3863 }, { "epoch": 0.25, "grad_norm": 1.1839840412139893, "learning_rate": 8.789834263962478e-06, "loss": 0.6126, "step": 3864 }, { "epoch": 0.25, "grad_norm": 1.2334038019180298, "learning_rate": 8.789152328773486e-06, "loss": 0.6141, "step": 3865 }, { "epoch": 0.25, "grad_norm": 1.2877147197723389, "learning_rate": 8.788470227970426e-06, "loss": 0.581, "step": 3866 }, { "epoch": 0.25, "grad_norm": 1.1209884881973267, "learning_rate": 8.787787961583105e-06, "loss": 0.5281, "step": 3867 }, { "epoch": 0.25, "grad_norm": 1.0825968980789185, "learning_rate": 8.787105529641345e-06, "loss": 0.5465, "step": 3868 }, { "epoch": 0.25, "grad_norm": 1.20197331905365, "learning_rate": 8.786422932174974e-06, "loss": 0.6018, "step": 3869 }, { "epoch": 0.25, "grad_norm": 1.1381710767745972, "learning_rate": 8.785740169213824e-06, "loss": 0.5765, "step": 3870 }, { "epoch": 0.25, "grad_norm": 1.1973110437393188, "learning_rate": 8.78505724078774e-06, "loss": 0.5198, "step": 3871 }, { "epoch": 0.25, "grad_norm": 1.1364659070968628, "learning_rate": 8.78437414692657e-06, "loss": 0.556, "step": 3872 }, { "epoch": 0.25, "grad_norm": 1.1565792560577393, "learning_rate": 8.783690887660167e-06, "loss": 0.5496, "step": 3873 }, { "epoch": 0.25, "grad_norm": 1.3805660009384155, "learning_rate": 8.783007463018398e-06, "loss": 0.6011, "step": 3874 }, { "epoch": 0.25, "grad_norm": 1.2383497953414917, "learning_rate": 8.782323873031136e-06, "loss": 0.5915, "step": 3875 }, { "epoch": 0.25, "grad_norm": 1.1441603899002075, "learning_rate": 8.781640117728254e-06, "loss": 0.5895, "step": 3876 }, { "epoch": 0.25, "grad_norm": 1.1583365201950073, "learning_rate": 8.78095619713964e-06, "loss": 0.555, "step": 3877 }, { "epoch": 0.25, "grad_norm": 1.3020660877227783, "learning_rate": 8.780272111295186e-06, "loss": 0.5649, "step": 3878 }, { "epoch": 0.25, "grad_norm": 1.2643616199493408, "learning_rate": 8.779587860224788e-06, "loss": 0.5785, "step": 3879 }, { "epoch": 0.25, "grad_norm": 1.2110109329223633, "learning_rate": 8.77890344395836e-06, "loss": 0.5807, "step": 3880 }, { "epoch": 0.25, "grad_norm": 1.2454850673675537, "learning_rate": 8.778218862525811e-06, "loss": 0.5676, "step": 3881 }, { "epoch": 0.25, "grad_norm": 1.2118897438049316, "learning_rate": 8.777534115957062e-06, "loss": 0.6, "step": 3882 }, { "epoch": 0.25, "grad_norm": 1.151107907295227, "learning_rate": 8.776849204282044e-06, "loss": 0.5775, "step": 3883 }, { "epoch": 0.25, "grad_norm": 1.1715706586837769, "learning_rate": 8.776164127530692e-06, "loss": 0.549, "step": 3884 }, { "epoch": 0.25, "grad_norm": 1.158830165863037, "learning_rate": 8.775478885732948e-06, "loss": 0.5668, "step": 3885 }, { "epoch": 0.25, "grad_norm": 1.2075433731079102, "learning_rate": 8.774793478918764e-06, "loss": 0.566, "step": 3886 }, { "epoch": 0.25, "grad_norm": 1.3821923732757568, "learning_rate": 8.774107907118094e-06, "loss": 0.5429, "step": 3887 }, { "epoch": 0.25, "grad_norm": 1.1859681606292725, "learning_rate": 8.773422170360907e-06, "loss": 0.5511, "step": 3888 }, { "epoch": 0.25, "grad_norm": 1.5051188468933105, "learning_rate": 8.77273626867717e-06, "loss": 0.5631, "step": 3889 }, { "epoch": 0.25, "grad_norm": 1.161070466041565, "learning_rate": 8.772050202096867e-06, "loss": 0.5226, "step": 3890 }, { "epoch": 0.25, "grad_norm": 1.2094565629959106, "learning_rate": 8.77136397064998e-06, "loss": 0.5668, "step": 3891 }, { "epoch": 0.25, "grad_norm": 1.041316032409668, "learning_rate": 8.770677574366507e-06, "loss": 0.536, "step": 3892 }, { "epoch": 0.25, "grad_norm": 1.2038241624832153, "learning_rate": 8.769991013276441e-06, "loss": 0.5713, "step": 3893 }, { "epoch": 0.25, "grad_norm": 1.122144103050232, "learning_rate": 8.769304287409797e-06, "loss": 0.579, "step": 3894 }, { "epoch": 0.25, "grad_norm": 1.0987216234207153, "learning_rate": 8.768617396796588e-06, "loss": 0.5876, "step": 3895 }, { "epoch": 0.25, "grad_norm": 1.189388632774353, "learning_rate": 8.767930341466835e-06, "loss": 0.6007, "step": 3896 }, { "epoch": 0.25, "grad_norm": 1.3521099090576172, "learning_rate": 8.767243121450568e-06, "loss": 0.5739, "step": 3897 }, { "epoch": 0.25, "grad_norm": 1.2637646198272705, "learning_rate": 8.766555736777825e-06, "loss": 0.5928, "step": 3898 }, { "epoch": 0.25, "grad_norm": 1.1582218408584595, "learning_rate": 8.765868187478648e-06, "loss": 0.5273, "step": 3899 }, { "epoch": 0.25, "grad_norm": 1.2149618864059448, "learning_rate": 8.765180473583087e-06, "loss": 0.5893, "step": 3900 }, { "epoch": 0.25, "grad_norm": 1.1978085041046143, "learning_rate": 8.764492595121204e-06, "loss": 0.5915, "step": 3901 }, { "epoch": 0.25, "grad_norm": 1.3351151943206787, "learning_rate": 8.763804552123061e-06, "loss": 0.6083, "step": 3902 }, { "epoch": 0.25, "grad_norm": 1.1064995527267456, "learning_rate": 8.763116344618732e-06, "loss": 0.5835, "step": 3903 }, { "epoch": 0.25, "grad_norm": 1.165600061416626, "learning_rate": 8.762427972638297e-06, "loss": 0.5383, "step": 3904 }, { "epoch": 0.25, "grad_norm": 1.1636412143707275, "learning_rate": 8.761739436211842e-06, "loss": 0.5102, "step": 3905 }, { "epoch": 0.25, "grad_norm": 1.254989743232727, "learning_rate": 8.761050735369463e-06, "loss": 0.5253, "step": 3906 }, { "epoch": 0.25, "grad_norm": 1.1725060939788818, "learning_rate": 8.760361870141258e-06, "loss": 0.5113, "step": 3907 }, { "epoch": 0.25, "grad_norm": 1.3239696025848389, "learning_rate": 8.759672840557338e-06, "loss": 0.5801, "step": 3908 }, { "epoch": 0.25, "grad_norm": 1.1344033479690552, "learning_rate": 8.758983646647819e-06, "loss": 0.5931, "step": 3909 }, { "epoch": 0.25, "grad_norm": 1.158051609992981, "learning_rate": 8.758294288442824e-06, "loss": 0.5166, "step": 3910 }, { "epoch": 0.25, "grad_norm": 1.2004112005233765, "learning_rate": 8.75760476597248e-06, "loss": 0.576, "step": 3911 }, { "epoch": 0.25, "grad_norm": 1.12273108959198, "learning_rate": 8.756915079266927e-06, "loss": 0.529, "step": 3912 }, { "epoch": 0.25, "grad_norm": 1.1795262098312378, "learning_rate": 8.75622522835631e-06, "loss": 0.5476, "step": 3913 }, { "epoch": 0.25, "grad_norm": 1.1113086938858032, "learning_rate": 8.755535213270779e-06, "loss": 0.5011, "step": 3914 }, { "epoch": 0.25, "grad_norm": 1.3286712169647217, "learning_rate": 8.754845034040492e-06, "loss": 0.5552, "step": 3915 }, { "epoch": 0.25, "grad_norm": 1.1792280673980713, "learning_rate": 8.754154690695616e-06, "loss": 0.6113, "step": 3916 }, { "epoch": 0.25, "grad_norm": 1.1054009199142456, "learning_rate": 8.753464183266325e-06, "loss": 0.5269, "step": 3917 }, { "epoch": 0.25, "grad_norm": 1.2188798189163208, "learning_rate": 8.752773511782799e-06, "loss": 0.5657, "step": 3918 }, { "epoch": 0.25, "grad_norm": 1.168470859527588, "learning_rate": 8.752082676275225e-06, "loss": 0.5234, "step": 3919 }, { "epoch": 0.25, "grad_norm": 1.2282370328903198, "learning_rate": 8.751391676773798e-06, "loss": 0.5526, "step": 3920 }, { "epoch": 0.25, "grad_norm": 1.2214038372039795, "learning_rate": 8.75070051330872e-06, "loss": 0.6169, "step": 3921 }, { "epoch": 0.25, "grad_norm": 1.238123893737793, "learning_rate": 8.750009185910197e-06, "loss": 0.5873, "step": 3922 }, { "epoch": 0.25, "grad_norm": 1.420828938484192, "learning_rate": 8.74931769460845e-06, "loss": 0.5342, "step": 3923 }, { "epoch": 0.25, "grad_norm": 1.201955795288086, "learning_rate": 8.748626039433701e-06, "loss": 0.5876, "step": 3924 }, { "epoch": 0.25, "grad_norm": 1.2814263105392456, "learning_rate": 8.747934220416177e-06, "loss": 0.5428, "step": 3925 }, { "epoch": 0.25, "grad_norm": 1.137593388557434, "learning_rate": 8.74724223758612e-06, "loss": 0.584, "step": 3926 }, { "epoch": 0.25, "grad_norm": 1.2029062509536743, "learning_rate": 8.746550090973771e-06, "loss": 0.5592, "step": 3927 }, { "epoch": 0.25, "grad_norm": 1.3499956130981445, "learning_rate": 8.745857780609384e-06, "loss": 0.5688, "step": 3928 }, { "epoch": 0.25, "grad_norm": 1.2081423997879028, "learning_rate": 8.745165306523217e-06, "loss": 0.5637, "step": 3929 }, { "epoch": 0.25, "grad_norm": 1.2353421449661255, "learning_rate": 8.74447266874554e-06, "loss": 0.5938, "step": 3930 }, { "epoch": 0.25, "grad_norm": 1.211301565170288, "learning_rate": 8.743779867306622e-06, "loss": 0.5454, "step": 3931 }, { "epoch": 0.25, "grad_norm": 1.1324204206466675, "learning_rate": 8.743086902236743e-06, "loss": 0.5075, "step": 3932 }, { "epoch": 0.25, "grad_norm": 1.1191922426223755, "learning_rate": 8.742393773566195e-06, "loss": 0.5323, "step": 3933 }, { "epoch": 0.25, "grad_norm": 1.540982723236084, "learning_rate": 8.741700481325271e-06, "loss": 0.6167, "step": 3934 }, { "epoch": 0.25, "grad_norm": 1.3550564050674438, "learning_rate": 8.741007025544273e-06, "loss": 0.5544, "step": 3935 }, { "epoch": 0.25, "grad_norm": 1.2600338459014893, "learning_rate": 8.740313406253509e-06, "loss": 0.5811, "step": 3936 }, { "epoch": 0.25, "grad_norm": 1.2689043283462524, "learning_rate": 8.739619623483296e-06, "loss": 0.5606, "step": 3937 }, { "epoch": 0.25, "grad_norm": 1.217634916305542, "learning_rate": 8.738925677263957e-06, "loss": 0.5541, "step": 3938 }, { "epoch": 0.25, "grad_norm": 1.309937834739685, "learning_rate": 8.738231567625823e-06, "loss": 0.5601, "step": 3939 }, { "epoch": 0.25, "grad_norm": 1.2557153701782227, "learning_rate": 8.737537294599235e-06, "loss": 0.5862, "step": 3940 }, { "epoch": 0.25, "grad_norm": 1.2593830823898315, "learning_rate": 8.736842858214532e-06, "loss": 0.5421, "step": 3941 }, { "epoch": 0.25, "grad_norm": 1.2987676858901978, "learning_rate": 8.736148258502069e-06, "loss": 0.5688, "step": 3942 }, { "epoch": 0.25, "grad_norm": 1.1952149868011475, "learning_rate": 8.735453495492205e-06, "loss": 0.6017, "step": 3943 }, { "epoch": 0.25, "grad_norm": 1.155908226966858, "learning_rate": 8.734758569215307e-06, "loss": 0.4903, "step": 3944 }, { "epoch": 0.25, "grad_norm": 1.2433099746704102, "learning_rate": 8.734063479701747e-06, "loss": 0.5727, "step": 3945 }, { "epoch": 0.25, "grad_norm": 1.3704736232757568, "learning_rate": 8.733368226981907e-06, "loss": 0.5637, "step": 3946 }, { "epoch": 0.25, "grad_norm": 1.1714437007904053, "learning_rate": 8.732672811086174e-06, "loss": 0.5031, "step": 3947 }, { "epoch": 0.25, "grad_norm": 1.1727384328842163, "learning_rate": 8.731977232044943e-06, "loss": 0.535, "step": 3948 }, { "epoch": 0.25, "grad_norm": 1.266784429550171, "learning_rate": 8.731281489888614e-06, "loss": 0.5238, "step": 3949 }, { "epoch": 0.25, "grad_norm": 1.428558111190796, "learning_rate": 8.7305855846476e-06, "loss": 0.5144, "step": 3950 }, { "epoch": 0.26, "grad_norm": 1.2353821992874146, "learning_rate": 8.729889516352314e-06, "loss": 0.5149, "step": 3951 }, { "epoch": 0.26, "grad_norm": 1.149503469467163, "learning_rate": 8.729193285033182e-06, "loss": 0.5532, "step": 3952 }, { "epoch": 0.26, "grad_norm": 1.2599438428878784, "learning_rate": 8.728496890720632e-06, "loss": 0.5849, "step": 3953 }, { "epoch": 0.26, "grad_norm": 1.1821272373199463, "learning_rate": 8.727800333445102e-06, "loss": 0.5634, "step": 3954 }, { "epoch": 0.26, "grad_norm": 1.1894230842590332, "learning_rate": 8.727103613237037e-06, "loss": 0.5776, "step": 3955 }, { "epoch": 0.26, "grad_norm": 1.20229172706604, "learning_rate": 8.726406730126891e-06, "loss": 0.61, "step": 3956 }, { "epoch": 0.26, "grad_norm": 1.359997034072876, "learning_rate": 8.72570968414512e-06, "loss": 0.5584, "step": 3957 }, { "epoch": 0.26, "grad_norm": 1.3391788005828857, "learning_rate": 8.725012475322193e-06, "loss": 0.568, "step": 3958 }, { "epoch": 0.26, "grad_norm": 1.2491588592529297, "learning_rate": 8.72431510368858e-06, "loss": 0.5928, "step": 3959 }, { "epoch": 0.26, "grad_norm": 1.3001607656478882, "learning_rate": 8.723617569274761e-06, "loss": 0.5323, "step": 3960 }, { "epoch": 0.26, "grad_norm": 1.4650894403457642, "learning_rate": 8.722919872111226e-06, "loss": 0.6037, "step": 3961 }, { "epoch": 0.26, "grad_norm": 1.1530565023422241, "learning_rate": 8.72222201222847e-06, "loss": 0.5367, "step": 3962 }, { "epoch": 0.26, "grad_norm": 1.2734497785568237, "learning_rate": 8.721523989656991e-06, "loss": 0.5492, "step": 3963 }, { "epoch": 0.26, "grad_norm": 1.1838091611862183, "learning_rate": 8.720825804427301e-06, "loss": 0.5632, "step": 3964 }, { "epoch": 0.26, "grad_norm": 1.1997613906860352, "learning_rate": 8.720127456569915e-06, "loss": 0.5695, "step": 3965 }, { "epoch": 0.26, "grad_norm": 1.1922858953475952, "learning_rate": 8.719428946115358e-06, "loss": 0.5875, "step": 3966 }, { "epoch": 0.26, "grad_norm": 1.1875282526016235, "learning_rate": 8.718730273094156e-06, "loss": 0.5492, "step": 3967 }, { "epoch": 0.26, "grad_norm": 1.1868394613265991, "learning_rate": 8.71803143753685e-06, "loss": 0.5781, "step": 3968 }, { "epoch": 0.26, "grad_norm": 1.4025242328643799, "learning_rate": 8.71733243947398e-06, "loss": 0.5767, "step": 3969 }, { "epoch": 0.26, "grad_norm": 1.1298694610595703, "learning_rate": 8.7166332789361e-06, "loss": 0.5899, "step": 3970 }, { "epoch": 0.26, "grad_norm": 1.03023099899292, "learning_rate": 8.71593395595377e-06, "loss": 0.593, "step": 3971 }, { "epoch": 0.26, "grad_norm": 1.1224298477172852, "learning_rate": 8.715234470557553e-06, "loss": 0.5725, "step": 3972 }, { "epoch": 0.26, "grad_norm": 1.1172308921813965, "learning_rate": 8.714534822778022e-06, "loss": 0.5602, "step": 3973 }, { "epoch": 0.26, "grad_norm": 1.2637042999267578, "learning_rate": 8.71383501264576e-06, "loss": 0.5146, "step": 3974 }, { "epoch": 0.26, "grad_norm": 1.2087706327438354, "learning_rate": 8.71313504019135e-06, "loss": 0.5712, "step": 3975 }, { "epoch": 0.26, "grad_norm": 1.26228666305542, "learning_rate": 8.712434905445387e-06, "loss": 0.577, "step": 3976 }, { "epoch": 0.26, "grad_norm": 1.1941555738449097, "learning_rate": 8.711734608438474e-06, "loss": 0.5844, "step": 3977 }, { "epoch": 0.26, "grad_norm": 1.1792997121810913, "learning_rate": 8.711034149201216e-06, "loss": 0.5967, "step": 3978 }, { "epoch": 0.26, "grad_norm": 1.392087459564209, "learning_rate": 8.71033352776423e-06, "loss": 0.5982, "step": 3979 }, { "epoch": 0.26, "grad_norm": 1.2969837188720703, "learning_rate": 8.709632744158139e-06, "loss": 0.5506, "step": 3980 }, { "epoch": 0.26, "grad_norm": 1.3845887184143066, "learning_rate": 8.708931798413572e-06, "loss": 0.5789, "step": 3981 }, { "epoch": 0.26, "grad_norm": 1.1897211074829102, "learning_rate": 8.708230690561165e-06, "loss": 0.5423, "step": 3982 }, { "epoch": 0.26, "grad_norm": 1.1643797159194946, "learning_rate": 8.707529420631563e-06, "loss": 0.5235, "step": 3983 }, { "epoch": 0.26, "grad_norm": 1.2630664110183716, "learning_rate": 8.706827988655413e-06, "loss": 0.5569, "step": 3984 }, { "epoch": 0.26, "grad_norm": 1.5115811824798584, "learning_rate": 8.706126394663376e-06, "loss": 0.5783, "step": 3985 }, { "epoch": 0.26, "grad_norm": 1.313165307044983, "learning_rate": 8.705424638686117e-06, "loss": 0.5899, "step": 3986 }, { "epoch": 0.26, "grad_norm": 1.3437464237213135, "learning_rate": 8.704722720754308e-06, "loss": 0.5289, "step": 3987 }, { "epoch": 0.26, "grad_norm": 1.272902250289917, "learning_rate": 8.704020640898626e-06, "loss": 0.5577, "step": 3988 }, { "epoch": 0.26, "grad_norm": 1.1661434173583984, "learning_rate": 8.703318399149758e-06, "loss": 0.5247, "step": 3989 }, { "epoch": 0.26, "grad_norm": 1.2513164281845093, "learning_rate": 8.702615995538399e-06, "loss": 0.5971, "step": 3990 }, { "epoch": 0.26, "grad_norm": 1.2849024534225464, "learning_rate": 8.701913430095246e-06, "loss": 0.5984, "step": 3991 }, { "epoch": 0.26, "grad_norm": 1.2019726037979126, "learning_rate": 8.701210702851009e-06, "loss": 0.5788, "step": 3992 }, { "epoch": 0.26, "grad_norm": 1.3217267990112305, "learning_rate": 8.700507813836401e-06, "loss": 0.5674, "step": 3993 }, { "epoch": 0.26, "grad_norm": 1.6744028329849243, "learning_rate": 8.699804763082145e-06, "loss": 0.49, "step": 3994 }, { "epoch": 0.26, "grad_norm": 1.258581519126892, "learning_rate": 8.699101550618967e-06, "loss": 0.5233, "step": 3995 }, { "epoch": 0.26, "grad_norm": 1.4075610637664795, "learning_rate": 8.698398176477604e-06, "loss": 0.6035, "step": 3996 }, { "epoch": 0.26, "grad_norm": 1.2906317710876465, "learning_rate": 8.6976946406888e-06, "loss": 0.5697, "step": 3997 }, { "epoch": 0.26, "grad_norm": 1.1399260759353638, "learning_rate": 8.696990943283301e-06, "loss": 0.4998, "step": 3998 }, { "epoch": 0.26, "grad_norm": 1.2914701700210571, "learning_rate": 8.696287084291868e-06, "loss": 0.5991, "step": 3999 }, { "epoch": 0.26, "grad_norm": 1.277255654335022, "learning_rate": 8.695583063745264e-06, "loss": 0.5664, "step": 4000 }, { "epoch": 0.26, "grad_norm": 1.1018397808074951, "learning_rate": 8.694878881674257e-06, "loss": 0.5252, "step": 4001 }, { "epoch": 0.26, "grad_norm": 1.155687928199768, "learning_rate": 8.69417453810963e-06, "loss": 0.5717, "step": 4002 }, { "epoch": 0.26, "grad_norm": 1.1717098951339722, "learning_rate": 8.693470033082161e-06, "loss": 0.5207, "step": 4003 }, { "epoch": 0.26, "grad_norm": 1.2473832368850708, "learning_rate": 8.692765366622648e-06, "loss": 0.5792, "step": 4004 }, { "epoch": 0.26, "grad_norm": 1.2450724840164185, "learning_rate": 8.69206053876189e-06, "loss": 0.5416, "step": 4005 }, { "epoch": 0.26, "grad_norm": 1.6606662273406982, "learning_rate": 8.69135554953069e-06, "loss": 0.5387, "step": 4006 }, { "epoch": 0.26, "grad_norm": 1.3594588041305542, "learning_rate": 8.690650398959861e-06, "loss": 0.5962, "step": 4007 }, { "epoch": 0.26, "grad_norm": 1.3611509799957275, "learning_rate": 8.689945087080228e-06, "loss": 0.596, "step": 4008 }, { "epoch": 0.26, "grad_norm": 1.3245444297790527, "learning_rate": 8.689239613922614e-06, "loss": 0.6037, "step": 4009 }, { "epoch": 0.26, "grad_norm": 1.2097152471542358, "learning_rate": 8.688533979517855e-06, "loss": 0.549, "step": 4010 }, { "epoch": 0.26, "grad_norm": 1.2649396657943726, "learning_rate": 8.687828183896792e-06, "loss": 0.6419, "step": 4011 }, { "epoch": 0.26, "grad_norm": 1.1137644052505493, "learning_rate": 8.687122227090274e-06, "loss": 0.5483, "step": 4012 }, { "epoch": 0.26, "grad_norm": 1.2079235315322876, "learning_rate": 8.686416109129156e-06, "loss": 0.5203, "step": 4013 }, { "epoch": 0.26, "grad_norm": 1.272394061088562, "learning_rate": 8.685709830044302e-06, "loss": 0.6102, "step": 4014 }, { "epoch": 0.26, "grad_norm": 1.2204742431640625, "learning_rate": 8.68500338986658e-06, "loss": 0.5363, "step": 4015 }, { "epoch": 0.26, "grad_norm": 1.1404880285263062, "learning_rate": 8.684296788626868e-06, "loss": 0.5583, "step": 4016 }, { "epoch": 0.26, "grad_norm": 1.1167899370193481, "learning_rate": 8.683590026356049e-06, "loss": 0.522, "step": 4017 }, { "epoch": 0.26, "grad_norm": 1.2481938600540161, "learning_rate": 8.682883103085012e-06, "loss": 0.5862, "step": 4018 }, { "epoch": 0.26, "grad_norm": 1.5158933401107788, "learning_rate": 8.682176018844658e-06, "loss": 0.5892, "step": 4019 }, { "epoch": 0.26, "grad_norm": 1.2465293407440186, "learning_rate": 8.681468773665888e-06, "loss": 0.5338, "step": 4020 }, { "epoch": 0.26, "grad_norm": 1.249934434890747, "learning_rate": 8.68076136757962e-06, "loss": 0.5406, "step": 4021 }, { "epoch": 0.26, "grad_norm": 1.195595622062683, "learning_rate": 8.680053800616766e-06, "loss": 0.6022, "step": 4022 }, { "epoch": 0.26, "grad_norm": 1.0866607427597046, "learning_rate": 8.679346072808256e-06, "loss": 0.5412, "step": 4023 }, { "epoch": 0.26, "grad_norm": 1.808372974395752, "learning_rate": 8.678638184185023e-06, "loss": 0.5278, "step": 4024 }, { "epoch": 0.26, "grad_norm": 1.1793465614318848, "learning_rate": 8.677930134778005e-06, "loss": 0.5442, "step": 4025 }, { "epoch": 0.26, "grad_norm": 1.3217717409133911, "learning_rate": 8.677221924618151e-06, "loss": 0.6417, "step": 4026 }, { "epoch": 0.26, "grad_norm": 1.374787449836731, "learning_rate": 8.676513553736415e-06, "loss": 0.6007, "step": 4027 }, { "epoch": 0.26, "grad_norm": 1.290431261062622, "learning_rate": 8.675805022163755e-06, "loss": 0.5615, "step": 4028 }, { "epoch": 0.26, "grad_norm": 1.2384248971939087, "learning_rate": 8.67509632993114e-06, "loss": 0.6052, "step": 4029 }, { "epoch": 0.26, "grad_norm": 1.1653894186019897, "learning_rate": 8.674387477069548e-06, "loss": 0.534, "step": 4030 }, { "epoch": 0.26, "grad_norm": 1.23048996925354, "learning_rate": 8.67367846360996e-06, "loss": 0.5785, "step": 4031 }, { "epoch": 0.26, "grad_norm": 1.2755146026611328, "learning_rate": 8.672969289583363e-06, "loss": 0.5546, "step": 4032 }, { "epoch": 0.26, "grad_norm": 1.2723993062973022, "learning_rate": 8.672259955020757e-06, "loss": 0.5226, "step": 4033 }, { "epoch": 0.26, "grad_norm": 1.27908194065094, "learning_rate": 8.67155045995314e-06, "loss": 0.5369, "step": 4034 }, { "epoch": 0.26, "grad_norm": 1.156853437423706, "learning_rate": 8.670840804411526e-06, "loss": 0.5332, "step": 4035 }, { "epoch": 0.26, "grad_norm": 1.0913419723510742, "learning_rate": 8.670130988426933e-06, "loss": 0.5181, "step": 4036 }, { "epoch": 0.26, "grad_norm": 1.1994889974594116, "learning_rate": 8.669421012030383e-06, "loss": 0.5555, "step": 4037 }, { "epoch": 0.26, "grad_norm": 1.2396610975265503, "learning_rate": 8.668710875252907e-06, "loss": 0.5332, "step": 4038 }, { "epoch": 0.26, "grad_norm": 1.2211873531341553, "learning_rate": 8.668000578125544e-06, "loss": 0.5746, "step": 4039 }, { "epoch": 0.26, "grad_norm": 1.217777967453003, "learning_rate": 8.667290120679339e-06, "loss": 0.5368, "step": 4040 }, { "epoch": 0.26, "grad_norm": 1.3222877979278564, "learning_rate": 8.666579502945347e-06, "loss": 0.6264, "step": 4041 }, { "epoch": 0.26, "grad_norm": 1.3055462837219238, "learning_rate": 8.665868724954622e-06, "loss": 0.5681, "step": 4042 }, { "epoch": 0.26, "grad_norm": 2.0392541885375977, "learning_rate": 8.665157786738234e-06, "loss": 0.6162, "step": 4043 }, { "epoch": 0.26, "grad_norm": 1.2650147676467896, "learning_rate": 8.664446688327256e-06, "loss": 0.6014, "step": 4044 }, { "epoch": 0.26, "grad_norm": 1.1131746768951416, "learning_rate": 8.663735429752766e-06, "loss": 0.5386, "step": 4045 }, { "epoch": 0.26, "grad_norm": 1.1304672956466675, "learning_rate": 8.663024011045856e-06, "loss": 0.5436, "step": 4046 }, { "epoch": 0.26, "grad_norm": 1.192566156387329, "learning_rate": 8.662312432237614e-06, "loss": 0.528, "step": 4047 }, { "epoch": 0.26, "grad_norm": 1.2743077278137207, "learning_rate": 8.661600693359146e-06, "loss": 0.6192, "step": 4048 }, { "epoch": 0.26, "grad_norm": 1.1942932605743408, "learning_rate": 8.66088879444156e-06, "loss": 0.523, "step": 4049 }, { "epoch": 0.26, "grad_norm": 1.2315644025802612, "learning_rate": 8.660176735515969e-06, "loss": 0.548, "step": 4050 }, { "epoch": 0.26, "grad_norm": 1.3273383378982544, "learning_rate": 8.659464516613494e-06, "loss": 0.5554, "step": 4051 }, { "epoch": 0.26, "grad_norm": 1.1753509044647217, "learning_rate": 8.65875213776527e-06, "loss": 0.6141, "step": 4052 }, { "epoch": 0.26, "grad_norm": 1.3472990989685059, "learning_rate": 8.658039599002426e-06, "loss": 0.5598, "step": 4053 }, { "epoch": 0.26, "grad_norm": 1.2434310913085938, "learning_rate": 8.657326900356113e-06, "loss": 0.5581, "step": 4054 }, { "epoch": 0.26, "grad_norm": 1.0632684230804443, "learning_rate": 8.656614041857474e-06, "loss": 0.5629, "step": 4055 }, { "epoch": 0.26, "grad_norm": 1.2091089487075806, "learning_rate": 8.655901023537672e-06, "loss": 0.561, "step": 4056 }, { "epoch": 0.26, "grad_norm": 1.4080921411514282, "learning_rate": 8.655187845427866e-06, "loss": 0.573, "step": 4057 }, { "epoch": 0.26, "grad_norm": 1.3205575942993164, "learning_rate": 8.654474507559232e-06, "loss": 0.5275, "step": 4058 }, { "epoch": 0.26, "grad_norm": 1.130042552947998, "learning_rate": 8.653761009962944e-06, "loss": 0.587, "step": 4059 }, { "epoch": 0.26, "grad_norm": 2.7960896492004395, "learning_rate": 8.653047352670191e-06, "loss": 0.5438, "step": 4060 }, { "epoch": 0.26, "grad_norm": 1.2564643621444702, "learning_rate": 8.652333535712161e-06, "loss": 0.5739, "step": 4061 }, { "epoch": 0.26, "grad_norm": 1.1578497886657715, "learning_rate": 8.651619559120057e-06, "loss": 0.5692, "step": 4062 }, { "epoch": 0.26, "grad_norm": 1.1646075248718262, "learning_rate": 8.650905422925085e-06, "loss": 0.6002, "step": 4063 }, { "epoch": 0.26, "grad_norm": 1.21863853931427, "learning_rate": 8.650191127158454e-06, "loss": 0.567, "step": 4064 }, { "epoch": 0.26, "grad_norm": 1.2270140647888184, "learning_rate": 8.649476671851387e-06, "loss": 0.5717, "step": 4065 }, { "epoch": 0.26, "grad_norm": 1.3064275979995728, "learning_rate": 8.64876205703511e-06, "loss": 0.5504, "step": 4066 }, { "epoch": 0.26, "grad_norm": 1.2055292129516602, "learning_rate": 8.648047282740858e-06, "loss": 0.5048, "step": 4067 }, { "epoch": 0.26, "grad_norm": 1.3696569204330444, "learning_rate": 8.64733234899987e-06, "loss": 0.6121, "step": 4068 }, { "epoch": 0.26, "grad_norm": 1.5945011377334595, "learning_rate": 8.646617255843397e-06, "loss": 0.5716, "step": 4069 }, { "epoch": 0.26, "grad_norm": 1.212559700012207, "learning_rate": 8.645902003302692e-06, "loss": 0.5262, "step": 4070 }, { "epoch": 0.26, "grad_norm": 1.2647544145584106, "learning_rate": 8.645186591409015e-06, "loss": 0.5392, "step": 4071 }, { "epoch": 0.26, "grad_norm": 1.1561442613601685, "learning_rate": 8.64447102019364e-06, "loss": 0.5708, "step": 4072 }, { "epoch": 0.26, "grad_norm": 1.2343194484710693, "learning_rate": 8.643755289687837e-06, "loss": 0.5477, "step": 4073 }, { "epoch": 0.26, "grad_norm": 1.2162854671478271, "learning_rate": 8.643039399922893e-06, "loss": 0.5981, "step": 4074 }, { "epoch": 0.26, "grad_norm": 1.125133991241455, "learning_rate": 8.642323350930095e-06, "loss": 0.5289, "step": 4075 }, { "epoch": 0.26, "grad_norm": 1.0666342973709106, "learning_rate": 8.641607142740742e-06, "loss": 0.5317, "step": 4076 }, { "epoch": 0.26, "grad_norm": 1.1278069019317627, "learning_rate": 8.640890775386135e-06, "loss": 0.5385, "step": 4077 }, { "epoch": 0.26, "grad_norm": 1.407252550125122, "learning_rate": 8.640174248897585e-06, "loss": 0.5698, "step": 4078 }, { "epoch": 0.26, "grad_norm": 1.2571734189987183, "learning_rate": 8.639457563306412e-06, "loss": 0.5612, "step": 4079 }, { "epoch": 0.26, "grad_norm": 16.977657318115234, "learning_rate": 8.638740718643937e-06, "loss": 0.5285, "step": 4080 }, { "epoch": 0.26, "grad_norm": 1.2184888124465942, "learning_rate": 8.638023714941495e-06, "loss": 0.5618, "step": 4081 }, { "epoch": 0.26, "grad_norm": 1.6986840963363647, "learning_rate": 8.637306552230422e-06, "loss": 0.5474, "step": 4082 }, { "epoch": 0.26, "grad_norm": 1.246151328086853, "learning_rate": 8.636589230542064e-06, "loss": 0.5184, "step": 4083 }, { "epoch": 0.26, "grad_norm": 1.1961897611618042, "learning_rate": 8.635871749907774e-06, "loss": 0.5927, "step": 4084 }, { "epoch": 0.26, "grad_norm": 1.4523459672927856, "learning_rate": 8.63515411035891e-06, "loss": 0.5904, "step": 4085 }, { "epoch": 0.26, "grad_norm": 1.2095180749893188, "learning_rate": 8.634436311926837e-06, "loss": 0.5516, "step": 4086 }, { "epoch": 0.26, "grad_norm": 1.2480319738388062, "learning_rate": 8.633718354642931e-06, "loss": 0.5492, "step": 4087 }, { "epoch": 0.26, "grad_norm": 1.2636150121688843, "learning_rate": 8.633000238538571e-06, "loss": 0.5912, "step": 4088 }, { "epoch": 0.26, "grad_norm": 1.2436060905456543, "learning_rate": 8.632281963645144e-06, "loss": 0.5293, "step": 4089 }, { "epoch": 0.26, "grad_norm": 1.229607343673706, "learning_rate": 8.631563529994045e-06, "loss": 0.5718, "step": 4090 }, { "epoch": 0.26, "grad_norm": 1.2721407413482666, "learning_rate": 8.63084493761667e-06, "loss": 0.6111, "step": 4091 }, { "epoch": 0.26, "grad_norm": 1.6589717864990234, "learning_rate": 8.630126186544434e-06, "loss": 0.5407, "step": 4092 }, { "epoch": 0.26, "grad_norm": 1.2945435047149658, "learning_rate": 8.629407276808748e-06, "loss": 0.6163, "step": 4093 }, { "epoch": 0.26, "grad_norm": 1.1733886003494263, "learning_rate": 8.628688208441034e-06, "loss": 0.5373, "step": 4094 }, { "epoch": 0.26, "grad_norm": 1.2149401903152466, "learning_rate": 8.62796898147272e-06, "loss": 0.5695, "step": 4095 }, { "epoch": 0.26, "grad_norm": 1.2240813970565796, "learning_rate": 8.62724959593524e-06, "loss": 0.5672, "step": 4096 }, { "epoch": 0.26, "grad_norm": 1.0848031044006348, "learning_rate": 8.626530051860041e-06, "loss": 0.5293, "step": 4097 }, { "epoch": 0.26, "grad_norm": 1.2782719135284424, "learning_rate": 8.62581034927857e-06, "loss": 0.5543, "step": 4098 }, { "epoch": 0.26, "grad_norm": 1.313409447669983, "learning_rate": 8.62509048822228e-06, "loss": 0.5759, "step": 4099 }, { "epoch": 0.26, "grad_norm": 1.3369966745376587, "learning_rate": 8.624370468722642e-06, "loss": 0.5569, "step": 4100 }, { "epoch": 0.26, "grad_norm": 1.1516395807266235, "learning_rate": 8.62365029081112e-06, "loss": 0.573, "step": 4101 }, { "epoch": 0.26, "grad_norm": 1.244042158126831, "learning_rate": 8.622929954519193e-06, "loss": 0.5421, "step": 4102 }, { "epoch": 0.26, "grad_norm": 1.153930425643921, "learning_rate": 8.622209459878344e-06, "loss": 0.5709, "step": 4103 }, { "epoch": 0.26, "grad_norm": 1.1611562967300415, "learning_rate": 8.621488806920066e-06, "loss": 0.541, "step": 4104 }, { "epoch": 0.26, "grad_norm": 1.1870728731155396, "learning_rate": 8.620767995675856e-06, "loss": 0.5586, "step": 4105 }, { "epoch": 0.27, "grad_norm": 1.2929238080978394, "learning_rate": 8.620047026177219e-06, "loss": 0.5926, "step": 4106 }, { "epoch": 0.27, "grad_norm": 1.1017757654190063, "learning_rate": 8.619325898455664e-06, "loss": 0.5053, "step": 4107 }, { "epoch": 0.27, "grad_norm": 1.253713846206665, "learning_rate": 8.618604612542713e-06, "loss": 0.5546, "step": 4108 }, { "epoch": 0.27, "grad_norm": 1.1647475957870483, "learning_rate": 8.617883168469892e-06, "loss": 0.5413, "step": 4109 }, { "epoch": 0.27, "grad_norm": 1.183211088180542, "learning_rate": 8.617161566268731e-06, "loss": 0.5319, "step": 4110 }, { "epoch": 0.27, "grad_norm": 1.0488322973251343, "learning_rate": 8.616439805970771e-06, "loss": 0.5913, "step": 4111 }, { "epoch": 0.27, "grad_norm": 1.2133508920669556, "learning_rate": 8.615717887607556e-06, "loss": 0.5851, "step": 4112 }, { "epoch": 0.27, "grad_norm": 1.1394673585891724, "learning_rate": 8.614995811210643e-06, "loss": 0.542, "step": 4113 }, { "epoch": 0.27, "grad_norm": 1.193429946899414, "learning_rate": 8.614273576811588e-06, "loss": 0.5647, "step": 4114 }, { "epoch": 0.27, "grad_norm": 1.130056381225586, "learning_rate": 8.61355118444196e-06, "loss": 0.5521, "step": 4115 }, { "epoch": 0.27, "grad_norm": 1.3761135339736938, "learning_rate": 8.612828634133335e-06, "loss": 0.5855, "step": 4116 }, { "epoch": 0.27, "grad_norm": 1.3510661125183105, "learning_rate": 8.612105925917289e-06, "loss": 0.6027, "step": 4117 }, { "epoch": 0.27, "grad_norm": 1.2859174013137817, "learning_rate": 8.611383059825414e-06, "loss": 0.5617, "step": 4118 }, { "epoch": 0.27, "grad_norm": 1.2857476472854614, "learning_rate": 8.610660035889302e-06, "loss": 0.5546, "step": 4119 }, { "epoch": 0.27, "grad_norm": 1.109472393989563, "learning_rate": 8.609936854140557e-06, "loss": 0.5227, "step": 4120 }, { "epoch": 0.27, "grad_norm": 1.36490797996521, "learning_rate": 8.609213514610784e-06, "loss": 0.583, "step": 4121 }, { "epoch": 0.27, "grad_norm": 1.122012734413147, "learning_rate": 8.608490017331602e-06, "loss": 0.5218, "step": 4122 }, { "epoch": 0.27, "grad_norm": 1.2092286348342896, "learning_rate": 8.60776636233463e-06, "loss": 0.5656, "step": 4123 }, { "epoch": 0.27, "grad_norm": 1.2125571966171265, "learning_rate": 8.607042549651498e-06, "loss": 0.573, "step": 4124 }, { "epoch": 0.27, "grad_norm": 1.2220587730407715, "learning_rate": 8.606318579313842e-06, "loss": 0.5482, "step": 4125 }, { "epoch": 0.27, "grad_norm": 1.203147053718567, "learning_rate": 8.605594451353308e-06, "loss": 0.5744, "step": 4126 }, { "epoch": 0.27, "grad_norm": 1.2100011110305786, "learning_rate": 8.60487016580154e-06, "loss": 0.5776, "step": 4127 }, { "epoch": 0.27, "grad_norm": 1.2070075273513794, "learning_rate": 8.6041457226902e-06, "loss": 0.5932, "step": 4128 }, { "epoch": 0.27, "grad_norm": 1.2795183658599854, "learning_rate": 8.60342112205095e-06, "loss": 0.5395, "step": 4129 }, { "epoch": 0.27, "grad_norm": 1.0966876745224, "learning_rate": 8.602696363915457e-06, "loss": 0.6179, "step": 4130 }, { "epoch": 0.27, "grad_norm": 1.130291223526001, "learning_rate": 8.601971448315403e-06, "loss": 0.5521, "step": 4131 }, { "epoch": 0.27, "grad_norm": 1.174310564994812, "learning_rate": 8.601246375282468e-06, "loss": 0.5399, "step": 4132 }, { "epoch": 0.27, "grad_norm": 1.2354316711425781, "learning_rate": 8.600521144848347e-06, "loss": 0.5588, "step": 4133 }, { "epoch": 0.27, "grad_norm": 1.1033289432525635, "learning_rate": 8.599795757044736e-06, "loss": 0.5305, "step": 4134 }, { "epoch": 0.27, "grad_norm": 1.399173617362976, "learning_rate": 8.59907021190334e-06, "loss": 0.6031, "step": 4135 }, { "epoch": 0.27, "grad_norm": 1.2756210565567017, "learning_rate": 8.598344509455871e-06, "loss": 0.5763, "step": 4136 }, { "epoch": 0.27, "grad_norm": 1.17406165599823, "learning_rate": 8.597618649734047e-06, "loss": 0.5794, "step": 4137 }, { "epoch": 0.27, "grad_norm": 1.1995811462402344, "learning_rate": 8.596892632769594e-06, "loss": 0.5557, "step": 4138 }, { "epoch": 0.27, "grad_norm": 1.4989738464355469, "learning_rate": 8.596166458594244e-06, "loss": 0.623, "step": 4139 }, { "epoch": 0.27, "grad_norm": 1.1544581651687622, "learning_rate": 8.595440127239738e-06, "loss": 0.5278, "step": 4140 }, { "epoch": 0.27, "grad_norm": 1.2327934503555298, "learning_rate": 8.594713638737818e-06, "loss": 0.5219, "step": 4141 }, { "epoch": 0.27, "grad_norm": 1.2161445617675781, "learning_rate": 8.59398699312024e-06, "loss": 0.5843, "step": 4142 }, { "epoch": 0.27, "grad_norm": 1.223368525505066, "learning_rate": 8.593260190418765e-06, "loss": 0.5762, "step": 4143 }, { "epoch": 0.27, "grad_norm": 1.2442775964736938, "learning_rate": 8.592533230665154e-06, "loss": 0.5648, "step": 4144 }, { "epoch": 0.27, "grad_norm": 1.230576753616333, "learning_rate": 8.591806113891187e-06, "loss": 0.6001, "step": 4145 }, { "epoch": 0.27, "grad_norm": 1.3843586444854736, "learning_rate": 8.591078840128642e-06, "loss": 0.549, "step": 4146 }, { "epoch": 0.27, "grad_norm": 1.1411439180374146, "learning_rate": 8.590351409409305e-06, "loss": 0.5392, "step": 4147 }, { "epoch": 0.27, "grad_norm": 1.1927884817123413, "learning_rate": 8.589623821764971e-06, "loss": 0.4992, "step": 4148 }, { "epoch": 0.27, "grad_norm": 1.224609613418579, "learning_rate": 8.58889607722744e-06, "loss": 0.5168, "step": 4149 }, { "epoch": 0.27, "grad_norm": 1.190293788909912, "learning_rate": 8.588168175828523e-06, "loss": 0.5765, "step": 4150 }, { "epoch": 0.27, "grad_norm": 1.0747616291046143, "learning_rate": 8.587440117600033e-06, "loss": 0.523, "step": 4151 }, { "epoch": 0.27, "grad_norm": 1.1856069564819336, "learning_rate": 8.58671190257379e-06, "loss": 0.5778, "step": 4152 }, { "epoch": 0.27, "grad_norm": 1.3347018957138062, "learning_rate": 8.585983530781623e-06, "loss": 0.6263, "step": 4153 }, { "epoch": 0.27, "grad_norm": 1.1689285039901733, "learning_rate": 8.58525500225537e-06, "loss": 0.5343, "step": 4154 }, { "epoch": 0.27, "grad_norm": 1.1984418630599976, "learning_rate": 8.584526317026868e-06, "loss": 0.6001, "step": 4155 }, { "epoch": 0.27, "grad_norm": 1.1100916862487793, "learning_rate": 8.58379747512797e-06, "loss": 0.5669, "step": 4156 }, { "epoch": 0.27, "grad_norm": 1.2458772659301758, "learning_rate": 8.583068476590533e-06, "loss": 0.5686, "step": 4157 }, { "epoch": 0.27, "grad_norm": 1.4719551801681519, "learning_rate": 8.582339321446414e-06, "loss": 0.5713, "step": 4158 }, { "epoch": 0.27, "grad_norm": 1.255275845527649, "learning_rate": 8.581610009727487e-06, "loss": 0.5684, "step": 4159 }, { "epoch": 0.27, "grad_norm": 1.2063084840774536, "learning_rate": 8.580880541465628e-06, "loss": 0.5303, "step": 4160 }, { "epoch": 0.27, "grad_norm": 1.1766034364700317, "learning_rate": 8.58015091669272e-06, "loss": 0.4961, "step": 4161 }, { "epoch": 0.27, "grad_norm": 1.15731942653656, "learning_rate": 8.57942113544065e-06, "loss": 0.5425, "step": 4162 }, { "epoch": 0.27, "grad_norm": 1.1113885641098022, "learning_rate": 8.578691197741317e-06, "loss": 0.5778, "step": 4163 }, { "epoch": 0.27, "grad_norm": 1.2103408575057983, "learning_rate": 8.577961103626627e-06, "loss": 0.582, "step": 4164 }, { "epoch": 0.27, "grad_norm": 1.1001824140548706, "learning_rate": 8.577230853128488e-06, "loss": 0.5627, "step": 4165 }, { "epoch": 0.27, "grad_norm": 1.1804460287094116, "learning_rate": 8.576500446278817e-06, "loss": 0.554, "step": 4166 }, { "epoch": 0.27, "grad_norm": 1.2220643758773804, "learning_rate": 8.57576988310954e-06, "loss": 0.5619, "step": 4167 }, { "epoch": 0.27, "grad_norm": 1.1890196800231934, "learning_rate": 8.575039163652585e-06, "loss": 0.5644, "step": 4168 }, { "epoch": 0.27, "grad_norm": 1.1223632097244263, "learning_rate": 8.574308287939894e-06, "loss": 0.5392, "step": 4169 }, { "epoch": 0.27, "grad_norm": 1.2729123830795288, "learning_rate": 8.573577256003408e-06, "loss": 0.5791, "step": 4170 }, { "epoch": 0.27, "grad_norm": 1.165684700012207, "learning_rate": 8.572846067875082e-06, "loss": 0.557, "step": 4171 }, { "epoch": 0.27, "grad_norm": 1.2308602333068848, "learning_rate": 8.572114723586872e-06, "loss": 0.5517, "step": 4172 }, { "epoch": 0.27, "grad_norm": 1.208164095878601, "learning_rate": 8.571383223170743e-06, "loss": 0.5851, "step": 4173 }, { "epoch": 0.27, "grad_norm": 1.1634598970413208, "learning_rate": 8.570651566658667e-06, "loss": 0.5945, "step": 4174 }, { "epoch": 0.27, "grad_norm": 1.0628231763839722, "learning_rate": 8.569919754082624e-06, "loss": 0.5397, "step": 4175 }, { "epoch": 0.27, "grad_norm": 1.0852402448654175, "learning_rate": 8.5691877854746e-06, "loss": 0.5214, "step": 4176 }, { "epoch": 0.27, "grad_norm": 1.3403525352478027, "learning_rate": 8.568455660866584e-06, "loss": 0.5884, "step": 4177 }, { "epoch": 0.27, "grad_norm": 1.2926851511001587, "learning_rate": 8.56772338029058e-06, "loss": 0.5584, "step": 4178 }, { "epoch": 0.27, "grad_norm": 1.1332300901412964, "learning_rate": 8.566990943778591e-06, "loss": 0.562, "step": 4179 }, { "epoch": 0.27, "grad_norm": 1.268455147743225, "learning_rate": 8.566258351362632e-06, "loss": 0.5839, "step": 4180 }, { "epoch": 0.27, "grad_norm": 1.1007084846496582, "learning_rate": 8.56552560307472e-06, "loss": 0.5781, "step": 4181 }, { "epoch": 0.27, "grad_norm": 1.1924675703048706, "learning_rate": 8.564792698946885e-06, "loss": 0.592, "step": 4182 }, { "epoch": 0.27, "grad_norm": 1.171140432357788, "learning_rate": 8.564059639011155e-06, "loss": 0.571, "step": 4183 }, { "epoch": 0.27, "grad_norm": 1.1210906505584717, "learning_rate": 8.563326423299577e-06, "loss": 0.5697, "step": 4184 }, { "epoch": 0.27, "grad_norm": 1.250093936920166, "learning_rate": 8.562593051844192e-06, "loss": 0.5346, "step": 4185 }, { "epoch": 0.27, "grad_norm": 1.2119371891021729, "learning_rate": 8.561859524677059e-06, "loss": 0.6102, "step": 4186 }, { "epoch": 0.27, "grad_norm": 1.1047676801681519, "learning_rate": 8.561125841830234e-06, "loss": 0.5762, "step": 4187 }, { "epoch": 0.27, "grad_norm": 1.0656180381774902, "learning_rate": 8.560392003335787e-06, "loss": 0.5301, "step": 4188 }, { "epoch": 0.27, "grad_norm": 1.2671289443969727, "learning_rate": 8.55965800922579e-06, "loss": 0.6011, "step": 4189 }, { "epoch": 0.27, "grad_norm": 1.1189066171646118, "learning_rate": 8.558923859532328e-06, "loss": 0.5006, "step": 4190 }, { "epoch": 0.27, "grad_norm": 1.1749025583267212, "learning_rate": 8.558189554287483e-06, "loss": 0.5533, "step": 4191 }, { "epoch": 0.27, "grad_norm": 1.3333261013031006, "learning_rate": 8.557455093523357e-06, "loss": 0.5562, "step": 4192 }, { "epoch": 0.27, "grad_norm": 1.130499005317688, "learning_rate": 8.556720477272044e-06, "loss": 0.5461, "step": 4193 }, { "epoch": 0.27, "grad_norm": 1.20895254611969, "learning_rate": 8.555985705565656e-06, "loss": 0.6011, "step": 4194 }, { "epoch": 0.27, "grad_norm": 1.100459337234497, "learning_rate": 8.555250778436308e-06, "loss": 0.4955, "step": 4195 }, { "epoch": 0.27, "grad_norm": 1.1558326482772827, "learning_rate": 8.554515695916122e-06, "loss": 0.5565, "step": 4196 }, { "epoch": 0.27, "grad_norm": 1.2365931272506714, "learning_rate": 8.553780458037225e-06, "loss": 0.5633, "step": 4197 }, { "epoch": 0.27, "grad_norm": 1.2364509105682373, "learning_rate": 8.553045064831752e-06, "loss": 0.5639, "step": 4198 }, { "epoch": 0.27, "grad_norm": 1.1897644996643066, "learning_rate": 8.552309516331846e-06, "loss": 0.5902, "step": 4199 }, { "epoch": 0.27, "grad_norm": 1.2356752157211304, "learning_rate": 8.551573812569657e-06, "loss": 0.5693, "step": 4200 }, { "epoch": 0.27, "grad_norm": 1.1759861707687378, "learning_rate": 8.55083795357734e-06, "loss": 0.5464, "step": 4201 }, { "epoch": 0.27, "grad_norm": 1.3457356691360474, "learning_rate": 8.550101939387056e-06, "loss": 0.5966, "step": 4202 }, { "epoch": 0.27, "grad_norm": 1.2749825716018677, "learning_rate": 8.549365770030977e-06, "loss": 0.5182, "step": 4203 }, { "epoch": 0.27, "grad_norm": 1.0529775619506836, "learning_rate": 8.548629445541278e-06, "loss": 0.5441, "step": 4204 }, { "epoch": 0.27, "grad_norm": 1.1320489645004272, "learning_rate": 8.54789296595014e-06, "loss": 0.5499, "step": 4205 }, { "epoch": 0.27, "grad_norm": 1.185150384902954, "learning_rate": 8.547156331289756e-06, "loss": 0.5751, "step": 4206 }, { "epoch": 0.27, "grad_norm": 1.4077160358428955, "learning_rate": 8.54641954159232e-06, "loss": 0.5915, "step": 4207 }, { "epoch": 0.27, "grad_norm": 1.2817683219909668, "learning_rate": 8.545682596890034e-06, "loss": 0.5421, "step": 4208 }, { "epoch": 0.27, "grad_norm": 1.2390251159667969, "learning_rate": 8.544945497215111e-06, "loss": 0.5838, "step": 4209 }, { "epoch": 0.27, "grad_norm": 1.1715396642684937, "learning_rate": 8.544208242599767e-06, "loss": 0.561, "step": 4210 }, { "epoch": 0.27, "grad_norm": 1.2543964385986328, "learning_rate": 8.543470833076224e-06, "loss": 0.5498, "step": 4211 }, { "epoch": 0.27, "grad_norm": 1.2232125997543335, "learning_rate": 8.542733268676714e-06, "loss": 0.5614, "step": 4212 }, { "epoch": 0.27, "grad_norm": 1.3583402633666992, "learning_rate": 8.541995549433473e-06, "loss": 0.5943, "step": 4213 }, { "epoch": 0.27, "grad_norm": 1.1546661853790283, "learning_rate": 8.541257675378745e-06, "loss": 0.5158, "step": 4214 }, { "epoch": 0.27, "grad_norm": 1.1734999418258667, "learning_rate": 8.540519646544781e-06, "loss": 0.5632, "step": 4215 }, { "epoch": 0.27, "grad_norm": 1.2276109457015991, "learning_rate": 8.539781462963837e-06, "loss": 0.5488, "step": 4216 }, { "epoch": 0.27, "grad_norm": 1.126416802406311, "learning_rate": 8.539043124668178e-06, "loss": 0.535, "step": 4217 }, { "epoch": 0.27, "grad_norm": 1.2512520551681519, "learning_rate": 8.538304631690074e-06, "loss": 0.6063, "step": 4218 }, { "epoch": 0.27, "grad_norm": 1.3271247148513794, "learning_rate": 8.537565984061804e-06, "loss": 0.5873, "step": 4219 }, { "epoch": 0.27, "grad_norm": 1.2697441577911377, "learning_rate": 8.536827181815654e-06, "loss": 0.6023, "step": 4220 }, { "epoch": 0.27, "grad_norm": 1.0913180112838745, "learning_rate": 8.536088224983911e-06, "loss": 0.5119, "step": 4221 }, { "epoch": 0.27, "grad_norm": 1.1755743026733398, "learning_rate": 8.535349113598875e-06, "loss": 0.5174, "step": 4222 }, { "epoch": 0.27, "grad_norm": 1.3538233041763306, "learning_rate": 8.534609847692851e-06, "loss": 0.6241, "step": 4223 }, { "epoch": 0.27, "grad_norm": 1.33254873752594, "learning_rate": 8.53387042729815e-06, "loss": 0.5779, "step": 4224 }, { "epoch": 0.27, "grad_norm": 1.147516131401062, "learning_rate": 8.53313085244709e-06, "loss": 0.553, "step": 4225 }, { "epoch": 0.27, "grad_norm": 1.073009967803955, "learning_rate": 8.532391123171996e-06, "loss": 0.5555, "step": 4226 }, { "epoch": 0.27, "grad_norm": 1.362396001815796, "learning_rate": 8.531651239505199e-06, "loss": 0.5339, "step": 4227 }, { "epoch": 0.27, "grad_norm": 1.3155646324157715, "learning_rate": 8.53091120147904e-06, "loss": 0.5572, "step": 4228 }, { "epoch": 0.27, "grad_norm": 1.316593885421753, "learning_rate": 8.530171009125861e-06, "loss": 0.6031, "step": 4229 }, { "epoch": 0.27, "grad_norm": 1.169567584991455, "learning_rate": 8.529430662478014e-06, "loss": 0.5492, "step": 4230 }, { "epoch": 0.27, "grad_norm": 1.2040883302688599, "learning_rate": 8.528690161567861e-06, "loss": 0.5529, "step": 4231 }, { "epoch": 0.27, "grad_norm": 1.2084248065948486, "learning_rate": 8.527949506427764e-06, "loss": 0.5399, "step": 4232 }, { "epoch": 0.27, "grad_norm": 1.1987972259521484, "learning_rate": 8.527208697090096e-06, "loss": 0.5291, "step": 4233 }, { "epoch": 0.27, "grad_norm": 1.2441487312316895, "learning_rate": 8.526467733587238e-06, "loss": 0.5438, "step": 4234 }, { "epoch": 0.27, "grad_norm": 1.3951441049575806, "learning_rate": 8.525726615951571e-06, "loss": 0.5817, "step": 4235 }, { "epoch": 0.27, "grad_norm": 1.2882983684539795, "learning_rate": 8.524985344215494e-06, "loss": 0.5711, "step": 4236 }, { "epoch": 0.27, "grad_norm": 1.3366115093231201, "learning_rate": 8.524243918411399e-06, "loss": 0.5467, "step": 4237 }, { "epoch": 0.27, "grad_norm": 1.1821403503417969, "learning_rate": 8.523502338571696e-06, "loss": 0.5974, "step": 4238 }, { "epoch": 0.27, "grad_norm": 1.1839768886566162, "learning_rate": 8.522760604728795e-06, "loss": 0.5999, "step": 4239 }, { "epoch": 0.27, "grad_norm": 1.1066842079162598, "learning_rate": 8.522018716915119e-06, "loss": 0.538, "step": 4240 }, { "epoch": 0.27, "grad_norm": 1.2849642038345337, "learning_rate": 8.52127667516309e-06, "loss": 0.5549, "step": 4241 }, { "epoch": 0.27, "grad_norm": 1.542620062828064, "learning_rate": 8.520534479505144e-06, "loss": 0.6041, "step": 4242 }, { "epoch": 0.27, "grad_norm": 1.1956439018249512, "learning_rate": 8.519792129973718e-06, "loss": 0.5593, "step": 4243 }, { "epoch": 0.27, "grad_norm": 1.1786779165267944, "learning_rate": 8.51904962660126e-06, "loss": 0.5208, "step": 4244 }, { "epoch": 0.27, "grad_norm": 1.149571418762207, "learning_rate": 8.518306969420222e-06, "loss": 0.5442, "step": 4245 }, { "epoch": 0.27, "grad_norm": 1.2280182838439941, "learning_rate": 8.517564158463064e-06, "loss": 0.5797, "step": 4246 }, { "epoch": 0.27, "grad_norm": 1.118092656135559, "learning_rate": 8.51682119376225e-06, "loss": 0.5295, "step": 4247 }, { "epoch": 0.27, "grad_norm": 1.132513165473938, "learning_rate": 8.516078075350256e-06, "loss": 0.548, "step": 4248 }, { "epoch": 0.27, "grad_norm": 1.2361090183258057, "learning_rate": 8.515334803259563e-06, "loss": 0.6253, "step": 4249 }, { "epoch": 0.27, "grad_norm": 1.2044657468795776, "learning_rate": 8.514591377522654e-06, "loss": 0.5841, "step": 4250 }, { "epoch": 0.27, "grad_norm": 1.2948352098464966, "learning_rate": 8.513847798172023e-06, "loss": 0.5921, "step": 4251 }, { "epoch": 0.27, "grad_norm": 1.234932541847229, "learning_rate": 8.513104065240172e-06, "loss": 0.4815, "step": 4252 }, { "epoch": 0.27, "grad_norm": 1.3436617851257324, "learning_rate": 8.512360178759606e-06, "loss": 0.5878, "step": 4253 }, { "epoch": 0.27, "grad_norm": 1.3124040365219116, "learning_rate": 8.511616138762839e-06, "loss": 0.5675, "step": 4254 }, { "epoch": 0.27, "grad_norm": 1.3947901725769043, "learning_rate": 8.510871945282388e-06, "loss": 0.5968, "step": 4255 }, { "epoch": 0.27, "grad_norm": 1.373995304107666, "learning_rate": 8.510127598350786e-06, "loss": 0.6062, "step": 4256 }, { "epoch": 0.27, "grad_norm": 1.134596824645996, "learning_rate": 8.50938309800056e-06, "loss": 0.59, "step": 4257 }, { "epoch": 0.27, "grad_norm": 1.145981788635254, "learning_rate": 8.508638444264255e-06, "loss": 0.5698, "step": 4258 }, { "epoch": 0.27, "grad_norm": 1.121445894241333, "learning_rate": 8.507893637174415e-06, "loss": 0.5032, "step": 4259 }, { "epoch": 0.27, "grad_norm": 1.1211237907409668, "learning_rate": 8.507148676763595e-06, "loss": 0.5548, "step": 4260 }, { "epoch": 0.28, "grad_norm": 1.1662217378616333, "learning_rate": 8.506403563064354e-06, "loss": 0.5762, "step": 4261 }, { "epoch": 0.28, "grad_norm": 1.1905170679092407, "learning_rate": 8.50565829610926e-06, "loss": 0.5787, "step": 4262 }, { "epoch": 0.28, "grad_norm": 1.214615821838379, "learning_rate": 8.504912875930889e-06, "loss": 0.5757, "step": 4263 }, { "epoch": 0.28, "grad_norm": 1.2225399017333984, "learning_rate": 8.504167302561816e-06, "loss": 0.5377, "step": 4264 }, { "epoch": 0.28, "grad_norm": 1.1274292469024658, "learning_rate": 8.503421576034634e-06, "loss": 0.5766, "step": 4265 }, { "epoch": 0.28, "grad_norm": 1.1681623458862305, "learning_rate": 8.502675696381933e-06, "loss": 0.5646, "step": 4266 }, { "epoch": 0.28, "grad_norm": 1.3049132823944092, "learning_rate": 8.501929663636313e-06, "loss": 0.5343, "step": 4267 }, { "epoch": 0.28, "grad_norm": 1.7614610195159912, "learning_rate": 8.501183477830382e-06, "loss": 0.5341, "step": 4268 }, { "epoch": 0.28, "grad_norm": 1.3072841167449951, "learning_rate": 8.500437138996755e-06, "loss": 0.5914, "step": 4269 }, { "epoch": 0.28, "grad_norm": 1.1902077198028564, "learning_rate": 8.499690647168053e-06, "loss": 0.5891, "step": 4270 }, { "epoch": 0.28, "grad_norm": 1.2279813289642334, "learning_rate": 8.498944002376901e-06, "loss": 0.5581, "step": 4271 }, { "epoch": 0.28, "grad_norm": 1.23380446434021, "learning_rate": 8.498197204655936e-06, "loss": 0.5709, "step": 4272 }, { "epoch": 0.28, "grad_norm": 1.2122764587402344, "learning_rate": 8.497450254037793e-06, "loss": 0.5618, "step": 4273 }, { "epoch": 0.28, "grad_norm": 1.3796833753585815, "learning_rate": 8.496703150555125e-06, "loss": 0.5947, "step": 4274 }, { "epoch": 0.28, "grad_norm": 1.2503708600997925, "learning_rate": 8.495955894240585e-06, "loss": 0.5611, "step": 4275 }, { "epoch": 0.28, "grad_norm": 1.295320749282837, "learning_rate": 8.495208485126831e-06, "loss": 0.5566, "step": 4276 }, { "epoch": 0.28, "grad_norm": 1.229204535484314, "learning_rate": 8.494460923246533e-06, "loss": 0.5548, "step": 4277 }, { "epoch": 0.28, "grad_norm": 1.1587477922439575, "learning_rate": 8.493713208632364e-06, "loss": 0.5762, "step": 4278 }, { "epoch": 0.28, "grad_norm": 1.1587098836898804, "learning_rate": 8.492965341317004e-06, "loss": 0.5657, "step": 4279 }, { "epoch": 0.28, "grad_norm": 1.207153081893921, "learning_rate": 8.49221732133314e-06, "loss": 0.5866, "step": 4280 }, { "epoch": 0.28, "grad_norm": 1.122913122177124, "learning_rate": 8.491469148713466e-06, "loss": 0.5671, "step": 4281 }, { "epoch": 0.28, "grad_norm": 1.1337535381317139, "learning_rate": 8.490720823490688e-06, "loss": 0.5462, "step": 4282 }, { "epoch": 0.28, "grad_norm": 1.3074039220809937, "learning_rate": 8.489972345697505e-06, "loss": 0.6002, "step": 4283 }, { "epoch": 0.28, "grad_norm": 1.1337559223175049, "learning_rate": 8.489223715366637e-06, "loss": 0.5274, "step": 4284 }, { "epoch": 0.28, "grad_norm": 1.1027885675430298, "learning_rate": 8.488474932530802e-06, "loss": 0.5567, "step": 4285 }, { "epoch": 0.28, "grad_norm": 1.2525464296340942, "learning_rate": 8.487725997222728e-06, "loss": 0.5864, "step": 4286 }, { "epoch": 0.28, "grad_norm": 1.1066358089447021, "learning_rate": 8.486976909475148e-06, "loss": 0.5454, "step": 4287 }, { "epoch": 0.28, "grad_norm": 1.2030760049819946, "learning_rate": 8.486227669320806e-06, "loss": 0.5842, "step": 4288 }, { "epoch": 0.28, "grad_norm": 1.1989573240280151, "learning_rate": 8.485478276792446e-06, "loss": 0.5534, "step": 4289 }, { "epoch": 0.28, "grad_norm": 1.1113778352737427, "learning_rate": 8.484728731922825e-06, "loss": 0.4985, "step": 4290 }, { "epoch": 0.28, "grad_norm": 1.1136823892593384, "learning_rate": 8.483979034744698e-06, "loss": 0.5592, "step": 4291 }, { "epoch": 0.28, "grad_norm": 1.177512526512146, "learning_rate": 8.483229185290839e-06, "loss": 0.538, "step": 4292 }, { "epoch": 0.28, "grad_norm": 1.3364275693893433, "learning_rate": 8.482479183594018e-06, "loss": 0.5607, "step": 4293 }, { "epoch": 0.28, "grad_norm": 1.3073933124542236, "learning_rate": 8.481729029687019e-06, "loss": 0.5561, "step": 4294 }, { "epoch": 0.28, "grad_norm": 1.2014325857162476, "learning_rate": 8.480978723602624e-06, "loss": 0.5425, "step": 4295 }, { "epoch": 0.28, "grad_norm": 1.0555912256240845, "learning_rate": 8.480228265373632e-06, "loss": 0.5021, "step": 4296 }, { "epoch": 0.28, "grad_norm": 1.259839415550232, "learning_rate": 8.479477655032841e-06, "loss": 0.5727, "step": 4297 }, { "epoch": 0.28, "grad_norm": 1.170598030090332, "learning_rate": 8.478726892613059e-06, "loss": 0.5483, "step": 4298 }, { "epoch": 0.28, "grad_norm": 1.3816595077514648, "learning_rate": 8.477975978147099e-06, "loss": 0.5447, "step": 4299 }, { "epoch": 0.28, "grad_norm": 1.152985692024231, "learning_rate": 8.477224911667783e-06, "loss": 0.5757, "step": 4300 }, { "epoch": 0.28, "grad_norm": 1.2040446996688843, "learning_rate": 8.476473693207938e-06, "loss": 0.5862, "step": 4301 }, { "epoch": 0.28, "grad_norm": 1.3932102918624878, "learning_rate": 8.475722322800396e-06, "loss": 0.5706, "step": 4302 }, { "epoch": 0.28, "grad_norm": 1.3080824613571167, "learning_rate": 8.474970800478001e-06, "loss": 0.5919, "step": 4303 }, { "epoch": 0.28, "grad_norm": 1.1619691848754883, "learning_rate": 8.474219126273596e-06, "loss": 0.552, "step": 4304 }, { "epoch": 0.28, "grad_norm": 1.2846269607543945, "learning_rate": 8.473467300220038e-06, "loss": 0.5668, "step": 4305 }, { "epoch": 0.28, "grad_norm": 1.0909277200698853, "learning_rate": 8.472715322350184e-06, "loss": 0.5327, "step": 4306 }, { "epoch": 0.28, "grad_norm": 1.2327356338500977, "learning_rate": 8.471963192696904e-06, "loss": 0.5792, "step": 4307 }, { "epoch": 0.28, "grad_norm": 1.3479145765304565, "learning_rate": 8.471210911293071e-06, "loss": 0.5551, "step": 4308 }, { "epoch": 0.28, "grad_norm": 1.1423249244689941, "learning_rate": 8.470458478171564e-06, "loss": 0.5022, "step": 4309 }, { "epoch": 0.28, "grad_norm": 1.2282154560089111, "learning_rate": 8.469705893365272e-06, "loss": 0.5828, "step": 4310 }, { "epoch": 0.28, "grad_norm": 1.273921251296997, "learning_rate": 8.468953156907086e-06, "loss": 0.587, "step": 4311 }, { "epoch": 0.28, "grad_norm": 1.1622263193130493, "learning_rate": 8.468200268829909e-06, "loss": 0.5714, "step": 4312 }, { "epoch": 0.28, "grad_norm": 1.2341233491897583, "learning_rate": 8.467447229166646e-06, "loss": 0.546, "step": 4313 }, { "epoch": 0.28, "grad_norm": 1.1553407907485962, "learning_rate": 8.46669403795021e-06, "loss": 0.5832, "step": 4314 }, { "epoch": 0.28, "grad_norm": 1.137056827545166, "learning_rate": 8.465940695213522e-06, "loss": 0.569, "step": 4315 }, { "epoch": 0.28, "grad_norm": 1.252094030380249, "learning_rate": 8.465187200989508e-06, "loss": 0.5791, "step": 4316 }, { "epoch": 0.28, "grad_norm": 1.3898731470108032, "learning_rate": 8.464433555311102e-06, "loss": 0.5982, "step": 4317 }, { "epoch": 0.28, "grad_norm": 1.2338894605636597, "learning_rate": 8.463679758211244e-06, "loss": 0.619, "step": 4318 }, { "epoch": 0.28, "grad_norm": 1.1329847574234009, "learning_rate": 8.46292580972288e-06, "loss": 0.496, "step": 4319 }, { "epoch": 0.28, "grad_norm": 1.3131792545318604, "learning_rate": 8.462171709878964e-06, "loss": 0.5631, "step": 4320 }, { "epoch": 0.28, "grad_norm": 1.3418912887573242, "learning_rate": 8.461417458712454e-06, "loss": 0.559, "step": 4321 }, { "epoch": 0.28, "grad_norm": 1.2500457763671875, "learning_rate": 8.460663056256319e-06, "loss": 0.5302, "step": 4322 }, { "epoch": 0.28, "grad_norm": 1.139854073524475, "learning_rate": 8.459908502543528e-06, "loss": 0.5794, "step": 4323 }, { "epoch": 0.28, "grad_norm": 1.4102133512496948, "learning_rate": 8.459153797607065e-06, "loss": 0.533, "step": 4324 }, { "epoch": 0.28, "grad_norm": 1.0967340469360352, "learning_rate": 8.458398941479914e-06, "loss": 0.5386, "step": 4325 }, { "epoch": 0.28, "grad_norm": 1.0417659282684326, "learning_rate": 8.457643934195068e-06, "loss": 0.5352, "step": 4326 }, { "epoch": 0.28, "grad_norm": 1.099289059638977, "learning_rate": 8.456888775785526e-06, "loss": 0.5583, "step": 4327 }, { "epoch": 0.28, "grad_norm": 1.1354169845581055, "learning_rate": 8.456133466284296e-06, "loss": 0.5517, "step": 4328 }, { "epoch": 0.28, "grad_norm": 1.1954598426818848, "learning_rate": 8.455378005724386e-06, "loss": 0.6016, "step": 4329 }, { "epoch": 0.28, "grad_norm": 1.1985307931900024, "learning_rate": 8.45462239413882e-06, "loss": 0.5172, "step": 4330 }, { "epoch": 0.28, "grad_norm": 1.1541074514389038, "learning_rate": 8.453866631560623e-06, "loss": 0.6206, "step": 4331 }, { "epoch": 0.28, "grad_norm": 1.156216025352478, "learning_rate": 8.453110718022826e-06, "loss": 0.5265, "step": 4332 }, { "epoch": 0.28, "grad_norm": 1.3581246137619019, "learning_rate": 8.452354653558469e-06, "loss": 0.5523, "step": 4333 }, { "epoch": 0.28, "grad_norm": 1.2852914333343506, "learning_rate": 8.451598438200596e-06, "loss": 0.5589, "step": 4334 }, { "epoch": 0.28, "grad_norm": 1.1934353113174438, "learning_rate": 8.450842071982263e-06, "loss": 0.5532, "step": 4335 }, { "epoch": 0.28, "grad_norm": 1.1514145135879517, "learning_rate": 8.450085554936525e-06, "loss": 0.5791, "step": 4336 }, { "epoch": 0.28, "grad_norm": 1.2718291282653809, "learning_rate": 8.449328887096449e-06, "loss": 0.5433, "step": 4337 }, { "epoch": 0.28, "grad_norm": 1.1368069648742676, "learning_rate": 8.448572068495105e-06, "loss": 0.5522, "step": 4338 }, { "epoch": 0.28, "grad_norm": 2.198578119277954, "learning_rate": 8.447815099165574e-06, "loss": 0.6428, "step": 4339 }, { "epoch": 0.28, "grad_norm": 1.1175074577331543, "learning_rate": 8.447057979140942e-06, "loss": 0.5132, "step": 4340 }, { "epoch": 0.28, "grad_norm": 1.122523546218872, "learning_rate": 8.446300708454299e-06, "loss": 0.5501, "step": 4341 }, { "epoch": 0.28, "grad_norm": 1.2243826389312744, "learning_rate": 8.445543287138741e-06, "loss": 0.5518, "step": 4342 }, { "epoch": 0.28, "grad_norm": 1.14012610912323, "learning_rate": 8.44478571522738e-06, "loss": 0.5489, "step": 4343 }, { "epoch": 0.28, "grad_norm": 1.2549008131027222, "learning_rate": 8.444027992753319e-06, "loss": 0.5535, "step": 4344 }, { "epoch": 0.28, "grad_norm": 1.4281281232833862, "learning_rate": 8.443270119749683e-06, "loss": 0.5432, "step": 4345 }, { "epoch": 0.28, "grad_norm": 1.4095577001571655, "learning_rate": 8.442512096249593e-06, "loss": 0.6008, "step": 4346 }, { "epoch": 0.28, "grad_norm": 1.5576348304748535, "learning_rate": 8.441753922286179e-06, "loss": 0.6074, "step": 4347 }, { "epoch": 0.28, "grad_norm": 1.1735895872116089, "learning_rate": 8.440995597892582e-06, "loss": 0.5564, "step": 4348 }, { "epoch": 0.28, "grad_norm": 1.2823834419250488, "learning_rate": 8.440237123101947e-06, "loss": 0.5733, "step": 4349 }, { "epoch": 0.28, "grad_norm": 1.2837876081466675, "learning_rate": 8.439478497947421e-06, "loss": 0.5637, "step": 4350 }, { "epoch": 0.28, "grad_norm": 1.1797451972961426, "learning_rate": 8.438719722462166e-06, "loss": 0.5049, "step": 4351 }, { "epoch": 0.28, "grad_norm": 1.3599703311920166, "learning_rate": 8.437960796679342e-06, "loss": 0.5914, "step": 4352 }, { "epoch": 0.28, "grad_norm": 1.2541788816452026, "learning_rate": 8.437201720632124e-06, "loss": 0.5188, "step": 4353 }, { "epoch": 0.28, "grad_norm": 1.141904592514038, "learning_rate": 8.436442494353684e-06, "loss": 0.5878, "step": 4354 }, { "epoch": 0.28, "grad_norm": 1.1709290742874146, "learning_rate": 8.435683117877211e-06, "loss": 0.5721, "step": 4355 }, { "epoch": 0.28, "grad_norm": 1.2246443033218384, "learning_rate": 8.434923591235891e-06, "loss": 0.5606, "step": 4356 }, { "epoch": 0.28, "grad_norm": 1.3484134674072266, "learning_rate": 8.434163914462925e-06, "loss": 0.5977, "step": 4357 }, { "epoch": 0.28, "grad_norm": 1.2133268117904663, "learning_rate": 8.433404087591515e-06, "loss": 0.5541, "step": 4358 }, { "epoch": 0.28, "grad_norm": 1.3657267093658447, "learning_rate": 8.432644110654868e-06, "loss": 0.5875, "step": 4359 }, { "epoch": 0.28, "grad_norm": 1.2457678318023682, "learning_rate": 8.431883983686206e-06, "loss": 0.6052, "step": 4360 }, { "epoch": 0.28, "grad_norm": 1.252913475036621, "learning_rate": 8.431123706718747e-06, "loss": 0.5534, "step": 4361 }, { "epoch": 0.28, "grad_norm": 1.3320125341415405, "learning_rate": 8.430363279785725e-06, "loss": 0.5977, "step": 4362 }, { "epoch": 0.28, "grad_norm": 1.3334496021270752, "learning_rate": 8.429602702920375e-06, "loss": 0.6129, "step": 4363 }, { "epoch": 0.28, "grad_norm": 1.2421033382415771, "learning_rate": 8.428841976155937e-06, "loss": 0.5889, "step": 4364 }, { "epoch": 0.28, "grad_norm": 1.1227086782455444, "learning_rate": 8.428081099525663e-06, "loss": 0.5398, "step": 4365 }, { "epoch": 0.28, "grad_norm": 1.2490116357803345, "learning_rate": 8.42732007306281e-06, "loss": 0.5335, "step": 4366 }, { "epoch": 0.28, "grad_norm": 1.1360403299331665, "learning_rate": 8.42655889680064e-06, "loss": 0.5291, "step": 4367 }, { "epoch": 0.28, "grad_norm": 1.192284345626831, "learning_rate": 8.42579757077242e-06, "loss": 0.5904, "step": 4368 }, { "epoch": 0.28, "grad_norm": 1.1190905570983887, "learning_rate": 8.425036095011428e-06, "loss": 0.5369, "step": 4369 }, { "epoch": 0.28, "grad_norm": 1.277560830116272, "learning_rate": 8.424274469550943e-06, "loss": 0.6042, "step": 4370 }, { "epoch": 0.28, "grad_norm": 1.2904154062271118, "learning_rate": 8.423512694424256e-06, "loss": 0.5922, "step": 4371 }, { "epoch": 0.28, "grad_norm": 1.082527756690979, "learning_rate": 8.422750769664663e-06, "loss": 0.5364, "step": 4372 }, { "epoch": 0.28, "grad_norm": 1.2179927825927734, "learning_rate": 8.421988695305464e-06, "loss": 0.5563, "step": 4373 }, { "epoch": 0.28, "grad_norm": 1.090315818786621, "learning_rate": 8.421226471379969e-06, "loss": 0.5201, "step": 4374 }, { "epoch": 0.28, "grad_norm": 1.2313051223754883, "learning_rate": 8.42046409792149e-06, "loss": 0.5442, "step": 4375 }, { "epoch": 0.28, "grad_norm": 1.1929001808166504, "learning_rate": 8.419701574963352e-06, "loss": 0.5568, "step": 4376 }, { "epoch": 0.28, "grad_norm": 1.1260827779769897, "learning_rate": 8.41893890253888e-06, "loss": 0.5782, "step": 4377 }, { "epoch": 0.28, "grad_norm": 1.1154929399490356, "learning_rate": 8.41817608068141e-06, "loss": 0.5423, "step": 4378 }, { "epoch": 0.28, "grad_norm": 1.1937830448150635, "learning_rate": 8.417413109424282e-06, "loss": 0.5261, "step": 4379 }, { "epoch": 0.28, "grad_norm": 1.3604422807693481, "learning_rate": 8.416649988800844e-06, "loss": 0.6065, "step": 4380 }, { "epoch": 0.28, "grad_norm": 1.167065978050232, "learning_rate": 8.415886718844452e-06, "loss": 0.5628, "step": 4381 }, { "epoch": 0.28, "grad_norm": 1.2616500854492188, "learning_rate": 8.415123299588462e-06, "loss": 0.5544, "step": 4382 }, { "epoch": 0.28, "grad_norm": 1.200610637664795, "learning_rate": 8.414359731066246e-06, "loss": 0.6021, "step": 4383 }, { "epoch": 0.28, "grad_norm": 1.0817440748214722, "learning_rate": 8.413596013311175e-06, "loss": 0.5122, "step": 4384 }, { "epoch": 0.28, "grad_norm": 1.154176950454712, "learning_rate": 8.412832146356627e-06, "loss": 0.5386, "step": 4385 }, { "epoch": 0.28, "grad_norm": 1.2219167947769165, "learning_rate": 8.412068130235992e-06, "loss": 0.587, "step": 4386 }, { "epoch": 0.28, "grad_norm": 3.127368927001953, "learning_rate": 8.411303964982665e-06, "loss": 0.5438, "step": 4387 }, { "epoch": 0.28, "grad_norm": 1.1463903188705444, "learning_rate": 8.410539650630039e-06, "loss": 0.5341, "step": 4388 }, { "epoch": 0.28, "grad_norm": 1.3381551504135132, "learning_rate": 8.409775187211527e-06, "loss": 0.5571, "step": 4389 }, { "epoch": 0.28, "grad_norm": 1.1205806732177734, "learning_rate": 8.409010574760538e-06, "loss": 0.5623, "step": 4390 }, { "epoch": 0.28, "grad_norm": 1.1469701528549194, "learning_rate": 8.408245813310492e-06, "loss": 0.6042, "step": 4391 }, { "epoch": 0.28, "grad_norm": 1.2237600088119507, "learning_rate": 8.407480902894815e-06, "loss": 0.5351, "step": 4392 }, { "epoch": 0.28, "grad_norm": 1.2601127624511719, "learning_rate": 8.40671584354694e-06, "loss": 0.5172, "step": 4393 }, { "epoch": 0.28, "grad_norm": 1.1949927806854248, "learning_rate": 8.405950635300305e-06, "loss": 0.5644, "step": 4394 }, { "epoch": 0.28, "grad_norm": 1.2420339584350586, "learning_rate": 8.405185278188354e-06, "loss": 0.5919, "step": 4395 }, { "epoch": 0.28, "grad_norm": 1.2809420824050903, "learning_rate": 8.40441977224454e-06, "loss": 0.5735, "step": 4396 }, { "epoch": 0.28, "grad_norm": 1.097718596458435, "learning_rate": 8.403654117502323e-06, "loss": 0.56, "step": 4397 }, { "epoch": 0.28, "grad_norm": 1.1785484552383423, "learning_rate": 8.402888313995164e-06, "loss": 0.5139, "step": 4398 }, { "epoch": 0.28, "grad_norm": 1.1875569820404053, "learning_rate": 8.402122361756537e-06, "loss": 0.5441, "step": 4399 }, { "epoch": 0.28, "grad_norm": 1.0583492517471313, "learning_rate": 8.401356260819918e-06, "loss": 0.5126, "step": 4400 }, { "epoch": 0.28, "grad_norm": 1.142875075340271, "learning_rate": 8.400590011218794e-06, "loss": 0.5311, "step": 4401 }, { "epoch": 0.28, "grad_norm": 1.145861268043518, "learning_rate": 8.399823612986655e-06, "loss": 0.5099, "step": 4402 }, { "epoch": 0.28, "grad_norm": 1.143134593963623, "learning_rate": 8.399057066156995e-06, "loss": 0.5371, "step": 4403 }, { "epoch": 0.28, "grad_norm": 1.2730233669281006, "learning_rate": 8.398290370763323e-06, "loss": 0.534, "step": 4404 }, { "epoch": 0.28, "grad_norm": 1.2019233703613281, "learning_rate": 8.397523526839145e-06, "loss": 0.5481, "step": 4405 }, { "epoch": 0.28, "grad_norm": 1.1144300699234009, "learning_rate": 8.396756534417981e-06, "loss": 0.5476, "step": 4406 }, { "epoch": 0.28, "grad_norm": 1.195786476135254, "learning_rate": 8.395989393533351e-06, "loss": 0.5586, "step": 4407 }, { "epoch": 0.28, "grad_norm": 1.16204035282135, "learning_rate": 8.395222104218785e-06, "loss": 0.5599, "step": 4408 }, { "epoch": 0.28, "grad_norm": 1.2950773239135742, "learning_rate": 8.394454666507824e-06, "loss": 0.5533, "step": 4409 }, { "epoch": 0.28, "grad_norm": 1.0779892206192017, "learning_rate": 8.393687080434005e-06, "loss": 0.5311, "step": 4410 }, { "epoch": 0.28, "grad_norm": 1.1826475858688354, "learning_rate": 8.392919346030881e-06, "loss": 0.5981, "step": 4411 }, { "epoch": 0.28, "grad_norm": 1.1138031482696533, "learning_rate": 8.392151463332004e-06, "loss": 0.5397, "step": 4412 }, { "epoch": 0.28, "grad_norm": 1.0730658769607544, "learning_rate": 8.39138343237094e-06, "loss": 0.6093, "step": 4413 }, { "epoch": 0.28, "grad_norm": 1.1687285900115967, "learning_rate": 8.390615253181257e-06, "loss": 0.5399, "step": 4414 }, { "epoch": 0.28, "grad_norm": 1.0826259851455688, "learning_rate": 8.389846925796528e-06, "loss": 0.5497, "step": 4415 }, { "epoch": 0.29, "grad_norm": 1.0435688495635986, "learning_rate": 8.389078450250337e-06, "loss": 0.5152, "step": 4416 }, { "epoch": 0.29, "grad_norm": 1.2198114395141602, "learning_rate": 8.388309826576268e-06, "loss": 0.5514, "step": 4417 }, { "epoch": 0.29, "grad_norm": 1.3538388013839722, "learning_rate": 8.387541054807922e-06, "loss": 0.5885, "step": 4418 }, { "epoch": 0.29, "grad_norm": 1.1545196771621704, "learning_rate": 8.386772134978894e-06, "loss": 0.543, "step": 4419 }, { "epoch": 0.29, "grad_norm": 1.2049407958984375, "learning_rate": 8.386003067122794e-06, "loss": 0.5602, "step": 4420 }, { "epoch": 0.29, "grad_norm": 1.128703236579895, "learning_rate": 8.385233851273237e-06, "loss": 0.599, "step": 4421 }, { "epoch": 0.29, "grad_norm": 1.1883424520492554, "learning_rate": 8.384464487463843e-06, "loss": 0.5342, "step": 4422 }, { "epoch": 0.29, "grad_norm": 1.2796025276184082, "learning_rate": 8.383694975728236e-06, "loss": 0.5805, "step": 4423 }, { "epoch": 0.29, "grad_norm": 1.1565303802490234, "learning_rate": 8.382925316100054e-06, "loss": 0.5579, "step": 4424 }, { "epoch": 0.29, "grad_norm": 1.1632329225540161, "learning_rate": 8.382155508612933e-06, "loss": 0.5511, "step": 4425 }, { "epoch": 0.29, "grad_norm": 1.1658861637115479, "learning_rate": 8.38138555330052e-06, "loss": 0.615, "step": 4426 }, { "epoch": 0.29, "grad_norm": 1.1907799243927002, "learning_rate": 8.380615450196472e-06, "loss": 0.5396, "step": 4427 }, { "epoch": 0.29, "grad_norm": 1.1145131587982178, "learning_rate": 8.379845199334442e-06, "loss": 0.5613, "step": 4428 }, { "epoch": 0.29, "grad_norm": 1.2691993713378906, "learning_rate": 8.379074800748099e-06, "loss": 0.5605, "step": 4429 }, { "epoch": 0.29, "grad_norm": 1.135455846786499, "learning_rate": 8.378304254471115e-06, "loss": 0.565, "step": 4430 }, { "epoch": 0.29, "grad_norm": 1.1450313329696655, "learning_rate": 8.377533560537168e-06, "loss": 0.5412, "step": 4431 }, { "epoch": 0.29, "grad_norm": 1.2082998752593994, "learning_rate": 8.376762718979943e-06, "loss": 0.58, "step": 4432 }, { "epoch": 0.29, "grad_norm": 1.190650463104248, "learning_rate": 8.375991729833131e-06, "loss": 0.5529, "step": 4433 }, { "epoch": 0.29, "grad_norm": 1.2873804569244385, "learning_rate": 8.37522059313043e-06, "loss": 0.5348, "step": 4434 }, { "epoch": 0.29, "grad_norm": 1.2104816436767578, "learning_rate": 8.374449308905548e-06, "loss": 0.5096, "step": 4435 }, { "epoch": 0.29, "grad_norm": 1.1519427299499512, "learning_rate": 8.373677877192192e-06, "loss": 0.5358, "step": 4436 }, { "epoch": 0.29, "grad_norm": 1.168603777885437, "learning_rate": 8.372906298024079e-06, "loss": 0.545, "step": 4437 }, { "epoch": 0.29, "grad_norm": 1.2531343698501587, "learning_rate": 8.372134571434934e-06, "loss": 0.5871, "step": 4438 }, { "epoch": 0.29, "grad_norm": 1.3223743438720703, "learning_rate": 8.371362697458488e-06, "loss": 0.5767, "step": 4439 }, { "epoch": 0.29, "grad_norm": 1.1685155630111694, "learning_rate": 8.370590676128477e-06, "loss": 0.5394, "step": 4440 }, { "epoch": 0.29, "grad_norm": 1.1464117765426636, "learning_rate": 8.369818507478642e-06, "loss": 0.5699, "step": 4441 }, { "epoch": 0.29, "grad_norm": 1.2308666706085205, "learning_rate": 8.369046191542735e-06, "loss": 0.5379, "step": 4442 }, { "epoch": 0.29, "grad_norm": 1.115913987159729, "learning_rate": 8.368273728354512e-06, "loss": 0.5251, "step": 4443 }, { "epoch": 0.29, "grad_norm": 1.223080039024353, "learning_rate": 8.367501117947734e-06, "loss": 0.5432, "step": 4444 }, { "epoch": 0.29, "grad_norm": 1.2582674026489258, "learning_rate": 8.366728360356172e-06, "loss": 0.5728, "step": 4445 }, { "epoch": 0.29, "grad_norm": 1.2285903692245483, "learning_rate": 8.365955455613597e-06, "loss": 0.5813, "step": 4446 }, { "epoch": 0.29, "grad_norm": 1.0493723154067993, "learning_rate": 8.365182403753796e-06, "loss": 0.541, "step": 4447 }, { "epoch": 0.29, "grad_norm": 1.258725643157959, "learning_rate": 8.364409204810553e-06, "loss": 0.5404, "step": 4448 }, { "epoch": 0.29, "grad_norm": 1.3611749410629272, "learning_rate": 8.363635858817664e-06, "loss": 0.564, "step": 4449 }, { "epoch": 0.29, "grad_norm": 1.2341454029083252, "learning_rate": 8.362862365808929e-06, "loss": 0.5935, "step": 4450 }, { "epoch": 0.29, "grad_norm": 1.1483862400054932, "learning_rate": 8.362088725818158e-06, "loss": 0.524, "step": 4451 }, { "epoch": 0.29, "grad_norm": 1.1975948810577393, "learning_rate": 8.361314938879161e-06, "loss": 0.5738, "step": 4452 }, { "epoch": 0.29, "grad_norm": 1.273173451423645, "learning_rate": 8.360541005025761e-06, "loss": 0.5602, "step": 4453 }, { "epoch": 0.29, "grad_norm": 1.2478461265563965, "learning_rate": 8.359766924291786e-06, "loss": 0.5806, "step": 4454 }, { "epoch": 0.29, "grad_norm": 1.3617653846740723, "learning_rate": 8.358992696711066e-06, "loss": 0.5693, "step": 4455 }, { "epoch": 0.29, "grad_norm": 1.0896553993225098, "learning_rate": 8.35821832231744e-06, "loss": 0.5066, "step": 4456 }, { "epoch": 0.29, "grad_norm": 1.1456842422485352, "learning_rate": 8.357443801144758e-06, "loss": 0.5615, "step": 4457 }, { "epoch": 0.29, "grad_norm": 1.2839725017547607, "learning_rate": 8.356669133226867e-06, "loss": 0.5424, "step": 4458 }, { "epoch": 0.29, "grad_norm": 1.1407408714294434, "learning_rate": 8.35589431859763e-06, "loss": 0.5467, "step": 4459 }, { "epoch": 0.29, "grad_norm": 1.1908165216445923, "learning_rate": 8.35511935729091e-06, "loss": 0.5633, "step": 4460 }, { "epoch": 0.29, "grad_norm": 1.221361756324768, "learning_rate": 8.35434424934058e-06, "loss": 0.5577, "step": 4461 }, { "epoch": 0.29, "grad_norm": 1.2457184791564941, "learning_rate": 8.353568994780518e-06, "loss": 0.5913, "step": 4462 }, { "epoch": 0.29, "grad_norm": 1.1312233209609985, "learning_rate": 8.352793593644606e-06, "loss": 0.5469, "step": 4463 }, { "epoch": 0.29, "grad_norm": 1.351129174232483, "learning_rate": 8.352018045966736e-06, "loss": 0.5676, "step": 4464 }, { "epoch": 0.29, "grad_norm": 1.2408804893493652, "learning_rate": 8.351242351780807e-06, "loss": 0.595, "step": 4465 }, { "epoch": 0.29, "grad_norm": 1.1289823055267334, "learning_rate": 8.35046651112072e-06, "loss": 0.5432, "step": 4466 }, { "epoch": 0.29, "grad_norm": 1.2398422956466675, "learning_rate": 8.349690524020389e-06, "loss": 0.6064, "step": 4467 }, { "epoch": 0.29, "grad_norm": 1.5065699815750122, "learning_rate": 8.348914390513724e-06, "loss": 0.5401, "step": 4468 }, { "epoch": 0.29, "grad_norm": 1.3095539808273315, "learning_rate": 8.348138110634652e-06, "loss": 0.539, "step": 4469 }, { "epoch": 0.29, "grad_norm": 1.2985461950302124, "learning_rate": 8.347361684417103e-06, "loss": 0.5287, "step": 4470 }, { "epoch": 0.29, "grad_norm": 1.2284092903137207, "learning_rate": 8.346585111895009e-06, "loss": 0.5668, "step": 4471 }, { "epoch": 0.29, "grad_norm": 1.2171210050582886, "learning_rate": 8.345808393102315e-06, "loss": 0.595, "step": 4472 }, { "epoch": 0.29, "grad_norm": 1.1701477766036987, "learning_rate": 8.345031528072969e-06, "loss": 0.5264, "step": 4473 }, { "epoch": 0.29, "grad_norm": 1.1562154293060303, "learning_rate": 8.344254516840925e-06, "loss": 0.5501, "step": 4474 }, { "epoch": 0.29, "grad_norm": 1.1601914167404175, "learning_rate": 8.343477359440145e-06, "loss": 0.5807, "step": 4475 }, { "epoch": 0.29, "grad_norm": 1.2059197425842285, "learning_rate": 8.342700055904594e-06, "loss": 0.554, "step": 4476 }, { "epoch": 0.29, "grad_norm": 1.1238696575164795, "learning_rate": 8.34192260626825e-06, "loss": 0.5655, "step": 4477 }, { "epoch": 0.29, "grad_norm": 1.169754981994629, "learning_rate": 8.341145010565088e-06, "loss": 0.5252, "step": 4478 }, { "epoch": 0.29, "grad_norm": 1.2274432182312012, "learning_rate": 8.3403672688291e-06, "loss": 0.549, "step": 4479 }, { "epoch": 0.29, "grad_norm": 1.2807599306106567, "learning_rate": 8.339589381094277e-06, "loss": 0.6113, "step": 4480 }, { "epoch": 0.29, "grad_norm": 1.121805191040039, "learning_rate": 8.33881134739462e-06, "loss": 0.5692, "step": 4481 }, { "epoch": 0.29, "grad_norm": 1.0986825227737427, "learning_rate": 8.33803316776413e-06, "loss": 0.5818, "step": 4482 }, { "epoch": 0.29, "grad_norm": 1.2628728151321411, "learning_rate": 8.337254842236825e-06, "loss": 0.5896, "step": 4483 }, { "epoch": 0.29, "grad_norm": 1.2465251684188843, "learning_rate": 8.33647637084672e-06, "loss": 0.5425, "step": 4484 }, { "epoch": 0.29, "grad_norm": 1.1588467359542847, "learning_rate": 8.33569775362784e-06, "loss": 0.518, "step": 4485 }, { "epoch": 0.29, "grad_norm": 1.0973247289657593, "learning_rate": 8.334918990614218e-06, "loss": 0.5269, "step": 4486 }, { "epoch": 0.29, "grad_norm": 1.3098986148834229, "learning_rate": 8.334140081839893e-06, "loss": 0.5695, "step": 4487 }, { "epoch": 0.29, "grad_norm": 1.225354790687561, "learning_rate": 8.333361027338904e-06, "loss": 0.6049, "step": 4488 }, { "epoch": 0.29, "grad_norm": 1.0920705795288086, "learning_rate": 8.332581827145307e-06, "loss": 0.4982, "step": 4489 }, { "epoch": 0.29, "grad_norm": 1.208522081375122, "learning_rate": 8.331802481293156e-06, "loss": 0.5779, "step": 4490 }, { "epoch": 0.29, "grad_norm": 1.120725154876709, "learning_rate": 8.331022989816516e-06, "loss": 0.5263, "step": 4491 }, { "epoch": 0.29, "grad_norm": 1.217225432395935, "learning_rate": 8.330243352749454e-06, "loss": 0.5408, "step": 4492 }, { "epoch": 0.29, "grad_norm": 1.2391902208328247, "learning_rate": 8.329463570126046e-06, "loss": 0.5754, "step": 4493 }, { "epoch": 0.29, "grad_norm": 1.2322036027908325, "learning_rate": 8.328683641980378e-06, "loss": 0.5952, "step": 4494 }, { "epoch": 0.29, "grad_norm": 1.1114485263824463, "learning_rate": 8.327903568346534e-06, "loss": 0.5326, "step": 4495 }, { "epoch": 0.29, "grad_norm": 1.216130018234253, "learning_rate": 8.327123349258613e-06, "loss": 0.5199, "step": 4496 }, { "epoch": 0.29, "grad_norm": 1.1062705516815186, "learning_rate": 8.326342984750711e-06, "loss": 0.5911, "step": 4497 }, { "epoch": 0.29, "grad_norm": 1.104170560836792, "learning_rate": 8.325562474856943e-06, "loss": 0.5465, "step": 4498 }, { "epoch": 0.29, "grad_norm": 1.1542696952819824, "learning_rate": 8.324781819611418e-06, "loss": 0.6396, "step": 4499 }, { "epoch": 0.29, "grad_norm": 1.1830660104751587, "learning_rate": 8.324001019048256e-06, "loss": 0.5905, "step": 4500 }, { "epoch": 0.29, "grad_norm": 1.1471025943756104, "learning_rate": 8.323220073201588e-06, "loss": 0.5612, "step": 4501 }, { "epoch": 0.29, "grad_norm": 1.123978614807129, "learning_rate": 8.322438982105545e-06, "loss": 0.5549, "step": 4502 }, { "epoch": 0.29, "grad_norm": 1.1690003871917725, "learning_rate": 8.321657745794264e-06, "loss": 0.5133, "step": 4503 }, { "epoch": 0.29, "grad_norm": 1.3359261751174927, "learning_rate": 8.320876364301894e-06, "loss": 0.5792, "step": 4504 }, { "epoch": 0.29, "grad_norm": 1.201876163482666, "learning_rate": 8.320094837662586e-06, "loss": 0.5505, "step": 4505 }, { "epoch": 0.29, "grad_norm": 1.109093427658081, "learning_rate": 8.319313165910497e-06, "loss": 0.5509, "step": 4506 }, { "epoch": 0.29, "grad_norm": 1.1681101322174072, "learning_rate": 8.318531349079797e-06, "loss": 0.5742, "step": 4507 }, { "epoch": 0.29, "grad_norm": 1.1534544229507446, "learning_rate": 8.31774938720465e-06, "loss": 0.5622, "step": 4508 }, { "epoch": 0.29, "grad_norm": 1.3569406270980835, "learning_rate": 8.31696728031924e-06, "loss": 0.544, "step": 4509 }, { "epoch": 0.29, "grad_norm": 1.136751651763916, "learning_rate": 8.316185028457746e-06, "loss": 0.5478, "step": 4510 }, { "epoch": 0.29, "grad_norm": 1.258293867111206, "learning_rate": 8.315402631654362e-06, "loss": 0.6205, "step": 4511 }, { "epoch": 0.29, "grad_norm": 1.2285239696502686, "learning_rate": 8.31462008994328e-06, "loss": 0.5718, "step": 4512 }, { "epoch": 0.29, "grad_norm": 1.1700971126556396, "learning_rate": 8.313837403358708e-06, "loss": 0.5558, "step": 4513 }, { "epoch": 0.29, "grad_norm": 1.1870357990264893, "learning_rate": 8.313054571934853e-06, "loss": 0.5854, "step": 4514 }, { "epoch": 0.29, "grad_norm": 1.0766249895095825, "learning_rate": 8.312271595705931e-06, "loss": 0.5316, "step": 4515 }, { "epoch": 0.29, "grad_norm": 1.192746639251709, "learning_rate": 8.311488474706161e-06, "loss": 0.5014, "step": 4516 }, { "epoch": 0.29, "grad_norm": 1.145533561706543, "learning_rate": 8.310705208969776e-06, "loss": 0.5901, "step": 4517 }, { "epoch": 0.29, "grad_norm": 1.0749925374984741, "learning_rate": 8.309921798531006e-06, "loss": 0.5562, "step": 4518 }, { "epoch": 0.29, "grad_norm": 1.1359297037124634, "learning_rate": 8.309138243424095e-06, "loss": 0.5196, "step": 4519 }, { "epoch": 0.29, "grad_norm": 1.1546469926834106, "learning_rate": 8.30835454368329e-06, "loss": 0.5338, "step": 4520 }, { "epoch": 0.29, "grad_norm": 1.1877328157424927, "learning_rate": 8.307570699342842e-06, "loss": 0.5495, "step": 4521 }, { "epoch": 0.29, "grad_norm": 1.1040258407592773, "learning_rate": 8.306786710437014e-06, "loss": 0.5424, "step": 4522 }, { "epoch": 0.29, "grad_norm": 1.265176773071289, "learning_rate": 8.306002577000067e-06, "loss": 0.5498, "step": 4523 }, { "epoch": 0.29, "grad_norm": 1.2730215787887573, "learning_rate": 8.30521829906628e-06, "loss": 0.5452, "step": 4524 }, { "epoch": 0.29, "grad_norm": 1.113263726234436, "learning_rate": 8.304433876669929e-06, "loss": 0.5427, "step": 4525 }, { "epoch": 0.29, "grad_norm": 1.3049427270889282, "learning_rate": 8.303649309845296e-06, "loss": 0.5677, "step": 4526 }, { "epoch": 0.29, "grad_norm": 1.1717838048934937, "learning_rate": 8.30286459862668e-06, "loss": 0.5533, "step": 4527 }, { "epoch": 0.29, "grad_norm": 1.1406105756759644, "learning_rate": 8.30207974304837e-06, "loss": 0.6105, "step": 4528 }, { "epoch": 0.29, "grad_norm": 1.3769278526306152, "learning_rate": 8.301294743144675e-06, "loss": 0.5388, "step": 4529 }, { "epoch": 0.29, "grad_norm": 1.0883797407150269, "learning_rate": 8.300509598949903e-06, "loss": 0.5156, "step": 4530 }, { "epoch": 0.29, "grad_norm": 1.1941035985946655, "learning_rate": 8.299724310498374e-06, "loss": 0.5231, "step": 4531 }, { "epoch": 0.29, "grad_norm": 1.1313318014144897, "learning_rate": 8.298938877824408e-06, "loss": 0.568, "step": 4532 }, { "epoch": 0.29, "grad_norm": 1.2587964534759521, "learning_rate": 8.298153300962334e-06, "loss": 0.5983, "step": 4533 }, { "epoch": 0.29, "grad_norm": 1.1881377696990967, "learning_rate": 8.297367579946489e-06, "loss": 0.5213, "step": 4534 }, { "epoch": 0.29, "grad_norm": 1.249639630317688, "learning_rate": 8.296581714811213e-06, "loss": 0.5736, "step": 4535 }, { "epoch": 0.29, "grad_norm": 1.1349403858184814, "learning_rate": 8.295795705590858e-06, "loss": 0.5271, "step": 4536 }, { "epoch": 0.29, "grad_norm": 1.1547362804412842, "learning_rate": 8.295009552319777e-06, "loss": 0.5822, "step": 4537 }, { "epoch": 0.29, "grad_norm": 1.1046096086502075, "learning_rate": 8.294223255032327e-06, "loss": 0.4974, "step": 4538 }, { "epoch": 0.29, "grad_norm": 1.2272088527679443, "learning_rate": 8.293436813762878e-06, "loss": 0.6061, "step": 4539 }, { "epoch": 0.29, "grad_norm": 1.1646167039871216, "learning_rate": 8.292650228545804e-06, "loss": 0.5955, "step": 4540 }, { "epoch": 0.29, "grad_norm": 1.0622018575668335, "learning_rate": 8.291863499415484e-06, "loss": 0.49, "step": 4541 }, { "epoch": 0.29, "grad_norm": 1.1602647304534912, "learning_rate": 8.291076626406303e-06, "loss": 0.5403, "step": 4542 }, { "epoch": 0.29, "grad_norm": 1.2049769163131714, "learning_rate": 8.290289609552653e-06, "loss": 0.5616, "step": 4543 }, { "epoch": 0.29, "grad_norm": 1.1946330070495605, "learning_rate": 8.289502448888934e-06, "loss": 0.572, "step": 4544 }, { "epoch": 0.29, "grad_norm": 1.1497628688812256, "learning_rate": 8.28871514444955e-06, "loss": 0.5368, "step": 4545 }, { "epoch": 0.29, "grad_norm": 1.2038556337356567, "learning_rate": 8.287927696268912e-06, "loss": 0.5331, "step": 4546 }, { "epoch": 0.29, "grad_norm": 1.217847466468811, "learning_rate": 8.287140104381437e-06, "loss": 0.5461, "step": 4547 }, { "epoch": 0.29, "grad_norm": 1.241492748260498, "learning_rate": 8.28635236882155e-06, "loss": 0.5285, "step": 4548 }, { "epoch": 0.29, "grad_norm": 1.1649178266525269, "learning_rate": 8.285564489623679e-06, "loss": 0.5456, "step": 4549 }, { "epoch": 0.29, "grad_norm": 1.3290579319000244, "learning_rate": 8.284776466822261e-06, "loss": 0.5991, "step": 4550 }, { "epoch": 0.29, "grad_norm": 1.2409977912902832, "learning_rate": 8.283988300451739e-06, "loss": 0.5887, "step": 4551 }, { "epoch": 0.29, "grad_norm": 1.1504868268966675, "learning_rate": 8.28319999054656e-06, "loss": 0.5696, "step": 4552 }, { "epoch": 0.29, "grad_norm": 1.0829637050628662, "learning_rate": 8.282411537141182e-06, "loss": 0.5346, "step": 4553 }, { "epoch": 0.29, "grad_norm": 1.1726362705230713, "learning_rate": 8.281622940270064e-06, "loss": 0.5323, "step": 4554 }, { "epoch": 0.29, "grad_norm": 1.1043903827667236, "learning_rate": 8.280834199967672e-06, "loss": 0.5471, "step": 4555 }, { "epoch": 0.29, "grad_norm": 1.0792526006698608, "learning_rate": 8.280045316268484e-06, "loss": 0.5014, "step": 4556 }, { "epoch": 0.29, "grad_norm": 1.1639248132705688, "learning_rate": 8.279256289206976e-06, "loss": 0.5292, "step": 4557 }, { "epoch": 0.29, "grad_norm": 1.361997365951538, "learning_rate": 8.278467118817638e-06, "loss": 0.5577, "step": 4558 }, { "epoch": 0.29, "grad_norm": 1.2197903394699097, "learning_rate": 8.27767780513496e-06, "loss": 0.5894, "step": 4559 }, { "epoch": 0.29, "grad_norm": 1.297418236732483, "learning_rate": 8.276888348193441e-06, "loss": 0.6259, "step": 4560 }, { "epoch": 0.29, "grad_norm": 1.3387787342071533, "learning_rate": 8.276098748027587e-06, "loss": 0.5563, "step": 4561 }, { "epoch": 0.29, "grad_norm": 1.151066780090332, "learning_rate": 8.275309004671912e-06, "loss": 0.5314, "step": 4562 }, { "epoch": 0.29, "grad_norm": 1.183496356010437, "learning_rate": 8.274519118160927e-06, "loss": 0.56, "step": 4563 }, { "epoch": 0.29, "grad_norm": 1.2107056379318237, "learning_rate": 8.27372908852916e-06, "loss": 0.5668, "step": 4564 }, { "epoch": 0.29, "grad_norm": 1.1977331638336182, "learning_rate": 8.272938915811144e-06, "loss": 0.5323, "step": 4565 }, { "epoch": 0.29, "grad_norm": 1.204788088798523, "learning_rate": 8.272148600041409e-06, "loss": 0.6113, "step": 4566 }, { "epoch": 0.29, "grad_norm": 1.1794337034225464, "learning_rate": 8.271358141254503e-06, "loss": 0.5601, "step": 4567 }, { "epoch": 0.29, "grad_norm": 1.2894701957702637, "learning_rate": 8.270567539484972e-06, "loss": 0.6041, "step": 4568 }, { "epoch": 0.29, "grad_norm": 1.3194565773010254, "learning_rate": 8.269776794767373e-06, "loss": 0.5751, "step": 4569 }, { "epoch": 0.29, "grad_norm": 1.2876911163330078, "learning_rate": 8.268985907136264e-06, "loss": 0.5629, "step": 4570 }, { "epoch": 0.3, "grad_norm": 1.182350993156433, "learning_rate": 8.268194876626219e-06, "loss": 0.569, "step": 4571 }, { "epoch": 0.3, "grad_norm": 1.1972376108169556, "learning_rate": 8.267403703271805e-06, "loss": 0.5096, "step": 4572 }, { "epoch": 0.3, "grad_norm": 1.2139478921890259, "learning_rate": 8.266612387107605e-06, "loss": 0.5825, "step": 4573 }, { "epoch": 0.3, "grad_norm": 1.4279924631118774, "learning_rate": 8.265820928168207e-06, "loss": 0.5548, "step": 4574 }, { "epoch": 0.3, "grad_norm": 1.2649853229522705, "learning_rate": 8.265029326488201e-06, "loss": 0.5886, "step": 4575 }, { "epoch": 0.3, "grad_norm": 1.0909173488616943, "learning_rate": 8.264237582102187e-06, "loss": 0.5015, "step": 4576 }, { "epoch": 0.3, "grad_norm": 1.3321714401245117, "learning_rate": 8.263445695044771e-06, "loss": 0.5655, "step": 4577 }, { "epoch": 0.3, "grad_norm": 4.340737342834473, "learning_rate": 8.262653665350565e-06, "loss": 0.5749, "step": 4578 }, { "epoch": 0.3, "grad_norm": 1.1523542404174805, "learning_rate": 8.261861493054182e-06, "loss": 0.6076, "step": 4579 }, { "epoch": 0.3, "grad_norm": 1.1335046291351318, "learning_rate": 8.261069178190251e-06, "loss": 0.5005, "step": 4580 }, { "epoch": 0.3, "grad_norm": 1.116296648979187, "learning_rate": 8.260276720793399e-06, "loss": 0.5389, "step": 4581 }, { "epoch": 0.3, "grad_norm": 1.1398544311523438, "learning_rate": 8.259484120898262e-06, "loss": 0.5688, "step": 4582 }, { "epoch": 0.3, "grad_norm": 1.2547261714935303, "learning_rate": 8.258691378539485e-06, "loss": 0.5127, "step": 4583 }, { "epoch": 0.3, "grad_norm": 1.1179065704345703, "learning_rate": 8.257898493751716e-06, "loss": 0.5706, "step": 4584 }, { "epoch": 0.3, "grad_norm": 1.136576533317566, "learning_rate": 8.257105466569608e-06, "loss": 0.5803, "step": 4585 }, { "epoch": 0.3, "grad_norm": 1.2517555952072144, "learning_rate": 8.256312297027824e-06, "loss": 0.5966, "step": 4586 }, { "epoch": 0.3, "grad_norm": 1.265546202659607, "learning_rate": 8.25551898516103e-06, "loss": 0.5629, "step": 4587 }, { "epoch": 0.3, "grad_norm": 1.1465169191360474, "learning_rate": 8.254725531003902e-06, "loss": 0.5323, "step": 4588 }, { "epoch": 0.3, "grad_norm": 1.163526177406311, "learning_rate": 8.253931934591118e-06, "loss": 0.5916, "step": 4589 }, { "epoch": 0.3, "grad_norm": 1.2537963390350342, "learning_rate": 8.253138195957364e-06, "loss": 0.5208, "step": 4590 }, { "epoch": 0.3, "grad_norm": 1.3935790061950684, "learning_rate": 8.252344315137333e-06, "loss": 0.5544, "step": 4591 }, { "epoch": 0.3, "grad_norm": 1.2931629419326782, "learning_rate": 8.251550292165725e-06, "loss": 0.5272, "step": 4592 }, { "epoch": 0.3, "grad_norm": 1.1680996417999268, "learning_rate": 8.25075612707724e-06, "loss": 0.5119, "step": 4593 }, { "epoch": 0.3, "grad_norm": 1.0916708707809448, "learning_rate": 8.24996181990659e-06, "loss": 0.5156, "step": 4594 }, { "epoch": 0.3, "grad_norm": 1.186858892440796, "learning_rate": 8.249167370688498e-06, "loss": 0.5762, "step": 4595 }, { "epoch": 0.3, "grad_norm": 1.2373336553573608, "learning_rate": 8.248372779457683e-06, "loss": 0.5469, "step": 4596 }, { "epoch": 0.3, "grad_norm": 1.2967504262924194, "learning_rate": 8.247578046248872e-06, "loss": 0.5464, "step": 4597 }, { "epoch": 0.3, "grad_norm": 1.2403796911239624, "learning_rate": 8.246783171096805e-06, "loss": 0.5829, "step": 4598 }, { "epoch": 0.3, "grad_norm": 1.1276071071624756, "learning_rate": 8.245988154036222e-06, "loss": 0.5656, "step": 4599 }, { "epoch": 0.3, "grad_norm": 1.0987221002578735, "learning_rate": 8.245192995101872e-06, "loss": 0.5094, "step": 4600 }, { "epoch": 0.3, "grad_norm": 1.2078455686569214, "learning_rate": 8.244397694328508e-06, "loss": 0.5119, "step": 4601 }, { "epoch": 0.3, "grad_norm": 1.224029779434204, "learning_rate": 8.243602251750893e-06, "loss": 0.5927, "step": 4602 }, { "epoch": 0.3, "grad_norm": 1.3256176710128784, "learning_rate": 8.242806667403791e-06, "loss": 0.5662, "step": 4603 }, { "epoch": 0.3, "grad_norm": 1.139109492301941, "learning_rate": 8.242010941321976e-06, "loss": 0.5312, "step": 4604 }, { "epoch": 0.3, "grad_norm": 1.182826280593872, "learning_rate": 8.241215073540228e-06, "loss": 0.5747, "step": 4605 }, { "epoch": 0.3, "grad_norm": 1.0931886434555054, "learning_rate": 8.240419064093332e-06, "loss": 0.549, "step": 4606 }, { "epoch": 0.3, "grad_norm": 1.1133811473846436, "learning_rate": 8.239622913016077e-06, "loss": 0.5554, "step": 4607 }, { "epoch": 0.3, "grad_norm": 1.1833308935165405, "learning_rate": 8.238826620343264e-06, "loss": 0.6013, "step": 4608 }, { "epoch": 0.3, "grad_norm": 1.2283267974853516, "learning_rate": 8.238030186109696e-06, "loss": 0.59, "step": 4609 }, { "epoch": 0.3, "grad_norm": 1.1075819730758667, "learning_rate": 8.237233610350183e-06, "loss": 0.5351, "step": 4610 }, { "epoch": 0.3, "grad_norm": 1.158838152885437, "learning_rate": 8.23643689309954e-06, "loss": 0.5578, "step": 4611 }, { "epoch": 0.3, "grad_norm": 1.2496306896209717, "learning_rate": 8.235640034392593e-06, "loss": 0.5848, "step": 4612 }, { "epoch": 0.3, "grad_norm": 1.1331990957260132, "learning_rate": 8.234843034264165e-06, "loss": 0.5577, "step": 4613 }, { "epoch": 0.3, "grad_norm": 1.208170771598816, "learning_rate": 8.234045892749097e-06, "loss": 0.5503, "step": 4614 }, { "epoch": 0.3, "grad_norm": 1.3003854751586914, "learning_rate": 8.233248609882225e-06, "loss": 0.5191, "step": 4615 }, { "epoch": 0.3, "grad_norm": 1.0089826583862305, "learning_rate": 8.232451185698398e-06, "loss": 0.4991, "step": 4616 }, { "epoch": 0.3, "grad_norm": 1.1870423555374146, "learning_rate": 8.23165362023247e-06, "loss": 0.5881, "step": 4617 }, { "epoch": 0.3, "grad_norm": 1.095381259918213, "learning_rate": 8.2308559135193e-06, "loss": 0.5577, "step": 4618 }, { "epoch": 0.3, "grad_norm": 1.2263062000274658, "learning_rate": 8.230058065593753e-06, "loss": 0.6245, "step": 4619 }, { "epoch": 0.3, "grad_norm": 1.1642780303955078, "learning_rate": 8.229260076490702e-06, "loss": 0.5889, "step": 4620 }, { "epoch": 0.3, "grad_norm": 1.1200428009033203, "learning_rate": 8.228461946245026e-06, "loss": 0.5156, "step": 4621 }, { "epoch": 0.3, "grad_norm": 1.1109298467636108, "learning_rate": 8.227663674891607e-06, "loss": 0.5294, "step": 4622 }, { "epoch": 0.3, "grad_norm": 1.1323877573013306, "learning_rate": 8.226865262465336e-06, "loss": 0.5694, "step": 4623 }, { "epoch": 0.3, "grad_norm": 1.2597614526748657, "learning_rate": 8.22606670900111e-06, "loss": 0.5688, "step": 4624 }, { "epoch": 0.3, "grad_norm": 1.2110520601272583, "learning_rate": 8.225268014533832e-06, "loss": 0.574, "step": 4625 }, { "epoch": 0.3, "grad_norm": 1.1099066734313965, "learning_rate": 8.224469179098409e-06, "loss": 0.5161, "step": 4626 }, { "epoch": 0.3, "grad_norm": 1.1567450761795044, "learning_rate": 8.223670202729759e-06, "loss": 0.572, "step": 4627 }, { "epoch": 0.3, "grad_norm": 1.0552736520767212, "learning_rate": 8.2228710854628e-06, "loss": 0.577, "step": 4628 }, { "epoch": 0.3, "grad_norm": 1.1969269514083862, "learning_rate": 8.222071827332463e-06, "loss": 0.5793, "step": 4629 }, { "epoch": 0.3, "grad_norm": 1.3046931028366089, "learning_rate": 8.22127242837368e-06, "loss": 0.6046, "step": 4630 }, { "epoch": 0.3, "grad_norm": 1.2884982824325562, "learning_rate": 8.220472888621386e-06, "loss": 0.5644, "step": 4631 }, { "epoch": 0.3, "grad_norm": 1.2787103652954102, "learning_rate": 8.219673208110533e-06, "loss": 0.5777, "step": 4632 }, { "epoch": 0.3, "grad_norm": 1.2546842098236084, "learning_rate": 8.218873386876071e-06, "loss": 0.5357, "step": 4633 }, { "epoch": 0.3, "grad_norm": 1.2300540208816528, "learning_rate": 8.21807342495296e-06, "loss": 0.6093, "step": 4634 }, { "epoch": 0.3, "grad_norm": 1.1780805587768555, "learning_rate": 8.21727332237616e-06, "loss": 0.5588, "step": 4635 }, { "epoch": 0.3, "grad_norm": 1.1764098405838013, "learning_rate": 8.216473079180644e-06, "loss": 0.5718, "step": 4636 }, { "epoch": 0.3, "grad_norm": 1.0584112405776978, "learning_rate": 8.21567269540139e-06, "loss": 0.5247, "step": 4637 }, { "epoch": 0.3, "grad_norm": 1.1437177658081055, "learning_rate": 8.214872171073377e-06, "loss": 0.5544, "step": 4638 }, { "epoch": 0.3, "grad_norm": 1.1271122694015503, "learning_rate": 8.214071506231598e-06, "loss": 0.5522, "step": 4639 }, { "epoch": 0.3, "grad_norm": 1.1685011386871338, "learning_rate": 8.213270700911044e-06, "loss": 0.5501, "step": 4640 }, { "epoch": 0.3, "grad_norm": 1.2361189126968384, "learning_rate": 8.21246975514672e-06, "loss": 0.5462, "step": 4641 }, { "epoch": 0.3, "grad_norm": 1.3935726881027222, "learning_rate": 8.211668668973629e-06, "loss": 0.5527, "step": 4642 }, { "epoch": 0.3, "grad_norm": 1.1891711950302124, "learning_rate": 8.210867442426787e-06, "loss": 0.551, "step": 4643 }, { "epoch": 0.3, "grad_norm": 1.1992086172103882, "learning_rate": 8.210066075541215e-06, "loss": 0.5658, "step": 4644 }, { "epoch": 0.3, "grad_norm": 1.2304579019546509, "learning_rate": 8.209264568351936e-06, "loss": 0.5341, "step": 4645 }, { "epoch": 0.3, "grad_norm": 1.1920418739318848, "learning_rate": 8.208462920893982e-06, "loss": 0.5759, "step": 4646 }, { "epoch": 0.3, "grad_norm": 1.3043125867843628, "learning_rate": 8.207661133202393e-06, "loss": 0.5829, "step": 4647 }, { "epoch": 0.3, "grad_norm": 1.1681187152862549, "learning_rate": 8.20685920531221e-06, "loss": 0.5813, "step": 4648 }, { "epoch": 0.3, "grad_norm": 1.1884715557098389, "learning_rate": 8.206057137258486e-06, "loss": 0.581, "step": 4649 }, { "epoch": 0.3, "grad_norm": 1.2033469676971436, "learning_rate": 8.205254929076276e-06, "loss": 0.5749, "step": 4650 }, { "epoch": 0.3, "grad_norm": 1.2648438215255737, "learning_rate": 8.204452580800644e-06, "loss": 0.5973, "step": 4651 }, { "epoch": 0.3, "grad_norm": 1.2000646591186523, "learning_rate": 8.203650092466656e-06, "loss": 0.5789, "step": 4652 }, { "epoch": 0.3, "grad_norm": 1.2273602485656738, "learning_rate": 8.202847464109388e-06, "loss": 0.6358, "step": 4653 }, { "epoch": 0.3, "grad_norm": 1.2558614015579224, "learning_rate": 8.20204469576392e-06, "loss": 0.5836, "step": 4654 }, { "epoch": 0.3, "grad_norm": 1.2155418395996094, "learning_rate": 8.20124178746534e-06, "loss": 0.5499, "step": 4655 }, { "epoch": 0.3, "grad_norm": 1.1428732872009277, "learning_rate": 8.200438739248744e-06, "loss": 0.5301, "step": 4656 }, { "epoch": 0.3, "grad_norm": 1.2005106210708618, "learning_rate": 8.199635551149225e-06, "loss": 0.543, "step": 4657 }, { "epoch": 0.3, "grad_norm": 1.1234712600708008, "learning_rate": 8.198832223201892e-06, "loss": 0.6072, "step": 4658 }, { "epoch": 0.3, "grad_norm": 1.1257047653198242, "learning_rate": 8.198028755441855e-06, "loss": 0.5371, "step": 4659 }, { "epoch": 0.3, "grad_norm": 1.1660288572311401, "learning_rate": 8.197225147904233e-06, "loss": 0.5321, "step": 4660 }, { "epoch": 0.3, "grad_norm": 1.214680790901184, "learning_rate": 8.196421400624147e-06, "loss": 0.627, "step": 4661 }, { "epoch": 0.3, "grad_norm": 1.132054090499878, "learning_rate": 8.195617513636732e-06, "loss": 0.5305, "step": 4662 }, { "epoch": 0.3, "grad_norm": 1.2159831523895264, "learning_rate": 8.194813486977116e-06, "loss": 0.6096, "step": 4663 }, { "epoch": 0.3, "grad_norm": 1.1698857545852661, "learning_rate": 8.194009320680448e-06, "loss": 0.5443, "step": 4664 }, { "epoch": 0.3, "grad_norm": 1.18265962600708, "learning_rate": 8.19320501478187e-06, "loss": 0.5147, "step": 4665 }, { "epoch": 0.3, "grad_norm": 1.1785368919372559, "learning_rate": 8.192400569316543e-06, "loss": 0.5657, "step": 4666 }, { "epoch": 0.3, "grad_norm": 1.1325395107269287, "learning_rate": 8.191595984319623e-06, "loss": 0.5004, "step": 4667 }, { "epoch": 0.3, "grad_norm": 1.1000217199325562, "learning_rate": 8.190791259826276e-06, "loss": 0.5315, "step": 4668 }, { "epoch": 0.3, "grad_norm": 1.223199486732483, "learning_rate": 8.189986395871676e-06, "loss": 0.5461, "step": 4669 }, { "epoch": 0.3, "grad_norm": 1.291399359703064, "learning_rate": 8.189181392491001e-06, "loss": 0.5809, "step": 4670 }, { "epoch": 0.3, "grad_norm": 1.139078974723816, "learning_rate": 8.188376249719433e-06, "loss": 0.5348, "step": 4671 }, { "epoch": 0.3, "grad_norm": 1.1930545568466187, "learning_rate": 8.187570967592169e-06, "loss": 0.5457, "step": 4672 }, { "epoch": 0.3, "grad_norm": 1.1127091646194458, "learning_rate": 8.1867655461444e-06, "loss": 0.5492, "step": 4673 }, { "epoch": 0.3, "grad_norm": 1.124065637588501, "learning_rate": 8.185959985411331e-06, "loss": 0.6102, "step": 4674 }, { "epoch": 0.3, "grad_norm": 1.1206517219543457, "learning_rate": 8.185154285428172e-06, "loss": 0.5387, "step": 4675 }, { "epoch": 0.3, "grad_norm": 1.1966100931167603, "learning_rate": 8.184348446230137e-06, "loss": 0.5931, "step": 4676 }, { "epoch": 0.3, "grad_norm": 1.108575701713562, "learning_rate": 8.183542467852445e-06, "loss": 0.533, "step": 4677 }, { "epoch": 0.3, "grad_norm": 1.2587897777557373, "learning_rate": 8.182736350330327e-06, "loss": 0.6003, "step": 4678 }, { "epoch": 0.3, "grad_norm": 1.1579556465148926, "learning_rate": 8.181930093699016e-06, "loss": 0.5853, "step": 4679 }, { "epoch": 0.3, "grad_norm": 1.163693904876709, "learning_rate": 8.181123697993748e-06, "loss": 0.5413, "step": 4680 }, { "epoch": 0.3, "grad_norm": 1.2570098638534546, "learning_rate": 8.180317163249771e-06, "loss": 0.6088, "step": 4681 }, { "epoch": 0.3, "grad_norm": 1.4902937412261963, "learning_rate": 8.179510489502337e-06, "loss": 0.5949, "step": 4682 }, { "epoch": 0.3, "grad_norm": 1.6357629299163818, "learning_rate": 8.1787036767867e-06, "loss": 0.5531, "step": 4683 }, { "epoch": 0.3, "grad_norm": 1.2893965244293213, "learning_rate": 8.17789672513813e-06, "loss": 0.5523, "step": 4684 }, { "epoch": 0.3, "grad_norm": 1.109093427658081, "learning_rate": 8.177089634591893e-06, "loss": 0.5164, "step": 4685 }, { "epoch": 0.3, "grad_norm": 1.3369598388671875, "learning_rate": 8.176282405183263e-06, "loss": 0.5654, "step": 4686 }, { "epoch": 0.3, "grad_norm": 1.0912548303604126, "learning_rate": 8.175475036947524e-06, "loss": 0.5115, "step": 4687 }, { "epoch": 0.3, "grad_norm": 1.2386709451675415, "learning_rate": 8.174667529919966e-06, "loss": 0.5406, "step": 4688 }, { "epoch": 0.3, "grad_norm": 1.253661870956421, "learning_rate": 8.17385988413588e-06, "loss": 0.5796, "step": 4689 }, { "epoch": 0.3, "grad_norm": 1.0995664596557617, "learning_rate": 8.173052099630565e-06, "loss": 0.5334, "step": 4690 }, { "epoch": 0.3, "grad_norm": 1.8532745838165283, "learning_rate": 8.172244176439333e-06, "loss": 0.5103, "step": 4691 }, { "epoch": 0.3, "grad_norm": 1.1652653217315674, "learning_rate": 8.171436114597489e-06, "loss": 0.5676, "step": 4692 }, { "epoch": 0.3, "grad_norm": 1.127484917640686, "learning_rate": 8.170627914140357e-06, "loss": 0.5683, "step": 4693 }, { "epoch": 0.3, "grad_norm": 1.1613678932189941, "learning_rate": 8.16981957510326e-06, "loss": 0.5734, "step": 4694 }, { "epoch": 0.3, "grad_norm": 1.2900794744491577, "learning_rate": 8.169011097521525e-06, "loss": 0.5777, "step": 4695 }, { "epoch": 0.3, "grad_norm": 1.236256718635559, "learning_rate": 8.168202481430494e-06, "loss": 0.5659, "step": 4696 }, { "epoch": 0.3, "grad_norm": 1.10580575466156, "learning_rate": 8.167393726865504e-06, "loss": 0.5423, "step": 4697 }, { "epoch": 0.3, "grad_norm": 1.1794646978378296, "learning_rate": 8.166584833861909e-06, "loss": 0.5518, "step": 4698 }, { "epoch": 0.3, "grad_norm": 1.2766611576080322, "learning_rate": 8.165775802455057e-06, "loss": 0.6076, "step": 4699 }, { "epoch": 0.3, "grad_norm": 1.1748130321502686, "learning_rate": 8.164966632680316e-06, "loss": 0.5815, "step": 4700 }, { "epoch": 0.3, "grad_norm": 1.2579673528671265, "learning_rate": 8.164157324573047e-06, "loss": 0.6179, "step": 4701 }, { "epoch": 0.3, "grad_norm": 1.1621860265731812, "learning_rate": 8.163347878168625e-06, "loss": 0.527, "step": 4702 }, { "epoch": 0.3, "grad_norm": 1.2476063966751099, "learning_rate": 8.162538293502431e-06, "loss": 0.5628, "step": 4703 }, { "epoch": 0.3, "grad_norm": 1.0878885984420776, "learning_rate": 8.161728570609845e-06, "loss": 0.5358, "step": 4704 }, { "epoch": 0.3, "grad_norm": 1.1814382076263428, "learning_rate": 8.160918709526262e-06, "loss": 0.5717, "step": 4705 }, { "epoch": 0.3, "grad_norm": 1.1806789636611938, "learning_rate": 8.160108710287078e-06, "loss": 0.5377, "step": 4706 }, { "epoch": 0.3, "grad_norm": 1.1445441246032715, "learning_rate": 8.159298572927694e-06, "loss": 0.5532, "step": 4707 }, { "epoch": 0.3, "grad_norm": 1.1655067205429077, "learning_rate": 8.158488297483523e-06, "loss": 0.5587, "step": 4708 }, { "epoch": 0.3, "grad_norm": 1.1586674451828003, "learning_rate": 8.157677883989976e-06, "loss": 0.5543, "step": 4709 }, { "epoch": 0.3, "grad_norm": 1.2779467105865479, "learning_rate": 8.156867332482474e-06, "loss": 0.5967, "step": 4710 }, { "epoch": 0.3, "grad_norm": 1.1892098188400269, "learning_rate": 8.156056642996447e-06, "loss": 0.5539, "step": 4711 }, { "epoch": 0.3, "grad_norm": 1.2009342908859253, "learning_rate": 8.155245815567328e-06, "loss": 0.5534, "step": 4712 }, { "epoch": 0.3, "grad_norm": 1.2409327030181885, "learning_rate": 8.154434850230553e-06, "loss": 0.5892, "step": 4713 }, { "epoch": 0.3, "grad_norm": 1.2352732419967651, "learning_rate": 8.153623747021569e-06, "loss": 0.5632, "step": 4714 }, { "epoch": 0.3, "grad_norm": 1.1990265846252441, "learning_rate": 8.15281250597583e-06, "loss": 0.5325, "step": 4715 }, { "epoch": 0.3, "grad_norm": 1.1536870002746582, "learning_rate": 8.152001127128788e-06, "loss": 0.5656, "step": 4716 }, { "epoch": 0.3, "grad_norm": 1.1392135620117188, "learning_rate": 8.151189610515911e-06, "loss": 0.5539, "step": 4717 }, { "epoch": 0.3, "grad_norm": 1.0866395235061646, "learning_rate": 8.150377956172666e-06, "loss": 0.537, "step": 4718 }, { "epoch": 0.3, "grad_norm": 1.175874948501587, "learning_rate": 8.149566164134529e-06, "loss": 0.5842, "step": 4719 }, { "epoch": 0.3, "grad_norm": 1.27848219871521, "learning_rate": 8.14875423443698e-06, "loss": 0.5592, "step": 4720 }, { "epoch": 0.3, "grad_norm": 1.3792237043380737, "learning_rate": 8.147942167115508e-06, "loss": 0.5694, "step": 4721 }, { "epoch": 0.3, "grad_norm": 1.2117499113082886, "learning_rate": 8.147129962205606e-06, "loss": 0.5598, "step": 4722 }, { "epoch": 0.3, "grad_norm": 1.1605632305145264, "learning_rate": 8.146317619742772e-06, "loss": 0.5838, "step": 4723 }, { "epoch": 0.3, "grad_norm": 1.1604715585708618, "learning_rate": 8.145505139762512e-06, "loss": 0.5643, "step": 4724 }, { "epoch": 0.3, "grad_norm": 1.1448249816894531, "learning_rate": 8.144692522300339e-06, "loss": 0.609, "step": 4725 }, { "epoch": 0.31, "grad_norm": 1.1635793447494507, "learning_rate": 8.14387976739177e-06, "loss": 0.5537, "step": 4726 }, { "epoch": 0.31, "grad_norm": 1.3545691967010498, "learning_rate": 8.143066875072325e-06, "loss": 0.4967, "step": 4727 }, { "epoch": 0.31, "grad_norm": 1.3797305822372437, "learning_rate": 8.142253845377538e-06, "loss": 0.5488, "step": 4728 }, { "epoch": 0.31, "grad_norm": 1.326597809791565, "learning_rate": 8.141440678342942e-06, "loss": 0.5474, "step": 4729 }, { "epoch": 0.31, "grad_norm": 1.2443865537643433, "learning_rate": 8.14062737400408e-06, "loss": 0.6047, "step": 4730 }, { "epoch": 0.31, "grad_norm": 1.1332453489303589, "learning_rate": 8.139813932396496e-06, "loss": 0.5463, "step": 4731 }, { "epoch": 0.31, "grad_norm": 1.1784067153930664, "learning_rate": 8.13900035355575e-06, "loss": 0.5648, "step": 4732 }, { "epoch": 0.31, "grad_norm": 1.253675937652588, "learning_rate": 8.138186637517394e-06, "loss": 0.5321, "step": 4733 }, { "epoch": 0.31, "grad_norm": 1.2567106485366821, "learning_rate": 8.137372784316998e-06, "loss": 0.5174, "step": 4734 }, { "epoch": 0.31, "grad_norm": 1.1438205242156982, "learning_rate": 8.13655879399013e-06, "loss": 0.5588, "step": 4735 }, { "epoch": 0.31, "grad_norm": 1.2103912830352783, "learning_rate": 8.135744666572372e-06, "loss": 0.5808, "step": 4736 }, { "epoch": 0.31, "grad_norm": 1.1364549398422241, "learning_rate": 8.134930402099304e-06, "loss": 0.5235, "step": 4737 }, { "epoch": 0.31, "grad_norm": 1.1285966634750366, "learning_rate": 8.134116000606517e-06, "loss": 0.5858, "step": 4738 }, { "epoch": 0.31, "grad_norm": 1.1442358493804932, "learning_rate": 8.133301462129605e-06, "loss": 0.5073, "step": 4739 }, { "epoch": 0.31, "grad_norm": 1.202022671699524, "learning_rate": 8.13248678670417e-06, "loss": 0.5486, "step": 4740 }, { "epoch": 0.31, "grad_norm": 1.1873443126678467, "learning_rate": 8.13167197436582e-06, "loss": 0.5761, "step": 4741 }, { "epoch": 0.31, "grad_norm": 1.1758934259414673, "learning_rate": 8.130857025150168e-06, "loss": 0.5756, "step": 4742 }, { "epoch": 0.31, "grad_norm": 1.1404697895050049, "learning_rate": 8.130041939092832e-06, "loss": 0.5445, "step": 4743 }, { "epoch": 0.31, "grad_norm": 1.1637436151504517, "learning_rate": 8.129226716229438e-06, "loss": 0.5828, "step": 4744 }, { "epoch": 0.31, "grad_norm": 1.0955870151519775, "learning_rate": 8.128411356595621e-06, "loss": 0.5522, "step": 4745 }, { "epoch": 0.31, "grad_norm": 1.1560527086257935, "learning_rate": 8.127595860227013e-06, "loss": 0.534, "step": 4746 }, { "epoch": 0.31, "grad_norm": 1.1091018915176392, "learning_rate": 8.12678022715926e-06, "loss": 0.5409, "step": 4747 }, { "epoch": 0.31, "grad_norm": 1.213171362876892, "learning_rate": 8.125964457428008e-06, "loss": 0.5497, "step": 4748 }, { "epoch": 0.31, "grad_norm": 1.1982417106628418, "learning_rate": 8.125148551068916e-06, "loss": 0.5849, "step": 4749 }, { "epoch": 0.31, "grad_norm": 1.3388404846191406, "learning_rate": 8.124332508117645e-06, "loss": 0.6172, "step": 4750 }, { "epoch": 0.31, "grad_norm": 1.2191790342330933, "learning_rate": 8.123516328609859e-06, "loss": 0.5757, "step": 4751 }, { "epoch": 0.31, "grad_norm": 1.1397674083709717, "learning_rate": 8.122700012581235e-06, "loss": 0.5947, "step": 4752 }, { "epoch": 0.31, "grad_norm": 1.261610984802246, "learning_rate": 8.121883560067449e-06, "loss": 0.5396, "step": 4753 }, { "epoch": 0.31, "grad_norm": 1.0748101472854614, "learning_rate": 8.121066971104188e-06, "loss": 0.5678, "step": 4754 }, { "epoch": 0.31, "grad_norm": 1.1949694156646729, "learning_rate": 8.120250245727142e-06, "loss": 0.5766, "step": 4755 }, { "epoch": 0.31, "grad_norm": 1.141997218132019, "learning_rate": 8.119433383972008e-06, "loss": 0.5866, "step": 4756 }, { "epoch": 0.31, "grad_norm": 1.1595149040222168, "learning_rate": 8.118616385874486e-06, "loss": 0.5523, "step": 4757 }, { "epoch": 0.31, "grad_norm": 1.2570453882217407, "learning_rate": 8.117799251470292e-06, "loss": 0.5754, "step": 4758 }, { "epoch": 0.31, "grad_norm": 1.1170960664749146, "learning_rate": 8.116981980795135e-06, "loss": 0.5579, "step": 4759 }, { "epoch": 0.31, "grad_norm": 1.0857913494110107, "learning_rate": 8.116164573884739e-06, "loss": 0.5127, "step": 4760 }, { "epoch": 0.31, "grad_norm": 1.1019023656845093, "learning_rate": 8.115347030774827e-06, "loss": 0.5767, "step": 4761 }, { "epoch": 0.31, "grad_norm": 1.2641558647155762, "learning_rate": 8.114529351501137e-06, "loss": 0.6055, "step": 4762 }, { "epoch": 0.31, "grad_norm": 1.178169846534729, "learning_rate": 8.113711536099402e-06, "loss": 0.5664, "step": 4763 }, { "epoch": 0.31, "grad_norm": 1.0843462944030762, "learning_rate": 8.11289358460537e-06, "loss": 0.5429, "step": 4764 }, { "epoch": 0.31, "grad_norm": 1.1442373991012573, "learning_rate": 8.112075497054791e-06, "loss": 0.5826, "step": 4765 }, { "epoch": 0.31, "grad_norm": 1.1468989849090576, "learning_rate": 8.111257273483422e-06, "loss": 0.5885, "step": 4766 }, { "epoch": 0.31, "grad_norm": 1.2675052881240845, "learning_rate": 8.110438913927024e-06, "loss": 0.6303, "step": 4767 }, { "epoch": 0.31, "grad_norm": 1.3531692028045654, "learning_rate": 8.109620418421365e-06, "loss": 0.5972, "step": 4768 }, { "epoch": 0.31, "grad_norm": 1.1569324731826782, "learning_rate": 8.108801787002222e-06, "loss": 0.5345, "step": 4769 }, { "epoch": 0.31, "grad_norm": 1.141838550567627, "learning_rate": 8.107983019705374e-06, "loss": 0.5957, "step": 4770 }, { "epoch": 0.31, "grad_norm": 1.1752783060073853, "learning_rate": 8.107164116566607e-06, "loss": 0.625, "step": 4771 }, { "epoch": 0.31, "grad_norm": 1.1432164907455444, "learning_rate": 8.10634507762171e-06, "loss": 0.5167, "step": 4772 }, { "epoch": 0.31, "grad_norm": 1.7900840044021606, "learning_rate": 8.105525902906487e-06, "loss": 0.5753, "step": 4773 }, { "epoch": 0.31, "grad_norm": 1.121266484260559, "learning_rate": 8.104706592456738e-06, "loss": 0.5366, "step": 4774 }, { "epoch": 0.31, "grad_norm": 1.1768122911453247, "learning_rate": 8.103887146308274e-06, "loss": 0.5754, "step": 4775 }, { "epoch": 0.31, "grad_norm": 1.224038004875183, "learning_rate": 8.103067564496912e-06, "loss": 0.5538, "step": 4776 }, { "epoch": 0.31, "grad_norm": 1.1069658994674683, "learning_rate": 8.102247847058472e-06, "loss": 0.5319, "step": 4777 }, { "epoch": 0.31, "grad_norm": 1.2260096073150635, "learning_rate": 8.101427994028784e-06, "loss": 0.5433, "step": 4778 }, { "epoch": 0.31, "grad_norm": 1.1361298561096191, "learning_rate": 8.100608005443678e-06, "loss": 0.549, "step": 4779 }, { "epoch": 0.31, "grad_norm": 1.1826860904693604, "learning_rate": 8.099787881338996e-06, "loss": 0.5574, "step": 4780 }, { "epoch": 0.31, "grad_norm": 1.2219860553741455, "learning_rate": 8.098967621750584e-06, "loss": 0.5407, "step": 4781 }, { "epoch": 0.31, "grad_norm": 1.2086524963378906, "learning_rate": 8.098147226714294e-06, "loss": 0.5126, "step": 4782 }, { "epoch": 0.31, "grad_norm": 1.2002277374267578, "learning_rate": 8.09732669626598e-06, "loss": 0.5074, "step": 4783 }, { "epoch": 0.31, "grad_norm": 1.1540921926498413, "learning_rate": 8.096506030441508e-06, "loss": 0.5359, "step": 4784 }, { "epoch": 0.31, "grad_norm": 1.1439553499221802, "learning_rate": 8.095685229276747e-06, "loss": 0.5456, "step": 4785 }, { "epoch": 0.31, "grad_norm": 1.2751400470733643, "learning_rate": 8.094864292807571e-06, "loss": 0.6106, "step": 4786 }, { "epoch": 0.31, "grad_norm": 1.1295043230056763, "learning_rate": 8.09404322106986e-06, "loss": 0.4952, "step": 4787 }, { "epoch": 0.31, "grad_norm": 1.0922449827194214, "learning_rate": 8.093222014099505e-06, "loss": 0.531, "step": 4788 }, { "epoch": 0.31, "grad_norm": 1.217063546180725, "learning_rate": 8.092400671932396e-06, "loss": 0.5544, "step": 4789 }, { "epoch": 0.31, "grad_norm": 1.2767672538757324, "learning_rate": 8.091579194604432e-06, "loss": 0.5549, "step": 4790 }, { "epoch": 0.31, "grad_norm": 1.126543402671814, "learning_rate": 8.090757582151519e-06, "loss": 0.558, "step": 4791 }, { "epoch": 0.31, "grad_norm": 1.261699914932251, "learning_rate": 8.089935834609563e-06, "loss": 0.5813, "step": 4792 }, { "epoch": 0.31, "grad_norm": 1.1013730764389038, "learning_rate": 8.089113952014488e-06, "loss": 0.5728, "step": 4793 }, { "epoch": 0.31, "grad_norm": 1.2076008319854736, "learning_rate": 8.088291934402208e-06, "loss": 0.614, "step": 4794 }, { "epoch": 0.31, "grad_norm": 1.1680707931518555, "learning_rate": 8.087469781808658e-06, "loss": 0.5376, "step": 4795 }, { "epoch": 0.31, "grad_norm": 1.1556686162948608, "learning_rate": 8.086647494269768e-06, "loss": 0.531, "step": 4796 }, { "epoch": 0.31, "grad_norm": 1.1423882246017456, "learning_rate": 8.085825071821482e-06, "loss": 0.527, "step": 4797 }, { "epoch": 0.31, "grad_norm": 1.2026389837265015, "learning_rate": 8.085002514499742e-06, "loss": 0.5754, "step": 4798 }, { "epoch": 0.31, "grad_norm": 1.2175016403198242, "learning_rate": 8.0841798223405e-06, "loss": 0.5947, "step": 4799 }, { "epoch": 0.31, "grad_norm": 1.1261415481567383, "learning_rate": 8.083356995379718e-06, "loss": 0.5582, "step": 4800 }, { "epoch": 0.31, "grad_norm": 1.2430384159088135, "learning_rate": 8.082534033653357e-06, "loss": 0.6105, "step": 4801 }, { "epoch": 0.31, "grad_norm": 1.28162682056427, "learning_rate": 8.081710937197385e-06, "loss": 0.5474, "step": 4802 }, { "epoch": 0.31, "grad_norm": 1.2285574674606323, "learning_rate": 8.08088770604778e-06, "loss": 0.5343, "step": 4803 }, { "epoch": 0.31, "grad_norm": 1.1517605781555176, "learning_rate": 8.080064340240522e-06, "loss": 0.5181, "step": 4804 }, { "epoch": 0.31, "grad_norm": 1.2070428133010864, "learning_rate": 8.079240839811595e-06, "loss": 0.5241, "step": 4805 }, { "epoch": 0.31, "grad_norm": 1.2504311800003052, "learning_rate": 8.078417204797e-06, "loss": 0.5539, "step": 4806 }, { "epoch": 0.31, "grad_norm": 1.3102046251296997, "learning_rate": 8.07759343523273e-06, "loss": 0.598, "step": 4807 }, { "epoch": 0.31, "grad_norm": 1.1355053186416626, "learning_rate": 8.076769531154789e-06, "loss": 0.547, "step": 4808 }, { "epoch": 0.31, "grad_norm": 1.2476634979248047, "learning_rate": 8.075945492599193e-06, "loss": 0.5189, "step": 4809 }, { "epoch": 0.31, "grad_norm": 1.2958557605743408, "learning_rate": 8.075121319601954e-06, "loss": 0.5624, "step": 4810 }, { "epoch": 0.31, "grad_norm": 1.194962501525879, "learning_rate": 8.074297012199097e-06, "loss": 0.5613, "step": 4811 }, { "epoch": 0.31, "grad_norm": 1.2269563674926758, "learning_rate": 8.073472570426649e-06, "loss": 0.5704, "step": 4812 }, { "epoch": 0.31, "grad_norm": 1.205365538597107, "learning_rate": 8.072647994320643e-06, "loss": 0.5171, "step": 4813 }, { "epoch": 0.31, "grad_norm": 1.145334005355835, "learning_rate": 8.071823283917123e-06, "loss": 0.5501, "step": 4814 }, { "epoch": 0.31, "grad_norm": 1.2180490493774414, "learning_rate": 8.070998439252132e-06, "loss": 0.4899, "step": 4815 }, { "epoch": 0.31, "grad_norm": 1.203458309173584, "learning_rate": 8.070173460361721e-06, "loss": 0.5685, "step": 4816 }, { "epoch": 0.31, "grad_norm": 1.2711193561553955, "learning_rate": 8.069348347281952e-06, "loss": 0.5733, "step": 4817 }, { "epoch": 0.31, "grad_norm": 1.2204780578613281, "learning_rate": 8.068523100048884e-06, "loss": 0.5651, "step": 4818 }, { "epoch": 0.31, "grad_norm": 1.1658719778060913, "learning_rate": 8.067697718698588e-06, "loss": 0.5216, "step": 4819 }, { "epoch": 0.31, "grad_norm": 1.1030998229980469, "learning_rate": 8.06687220326714e-06, "loss": 0.4927, "step": 4820 }, { "epoch": 0.31, "grad_norm": 1.1302589178085327, "learning_rate": 8.066046553790622e-06, "loss": 0.5524, "step": 4821 }, { "epoch": 0.31, "grad_norm": 1.136967420578003, "learning_rate": 8.065220770305118e-06, "loss": 0.5261, "step": 4822 }, { "epoch": 0.31, "grad_norm": 1.3746875524520874, "learning_rate": 8.064394852846723e-06, "loss": 0.542, "step": 4823 }, { "epoch": 0.31, "grad_norm": 1.164105772972107, "learning_rate": 8.063568801451534e-06, "loss": 0.5769, "step": 4824 }, { "epoch": 0.31, "grad_norm": 1.2894548177719116, "learning_rate": 8.062742616155657e-06, "loss": 0.5494, "step": 4825 }, { "epoch": 0.31, "grad_norm": 1.2309305667877197, "learning_rate": 8.061916296995205e-06, "loss": 0.6426, "step": 4826 }, { "epoch": 0.31, "grad_norm": 1.1487014293670654, "learning_rate": 8.06108984400629e-06, "loss": 0.5299, "step": 4827 }, { "epoch": 0.31, "grad_norm": 1.2631645202636719, "learning_rate": 8.060263257225033e-06, "loss": 0.6015, "step": 4828 }, { "epoch": 0.31, "grad_norm": 1.1041913032531738, "learning_rate": 8.059436536687568e-06, "loss": 0.5383, "step": 4829 }, { "epoch": 0.31, "grad_norm": 1.2645796537399292, "learning_rate": 8.058609682430023e-06, "loss": 0.575, "step": 4830 }, { "epoch": 0.31, "grad_norm": 1.244094967842102, "learning_rate": 8.057782694488541e-06, "loss": 0.5149, "step": 4831 }, { "epoch": 0.31, "grad_norm": 1.1459159851074219, "learning_rate": 8.056955572899268e-06, "loss": 0.5517, "step": 4832 }, { "epoch": 0.31, "grad_norm": 1.0835777521133423, "learning_rate": 8.056128317698352e-06, "loss": 0.5255, "step": 4833 }, { "epoch": 0.31, "grad_norm": 1.2345218658447266, "learning_rate": 8.055300928921954e-06, "loss": 0.5818, "step": 4834 }, { "epoch": 0.31, "grad_norm": 1.2574043273925781, "learning_rate": 8.054473406606233e-06, "loss": 0.5398, "step": 4835 }, { "epoch": 0.31, "grad_norm": 1.0972998142242432, "learning_rate": 8.053645750787362e-06, "loss": 0.4624, "step": 4836 }, { "epoch": 0.31, "grad_norm": 1.3288711309432983, "learning_rate": 8.052817961501512e-06, "loss": 0.5518, "step": 4837 }, { "epoch": 0.31, "grad_norm": 1.2062323093414307, "learning_rate": 8.051990038784867e-06, "loss": 0.5834, "step": 4838 }, { "epoch": 0.31, "grad_norm": 1.1609795093536377, "learning_rate": 8.05116198267361e-06, "loss": 0.5206, "step": 4839 }, { "epoch": 0.31, "grad_norm": 1.1027374267578125, "learning_rate": 8.050333793203936e-06, "loss": 0.5667, "step": 4840 }, { "epoch": 0.31, "grad_norm": 1.2131413221359253, "learning_rate": 8.049505470412043e-06, "loss": 0.5941, "step": 4841 }, { "epoch": 0.31, "grad_norm": 1.3073593378067017, "learning_rate": 8.048677014334132e-06, "loss": 0.5303, "step": 4842 }, { "epoch": 0.31, "grad_norm": 1.1713441610336304, "learning_rate": 8.047848425006414e-06, "loss": 0.5463, "step": 4843 }, { "epoch": 0.31, "grad_norm": 1.2641557455062866, "learning_rate": 8.047019702465106e-06, "loss": 0.5331, "step": 4844 }, { "epoch": 0.31, "grad_norm": 1.1899950504302979, "learning_rate": 8.046190846746427e-06, "loss": 0.5361, "step": 4845 }, { "epoch": 0.31, "grad_norm": 1.273436188697815, "learning_rate": 8.045361857886606e-06, "loss": 0.5698, "step": 4846 }, { "epoch": 0.31, "grad_norm": 1.2640095949172974, "learning_rate": 8.044532735921876e-06, "loss": 0.5546, "step": 4847 }, { "epoch": 0.31, "grad_norm": 1.26991868019104, "learning_rate": 8.043703480888474e-06, "loss": 0.5722, "step": 4848 }, { "epoch": 0.31, "grad_norm": 1.1351374387741089, "learning_rate": 8.042874092822647e-06, "loss": 0.5563, "step": 4849 }, { "epoch": 0.31, "grad_norm": 1.2538623809814453, "learning_rate": 8.042044571760643e-06, "loss": 0.5097, "step": 4850 }, { "epoch": 0.31, "grad_norm": 1.242449164390564, "learning_rate": 8.04121491773872e-06, "loss": 0.5741, "step": 4851 }, { "epoch": 0.31, "grad_norm": 1.2330896854400635, "learning_rate": 8.04038513079314e-06, "loss": 0.5295, "step": 4852 }, { "epoch": 0.31, "grad_norm": 1.286089301109314, "learning_rate": 8.039555210960168e-06, "loss": 0.6013, "step": 4853 }, { "epoch": 0.31, "grad_norm": 1.358383297920227, "learning_rate": 8.038725158276082e-06, "loss": 0.6212, "step": 4854 }, { "epoch": 0.31, "grad_norm": 1.065422534942627, "learning_rate": 8.03789497277716e-06, "loss": 0.4771, "step": 4855 }, { "epoch": 0.31, "grad_norm": 1.170329213142395, "learning_rate": 8.037064654499685e-06, "loss": 0.5939, "step": 4856 }, { "epoch": 0.31, "grad_norm": 1.166460394859314, "learning_rate": 8.036234203479949e-06, "loss": 0.5737, "step": 4857 }, { "epoch": 0.31, "grad_norm": 1.2505791187286377, "learning_rate": 8.03540361975425e-06, "loss": 0.5741, "step": 4858 }, { "epoch": 0.31, "grad_norm": 1.2031705379486084, "learning_rate": 8.034572903358891e-06, "loss": 0.5659, "step": 4859 }, { "epoch": 0.31, "grad_norm": 1.1904239654541016, "learning_rate": 8.03374205433018e-06, "loss": 0.5804, "step": 4860 }, { "epoch": 0.31, "grad_norm": 1.1714214086532593, "learning_rate": 8.03291107270443e-06, "loss": 0.6068, "step": 4861 }, { "epoch": 0.31, "grad_norm": 1.2360029220581055, "learning_rate": 8.032079958517962e-06, "loss": 0.5408, "step": 4862 }, { "epoch": 0.31, "grad_norm": 1.0930944681167603, "learning_rate": 8.031248711807102e-06, "loss": 0.5199, "step": 4863 }, { "epoch": 0.31, "grad_norm": 1.1370861530303955, "learning_rate": 8.030417332608183e-06, "loss": 0.5496, "step": 4864 }, { "epoch": 0.31, "grad_norm": 1.1494284868240356, "learning_rate": 8.02958582095754e-06, "loss": 0.5536, "step": 4865 }, { "epoch": 0.31, "grad_norm": 1.2370753288269043, "learning_rate": 8.028754176891516e-06, "loss": 0.5577, "step": 4866 }, { "epoch": 0.31, "grad_norm": 1.164368987083435, "learning_rate": 8.027922400446462e-06, "loss": 0.5517, "step": 4867 }, { "epoch": 0.31, "grad_norm": 1.0826694965362549, "learning_rate": 8.027090491658733e-06, "loss": 0.5547, "step": 4868 }, { "epoch": 0.31, "grad_norm": 1.1347674131393433, "learning_rate": 8.026258450564688e-06, "loss": 0.5363, "step": 4869 }, { "epoch": 0.31, "grad_norm": 1.2215193510055542, "learning_rate": 8.025426277200694e-06, "loss": 0.5809, "step": 4870 }, { "epoch": 0.31, "grad_norm": 1.1046028137207031, "learning_rate": 8.024593971603124e-06, "loss": 0.5658, "step": 4871 }, { "epoch": 0.31, "grad_norm": 1.3429113626480103, "learning_rate": 8.023761533808352e-06, "loss": 0.5343, "step": 4872 }, { "epoch": 0.31, "grad_norm": 1.2111319303512573, "learning_rate": 8.022928963852767e-06, "loss": 0.5461, "step": 4873 }, { "epoch": 0.31, "grad_norm": 1.427259087562561, "learning_rate": 8.022096261772755e-06, "loss": 0.5736, "step": 4874 }, { "epoch": 0.31, "grad_norm": 1.411470651626587, "learning_rate": 8.021263427604715e-06, "loss": 0.5641, "step": 4875 }, { "epoch": 0.31, "grad_norm": 1.1503300666809082, "learning_rate": 8.020430461385044e-06, "loss": 0.5638, "step": 4876 }, { "epoch": 0.31, "grad_norm": 1.1791008710861206, "learning_rate": 8.01959736315015e-06, "loss": 0.5635, "step": 4877 }, { "epoch": 0.31, "grad_norm": 1.1704092025756836, "learning_rate": 8.018764132936446e-06, "loss": 0.6063, "step": 4878 }, { "epoch": 0.31, "grad_norm": 1.0780540704727173, "learning_rate": 8.017930770780351e-06, "loss": 0.6154, "step": 4879 }, { "epoch": 0.32, "grad_norm": 1.361838698387146, "learning_rate": 8.017097276718288e-06, "loss": 0.5488, "step": 4880 }, { "epoch": 0.32, "grad_norm": 1.1727272272109985, "learning_rate": 8.016263650786688e-06, "loss": 0.5476, "step": 4881 }, { "epoch": 0.32, "grad_norm": 1.2041620016098022, "learning_rate": 8.015429893021985e-06, "loss": 0.5452, "step": 4882 }, { "epoch": 0.32, "grad_norm": 1.2255407571792603, "learning_rate": 8.014596003460621e-06, "loss": 0.5786, "step": 4883 }, { "epoch": 0.32, "grad_norm": 1.211143136024475, "learning_rate": 8.013761982139044e-06, "loss": 0.5955, "step": 4884 }, { "epoch": 0.32, "grad_norm": 1.152226448059082, "learning_rate": 8.012927829093706e-06, "loss": 0.58, "step": 4885 }, { "epoch": 0.32, "grad_norm": 1.0655144453048706, "learning_rate": 8.012093544361067e-06, "loss": 0.5413, "step": 4886 }, { "epoch": 0.32, "grad_norm": 1.1036145687103271, "learning_rate": 8.011259127977588e-06, "loss": 0.5366, "step": 4887 }, { "epoch": 0.32, "grad_norm": 1.2166601419448853, "learning_rate": 8.010424579979745e-06, "loss": 0.5398, "step": 4888 }, { "epoch": 0.32, "grad_norm": 1.1878069639205933, "learning_rate": 8.009589900404009e-06, "loss": 0.5284, "step": 4889 }, { "epoch": 0.32, "grad_norm": 1.2134085893630981, "learning_rate": 8.008755089286863e-06, "loss": 0.5689, "step": 4890 }, { "epoch": 0.32, "grad_norm": 1.2239207029342651, "learning_rate": 8.007920146664796e-06, "loss": 0.551, "step": 4891 }, { "epoch": 0.32, "grad_norm": 1.2550135850906372, "learning_rate": 8.007085072574299e-06, "loss": 0.5853, "step": 4892 }, { "epoch": 0.32, "grad_norm": 1.169715404510498, "learning_rate": 8.00624986705187e-06, "loss": 0.5466, "step": 4893 }, { "epoch": 0.32, "grad_norm": 1.2139688730239868, "learning_rate": 8.00541453013402e-06, "loss": 0.5729, "step": 4894 }, { "epoch": 0.32, "grad_norm": 1.1906354427337646, "learning_rate": 8.004579061857251e-06, "loss": 0.577, "step": 4895 }, { "epoch": 0.32, "grad_norm": 1.2433615922927856, "learning_rate": 8.003743462258084e-06, "loss": 0.5322, "step": 4896 }, { "epoch": 0.32, "grad_norm": 1.1780591011047363, "learning_rate": 8.002907731373041e-06, "loss": 0.5075, "step": 4897 }, { "epoch": 0.32, "grad_norm": 1.1830087900161743, "learning_rate": 8.002071869238648e-06, "loss": 0.5446, "step": 4898 }, { "epoch": 0.32, "grad_norm": 1.1497528553009033, "learning_rate": 8.00123587589144e-06, "loss": 0.5724, "step": 4899 }, { "epoch": 0.32, "grad_norm": 1.2220377922058105, "learning_rate": 8.000399751367953e-06, "loss": 0.5524, "step": 4900 }, { "epoch": 0.32, "grad_norm": 1.235679030418396, "learning_rate": 7.999563495704736e-06, "loss": 0.5875, "step": 4901 }, { "epoch": 0.32, "grad_norm": 1.1310583353042603, "learning_rate": 7.998727108938338e-06, "loss": 0.5115, "step": 4902 }, { "epoch": 0.32, "grad_norm": 1.100757122039795, "learning_rate": 7.997890591105313e-06, "loss": 0.5384, "step": 4903 }, { "epoch": 0.32, "grad_norm": 1.1125706434249878, "learning_rate": 7.997053942242225e-06, "loss": 0.5357, "step": 4904 }, { "epoch": 0.32, "grad_norm": 1.0546624660491943, "learning_rate": 7.996217162385643e-06, "loss": 0.5559, "step": 4905 }, { "epoch": 0.32, "grad_norm": 1.1107679605484009, "learning_rate": 7.995380251572138e-06, "loss": 0.5361, "step": 4906 }, { "epoch": 0.32, "grad_norm": 1.1266738176345825, "learning_rate": 7.994543209838293e-06, "loss": 0.5151, "step": 4907 }, { "epoch": 0.32, "grad_norm": 1.2056986093521118, "learning_rate": 7.993706037220687e-06, "loss": 0.491, "step": 4908 }, { "epoch": 0.32, "grad_norm": 1.1545755863189697, "learning_rate": 7.992868733755916e-06, "loss": 0.5462, "step": 4909 }, { "epoch": 0.32, "grad_norm": 1.214752197265625, "learning_rate": 7.992031299480574e-06, "loss": 0.5359, "step": 4910 }, { "epoch": 0.32, "grad_norm": 1.1303656101226807, "learning_rate": 7.991193734431264e-06, "loss": 0.5371, "step": 4911 }, { "epoch": 0.32, "grad_norm": 1.176766037940979, "learning_rate": 7.990356038644591e-06, "loss": 0.5694, "step": 4912 }, { "epoch": 0.32, "grad_norm": 1.165892481803894, "learning_rate": 7.989518212157172e-06, "loss": 0.5453, "step": 4913 }, { "epoch": 0.32, "grad_norm": 1.0498501062393188, "learning_rate": 7.988680255005625e-06, "loss": 0.5085, "step": 4914 }, { "epoch": 0.32, "grad_norm": 1.2225576639175415, "learning_rate": 7.987842167226576e-06, "loss": 0.507, "step": 4915 }, { "epoch": 0.32, "grad_norm": 1.4103739261627197, "learning_rate": 7.987003948856653e-06, "loss": 0.5464, "step": 4916 }, { "epoch": 0.32, "grad_norm": 1.3264169692993164, "learning_rate": 7.986165599932495e-06, "loss": 0.5458, "step": 4917 }, { "epoch": 0.32, "grad_norm": 1.1414934396743774, "learning_rate": 7.985327120490742e-06, "loss": 0.52, "step": 4918 }, { "epoch": 0.32, "grad_norm": 1.2376128435134888, "learning_rate": 7.984488510568042e-06, "loss": 0.5493, "step": 4919 }, { "epoch": 0.32, "grad_norm": 1.2251993417739868, "learning_rate": 7.983649770201053e-06, "loss": 0.5786, "step": 4920 }, { "epoch": 0.32, "grad_norm": 1.3648293018341064, "learning_rate": 7.982810899426428e-06, "loss": 0.5964, "step": 4921 }, { "epoch": 0.32, "grad_norm": 1.1636536121368408, "learning_rate": 7.981971898280834e-06, "loss": 0.5479, "step": 4922 }, { "epoch": 0.32, "grad_norm": 1.3040881156921387, "learning_rate": 7.981132766800943e-06, "loss": 0.5823, "step": 4923 }, { "epoch": 0.32, "grad_norm": 1.1873304843902588, "learning_rate": 7.980293505023428e-06, "loss": 0.5211, "step": 4924 }, { "epoch": 0.32, "grad_norm": 1.2697855234146118, "learning_rate": 7.979454112984977e-06, "loss": 0.5102, "step": 4925 }, { "epoch": 0.32, "grad_norm": 1.1436057090759277, "learning_rate": 7.978614590722272e-06, "loss": 0.5784, "step": 4926 }, { "epoch": 0.32, "grad_norm": 1.2705270051956177, "learning_rate": 7.977774938272006e-06, "loss": 0.5538, "step": 4927 }, { "epoch": 0.32, "grad_norm": 1.147097110748291, "learning_rate": 7.976935155670884e-06, "loss": 0.6018, "step": 4928 }, { "epoch": 0.32, "grad_norm": 1.317120909690857, "learning_rate": 7.976095242955603e-06, "loss": 0.5425, "step": 4929 }, { "epoch": 0.32, "grad_norm": 1.1322399377822876, "learning_rate": 7.975255200162881e-06, "loss": 0.5266, "step": 4930 }, { "epoch": 0.32, "grad_norm": 1.0383809804916382, "learning_rate": 7.974415027329427e-06, "loss": 0.542, "step": 4931 }, { "epoch": 0.32, "grad_norm": 1.2109875679016113, "learning_rate": 7.97357472449197e-06, "loss": 0.5634, "step": 4932 }, { "epoch": 0.32, "grad_norm": 1.126489520072937, "learning_rate": 7.972734291687232e-06, "loss": 0.5075, "step": 4933 }, { "epoch": 0.32, "grad_norm": 1.1137895584106445, "learning_rate": 7.971893728951946e-06, "loss": 0.6134, "step": 4934 }, { "epoch": 0.32, "grad_norm": 1.1568834781646729, "learning_rate": 7.971053036322855e-06, "loss": 0.5825, "step": 4935 }, { "epoch": 0.32, "grad_norm": 1.1015315055847168, "learning_rate": 7.970212213836701e-06, "loss": 0.5153, "step": 4936 }, { "epoch": 0.32, "grad_norm": 1.1080000400543213, "learning_rate": 7.969371261530233e-06, "loss": 0.5849, "step": 4937 }, { "epoch": 0.32, "grad_norm": 1.3194074630737305, "learning_rate": 7.96853017944021e-06, "loss": 0.6047, "step": 4938 }, { "epoch": 0.32, "grad_norm": 1.122412085533142, "learning_rate": 7.96768896760339e-06, "loss": 0.5846, "step": 4939 }, { "epoch": 0.32, "grad_norm": 1.2197251319885254, "learning_rate": 7.966847626056544e-06, "loss": 0.5589, "step": 4940 }, { "epoch": 0.32, "grad_norm": 1.1305344104766846, "learning_rate": 7.966006154836441e-06, "loss": 0.5299, "step": 4941 }, { "epoch": 0.32, "grad_norm": 1.225904107093811, "learning_rate": 7.965164553979862e-06, "loss": 0.5936, "step": 4942 }, { "epoch": 0.32, "grad_norm": 1.2886260747909546, "learning_rate": 7.964322823523591e-06, "loss": 0.5667, "step": 4943 }, { "epoch": 0.32, "grad_norm": 1.2092167139053345, "learning_rate": 7.963480963504417e-06, "loss": 0.53, "step": 4944 }, { "epoch": 0.32, "grad_norm": 1.238144040107727, "learning_rate": 7.962638973959137e-06, "loss": 0.5593, "step": 4945 }, { "epoch": 0.32, "grad_norm": 1.1079981327056885, "learning_rate": 7.961796854924551e-06, "loss": 0.5482, "step": 4946 }, { "epoch": 0.32, "grad_norm": 1.1242305040359497, "learning_rate": 7.960954606437463e-06, "loss": 0.4865, "step": 4947 }, { "epoch": 0.32, "grad_norm": 1.1387194395065308, "learning_rate": 7.960112228534691e-06, "loss": 0.5645, "step": 4948 }, { "epoch": 0.32, "grad_norm": 1.176944375038147, "learning_rate": 7.959269721253051e-06, "loss": 0.5592, "step": 4949 }, { "epoch": 0.32, "grad_norm": 1.1967841386795044, "learning_rate": 7.958427084629366e-06, "loss": 0.5695, "step": 4950 }, { "epoch": 0.32, "grad_norm": 1.475308895111084, "learning_rate": 7.957584318700466e-06, "loss": 0.5549, "step": 4951 }, { "epoch": 0.32, "grad_norm": 1.2648130655288696, "learning_rate": 7.956741423503187e-06, "loss": 0.5579, "step": 4952 }, { "epoch": 0.32, "grad_norm": 1.150984525680542, "learning_rate": 7.955898399074368e-06, "loss": 0.5641, "step": 4953 }, { "epoch": 0.32, "grad_norm": 1.4445621967315674, "learning_rate": 7.955055245450856e-06, "loss": 0.5842, "step": 4954 }, { "epoch": 0.32, "grad_norm": 1.307831048965454, "learning_rate": 7.954211962669503e-06, "loss": 0.512, "step": 4955 }, { "epoch": 0.32, "grad_norm": 1.292117953300476, "learning_rate": 7.953368550767168e-06, "loss": 0.5924, "step": 4956 }, { "epoch": 0.32, "grad_norm": 1.1620954275131226, "learning_rate": 7.952525009780712e-06, "loss": 0.5384, "step": 4957 }, { "epoch": 0.32, "grad_norm": 1.2138440608978271, "learning_rate": 7.951681339747007e-06, "loss": 0.5542, "step": 4958 }, { "epoch": 0.32, "grad_norm": 1.1476777791976929, "learning_rate": 7.950837540702926e-06, "loss": 0.5399, "step": 4959 }, { "epoch": 0.32, "grad_norm": 1.2582504749298096, "learning_rate": 7.949993612685349e-06, "loss": 0.559, "step": 4960 }, { "epoch": 0.32, "grad_norm": 1.2244548797607422, "learning_rate": 7.949149555731163e-06, "loss": 0.5749, "step": 4961 }, { "epoch": 0.32, "grad_norm": 1.1293060779571533, "learning_rate": 7.948305369877257e-06, "loss": 0.5261, "step": 4962 }, { "epoch": 0.32, "grad_norm": 1.3332428932189941, "learning_rate": 7.94746105516053e-06, "loss": 0.5756, "step": 4963 }, { "epoch": 0.32, "grad_norm": 1.2542487382888794, "learning_rate": 7.946616611617888e-06, "loss": 0.5353, "step": 4964 }, { "epoch": 0.32, "grad_norm": 1.1492060422897339, "learning_rate": 7.945772039286233e-06, "loss": 0.588, "step": 4965 }, { "epoch": 0.32, "grad_norm": 1.1687531471252441, "learning_rate": 7.944927338202483e-06, "loss": 0.5679, "step": 4966 }, { "epoch": 0.32, "grad_norm": 1.3380881547927856, "learning_rate": 7.944082508403559e-06, "loss": 0.5714, "step": 4967 }, { "epoch": 0.32, "grad_norm": 1.3020949363708496, "learning_rate": 7.943237549926383e-06, "loss": 0.5929, "step": 4968 }, { "epoch": 0.32, "grad_norm": 1.3205201625823975, "learning_rate": 7.942392462807886e-06, "loss": 0.5708, "step": 4969 }, { "epoch": 0.32, "grad_norm": 1.271517276763916, "learning_rate": 7.941547247085008e-06, "loss": 0.547, "step": 4970 }, { "epoch": 0.32, "grad_norm": 1.2265377044677734, "learning_rate": 7.940701902794688e-06, "loss": 0.5868, "step": 4971 }, { "epoch": 0.32, "grad_norm": 1.21064293384552, "learning_rate": 7.939856429973876e-06, "loss": 0.5164, "step": 4972 }, { "epoch": 0.32, "grad_norm": 1.1513924598693848, "learning_rate": 7.939010828659524e-06, "loss": 0.5437, "step": 4973 }, { "epoch": 0.32, "grad_norm": 1.1438637971878052, "learning_rate": 7.93816509888859e-06, "loss": 0.5538, "step": 4974 }, { "epoch": 0.32, "grad_norm": 1.1639599800109863, "learning_rate": 7.937319240698042e-06, "loss": 0.5741, "step": 4975 }, { "epoch": 0.32, "grad_norm": 1.1667574644088745, "learning_rate": 7.936473254124848e-06, "loss": 0.5501, "step": 4976 }, { "epoch": 0.32, "grad_norm": 1.139879584312439, "learning_rate": 7.935627139205983e-06, "loss": 0.5009, "step": 4977 }, { "epoch": 0.32, "grad_norm": 1.0775927305221558, "learning_rate": 7.934780895978432e-06, "loss": 0.555, "step": 4978 }, { "epoch": 0.32, "grad_norm": 1.1086668968200684, "learning_rate": 7.933934524479177e-06, "loss": 0.5065, "step": 4979 }, { "epoch": 0.32, "grad_norm": 1.2642810344696045, "learning_rate": 7.933088024745216e-06, "loss": 0.5882, "step": 4980 }, { "epoch": 0.32, "grad_norm": 1.2836997509002686, "learning_rate": 7.932241396813542e-06, "loss": 0.5221, "step": 4981 }, { "epoch": 0.32, "grad_norm": 1.2740702629089355, "learning_rate": 7.931394640721166e-06, "loss": 0.5955, "step": 4982 }, { "epoch": 0.32, "grad_norm": 1.309699535369873, "learning_rate": 7.93054775650509e-06, "loss": 0.515, "step": 4983 }, { "epoch": 0.32, "grad_norm": 1.136288046836853, "learning_rate": 7.929700744202336e-06, "loss": 0.5552, "step": 4984 }, { "epoch": 0.32, "grad_norm": 1.2487801313400269, "learning_rate": 7.928853603849918e-06, "loss": 0.5856, "step": 4985 }, { "epoch": 0.32, "grad_norm": 1.3042396306991577, "learning_rate": 7.928006335484865e-06, "loss": 0.5663, "step": 4986 }, { "epoch": 0.32, "grad_norm": 1.2241251468658447, "learning_rate": 7.927158939144212e-06, "loss": 0.5554, "step": 4987 }, { "epoch": 0.32, "grad_norm": 1.1752740144729614, "learning_rate": 7.926311414864992e-06, "loss": 0.5885, "step": 4988 }, { "epoch": 0.32, "grad_norm": 1.3134164810180664, "learning_rate": 7.92546376268425e-06, "loss": 0.5762, "step": 4989 }, { "epoch": 0.32, "grad_norm": 1.300551176071167, "learning_rate": 7.924615982639037e-06, "loss": 0.6218, "step": 4990 }, { "epoch": 0.32, "grad_norm": 1.2228273153305054, "learning_rate": 7.923768074766403e-06, "loss": 0.5692, "step": 4991 }, { "epoch": 0.32, "grad_norm": 1.2238566875457764, "learning_rate": 7.92292003910341e-06, "loss": 0.5407, "step": 4992 }, { "epoch": 0.32, "grad_norm": 1.1529148817062378, "learning_rate": 7.922071875687125e-06, "loss": 0.5704, "step": 4993 }, { "epoch": 0.32, "grad_norm": 1.198419451713562, "learning_rate": 7.921223584554616e-06, "loss": 0.5532, "step": 4994 }, { "epoch": 0.32, "grad_norm": 1.1800955533981323, "learning_rate": 7.92037516574296e-06, "loss": 0.5133, "step": 4995 }, { "epoch": 0.32, "grad_norm": 1.134155511856079, "learning_rate": 7.919526619289243e-06, "loss": 0.5876, "step": 4996 }, { "epoch": 0.32, "grad_norm": 1.180943250656128, "learning_rate": 7.918677945230545e-06, "loss": 0.5112, "step": 4997 }, { "epoch": 0.32, "grad_norm": 1.1342871189117432, "learning_rate": 7.917829143603969e-06, "loss": 0.5578, "step": 4998 }, { "epoch": 0.32, "grad_norm": 2.093654155731201, "learning_rate": 7.916980214446608e-06, "loss": 0.5279, "step": 4999 }, { "epoch": 0.32, "grad_norm": 1.148263931274414, "learning_rate": 7.916131157795569e-06, "loss": 0.5769, "step": 5000 }, { "epoch": 0.32, "grad_norm": 1.1845818758010864, "learning_rate": 7.915281973687958e-06, "loss": 0.6021, "step": 5001 }, { "epoch": 0.32, "grad_norm": 1.1727209091186523, "learning_rate": 7.914432662160894e-06, "loss": 0.5385, "step": 5002 }, { "epoch": 0.32, "grad_norm": 1.0695732831954956, "learning_rate": 7.913583223251498e-06, "loss": 0.5571, "step": 5003 }, { "epoch": 0.32, "grad_norm": 1.2719733715057373, "learning_rate": 7.912733656996897e-06, "loss": 0.5784, "step": 5004 }, { "epoch": 0.32, "grad_norm": 1.1437081098556519, "learning_rate": 7.911883963434224e-06, "loss": 0.551, "step": 5005 }, { "epoch": 0.32, "grad_norm": 1.233341932296753, "learning_rate": 7.911034142600613e-06, "loss": 0.5296, "step": 5006 }, { "epoch": 0.32, "grad_norm": 1.3344475030899048, "learning_rate": 7.910184194533212e-06, "loss": 0.5806, "step": 5007 }, { "epoch": 0.32, "grad_norm": 1.1502528190612793, "learning_rate": 7.909334119269168e-06, "loss": 0.574, "step": 5008 }, { "epoch": 0.32, "grad_norm": 1.1772656440734863, "learning_rate": 7.908483916845636e-06, "loss": 0.5656, "step": 5009 }, { "epoch": 0.32, "grad_norm": 1.127251148223877, "learning_rate": 7.907633587299775e-06, "loss": 0.589, "step": 5010 }, { "epoch": 0.32, "grad_norm": 1.2931201457977295, "learning_rate": 7.906783130668753e-06, "loss": 0.5341, "step": 5011 }, { "epoch": 0.32, "grad_norm": 1.1558998823165894, "learning_rate": 7.905932546989738e-06, "loss": 0.5533, "step": 5012 }, { "epoch": 0.32, "grad_norm": 1.0928001403808594, "learning_rate": 7.905081836299911e-06, "loss": 0.5436, "step": 5013 }, { "epoch": 0.32, "grad_norm": 1.2499539852142334, "learning_rate": 7.904230998636451e-06, "loss": 0.5665, "step": 5014 }, { "epoch": 0.32, "grad_norm": 1.2070790529251099, "learning_rate": 7.903380034036547e-06, "loss": 0.5783, "step": 5015 }, { "epoch": 0.32, "grad_norm": 1.0703461170196533, "learning_rate": 7.902528942537394e-06, "loss": 0.5495, "step": 5016 }, { "epoch": 0.32, "grad_norm": 1.2471386194229126, "learning_rate": 7.901677724176188e-06, "loss": 0.558, "step": 5017 }, { "epoch": 0.32, "grad_norm": 1.1638555526733398, "learning_rate": 7.900826378990134e-06, "loss": 0.5854, "step": 5018 }, { "epoch": 0.32, "grad_norm": 1.1744548082351685, "learning_rate": 7.899974907016445e-06, "loss": 0.5462, "step": 5019 }, { "epoch": 0.32, "grad_norm": 1.1133030652999878, "learning_rate": 7.899123308292335e-06, "loss": 0.5467, "step": 5020 }, { "epoch": 0.32, "grad_norm": 1.1371208429336548, "learning_rate": 7.898271582855025e-06, "loss": 0.515, "step": 5021 }, { "epoch": 0.32, "grad_norm": 4.373544216156006, "learning_rate": 7.897419730741741e-06, "loss": 0.5646, "step": 5022 }, { "epoch": 0.32, "grad_norm": 1.3475408554077148, "learning_rate": 7.896567751989716e-06, "loss": 0.5829, "step": 5023 }, { "epoch": 0.32, "grad_norm": 1.203530192375183, "learning_rate": 7.895715646636188e-06, "loss": 0.5507, "step": 5024 }, { "epoch": 0.32, "grad_norm": 1.38631010055542, "learning_rate": 7.894863414718402e-06, "loss": 0.504, "step": 5025 }, { "epoch": 0.32, "grad_norm": 1.4718014001846313, "learning_rate": 7.894011056273602e-06, "loss": 0.5513, "step": 5026 }, { "epoch": 0.32, "grad_norm": 1.0910053253173828, "learning_rate": 7.893158571339045e-06, "loss": 0.5548, "step": 5027 }, { "epoch": 0.32, "grad_norm": 1.1070046424865723, "learning_rate": 7.892305959951996e-06, "loss": 0.602, "step": 5028 }, { "epoch": 0.32, "grad_norm": 1.0902584791183472, "learning_rate": 7.891453222149712e-06, "loss": 0.5228, "step": 5029 }, { "epoch": 0.32, "grad_norm": 1.2193580865859985, "learning_rate": 7.890600357969466e-06, "loss": 0.5642, "step": 5030 }, { "epoch": 0.32, "grad_norm": 1.2871755361557007, "learning_rate": 7.88974736744854e-06, "loss": 0.5568, "step": 5031 }, { "epoch": 0.32, "grad_norm": 1.3102178573608398, "learning_rate": 7.88889425062421e-06, "loss": 0.5211, "step": 5032 }, { "epoch": 0.32, "grad_norm": 1.1467959880828857, "learning_rate": 7.888041007533767e-06, "loss": 0.5847, "step": 5033 }, { "epoch": 0.32, "grad_norm": 1.1819415092468262, "learning_rate": 7.887187638214503e-06, "loss": 0.5587, "step": 5034 }, { "epoch": 0.33, "grad_norm": 1.2945603132247925, "learning_rate": 7.886334142703716e-06, "loss": 0.5922, "step": 5035 }, { "epoch": 0.33, "grad_norm": 1.1795501708984375, "learning_rate": 7.885480521038709e-06, "loss": 0.5199, "step": 5036 }, { "epoch": 0.33, "grad_norm": 1.0558600425720215, "learning_rate": 7.884626773256794e-06, "loss": 0.4749, "step": 5037 }, { "epoch": 0.33, "grad_norm": 1.140488624572754, "learning_rate": 7.883772899395285e-06, "loss": 0.5835, "step": 5038 }, { "epoch": 0.33, "grad_norm": 1.0891573429107666, "learning_rate": 7.882918899491505e-06, "loss": 0.5226, "step": 5039 }, { "epoch": 0.33, "grad_norm": 1.177655816078186, "learning_rate": 7.882064773582777e-06, "loss": 0.4814, "step": 5040 }, { "epoch": 0.33, "grad_norm": 1.175526738166809, "learning_rate": 7.881210521706431e-06, "loss": 0.566, "step": 5041 }, { "epoch": 0.33, "grad_norm": 1.2146832942962646, "learning_rate": 7.88035614389981e-06, "loss": 0.5511, "step": 5042 }, { "epoch": 0.33, "grad_norm": 1.396968126296997, "learning_rate": 7.879501640200252e-06, "loss": 0.5243, "step": 5043 }, { "epoch": 0.33, "grad_norm": 1.07514488697052, "learning_rate": 7.878647010645105e-06, "loss": 0.542, "step": 5044 }, { "epoch": 0.33, "grad_norm": 1.1464614868164062, "learning_rate": 7.877792255271726e-06, "loss": 0.5668, "step": 5045 }, { "epoch": 0.33, "grad_norm": 1.145301103591919, "learning_rate": 7.87693737411747e-06, "loss": 0.5182, "step": 5046 }, { "epoch": 0.33, "grad_norm": 1.0613970756530762, "learning_rate": 7.876082367219708e-06, "loss": 0.5366, "step": 5047 }, { "epoch": 0.33, "grad_norm": 1.1426258087158203, "learning_rate": 7.875227234615803e-06, "loss": 0.5577, "step": 5048 }, { "epoch": 0.33, "grad_norm": 1.1083797216415405, "learning_rate": 7.874371976343134e-06, "loss": 0.5375, "step": 5049 }, { "epoch": 0.33, "grad_norm": 1.2677854299545288, "learning_rate": 7.873516592439083e-06, "loss": 0.5779, "step": 5050 }, { "epoch": 0.33, "grad_norm": 1.116576075553894, "learning_rate": 7.872661082941036e-06, "loss": 0.5157, "step": 5051 }, { "epoch": 0.33, "grad_norm": 1.1376876831054688, "learning_rate": 7.871805447886382e-06, "loss": 0.5941, "step": 5052 }, { "epoch": 0.33, "grad_norm": 1.2142285108566284, "learning_rate": 7.870949687312524e-06, "loss": 0.6041, "step": 5053 }, { "epoch": 0.33, "grad_norm": 1.0854461193084717, "learning_rate": 7.870093801256861e-06, "loss": 0.5338, "step": 5054 }, { "epoch": 0.33, "grad_norm": 1.1629067659378052, "learning_rate": 7.869237789756803e-06, "loss": 0.5292, "step": 5055 }, { "epoch": 0.33, "grad_norm": 1.1706104278564453, "learning_rate": 7.868381652849764e-06, "loss": 0.5827, "step": 5056 }, { "epoch": 0.33, "grad_norm": 1.2277663946151733, "learning_rate": 7.867525390573162e-06, "loss": 0.5718, "step": 5057 }, { "epoch": 0.33, "grad_norm": 1.2764768600463867, "learning_rate": 7.866669002964426e-06, "loss": 0.5464, "step": 5058 }, { "epoch": 0.33, "grad_norm": 1.3366260528564453, "learning_rate": 7.865812490060984e-06, "loss": 0.563, "step": 5059 }, { "epoch": 0.33, "grad_norm": 1.151236891746521, "learning_rate": 7.864955851900272e-06, "loss": 0.5619, "step": 5060 }, { "epoch": 0.33, "grad_norm": 1.1745730638504028, "learning_rate": 7.864099088519729e-06, "loss": 0.5272, "step": 5061 }, { "epoch": 0.33, "grad_norm": 1.1261568069458008, "learning_rate": 7.863242199956806e-06, "loss": 0.5458, "step": 5062 }, { "epoch": 0.33, "grad_norm": 1.2512454986572266, "learning_rate": 7.862385186248954e-06, "loss": 0.6045, "step": 5063 }, { "epoch": 0.33, "grad_norm": 1.3241835832595825, "learning_rate": 7.861528047433629e-06, "loss": 0.5465, "step": 5064 }, { "epoch": 0.33, "grad_norm": 1.2130409479141235, "learning_rate": 7.860670783548296e-06, "loss": 0.5247, "step": 5065 }, { "epoch": 0.33, "grad_norm": 1.0572621822357178, "learning_rate": 7.859813394630425e-06, "loss": 0.5308, "step": 5066 }, { "epoch": 0.33, "grad_norm": 1.1516298055648804, "learning_rate": 7.85895588071749e-06, "loss": 0.5583, "step": 5067 }, { "epoch": 0.33, "grad_norm": 1.220655083656311, "learning_rate": 7.858098241846968e-06, "loss": 0.5546, "step": 5068 }, { "epoch": 0.33, "grad_norm": 1.1039336919784546, "learning_rate": 7.857240478056345e-06, "loss": 0.5475, "step": 5069 }, { "epoch": 0.33, "grad_norm": 1.0790553092956543, "learning_rate": 7.856382589383113e-06, "loss": 0.5381, "step": 5070 }, { "epoch": 0.33, "grad_norm": 1.1120097637176514, "learning_rate": 7.855524575864768e-06, "loss": 0.5492, "step": 5071 }, { "epoch": 0.33, "grad_norm": 1.1295300722122192, "learning_rate": 7.854666437538811e-06, "loss": 0.5209, "step": 5072 }, { "epoch": 0.33, "grad_norm": 1.2221033573150635, "learning_rate": 7.85380817444275e-06, "loss": 0.5456, "step": 5073 }, { "epoch": 0.33, "grad_norm": 1.5389958620071411, "learning_rate": 7.852949786614097e-06, "loss": 0.498, "step": 5074 }, { "epoch": 0.33, "grad_norm": 1.082659125328064, "learning_rate": 7.85209127409037e-06, "loss": 0.5708, "step": 5075 }, { "epoch": 0.33, "grad_norm": 1.1118621826171875, "learning_rate": 7.851232636909089e-06, "loss": 0.5931, "step": 5076 }, { "epoch": 0.33, "grad_norm": 1.0672868490219116, "learning_rate": 7.850373875107786e-06, "loss": 0.5409, "step": 5077 }, { "epoch": 0.33, "grad_norm": 1.288175106048584, "learning_rate": 7.849514988723997e-06, "loss": 0.5157, "step": 5078 }, { "epoch": 0.33, "grad_norm": 1.268272042274475, "learning_rate": 7.848655977795258e-06, "loss": 0.5546, "step": 5079 }, { "epoch": 0.33, "grad_norm": 1.1729846000671387, "learning_rate": 7.847796842359116e-06, "loss": 0.6027, "step": 5080 }, { "epoch": 0.33, "grad_norm": 1.1707624197006226, "learning_rate": 7.846937582453123e-06, "loss": 0.5536, "step": 5081 }, { "epoch": 0.33, "grad_norm": 1.1182889938354492, "learning_rate": 7.846078198114832e-06, "loss": 0.5309, "step": 5082 }, { "epoch": 0.33, "grad_norm": 1.214241623878479, "learning_rate": 7.845218689381806e-06, "loss": 0.5375, "step": 5083 }, { "epoch": 0.33, "grad_norm": 1.1362226009368896, "learning_rate": 7.844359056291614e-06, "loss": 0.4983, "step": 5084 }, { "epoch": 0.33, "grad_norm": 1.1534096002578735, "learning_rate": 7.843499298881824e-06, "loss": 0.5734, "step": 5085 }, { "epoch": 0.33, "grad_norm": 1.1914541721343994, "learning_rate": 7.842639417190017e-06, "loss": 0.5332, "step": 5086 }, { "epoch": 0.33, "grad_norm": 1.1342132091522217, "learning_rate": 7.841779411253774e-06, "loss": 0.5159, "step": 5087 }, { "epoch": 0.33, "grad_norm": 1.2369462251663208, "learning_rate": 7.840919281110686e-06, "loss": 0.5649, "step": 5088 }, { "epoch": 0.33, "grad_norm": 1.328018069267273, "learning_rate": 7.840059026798346e-06, "loss": 0.5585, "step": 5089 }, { "epoch": 0.33, "grad_norm": 1.1872527599334717, "learning_rate": 7.839198648354352e-06, "loss": 0.5549, "step": 5090 }, { "epoch": 0.33, "grad_norm": 1.153740644454956, "learning_rate": 7.838338145816312e-06, "loss": 0.588, "step": 5091 }, { "epoch": 0.33, "grad_norm": 1.0705684423446655, "learning_rate": 7.837477519221831e-06, "loss": 0.5345, "step": 5092 }, { "epoch": 0.33, "grad_norm": 1.3939954042434692, "learning_rate": 7.836616768608533e-06, "loss": 0.5615, "step": 5093 }, { "epoch": 0.33, "grad_norm": 1.1474653482437134, "learning_rate": 7.835755894014032e-06, "loss": 0.5255, "step": 5094 }, { "epoch": 0.33, "grad_norm": 1.2539728879928589, "learning_rate": 7.834894895475958e-06, "loss": 0.5781, "step": 5095 }, { "epoch": 0.33, "grad_norm": 1.1951441764831543, "learning_rate": 7.834033773031944e-06, "loss": 0.5343, "step": 5096 }, { "epoch": 0.33, "grad_norm": 1.0723074674606323, "learning_rate": 7.833172526719623e-06, "loss": 0.5464, "step": 5097 }, { "epoch": 0.33, "grad_norm": 1.1731706857681274, "learning_rate": 7.83231115657664e-06, "loss": 0.5683, "step": 5098 }, { "epoch": 0.33, "grad_norm": 1.1199619770050049, "learning_rate": 7.831449662640647e-06, "loss": 0.5577, "step": 5099 }, { "epoch": 0.33, "grad_norm": 1.1587587594985962, "learning_rate": 7.830588044949292e-06, "loss": 0.5583, "step": 5100 }, { "epoch": 0.33, "grad_norm": 1.2407146692276, "learning_rate": 7.829726303540237e-06, "loss": 0.553, "step": 5101 }, { "epoch": 0.33, "grad_norm": 1.1583956480026245, "learning_rate": 7.828864438451147e-06, "loss": 0.5419, "step": 5102 }, { "epoch": 0.33, "grad_norm": 1.1137869358062744, "learning_rate": 7.82800244971969e-06, "loss": 0.576, "step": 5103 }, { "epoch": 0.33, "grad_norm": 1.215285301208496, "learning_rate": 7.827140337383543e-06, "loss": 0.5417, "step": 5104 }, { "epoch": 0.33, "grad_norm": 1.2308415174484253, "learning_rate": 7.826278101480386e-06, "loss": 0.57, "step": 5105 }, { "epoch": 0.33, "grad_norm": 1.1576011180877686, "learning_rate": 7.825415742047903e-06, "loss": 0.5285, "step": 5106 }, { "epoch": 0.33, "grad_norm": 1.1464680433273315, "learning_rate": 7.82455325912379e-06, "loss": 0.5597, "step": 5107 }, { "epoch": 0.33, "grad_norm": 1.2153165340423584, "learning_rate": 7.823690652745741e-06, "loss": 0.544, "step": 5108 }, { "epoch": 0.33, "grad_norm": 1.1198325157165527, "learning_rate": 7.82282792295146e-06, "loss": 0.5183, "step": 5109 }, { "epoch": 0.33, "grad_norm": 1.098602294921875, "learning_rate": 7.82196506977865e-06, "loss": 0.5103, "step": 5110 }, { "epoch": 0.33, "grad_norm": 1.2000678777694702, "learning_rate": 7.82110209326503e-06, "loss": 0.5375, "step": 5111 }, { "epoch": 0.33, "grad_norm": 1.2562848329544067, "learning_rate": 7.820238993448315e-06, "loss": 0.5457, "step": 5112 }, { "epoch": 0.33, "grad_norm": 1.0721904039382935, "learning_rate": 7.819375770366232e-06, "loss": 0.478, "step": 5113 }, { "epoch": 0.33, "grad_norm": 1.105156421661377, "learning_rate": 7.818512424056506e-06, "loss": 0.5314, "step": 5114 }, { "epoch": 0.33, "grad_norm": 1.1576628684997559, "learning_rate": 7.817648954556874e-06, "loss": 0.5334, "step": 5115 }, { "epoch": 0.33, "grad_norm": 1.1604870557785034, "learning_rate": 7.816785361905078e-06, "loss": 0.5462, "step": 5116 }, { "epoch": 0.33, "grad_norm": 1.191596269607544, "learning_rate": 7.815921646138858e-06, "loss": 0.5801, "step": 5117 }, { "epoch": 0.33, "grad_norm": 1.300008773803711, "learning_rate": 7.81505780729597e-06, "loss": 0.5714, "step": 5118 }, { "epoch": 0.33, "grad_norm": 1.3280384540557861, "learning_rate": 7.814193845414169e-06, "loss": 0.5672, "step": 5119 }, { "epoch": 0.33, "grad_norm": 1.206352710723877, "learning_rate": 7.813329760531215e-06, "loss": 0.5839, "step": 5120 }, { "epoch": 0.33, "grad_norm": 1.2608939409255981, "learning_rate": 7.812465552684874e-06, "loss": 0.5784, "step": 5121 }, { "epoch": 0.33, "grad_norm": 1.2562730312347412, "learning_rate": 7.811601221912921e-06, "loss": 0.557, "step": 5122 }, { "epoch": 0.33, "grad_norm": 1.2049190998077393, "learning_rate": 7.810736768253133e-06, "loss": 0.5563, "step": 5123 }, { "epoch": 0.33, "grad_norm": 1.1223958730697632, "learning_rate": 7.809872191743292e-06, "loss": 0.5637, "step": 5124 }, { "epoch": 0.33, "grad_norm": 1.2851351499557495, "learning_rate": 7.809007492421188e-06, "loss": 0.5934, "step": 5125 }, { "epoch": 0.33, "grad_norm": 1.527945876121521, "learning_rate": 7.808142670324612e-06, "loss": 0.5186, "step": 5126 }, { "epoch": 0.33, "grad_norm": 1.2070574760437012, "learning_rate": 7.807277725491367e-06, "loss": 0.5895, "step": 5127 }, { "epoch": 0.33, "grad_norm": 1.1785882711410522, "learning_rate": 7.806412657959255e-06, "loss": 0.5129, "step": 5128 }, { "epoch": 0.33, "grad_norm": 1.2565780878067017, "learning_rate": 7.805547467766087e-06, "loss": 0.5875, "step": 5129 }, { "epoch": 0.33, "grad_norm": 1.0733599662780762, "learning_rate": 7.804682154949674e-06, "loss": 0.5159, "step": 5130 }, { "epoch": 0.33, "grad_norm": 1.1435465812683105, "learning_rate": 7.803816719547845e-06, "loss": 0.5882, "step": 5131 }, { "epoch": 0.33, "grad_norm": 1.336014747619629, "learning_rate": 7.802951161598418e-06, "loss": 0.5349, "step": 5132 }, { "epoch": 0.33, "grad_norm": 1.178635597229004, "learning_rate": 7.802085481139227e-06, "loss": 0.5275, "step": 5133 }, { "epoch": 0.33, "grad_norm": 1.097257137298584, "learning_rate": 7.801219678208111e-06, "loss": 0.6134, "step": 5134 }, { "epoch": 0.33, "grad_norm": 1.250836968421936, "learning_rate": 7.800353752842911e-06, "loss": 0.5582, "step": 5135 }, { "epoch": 0.33, "grad_norm": 1.0837355852127075, "learning_rate": 7.799487705081472e-06, "loss": 0.5599, "step": 5136 }, { "epoch": 0.33, "grad_norm": 1.2645714282989502, "learning_rate": 7.798621534961648e-06, "loss": 0.577, "step": 5137 }, { "epoch": 0.33, "grad_norm": 1.190323829650879, "learning_rate": 7.797755242521298e-06, "loss": 0.5425, "step": 5138 }, { "epoch": 0.33, "grad_norm": 1.3124929666519165, "learning_rate": 7.796888827798283e-06, "loss": 0.6067, "step": 5139 }, { "epoch": 0.33, "grad_norm": 1.2732770442962646, "learning_rate": 7.796022290830477e-06, "loss": 0.5603, "step": 5140 }, { "epoch": 0.33, "grad_norm": 1.170518159866333, "learning_rate": 7.795155631655748e-06, "loss": 0.5946, "step": 5141 }, { "epoch": 0.33, "grad_norm": 1.2313921451568604, "learning_rate": 7.794288850311978e-06, "loss": 0.5029, "step": 5142 }, { "epoch": 0.33, "grad_norm": 1.2240076065063477, "learning_rate": 7.793421946837052e-06, "loss": 0.5804, "step": 5143 }, { "epoch": 0.33, "grad_norm": 1.2063417434692383, "learning_rate": 7.79255492126886e-06, "loss": 0.577, "step": 5144 }, { "epoch": 0.33, "grad_norm": 1.0833731889724731, "learning_rate": 7.791687773645296e-06, "loss": 0.5542, "step": 5145 }, { "epoch": 0.33, "grad_norm": 1.1609746217727661, "learning_rate": 7.790820504004263e-06, "loss": 0.557, "step": 5146 }, { "epoch": 0.33, "grad_norm": 1.0740764141082764, "learning_rate": 7.789953112383665e-06, "loss": 0.489, "step": 5147 }, { "epoch": 0.33, "grad_norm": 1.1340893507003784, "learning_rate": 7.789085598821416e-06, "loss": 0.543, "step": 5148 }, { "epoch": 0.33, "grad_norm": 1.161586046218872, "learning_rate": 7.788217963355432e-06, "loss": 0.4997, "step": 5149 }, { "epoch": 0.33, "grad_norm": 1.1816258430480957, "learning_rate": 7.787350206023633e-06, "loss": 0.5318, "step": 5150 }, { "epoch": 0.33, "grad_norm": 1.2204798460006714, "learning_rate": 7.78648232686395e-06, "loss": 0.5927, "step": 5151 }, { "epoch": 0.33, "grad_norm": 1.1293879747390747, "learning_rate": 7.785614325914313e-06, "loss": 0.4625, "step": 5152 }, { "epoch": 0.33, "grad_norm": 1.3418506383895874, "learning_rate": 7.784746203212658e-06, "loss": 0.5517, "step": 5153 }, { "epoch": 0.33, "grad_norm": 1.1433117389678955, "learning_rate": 7.783877958796934e-06, "loss": 0.5657, "step": 5154 }, { "epoch": 0.33, "grad_norm": 1.2424012422561646, "learning_rate": 7.783009592705087e-06, "loss": 0.5535, "step": 5155 }, { "epoch": 0.33, "grad_norm": 1.094070315361023, "learning_rate": 7.78214110497507e-06, "loss": 0.5412, "step": 5156 }, { "epoch": 0.33, "grad_norm": 1.3968020677566528, "learning_rate": 7.781272495644844e-06, "loss": 0.5912, "step": 5157 }, { "epoch": 0.33, "grad_norm": 1.1516847610473633, "learning_rate": 7.78040376475237e-06, "loss": 0.5655, "step": 5158 }, { "epoch": 0.33, "grad_norm": 1.1364582777023315, "learning_rate": 7.779534912335625e-06, "loss": 0.5307, "step": 5159 }, { "epoch": 0.33, "grad_norm": 1.335952877998352, "learning_rate": 7.77866593843258e-06, "loss": 0.6117, "step": 5160 }, { "epoch": 0.33, "grad_norm": 1.281507134437561, "learning_rate": 7.777796843081213e-06, "loss": 0.5671, "step": 5161 }, { "epoch": 0.33, "grad_norm": 1.2879812717437744, "learning_rate": 7.776927626319514e-06, "loss": 0.557, "step": 5162 }, { "epoch": 0.33, "grad_norm": 1.2781227827072144, "learning_rate": 7.776058288185476e-06, "loss": 0.5878, "step": 5163 }, { "epoch": 0.33, "grad_norm": 1.2532175779342651, "learning_rate": 7.77518882871709e-06, "loss": 0.5716, "step": 5164 }, { "epoch": 0.33, "grad_norm": 1.1658508777618408, "learning_rate": 7.77431924795236e-06, "loss": 0.5287, "step": 5165 }, { "epoch": 0.33, "grad_norm": 1.3439404964447021, "learning_rate": 7.773449545929294e-06, "loss": 0.5549, "step": 5166 }, { "epoch": 0.33, "grad_norm": 1.1272157430648804, "learning_rate": 7.772579722685905e-06, "loss": 0.5812, "step": 5167 }, { "epoch": 0.33, "grad_norm": 1.2670482397079468, "learning_rate": 7.771709778260206e-06, "loss": 0.611, "step": 5168 }, { "epoch": 0.33, "grad_norm": 1.2317371368408203, "learning_rate": 7.770839712690229e-06, "loss": 0.5207, "step": 5169 }, { "epoch": 0.33, "grad_norm": 1.1785351037979126, "learning_rate": 7.769969526013993e-06, "loss": 0.5707, "step": 5170 }, { "epoch": 0.33, "grad_norm": 1.157908320426941, "learning_rate": 7.769099218269537e-06, "loss": 0.5664, "step": 5171 }, { "epoch": 0.33, "grad_norm": 1.2019044160842896, "learning_rate": 7.768228789494898e-06, "loss": 0.557, "step": 5172 }, { "epoch": 0.33, "grad_norm": 1.1353901624679565, "learning_rate": 7.767358239728122e-06, "loss": 0.5448, "step": 5173 }, { "epoch": 0.33, "grad_norm": 1.2231882810592651, "learning_rate": 7.766487569007254e-06, "loss": 0.5748, "step": 5174 }, { "epoch": 0.33, "grad_norm": 1.075085997581482, "learning_rate": 7.765616777370357e-06, "loss": 0.575, "step": 5175 }, { "epoch": 0.33, "grad_norm": 1.1575015783309937, "learning_rate": 7.764745864855483e-06, "loss": 0.5411, "step": 5176 }, { "epoch": 0.33, "grad_norm": 1.3856710195541382, "learning_rate": 7.7638748315007e-06, "loss": 0.528, "step": 5177 }, { "epoch": 0.33, "grad_norm": 1.1808604001998901, "learning_rate": 7.76300367734408e-06, "loss": 0.5371, "step": 5178 }, { "epoch": 0.33, "grad_norm": 1.3416504859924316, "learning_rate": 7.762132402423699e-06, "loss": 0.6102, "step": 5179 }, { "epoch": 0.33, "grad_norm": 1.2064752578735352, "learning_rate": 7.761261006777635e-06, "loss": 0.6052, "step": 5180 }, { "epoch": 0.33, "grad_norm": 1.4140901565551758, "learning_rate": 7.760389490443978e-06, "loss": 0.5173, "step": 5181 }, { "epoch": 0.33, "grad_norm": 1.236519455909729, "learning_rate": 7.759517853460817e-06, "loss": 0.5205, "step": 5182 }, { "epoch": 0.33, "grad_norm": 1.053006887435913, "learning_rate": 7.758646095866252e-06, "loss": 0.5384, "step": 5183 }, { "epoch": 0.33, "grad_norm": 1.1304391622543335, "learning_rate": 7.757774217698382e-06, "loss": 0.568, "step": 5184 }, { "epoch": 0.33, "grad_norm": 1.2345168590545654, "learning_rate": 7.756902218995318e-06, "loss": 0.6129, "step": 5185 }, { "epoch": 0.33, "grad_norm": 1.2637135982513428, "learning_rate": 7.756030099795169e-06, "loss": 0.5913, "step": 5186 }, { "epoch": 0.33, "grad_norm": 1.1790125370025635, "learning_rate": 7.755157860136056e-06, "loss": 0.5674, "step": 5187 }, { "epoch": 0.33, "grad_norm": 1.1748669147491455, "learning_rate": 7.7542855000561e-06, "loss": 0.5582, "step": 5188 }, { "epoch": 0.33, "grad_norm": 1.402481198310852, "learning_rate": 7.75341301959343e-06, "loss": 0.6056, "step": 5189 }, { "epoch": 0.34, "grad_norm": 1.1273576021194458, "learning_rate": 7.752540418786184e-06, "loss": 0.5785, "step": 5190 }, { "epoch": 0.34, "grad_norm": 1.1686298847198486, "learning_rate": 7.751667697672496e-06, "loss": 0.5233, "step": 5191 }, { "epoch": 0.34, "grad_norm": 1.1766633987426758, "learning_rate": 7.750794856290513e-06, "loss": 0.5117, "step": 5192 }, { "epoch": 0.34, "grad_norm": 1.2517198324203491, "learning_rate": 7.749921894678385e-06, "loss": 0.5452, "step": 5193 }, { "epoch": 0.34, "grad_norm": 1.0908817052841187, "learning_rate": 7.749048812874265e-06, "loss": 0.6029, "step": 5194 }, { "epoch": 0.34, "grad_norm": 1.162393569946289, "learning_rate": 7.748175610916313e-06, "loss": 0.5831, "step": 5195 }, { "epoch": 0.34, "grad_norm": 1.1979494094848633, "learning_rate": 7.747302288842695e-06, "loss": 0.5905, "step": 5196 }, { "epoch": 0.34, "grad_norm": 1.2658048868179321, "learning_rate": 7.746428846691585e-06, "loss": 0.5939, "step": 5197 }, { "epoch": 0.34, "grad_norm": 1.2168331146240234, "learning_rate": 7.745555284501155e-06, "loss": 0.5248, "step": 5198 }, { "epoch": 0.34, "grad_norm": 1.194274663925171, "learning_rate": 7.744681602309584e-06, "loss": 0.5568, "step": 5199 }, { "epoch": 0.34, "grad_norm": 1.1554124355316162, "learning_rate": 7.743807800155065e-06, "loss": 0.5286, "step": 5200 }, { "epoch": 0.34, "grad_norm": 1.3891206979751587, "learning_rate": 7.742933878075785e-06, "loss": 0.5385, "step": 5201 }, { "epoch": 0.34, "grad_norm": 1.175614595413208, "learning_rate": 7.742059836109944e-06, "loss": 0.5575, "step": 5202 }, { "epoch": 0.34, "grad_norm": 1.2029317617416382, "learning_rate": 7.74118567429574e-06, "loss": 0.5754, "step": 5203 }, { "epoch": 0.34, "grad_norm": 1.0839378833770752, "learning_rate": 7.740311392671382e-06, "loss": 0.5716, "step": 5204 }, { "epoch": 0.34, "grad_norm": 1.0692499876022339, "learning_rate": 7.739436991275085e-06, "loss": 0.5153, "step": 5205 }, { "epoch": 0.34, "grad_norm": 1.1564048528671265, "learning_rate": 7.738562470145063e-06, "loss": 0.5838, "step": 5206 }, { "epoch": 0.34, "grad_norm": 1.13723886013031, "learning_rate": 7.737687829319542e-06, "loss": 0.5211, "step": 5207 }, { "epoch": 0.34, "grad_norm": 1.0745831727981567, "learning_rate": 7.736813068836749e-06, "loss": 0.5293, "step": 5208 }, { "epoch": 0.34, "grad_norm": 1.106656789779663, "learning_rate": 7.735938188734918e-06, "loss": 0.5166, "step": 5209 }, { "epoch": 0.34, "grad_norm": 1.2037099599838257, "learning_rate": 7.735063189052287e-06, "loss": 0.5728, "step": 5210 }, { "epoch": 0.34, "grad_norm": 1.2121760845184326, "learning_rate": 7.734188069827102e-06, "loss": 0.5584, "step": 5211 }, { "epoch": 0.34, "grad_norm": 1.1701499223709106, "learning_rate": 7.73331283109761e-06, "loss": 0.5258, "step": 5212 }, { "epoch": 0.34, "grad_norm": 1.2088345289230347, "learning_rate": 7.732437472902067e-06, "loss": 0.5417, "step": 5213 }, { "epoch": 0.34, "grad_norm": 1.1941769123077393, "learning_rate": 7.731561995278732e-06, "loss": 0.5732, "step": 5214 }, { "epoch": 0.34, "grad_norm": 1.3076565265655518, "learning_rate": 7.730686398265868e-06, "loss": 0.6016, "step": 5215 }, { "epoch": 0.34, "grad_norm": 1.1653410196304321, "learning_rate": 7.729810681901747e-06, "loss": 0.524, "step": 5216 }, { "epoch": 0.34, "grad_norm": 1.1899231672286987, "learning_rate": 7.728934846224645e-06, "loss": 0.5626, "step": 5217 }, { "epoch": 0.34, "grad_norm": 1.3039745092391968, "learning_rate": 7.728058891272841e-06, "loss": 0.556, "step": 5218 }, { "epoch": 0.34, "grad_norm": 1.128411889076233, "learning_rate": 7.727182817084622e-06, "loss": 0.5639, "step": 5219 }, { "epoch": 0.34, "grad_norm": 1.0817105770111084, "learning_rate": 7.726306623698278e-06, "loss": 0.5142, "step": 5220 }, { "epoch": 0.34, "grad_norm": 1.1457380056381226, "learning_rate": 7.725430311152104e-06, "loss": 0.5308, "step": 5221 }, { "epoch": 0.34, "grad_norm": 1.1130809783935547, "learning_rate": 7.724553879484406e-06, "loss": 0.4993, "step": 5222 }, { "epoch": 0.34, "grad_norm": 1.1388264894485474, "learning_rate": 7.723677328733484e-06, "loss": 0.5164, "step": 5223 }, { "epoch": 0.34, "grad_norm": 1.149950623512268, "learning_rate": 7.722800658937655e-06, "loss": 0.5873, "step": 5224 }, { "epoch": 0.34, "grad_norm": 1.1761689186096191, "learning_rate": 7.721923870135235e-06, "loss": 0.5853, "step": 5225 }, { "epoch": 0.34, "grad_norm": 1.1371783018112183, "learning_rate": 7.721046962364542e-06, "loss": 0.5669, "step": 5226 }, { "epoch": 0.34, "grad_norm": 1.3789364099502563, "learning_rate": 7.72016993566391e-06, "loss": 0.5757, "step": 5227 }, { "epoch": 0.34, "grad_norm": 1.198297142982483, "learning_rate": 7.719292790071666e-06, "loss": 0.4874, "step": 5228 }, { "epoch": 0.34, "grad_norm": 1.2518327236175537, "learning_rate": 7.718415525626151e-06, "loss": 0.6149, "step": 5229 }, { "epoch": 0.34, "grad_norm": 1.178118348121643, "learning_rate": 7.717538142365709e-06, "loss": 0.5778, "step": 5230 }, { "epoch": 0.34, "grad_norm": 1.017838716506958, "learning_rate": 7.716660640328684e-06, "loss": 0.495, "step": 5231 }, { "epoch": 0.34, "grad_norm": 1.2110927104949951, "learning_rate": 7.71578301955343e-06, "loss": 0.5634, "step": 5232 }, { "epoch": 0.34, "grad_norm": 1.1371456384658813, "learning_rate": 7.714905280078308e-06, "loss": 0.516, "step": 5233 }, { "epoch": 0.34, "grad_norm": 1.0587526559829712, "learning_rate": 7.714027421941683e-06, "loss": 0.5162, "step": 5234 }, { "epoch": 0.34, "grad_norm": 1.0431768894195557, "learning_rate": 7.71314944518192e-06, "loss": 0.5247, "step": 5235 }, { "epoch": 0.34, "grad_norm": 1.300467610359192, "learning_rate": 7.712271349837395e-06, "loss": 0.5958, "step": 5236 }, { "epoch": 0.34, "grad_norm": 1.1851603984832764, "learning_rate": 7.711393135946487e-06, "loss": 0.5486, "step": 5237 }, { "epoch": 0.34, "grad_norm": 1.4721860885620117, "learning_rate": 7.71051480354758e-06, "loss": 0.5179, "step": 5238 }, { "epoch": 0.34, "grad_norm": 1.1718719005584717, "learning_rate": 7.709636352679066e-06, "loss": 0.5004, "step": 5239 }, { "epoch": 0.34, "grad_norm": 1.2281982898712158, "learning_rate": 7.708757783379337e-06, "loss": 0.5747, "step": 5240 }, { "epoch": 0.34, "grad_norm": 1.1281970739364624, "learning_rate": 7.707879095686792e-06, "loss": 0.5343, "step": 5241 }, { "epoch": 0.34, "grad_norm": 1.1065359115600586, "learning_rate": 7.70700028963984e-06, "loss": 0.5508, "step": 5242 }, { "epoch": 0.34, "grad_norm": 1.2060635089874268, "learning_rate": 7.706121365276888e-06, "loss": 0.5528, "step": 5243 }, { "epoch": 0.34, "grad_norm": 1.1004940271377563, "learning_rate": 7.705242322636354e-06, "loss": 0.5027, "step": 5244 }, { "epoch": 0.34, "grad_norm": 1.189820647239685, "learning_rate": 7.70436316175666e-06, "loss": 0.5622, "step": 5245 }, { "epoch": 0.34, "grad_norm": 1.1592702865600586, "learning_rate": 7.703483882676228e-06, "loss": 0.5415, "step": 5246 }, { "epoch": 0.34, "grad_norm": 1.2170875072479248, "learning_rate": 7.702604485433488e-06, "loss": 0.5312, "step": 5247 }, { "epoch": 0.34, "grad_norm": 1.1211724281311035, "learning_rate": 7.701724970066882e-06, "loss": 0.553, "step": 5248 }, { "epoch": 0.34, "grad_norm": 1.2499263286590576, "learning_rate": 7.700845336614846e-06, "loss": 0.6004, "step": 5249 }, { "epoch": 0.34, "grad_norm": 1.2291146516799927, "learning_rate": 7.69996558511583e-06, "loss": 0.5467, "step": 5250 }, { "epoch": 0.34, "grad_norm": 1.2065154314041138, "learning_rate": 7.699085715608284e-06, "loss": 0.5662, "step": 5251 }, { "epoch": 0.34, "grad_norm": 1.2347850799560547, "learning_rate": 7.698205728130664e-06, "loss": 0.5864, "step": 5252 }, { "epoch": 0.34, "grad_norm": 1.3459914922714233, "learning_rate": 7.697325622721435e-06, "loss": 0.5346, "step": 5253 }, { "epoch": 0.34, "grad_norm": 1.1109728813171387, "learning_rate": 7.696445399419062e-06, "loss": 0.557, "step": 5254 }, { "epoch": 0.34, "grad_norm": 1.1328694820404053, "learning_rate": 7.695565058262015e-06, "loss": 0.5507, "step": 5255 }, { "epoch": 0.34, "grad_norm": 1.1022104024887085, "learning_rate": 7.694684599288775e-06, "loss": 0.5149, "step": 5256 }, { "epoch": 0.34, "grad_norm": 1.1449378728866577, "learning_rate": 7.693804022537826e-06, "loss": 0.5511, "step": 5257 }, { "epoch": 0.34, "grad_norm": 1.1535542011260986, "learning_rate": 7.69292332804765e-06, "loss": 0.5379, "step": 5258 }, { "epoch": 0.34, "grad_norm": 1.1372566223144531, "learning_rate": 7.692042515856745e-06, "loss": 0.5225, "step": 5259 }, { "epoch": 0.34, "grad_norm": 1.2335383892059326, "learning_rate": 7.691161586003607e-06, "loss": 0.5702, "step": 5260 }, { "epoch": 0.34, "grad_norm": 1.2840890884399414, "learning_rate": 7.69028053852674e-06, "loss": 0.5536, "step": 5261 }, { "epoch": 0.34, "grad_norm": 1.2451636791229248, "learning_rate": 7.68939937346465e-06, "loss": 0.5666, "step": 5262 }, { "epoch": 0.34, "grad_norm": 1.1557427644729614, "learning_rate": 7.688518090855853e-06, "loss": 0.5417, "step": 5263 }, { "epoch": 0.34, "grad_norm": 1.069617748260498, "learning_rate": 7.687636690738867e-06, "loss": 0.5385, "step": 5264 }, { "epoch": 0.34, "grad_norm": 1.2112137079238892, "learning_rate": 7.686755173152216e-06, "loss": 0.5245, "step": 5265 }, { "epoch": 0.34, "grad_norm": 1.4415233135223389, "learning_rate": 7.685873538134427e-06, "loss": 0.534, "step": 5266 }, { "epoch": 0.34, "grad_norm": 1.0858263969421387, "learning_rate": 7.684991785724036e-06, "loss": 0.5854, "step": 5267 }, { "epoch": 0.34, "grad_norm": 1.281009554862976, "learning_rate": 7.684109915959582e-06, "loss": 0.4949, "step": 5268 }, { "epoch": 0.34, "grad_norm": 1.1527438163757324, "learning_rate": 7.683227928879608e-06, "loss": 0.5582, "step": 5269 }, { "epoch": 0.34, "grad_norm": 1.1123063564300537, "learning_rate": 7.682345824522663e-06, "loss": 0.4967, "step": 5270 }, { "epoch": 0.34, "grad_norm": 1.1892638206481934, "learning_rate": 7.681463602927305e-06, "loss": 0.5783, "step": 5271 }, { "epoch": 0.34, "grad_norm": 1.180893063545227, "learning_rate": 7.680581264132088e-06, "loss": 0.5534, "step": 5272 }, { "epoch": 0.34, "grad_norm": 1.0307234525680542, "learning_rate": 7.679698808175582e-06, "loss": 0.5428, "step": 5273 }, { "epoch": 0.34, "grad_norm": 1.1366307735443115, "learning_rate": 7.678816235096353e-06, "loss": 0.5298, "step": 5274 }, { "epoch": 0.34, "grad_norm": 1.239931583404541, "learning_rate": 7.67793354493298e-06, "loss": 0.5501, "step": 5275 }, { "epoch": 0.34, "grad_norm": 1.1006945371627808, "learning_rate": 7.67705073772404e-06, "loss": 0.5356, "step": 5276 }, { "epoch": 0.34, "grad_norm": 1.4286234378814697, "learning_rate": 7.67616781350812e-06, "loss": 0.5893, "step": 5277 }, { "epoch": 0.34, "grad_norm": 1.249405860900879, "learning_rate": 7.675284772323808e-06, "loss": 0.5401, "step": 5278 }, { "epoch": 0.34, "grad_norm": 1.1879093647003174, "learning_rate": 7.6744016142097e-06, "loss": 0.5338, "step": 5279 }, { "epoch": 0.34, "grad_norm": 1.1957992315292358, "learning_rate": 7.6735183392044e-06, "loss": 0.5209, "step": 5280 }, { "epoch": 0.34, "grad_norm": 1.14713716506958, "learning_rate": 7.672634947346512e-06, "loss": 0.5072, "step": 5281 }, { "epoch": 0.34, "grad_norm": 1.3579033613204956, "learning_rate": 7.671751438674644e-06, "loss": 0.4956, "step": 5282 }, { "epoch": 0.34, "grad_norm": 1.1090108156204224, "learning_rate": 7.670867813227415e-06, "loss": 0.5209, "step": 5283 }, { "epoch": 0.34, "grad_norm": 1.0769511461257935, "learning_rate": 7.669984071043442e-06, "loss": 0.5203, "step": 5284 }, { "epoch": 0.34, "grad_norm": 1.1424435377120972, "learning_rate": 7.669100212161356e-06, "loss": 0.5675, "step": 5285 }, { "epoch": 0.34, "grad_norm": 1.172960877418518, "learning_rate": 7.668216236619786e-06, "loss": 0.5093, "step": 5286 }, { "epoch": 0.34, "grad_norm": 1.291610836982727, "learning_rate": 7.667332144457369e-06, "loss": 0.5198, "step": 5287 }, { "epoch": 0.34, "grad_norm": 1.2642709016799927, "learning_rate": 7.666447935712743e-06, "loss": 0.5423, "step": 5288 }, { "epoch": 0.34, "grad_norm": 1.364235758781433, "learning_rate": 7.665563610424562e-06, "loss": 0.5459, "step": 5289 }, { "epoch": 0.34, "grad_norm": 1.1848087310791016, "learning_rate": 7.664679168631468e-06, "loss": 0.5796, "step": 5290 }, { "epoch": 0.34, "grad_norm": 1.102948784828186, "learning_rate": 7.663794610372124e-06, "loss": 0.5528, "step": 5291 }, { "epoch": 0.34, "grad_norm": 1.181790828704834, "learning_rate": 7.662909935685193e-06, "loss": 0.5707, "step": 5292 }, { "epoch": 0.34, "grad_norm": 1.228040099143982, "learning_rate": 7.662025144609336e-06, "loss": 0.5334, "step": 5293 }, { "epoch": 0.34, "grad_norm": 1.1052039861679077, "learning_rate": 7.661140237183228e-06, "loss": 0.5608, "step": 5294 }, { "epoch": 0.34, "grad_norm": 1.1036401987075806, "learning_rate": 7.660255213445549e-06, "loss": 0.5021, "step": 5295 }, { "epoch": 0.34, "grad_norm": 1.0387790203094482, "learning_rate": 7.659370073434974e-06, "loss": 0.5295, "step": 5296 }, { "epoch": 0.34, "grad_norm": 1.1648706197738647, "learning_rate": 7.658484817190199e-06, "loss": 0.5816, "step": 5297 }, { "epoch": 0.34, "grad_norm": 1.359817385673523, "learning_rate": 7.657599444749907e-06, "loss": 0.5564, "step": 5298 }, { "epoch": 0.34, "grad_norm": 1.2050777673721313, "learning_rate": 7.656713956152803e-06, "loss": 0.6052, "step": 5299 }, { "epoch": 0.34, "grad_norm": 1.2643369436264038, "learning_rate": 7.655828351437587e-06, "loss": 0.5899, "step": 5300 }, { "epoch": 0.34, "grad_norm": 1.1987253427505493, "learning_rate": 7.654942630642965e-06, "loss": 0.5598, "step": 5301 }, { "epoch": 0.34, "grad_norm": 1.242450475692749, "learning_rate": 7.65405679380765e-06, "loss": 0.5445, "step": 5302 }, { "epoch": 0.34, "grad_norm": 1.1432924270629883, "learning_rate": 7.65317084097036e-06, "loss": 0.5502, "step": 5303 }, { "epoch": 0.34, "grad_norm": 1.1772209405899048, "learning_rate": 7.652284772169816e-06, "loss": 0.566, "step": 5304 }, { "epoch": 0.34, "grad_norm": 1.257742166519165, "learning_rate": 7.65139858744475e-06, "loss": 0.5401, "step": 5305 }, { "epoch": 0.34, "grad_norm": 1.210862398147583, "learning_rate": 7.650512286833891e-06, "loss": 0.5795, "step": 5306 }, { "epoch": 0.34, "grad_norm": 1.1537270545959473, "learning_rate": 7.649625870375981e-06, "loss": 0.5971, "step": 5307 }, { "epoch": 0.34, "grad_norm": 1.2930058240890503, "learning_rate": 7.648739338109759e-06, "loss": 0.5315, "step": 5308 }, { "epoch": 0.34, "grad_norm": 1.6675193309783936, "learning_rate": 7.647852690073973e-06, "loss": 0.5482, "step": 5309 }, { "epoch": 0.34, "grad_norm": 1.377129316329956, "learning_rate": 7.64696592630738e-06, "loss": 0.5147, "step": 5310 }, { "epoch": 0.34, "grad_norm": 1.218926191329956, "learning_rate": 7.646079046848732e-06, "loss": 0.5267, "step": 5311 }, { "epoch": 0.34, "grad_norm": 1.1598854064941406, "learning_rate": 7.645192051736799e-06, "loss": 0.5449, "step": 5312 }, { "epoch": 0.34, "grad_norm": 1.253422498703003, "learning_rate": 7.644304941010345e-06, "loss": 0.6141, "step": 5313 }, { "epoch": 0.34, "grad_norm": 1.1907910108566284, "learning_rate": 7.643417714708144e-06, "loss": 0.5197, "step": 5314 }, { "epoch": 0.34, "grad_norm": 1.0494358539581299, "learning_rate": 7.642530372868974e-06, "loss": 0.5345, "step": 5315 }, { "epoch": 0.34, "grad_norm": 1.2018685340881348, "learning_rate": 7.641642915531622e-06, "loss": 0.5983, "step": 5316 }, { "epoch": 0.34, "grad_norm": 1.1627079248428345, "learning_rate": 7.640755342734872e-06, "loss": 0.5339, "step": 5317 }, { "epoch": 0.34, "grad_norm": 1.129647135734558, "learning_rate": 7.63986765451752e-06, "loss": 0.5167, "step": 5318 }, { "epoch": 0.34, "grad_norm": 1.1949398517608643, "learning_rate": 7.638979850918364e-06, "loss": 0.5322, "step": 5319 }, { "epoch": 0.34, "grad_norm": 1.0576891899108887, "learning_rate": 7.638091931976206e-06, "loss": 0.5435, "step": 5320 }, { "epoch": 0.34, "grad_norm": 1.2916966676712036, "learning_rate": 7.637203897729856e-06, "loss": 0.5421, "step": 5321 }, { "epoch": 0.34, "grad_norm": 1.243985652923584, "learning_rate": 7.636315748218129e-06, "loss": 0.5696, "step": 5322 }, { "epoch": 0.34, "grad_norm": 1.0794143676757812, "learning_rate": 7.63542748347984e-06, "loss": 0.5076, "step": 5323 }, { "epoch": 0.34, "grad_norm": 1.044776439666748, "learning_rate": 7.634539103553818e-06, "loss": 0.5241, "step": 5324 }, { "epoch": 0.34, "grad_norm": 1.0923532247543335, "learning_rate": 7.633650608478887e-06, "loss": 0.4997, "step": 5325 }, { "epoch": 0.34, "grad_norm": 1.1200497150421143, "learning_rate": 7.632761998293884e-06, "loss": 0.5312, "step": 5326 }, { "epoch": 0.34, "grad_norm": 1.1519956588745117, "learning_rate": 7.631873273037647e-06, "loss": 0.5487, "step": 5327 }, { "epoch": 0.34, "grad_norm": 1.245306134223938, "learning_rate": 7.630984432749017e-06, "loss": 0.5569, "step": 5328 }, { "epoch": 0.34, "grad_norm": 1.2323863506317139, "learning_rate": 7.630095477466848e-06, "loss": 0.6411, "step": 5329 }, { "epoch": 0.34, "grad_norm": 1.22800874710083, "learning_rate": 7.629206407229992e-06, "loss": 0.548, "step": 5330 }, { "epoch": 0.34, "grad_norm": 1.1481767892837524, "learning_rate": 7.6283172220773065e-06, "loss": 0.5279, "step": 5331 }, { "epoch": 0.34, "grad_norm": 1.145075798034668, "learning_rate": 7.627427922047654e-06, "loss": 0.5686, "step": 5332 }, { "epoch": 0.34, "grad_norm": 1.1402381658554077, "learning_rate": 7.62653850717991e-06, "loss": 0.5609, "step": 5333 }, { "epoch": 0.34, "grad_norm": 1.0790075063705444, "learning_rate": 7.6256489775129415e-06, "loss": 0.5054, "step": 5334 }, { "epoch": 0.34, "grad_norm": 1.280454397201538, "learning_rate": 7.624759333085632e-06, "loss": 0.5625, "step": 5335 }, { "epoch": 0.34, "grad_norm": 1.2920382022857666, "learning_rate": 7.623869573936863e-06, "loss": 0.551, "step": 5336 }, { "epoch": 0.34, "grad_norm": 1.3290413618087769, "learning_rate": 7.6229797001055235e-06, "loss": 0.5711, "step": 5337 }, { "epoch": 0.34, "grad_norm": 1.224473237991333, "learning_rate": 7.622089711630512e-06, "loss": 0.5836, "step": 5338 }, { "epoch": 0.34, "grad_norm": 1.2993401288986206, "learning_rate": 7.6211996085507224e-06, "loss": 0.5392, "step": 5339 }, { "epoch": 0.34, "grad_norm": 1.1623237133026123, "learning_rate": 7.62030939090506e-06, "loss": 0.5509, "step": 5340 }, { "epoch": 0.34, "grad_norm": 1.2175097465515137, "learning_rate": 7.619419058732434e-06, "loss": 0.5909, "step": 5341 }, { "epoch": 0.34, "grad_norm": 1.0587667226791382, "learning_rate": 7.6185286120717615e-06, "loss": 0.5838, "step": 5342 }, { "epoch": 0.34, "grad_norm": 1.2486951351165771, "learning_rate": 7.617638050961957e-06, "loss": 0.5664, "step": 5343 }, { "epoch": 0.34, "grad_norm": 1.2960596084594727, "learning_rate": 7.616747375441949e-06, "loss": 0.5222, "step": 5344 }, { "epoch": 0.35, "grad_norm": 1.303763747215271, "learning_rate": 7.6158565855506635e-06, "loss": 0.536, "step": 5345 }, { "epoch": 0.35, "grad_norm": 1.2386630773544312, "learning_rate": 7.6149656813270355e-06, "loss": 0.5165, "step": 5346 }, { "epoch": 0.35, "grad_norm": 1.1495429277420044, "learning_rate": 7.614074662810005e-06, "loss": 0.548, "step": 5347 }, { "epoch": 0.35, "grad_norm": 1.1672993898391724, "learning_rate": 7.613183530038515e-06, "loss": 0.5455, "step": 5348 }, { "epoch": 0.35, "grad_norm": 1.1597497463226318, "learning_rate": 7.612292283051515e-06, "loss": 0.5608, "step": 5349 }, { "epoch": 0.35, "grad_norm": 1.2277276515960693, "learning_rate": 7.611400921887958e-06, "loss": 0.5339, "step": 5350 }, { "epoch": 0.35, "grad_norm": 1.1816107034683228, "learning_rate": 7.610509446586806e-06, "loss": 0.5214, "step": 5351 }, { "epoch": 0.35, "grad_norm": 1.241702914237976, "learning_rate": 7.60961785718702e-06, "loss": 0.5438, "step": 5352 }, { "epoch": 0.35, "grad_norm": 1.2051866054534912, "learning_rate": 7.60872615372757e-06, "loss": 0.5668, "step": 5353 }, { "epoch": 0.35, "grad_norm": 1.2179484367370605, "learning_rate": 7.607834336247433e-06, "loss": 0.5566, "step": 5354 }, { "epoch": 0.35, "grad_norm": 1.1740131378173828, "learning_rate": 7.6069424047855824e-06, "loss": 0.5587, "step": 5355 }, { "epoch": 0.35, "grad_norm": 1.1145297288894653, "learning_rate": 7.606050359381007e-06, "loss": 0.529, "step": 5356 }, { "epoch": 0.35, "grad_norm": 1.173911690711975, "learning_rate": 7.6051582000726945e-06, "loss": 0.5781, "step": 5357 }, { "epoch": 0.35, "grad_norm": 1.169009804725647, "learning_rate": 7.604265926899639e-06, "loss": 0.5578, "step": 5358 }, { "epoch": 0.35, "grad_norm": 1.1535224914550781, "learning_rate": 7.603373539900835e-06, "loss": 0.5268, "step": 5359 }, { "epoch": 0.35, "grad_norm": 1.2758941650390625, "learning_rate": 7.602481039115295e-06, "loss": 0.5102, "step": 5360 }, { "epoch": 0.35, "grad_norm": 1.3139128684997559, "learning_rate": 7.601588424582021e-06, "loss": 0.5632, "step": 5361 }, { "epoch": 0.35, "grad_norm": 1.1841084957122803, "learning_rate": 7.600695696340029e-06, "loss": 0.5752, "step": 5362 }, { "epoch": 0.35, "grad_norm": 1.1113086938858032, "learning_rate": 7.5998028544283395e-06, "loss": 0.5398, "step": 5363 }, { "epoch": 0.35, "grad_norm": 1.174030065536499, "learning_rate": 7.598909898885973e-06, "loss": 0.5844, "step": 5364 }, { "epoch": 0.35, "grad_norm": 1.2781825065612793, "learning_rate": 7.598016829751959e-06, "loss": 0.532, "step": 5365 }, { "epoch": 0.35, "grad_norm": 1.2228753566741943, "learning_rate": 7.597123647065336e-06, "loss": 0.5534, "step": 5366 }, { "epoch": 0.35, "grad_norm": 1.152024507522583, "learning_rate": 7.596230350865137e-06, "loss": 0.5539, "step": 5367 }, { "epoch": 0.35, "grad_norm": 1.182745337486267, "learning_rate": 7.595336941190409e-06, "loss": 0.5557, "step": 5368 }, { "epoch": 0.35, "grad_norm": 1.1158287525177002, "learning_rate": 7.594443418080197e-06, "loss": 0.5452, "step": 5369 }, { "epoch": 0.35, "grad_norm": 1.3515263795852661, "learning_rate": 7.593549781573559e-06, "loss": 0.5936, "step": 5370 }, { "epoch": 0.35, "grad_norm": 1.1571816205978394, "learning_rate": 7.592656031709551e-06, "loss": 0.5351, "step": 5371 }, { "epoch": 0.35, "grad_norm": 1.1416188478469849, "learning_rate": 7.591762168527237e-06, "loss": 0.5642, "step": 5372 }, { "epoch": 0.35, "grad_norm": 1.2000631093978882, "learning_rate": 7.5908681920656834e-06, "loss": 0.5725, "step": 5373 }, { "epoch": 0.35, "grad_norm": 1.2225940227508545, "learning_rate": 7.589974102363968e-06, "loss": 0.5459, "step": 5374 }, { "epoch": 0.35, "grad_norm": 1.2091041803359985, "learning_rate": 7.589079899461167e-06, "loss": 0.5402, "step": 5375 }, { "epoch": 0.35, "grad_norm": 1.161887526512146, "learning_rate": 7.588185583396363e-06, "loss": 0.5436, "step": 5376 }, { "epoch": 0.35, "grad_norm": 1.1439707279205322, "learning_rate": 7.587291154208645e-06, "loss": 0.4959, "step": 5377 }, { "epoch": 0.35, "grad_norm": 1.2478387355804443, "learning_rate": 7.586396611937104e-06, "loss": 0.5141, "step": 5378 }, { "epoch": 0.35, "grad_norm": 1.139794945716858, "learning_rate": 7.5855019566208425e-06, "loss": 0.5891, "step": 5379 }, { "epoch": 0.35, "grad_norm": 1.2294654846191406, "learning_rate": 7.58460718829896e-06, "loss": 0.5883, "step": 5380 }, { "epoch": 0.35, "grad_norm": 1.1936397552490234, "learning_rate": 7.583712307010566e-06, "loss": 0.5704, "step": 5381 }, { "epoch": 0.35, "grad_norm": 1.3340296745300293, "learning_rate": 7.5828173127947725e-06, "loss": 0.5952, "step": 5382 }, { "epoch": 0.35, "grad_norm": 1.0791258811950684, "learning_rate": 7.5819222056907e-06, "loss": 0.5812, "step": 5383 }, { "epoch": 0.35, "grad_norm": 1.0844377279281616, "learning_rate": 7.581026985737467e-06, "loss": 0.5211, "step": 5384 }, { "epoch": 0.35, "grad_norm": 1.10100519657135, "learning_rate": 7.580131652974203e-06, "loss": 0.5232, "step": 5385 }, { "epoch": 0.35, "grad_norm": 1.2996400594711304, "learning_rate": 7.579236207440044e-06, "loss": 0.6124, "step": 5386 }, { "epoch": 0.35, "grad_norm": 1.0994715690612793, "learning_rate": 7.578340649174123e-06, "loss": 0.5724, "step": 5387 }, { "epoch": 0.35, "grad_norm": 1.3291950225830078, "learning_rate": 7.577444978215584e-06, "loss": 0.5999, "step": 5388 }, { "epoch": 0.35, "grad_norm": 1.2479634284973145, "learning_rate": 7.576549194603578e-06, "loss": 0.5312, "step": 5389 }, { "epoch": 0.35, "grad_norm": 1.1444650888442993, "learning_rate": 7.575653298377252e-06, "loss": 0.574, "step": 5390 }, { "epoch": 0.35, "grad_norm": 1.1606429815292358, "learning_rate": 7.574757289575765e-06, "loss": 0.5569, "step": 5391 }, { "epoch": 0.35, "grad_norm": 1.1558581590652466, "learning_rate": 7.5738611682382816e-06, "loss": 0.5727, "step": 5392 }, { "epoch": 0.35, "grad_norm": 1.2476056814193726, "learning_rate": 7.5729649344039655e-06, "loss": 0.5517, "step": 5393 }, { "epoch": 0.35, "grad_norm": 1.2299615144729614, "learning_rate": 7.572068588111991e-06, "loss": 0.5752, "step": 5394 }, { "epoch": 0.35, "grad_norm": 1.1296870708465576, "learning_rate": 7.5711721294015364e-06, "loss": 0.5496, "step": 5395 }, { "epoch": 0.35, "grad_norm": 1.1339993476867676, "learning_rate": 7.570275558311779e-06, "loss": 0.561, "step": 5396 }, { "epoch": 0.35, "grad_norm": 1.182433843612671, "learning_rate": 7.56937887488191e-06, "loss": 0.5249, "step": 5397 }, { "epoch": 0.35, "grad_norm": 1.1102538108825684, "learning_rate": 7.568482079151118e-06, "loss": 0.5308, "step": 5398 }, { "epoch": 0.35, "grad_norm": 1.1582437753677368, "learning_rate": 7.5675851711586025e-06, "loss": 0.5701, "step": 5399 }, { "epoch": 0.35, "grad_norm": 1.1513029336929321, "learning_rate": 7.566688150943563e-06, "loss": 0.5217, "step": 5400 }, { "epoch": 0.35, "grad_norm": 1.2099248170852661, "learning_rate": 7.565791018545208e-06, "loss": 0.566, "step": 5401 }, { "epoch": 0.35, "grad_norm": 1.2039952278137207, "learning_rate": 7.564893774002745e-06, "loss": 0.5561, "step": 5402 }, { "epoch": 0.35, "grad_norm": 1.162505030632019, "learning_rate": 7.563996417355395e-06, "loss": 0.5617, "step": 5403 }, { "epoch": 0.35, "grad_norm": 1.1225814819335938, "learning_rate": 7.563098948642374e-06, "loss": 0.5194, "step": 5404 }, { "epoch": 0.35, "grad_norm": 1.1671833992004395, "learning_rate": 7.562201367902914e-06, "loss": 0.5514, "step": 5405 }, { "epoch": 0.35, "grad_norm": 1.175070881843567, "learning_rate": 7.56130367517624e-06, "loss": 0.5608, "step": 5406 }, { "epoch": 0.35, "grad_norm": 1.152637004852295, "learning_rate": 7.5604058705015926e-06, "loss": 0.5841, "step": 5407 }, { "epoch": 0.35, "grad_norm": 1.1095777750015259, "learning_rate": 7.5595079539182106e-06, "loss": 0.5283, "step": 5408 }, { "epoch": 0.35, "grad_norm": 1.151301383972168, "learning_rate": 7.558609925465338e-06, "loss": 0.5421, "step": 5409 }, { "epoch": 0.35, "grad_norm": 1.2231439352035522, "learning_rate": 7.557711785182228e-06, "loss": 0.5273, "step": 5410 }, { "epoch": 0.35, "grad_norm": 1.100818157196045, "learning_rate": 7.556813533108134e-06, "loss": 0.5232, "step": 5411 }, { "epoch": 0.35, "grad_norm": 1.2683347463607788, "learning_rate": 7.555915169282317e-06, "loss": 0.5753, "step": 5412 }, { "epoch": 0.35, "grad_norm": 1.1708662509918213, "learning_rate": 7.555016693744044e-06, "loss": 0.5406, "step": 5413 }, { "epoch": 0.35, "grad_norm": 1.201413869857788, "learning_rate": 7.554118106532582e-06, "loss": 0.5613, "step": 5414 }, { "epoch": 0.35, "grad_norm": 1.2310246229171753, "learning_rate": 7.553219407687207e-06, "loss": 0.5826, "step": 5415 }, { "epoch": 0.35, "grad_norm": 1.1485774517059326, "learning_rate": 7.5523205972472e-06, "loss": 0.5302, "step": 5416 }, { "epoch": 0.35, "grad_norm": 1.218666434288025, "learning_rate": 7.551421675251844e-06, "loss": 0.5648, "step": 5417 }, { "epoch": 0.35, "grad_norm": 1.3142313957214355, "learning_rate": 7.55052264174043e-06, "loss": 0.5449, "step": 5418 }, { "epoch": 0.35, "grad_norm": 1.1487478017807007, "learning_rate": 7.549623496752251e-06, "loss": 0.5809, "step": 5419 }, { "epoch": 0.35, "grad_norm": 1.1371876001358032, "learning_rate": 7.548724240326607e-06, "loss": 0.5297, "step": 5420 }, { "epoch": 0.35, "grad_norm": 1.2285594940185547, "learning_rate": 7.547824872502803e-06, "loss": 0.5731, "step": 5421 }, { "epoch": 0.35, "grad_norm": 1.3289635181427002, "learning_rate": 7.546925393320147e-06, "loss": 0.577, "step": 5422 }, { "epoch": 0.35, "grad_norm": 1.1439608335494995, "learning_rate": 7.546025802817953e-06, "loss": 0.5142, "step": 5423 }, { "epoch": 0.35, "grad_norm": 1.3017923831939697, "learning_rate": 7.54512610103554e-06, "loss": 0.5379, "step": 5424 }, { "epoch": 0.35, "grad_norm": 1.3082431554794312, "learning_rate": 7.544226288012233e-06, "loss": 0.5424, "step": 5425 }, { "epoch": 0.35, "grad_norm": 1.1731765270233154, "learning_rate": 7.543326363787358e-06, "loss": 0.5238, "step": 5426 }, { "epoch": 0.35, "grad_norm": 1.1888586282730103, "learning_rate": 7.54242632840025e-06, "loss": 0.5532, "step": 5427 }, { "epoch": 0.35, "grad_norm": 1.1258538961410522, "learning_rate": 7.541526181890245e-06, "loss": 0.5542, "step": 5428 }, { "epoch": 0.35, "grad_norm": 1.176339864730835, "learning_rate": 7.540625924296689e-06, "loss": 0.4963, "step": 5429 }, { "epoch": 0.35, "grad_norm": 1.0827643871307373, "learning_rate": 7.53972555565893e-06, "loss": 0.5305, "step": 5430 }, { "epoch": 0.35, "grad_norm": 1.224753499031067, "learning_rate": 7.538825076016318e-06, "loss": 0.5208, "step": 5431 }, { "epoch": 0.35, "grad_norm": 1.2225325107574463, "learning_rate": 7.537924485408212e-06, "loss": 0.5561, "step": 5432 }, { "epoch": 0.35, "grad_norm": 1.3297946453094482, "learning_rate": 7.537023783873976e-06, "loss": 0.4921, "step": 5433 }, { "epoch": 0.35, "grad_norm": 1.1201390027999878, "learning_rate": 7.536122971452975e-06, "loss": 0.5529, "step": 5434 }, { "epoch": 0.35, "grad_norm": 1.6110578775405884, "learning_rate": 7.535222048184581e-06, "loss": 0.5502, "step": 5435 }, { "epoch": 0.35, "grad_norm": 1.2075411081314087, "learning_rate": 7.534321014108175e-06, "loss": 0.5708, "step": 5436 }, { "epoch": 0.35, "grad_norm": 1.1659936904907227, "learning_rate": 7.533419869263135e-06, "loss": 0.511, "step": 5437 }, { "epoch": 0.35, "grad_norm": 1.2025140523910522, "learning_rate": 7.532518613688847e-06, "loss": 0.586, "step": 5438 }, { "epoch": 0.35, "grad_norm": 1.0547571182250977, "learning_rate": 7.531617247424707e-06, "loss": 0.5294, "step": 5439 }, { "epoch": 0.35, "grad_norm": 1.2253226041793823, "learning_rate": 7.530715770510108e-06, "loss": 0.556, "step": 5440 }, { "epoch": 0.35, "grad_norm": 1.2382909059524536, "learning_rate": 7.529814182984451e-06, "loss": 0.5162, "step": 5441 }, { "epoch": 0.35, "grad_norm": 1.226096272468567, "learning_rate": 7.528912484887144e-06, "loss": 0.5739, "step": 5442 }, { "epoch": 0.35, "grad_norm": 1.2826918363571167, "learning_rate": 7.528010676257596e-06, "loss": 0.5673, "step": 5443 }, { "epoch": 0.35, "grad_norm": 1.1453564167022705, "learning_rate": 7.527108757135224e-06, "loss": 0.5241, "step": 5444 }, { "epoch": 0.35, "grad_norm": 1.1232718229293823, "learning_rate": 7.52620672755945e-06, "loss": 0.5707, "step": 5445 }, { "epoch": 0.35, "grad_norm": 1.2113926410675049, "learning_rate": 7.525304587569696e-06, "loss": 0.545, "step": 5446 }, { "epoch": 0.35, "grad_norm": 1.2844187021255493, "learning_rate": 7.524402337205395e-06, "loss": 0.591, "step": 5447 }, { "epoch": 0.35, "grad_norm": 1.1707981824874878, "learning_rate": 7.52349997650598e-06, "loss": 0.5062, "step": 5448 }, { "epoch": 0.35, "grad_norm": 1.0840801000595093, "learning_rate": 7.522597505510892e-06, "loss": 0.5453, "step": 5449 }, { "epoch": 0.35, "grad_norm": 1.0423074960708618, "learning_rate": 7.521694924259575e-06, "loss": 0.4659, "step": 5450 }, { "epoch": 0.35, "grad_norm": 1.2112069129943848, "learning_rate": 7.520792232791479e-06, "loss": 0.5292, "step": 5451 }, { "epoch": 0.35, "grad_norm": 1.3012075424194336, "learning_rate": 7.519889431146059e-06, "loss": 0.5506, "step": 5452 }, { "epoch": 0.35, "grad_norm": 1.2599204778671265, "learning_rate": 7.5189865193627735e-06, "loss": 0.5468, "step": 5453 }, { "epoch": 0.35, "grad_norm": 1.201534390449524, "learning_rate": 7.518083497481086e-06, "loss": 0.5415, "step": 5454 }, { "epoch": 0.35, "grad_norm": 1.1723167896270752, "learning_rate": 7.517180365540464e-06, "loss": 0.5919, "step": 5455 }, { "epoch": 0.35, "grad_norm": 1.1513060331344604, "learning_rate": 7.516277123580384e-06, "loss": 0.5618, "step": 5456 }, { "epoch": 0.35, "grad_norm": 1.2651101350784302, "learning_rate": 7.515373771640324e-06, "loss": 0.5882, "step": 5457 }, { "epoch": 0.35, "grad_norm": 1.2296417951583862, "learning_rate": 7.514470309759765e-06, "loss": 0.5721, "step": 5458 }, { "epoch": 0.35, "grad_norm": 1.2198097705841064, "learning_rate": 7.513566737978196e-06, "loss": 0.5565, "step": 5459 }, { "epoch": 0.35, "grad_norm": 1.3520342111587524, "learning_rate": 7.51266305633511e-06, "loss": 0.5295, "step": 5460 }, { "epoch": 0.35, "grad_norm": 1.1073671579360962, "learning_rate": 7.511759264870005e-06, "loss": 0.5462, "step": 5461 }, { "epoch": 0.35, "grad_norm": 1.126050353050232, "learning_rate": 7.510855363622382e-06, "loss": 0.5389, "step": 5462 }, { "epoch": 0.35, "grad_norm": 1.2934120893478394, "learning_rate": 7.50995135263175e-06, "loss": 0.5931, "step": 5463 }, { "epoch": 0.35, "grad_norm": 1.3112523555755615, "learning_rate": 7.509047231937619e-06, "loss": 0.5592, "step": 5464 }, { "epoch": 0.35, "grad_norm": 1.2232369184494019, "learning_rate": 7.508143001579508e-06, "loss": 0.5999, "step": 5465 }, { "epoch": 0.35, "grad_norm": 1.1704416275024414, "learning_rate": 7.507238661596938e-06, "loss": 0.5325, "step": 5466 }, { "epoch": 0.35, "grad_norm": 1.1404266357421875, "learning_rate": 7.506334212029433e-06, "loss": 0.6165, "step": 5467 }, { "epoch": 0.35, "grad_norm": 1.1625771522521973, "learning_rate": 7.505429652916529e-06, "loss": 0.5164, "step": 5468 }, { "epoch": 0.35, "grad_norm": 1.1691944599151611, "learning_rate": 7.5045249842977565e-06, "loss": 0.5348, "step": 5469 }, { "epoch": 0.35, "grad_norm": 1.2132251262664795, "learning_rate": 7.50362020621266e-06, "loss": 0.4998, "step": 5470 }, { "epoch": 0.35, "grad_norm": 1.141886591911316, "learning_rate": 7.502715318700784e-06, "loss": 0.5778, "step": 5471 }, { "epoch": 0.35, "grad_norm": 1.188124179840088, "learning_rate": 7.501810321801677e-06, "loss": 0.5594, "step": 5472 }, { "epoch": 0.35, "grad_norm": 1.2715905904769897, "learning_rate": 7.500905215554896e-06, "loss": 0.5922, "step": 5473 }, { "epoch": 0.35, "grad_norm": 1.1343433856964111, "learning_rate": 7.500000000000001e-06, "loss": 0.5366, "step": 5474 }, { "epoch": 0.35, "grad_norm": 1.1602458953857422, "learning_rate": 7.499094675176556e-06, "loss": 0.5156, "step": 5475 }, { "epoch": 0.35, "grad_norm": 1.1133506298065186, "learning_rate": 7.49818924112413e-06, "loss": 0.571, "step": 5476 }, { "epoch": 0.35, "grad_norm": 1.2146013975143433, "learning_rate": 7.497283697882298e-06, "loss": 0.5347, "step": 5477 }, { "epoch": 0.35, "grad_norm": 1.2748621702194214, "learning_rate": 7.4963780454906385e-06, "loss": 0.6502, "step": 5478 }, { "epoch": 0.35, "grad_norm": 1.3700312376022339, "learning_rate": 7.495472283988735e-06, "loss": 0.5934, "step": 5479 }, { "epoch": 0.35, "grad_norm": 1.2504327297210693, "learning_rate": 7.494566413416178e-06, "loss": 0.5337, "step": 5480 }, { "epoch": 0.35, "grad_norm": 1.1962692737579346, "learning_rate": 7.493660433812558e-06, "loss": 0.538, "step": 5481 }, { "epoch": 0.35, "grad_norm": 1.1060031652450562, "learning_rate": 7.4927543452174735e-06, "loss": 0.5148, "step": 5482 }, { "epoch": 0.35, "grad_norm": 1.2236384153366089, "learning_rate": 7.4918481476705285e-06, "loss": 0.5468, "step": 5483 }, { "epoch": 0.35, "grad_norm": 1.19748854637146, "learning_rate": 7.49094184121133e-06, "loss": 0.54, "step": 5484 }, { "epoch": 0.35, "grad_norm": 1.0491451025009155, "learning_rate": 7.490035425879491e-06, "loss": 0.5444, "step": 5485 }, { "epoch": 0.35, "grad_norm": 1.306011438369751, "learning_rate": 7.489128901714627e-06, "loss": 0.555, "step": 5486 }, { "epoch": 0.35, "grad_norm": 1.1994972229003906, "learning_rate": 7.488222268756361e-06, "loss": 0.5324, "step": 5487 }, { "epoch": 0.35, "grad_norm": 1.1578638553619385, "learning_rate": 7.487315527044319e-06, "loss": 0.5108, "step": 5488 }, { "epoch": 0.35, "grad_norm": 1.2470946311950684, "learning_rate": 7.486408676618135e-06, "loss": 0.5702, "step": 5489 }, { "epoch": 0.35, "grad_norm": 1.2237720489501953, "learning_rate": 7.48550171751744e-06, "loss": 0.5787, "step": 5490 }, { "epoch": 0.35, "grad_norm": 1.2372705936431885, "learning_rate": 7.484594649781878e-06, "loss": 0.5591, "step": 5491 }, { "epoch": 0.35, "grad_norm": 1.18450129032135, "learning_rate": 7.483687473451096e-06, "loss": 0.5209, "step": 5492 }, { "epoch": 0.35, "grad_norm": 1.1646240949630737, "learning_rate": 7.48278018856474e-06, "loss": 0.5455, "step": 5493 }, { "epoch": 0.35, "grad_norm": 1.2366533279418945, "learning_rate": 7.48187279516247e-06, "loss": 0.6339, "step": 5494 }, { "epoch": 0.35, "grad_norm": 1.0529205799102783, "learning_rate": 7.480965293283942e-06, "loss": 0.5233, "step": 5495 }, { "epoch": 0.35, "grad_norm": 1.2602312564849854, "learning_rate": 7.480057682968822e-06, "loss": 0.5494, "step": 5496 }, { "epoch": 0.35, "grad_norm": 1.1631699800491333, "learning_rate": 7.47914996425678e-06, "loss": 0.5669, "step": 5497 }, { "epoch": 0.35, "grad_norm": 1.1330434083938599, "learning_rate": 7.478242137187488e-06, "loss": 0.4975, "step": 5498 }, { "epoch": 0.35, "grad_norm": 1.268484354019165, "learning_rate": 7.477334201800628e-06, "loss": 0.562, "step": 5499 }, { "epoch": 0.36, "grad_norm": 1.1481913328170776, "learning_rate": 7.476426158135881e-06, "loss": 0.5064, "step": 5500 }, { "epoch": 0.36, "grad_norm": 1.1502835750579834, "learning_rate": 7.475518006232936e-06, "loss": 0.5012, "step": 5501 }, { "epoch": 0.36, "grad_norm": 1.1621520519256592, "learning_rate": 7.474609746131485e-06, "loss": 0.5462, "step": 5502 }, { "epoch": 0.36, "grad_norm": 1.3198704719543457, "learning_rate": 7.473701377871227e-06, "loss": 0.5443, "step": 5503 }, { "epoch": 0.36, "grad_norm": 1.1809096336364746, "learning_rate": 7.4727929014918655e-06, "loss": 0.554, "step": 5504 }, { "epoch": 0.36, "grad_norm": 1.1407508850097656, "learning_rate": 7.471884317033103e-06, "loss": 0.5768, "step": 5505 }, { "epoch": 0.36, "grad_norm": 1.0928750038146973, "learning_rate": 7.470975624534658e-06, "loss": 0.5457, "step": 5506 }, { "epoch": 0.36, "grad_norm": 1.1511112451553345, "learning_rate": 7.470066824036242e-06, "loss": 0.5597, "step": 5507 }, { "epoch": 0.36, "grad_norm": 1.191504955291748, "learning_rate": 7.469157915577578e-06, "loss": 0.5254, "step": 5508 }, { "epoch": 0.36, "grad_norm": 2.5938363075256348, "learning_rate": 7.468248899198392e-06, "loss": 0.562, "step": 5509 }, { "epoch": 0.36, "grad_norm": 1.2577276229858398, "learning_rate": 7.4673397749384135e-06, "loss": 0.5358, "step": 5510 }, { "epoch": 0.36, "grad_norm": 1.2401012182235718, "learning_rate": 7.46643054283738e-06, "loss": 0.4772, "step": 5511 }, { "epoch": 0.36, "grad_norm": 1.1664294004440308, "learning_rate": 7.465521202935033e-06, "loss": 0.5543, "step": 5512 }, { "epoch": 0.36, "grad_norm": 1.211308479309082, "learning_rate": 7.464611755271114e-06, "loss": 0.5507, "step": 5513 }, { "epoch": 0.36, "grad_norm": 1.1080774068832397, "learning_rate": 7.4637021998853725e-06, "loss": 0.5389, "step": 5514 }, { "epoch": 0.36, "grad_norm": 1.1108276844024658, "learning_rate": 7.462792536817567e-06, "loss": 0.5296, "step": 5515 }, { "epoch": 0.36, "grad_norm": 1.2797602415084839, "learning_rate": 7.461882766107451e-06, "loss": 0.5577, "step": 5516 }, { "epoch": 0.36, "grad_norm": 1.0594059228897095, "learning_rate": 7.460972887794793e-06, "loss": 0.5379, "step": 5517 }, { "epoch": 0.36, "grad_norm": 1.2909904718399048, "learning_rate": 7.460062901919359e-06, "loss": 0.5225, "step": 5518 }, { "epoch": 0.36, "grad_norm": 1.101715087890625, "learning_rate": 7.459152808520922e-06, "loss": 0.5283, "step": 5519 }, { "epoch": 0.36, "grad_norm": 1.1270750761032104, "learning_rate": 7.458242607639261e-06, "loss": 0.533, "step": 5520 }, { "epoch": 0.36, "grad_norm": 1.0967172384262085, "learning_rate": 7.457332299314159e-06, "loss": 0.5623, "step": 5521 }, { "epoch": 0.36, "grad_norm": 1.2117010354995728, "learning_rate": 7.4564218835854e-06, "loss": 0.5582, "step": 5522 }, { "epoch": 0.36, "grad_norm": 1.3138213157653809, "learning_rate": 7.455511360492779e-06, "loss": 0.5273, "step": 5523 }, { "epoch": 0.36, "grad_norm": 1.3105390071868896, "learning_rate": 7.454600730076093e-06, "loss": 0.5335, "step": 5524 }, { "epoch": 0.36, "grad_norm": 1.2437533140182495, "learning_rate": 7.4536899923751395e-06, "loss": 0.5243, "step": 5525 }, { "epoch": 0.36, "grad_norm": 1.0537219047546387, "learning_rate": 7.4527791474297285e-06, "loss": 0.5252, "step": 5526 }, { "epoch": 0.36, "grad_norm": 1.3049806356430054, "learning_rate": 7.4518681952796705e-06, "loss": 0.5269, "step": 5527 }, { "epoch": 0.36, "grad_norm": 1.1053842306137085, "learning_rate": 7.4509571359647785e-06, "loss": 0.5686, "step": 5528 }, { "epoch": 0.36, "grad_norm": 1.258530616760254, "learning_rate": 7.4500459695248725e-06, "loss": 0.5541, "step": 5529 }, { "epoch": 0.36, "grad_norm": 1.135095477104187, "learning_rate": 7.44913469599978e-06, "loss": 0.4805, "step": 5530 }, { "epoch": 0.36, "grad_norm": 1.1299347877502441, "learning_rate": 7.448223315429328e-06, "loss": 0.579, "step": 5531 }, { "epoch": 0.36, "grad_norm": 1.0707110166549683, "learning_rate": 7.447311827853351e-06, "loss": 0.5194, "step": 5532 }, { "epoch": 0.36, "grad_norm": 1.064680814743042, "learning_rate": 7.446400233311689e-06, "loss": 0.5541, "step": 5533 }, { "epoch": 0.36, "grad_norm": 1.1367135047912598, "learning_rate": 7.445488531844184e-06, "loss": 0.5277, "step": 5534 }, { "epoch": 0.36, "grad_norm": 1.2308048009872437, "learning_rate": 7.444576723490684e-06, "loss": 0.5613, "step": 5535 }, { "epoch": 0.36, "grad_norm": 1.2004492282867432, "learning_rate": 7.4436648082910446e-06, "loss": 0.551, "step": 5536 }, { "epoch": 0.36, "grad_norm": 1.03052818775177, "learning_rate": 7.44275278628512e-06, "loss": 0.5436, "step": 5537 }, { "epoch": 0.36, "grad_norm": 1.1295900344848633, "learning_rate": 7.4418406575127735e-06, "loss": 0.5422, "step": 5538 }, { "epoch": 0.36, "grad_norm": 1.2810769081115723, "learning_rate": 7.440928422013872e-06, "loss": 0.5255, "step": 5539 }, { "epoch": 0.36, "grad_norm": 1.1635115146636963, "learning_rate": 7.440016079828288e-06, "loss": 0.5578, "step": 5540 }, { "epoch": 0.36, "grad_norm": 1.2317860126495361, "learning_rate": 7.439103630995894e-06, "loss": 0.5825, "step": 5541 }, { "epoch": 0.36, "grad_norm": 1.1907908916473389, "learning_rate": 7.4381910755565745e-06, "loss": 0.5786, "step": 5542 }, { "epoch": 0.36, "grad_norm": 1.2221354246139526, "learning_rate": 7.437278413550215e-06, "loss": 0.5608, "step": 5543 }, { "epoch": 0.36, "grad_norm": 1.2044223546981812, "learning_rate": 7.436365645016702e-06, "loss": 0.557, "step": 5544 }, { "epoch": 0.36, "grad_norm": 1.1769828796386719, "learning_rate": 7.435452769995935e-06, "loss": 0.5409, "step": 5545 }, { "epoch": 0.36, "grad_norm": 1.2123057842254639, "learning_rate": 7.4345397885278096e-06, "loss": 0.5505, "step": 5546 }, { "epoch": 0.36, "grad_norm": 1.2981032133102417, "learning_rate": 7.43362670065223e-06, "loss": 0.5391, "step": 5547 }, { "epoch": 0.36, "grad_norm": 1.1755000352859497, "learning_rate": 7.432713506409108e-06, "loss": 0.6047, "step": 5548 }, { "epoch": 0.36, "grad_norm": 1.2730683088302612, "learning_rate": 7.431800205838355e-06, "loss": 0.5495, "step": 5549 }, { "epoch": 0.36, "grad_norm": 1.1846522092819214, "learning_rate": 7.430886798979888e-06, "loss": 0.5355, "step": 5550 }, { "epoch": 0.36, "grad_norm": 1.2876492738723755, "learning_rate": 7.429973285873632e-06, "loss": 0.5323, "step": 5551 }, { "epoch": 0.36, "grad_norm": 1.2307277917861938, "learning_rate": 7.429059666559512e-06, "loss": 0.5794, "step": 5552 }, { "epoch": 0.36, "grad_norm": 1.1699929237365723, "learning_rate": 7.428145941077463e-06, "loss": 0.535, "step": 5553 }, { "epoch": 0.36, "grad_norm": 1.1672996282577515, "learning_rate": 7.4272321094674175e-06, "loss": 0.5786, "step": 5554 }, { "epoch": 0.36, "grad_norm": 1.1606366634368896, "learning_rate": 7.426318171769319e-06, "loss": 0.531, "step": 5555 }, { "epoch": 0.36, "grad_norm": 1.1518011093139648, "learning_rate": 7.425404128023114e-06, "loss": 0.5862, "step": 5556 }, { "epoch": 0.36, "grad_norm": 1.2012429237365723, "learning_rate": 7.424489978268752e-06, "loss": 0.5932, "step": 5557 }, { "epoch": 0.36, "grad_norm": 1.0506792068481445, "learning_rate": 7.423575722546188e-06, "loss": 0.5098, "step": 5558 }, { "epoch": 0.36, "grad_norm": 1.1039893627166748, "learning_rate": 7.422661360895382e-06, "loss": 0.4895, "step": 5559 }, { "epoch": 0.36, "grad_norm": 1.1986591815948486, "learning_rate": 7.421746893356299e-06, "loss": 0.557, "step": 5560 }, { "epoch": 0.36, "grad_norm": 1.0963324308395386, "learning_rate": 7.420832319968907e-06, "loss": 0.5524, "step": 5561 }, { "epoch": 0.36, "grad_norm": 1.1472525596618652, "learning_rate": 7.41991764077318e-06, "loss": 0.5694, "step": 5562 }, { "epoch": 0.36, "grad_norm": 1.139106035232544, "learning_rate": 7.419002855809096e-06, "loss": 0.5206, "step": 5563 }, { "epoch": 0.36, "grad_norm": 1.2335044145584106, "learning_rate": 7.4180879651166394e-06, "loss": 0.5582, "step": 5564 }, { "epoch": 0.36, "grad_norm": 1.3115383386611938, "learning_rate": 7.417172968735797e-06, "loss": 0.5616, "step": 5565 }, { "epoch": 0.36, "grad_norm": 1.251376748085022, "learning_rate": 7.41625786670656e-06, "loss": 0.5804, "step": 5566 }, { "epoch": 0.36, "grad_norm": 1.1432888507843018, "learning_rate": 7.415342659068925e-06, "loss": 0.5192, "step": 5567 }, { "epoch": 0.36, "grad_norm": 1.1637476682662964, "learning_rate": 7.414427345862895e-06, "loss": 0.5511, "step": 5568 }, { "epoch": 0.36, "grad_norm": 1.2915873527526855, "learning_rate": 7.413511927128474e-06, "loss": 0.5673, "step": 5569 }, { "epoch": 0.36, "grad_norm": 1.4992048740386963, "learning_rate": 7.412596402905674e-06, "loss": 0.5648, "step": 5570 }, { "epoch": 0.36, "grad_norm": 1.1463955640792847, "learning_rate": 7.41168077323451e-06, "loss": 0.5787, "step": 5571 }, { "epoch": 0.36, "grad_norm": 1.1783190965652466, "learning_rate": 7.410765038155001e-06, "loss": 0.5262, "step": 5572 }, { "epoch": 0.36, "grad_norm": 1.2065173387527466, "learning_rate": 7.409849197707173e-06, "loss": 0.6066, "step": 5573 }, { "epoch": 0.36, "grad_norm": 1.174583911895752, "learning_rate": 7.408933251931054e-06, "loss": 0.5214, "step": 5574 }, { "epoch": 0.36, "grad_norm": 1.1895729303359985, "learning_rate": 7.408017200866677e-06, "loss": 0.4966, "step": 5575 }, { "epoch": 0.36, "grad_norm": 1.1042587757110596, "learning_rate": 7.4071010445540804e-06, "loss": 0.5277, "step": 5576 }, { "epoch": 0.36, "grad_norm": 1.1662585735321045, "learning_rate": 7.406184783033309e-06, "loss": 0.5655, "step": 5577 }, { "epoch": 0.36, "grad_norm": 1.1539708375930786, "learning_rate": 7.405268416344408e-06, "loss": 0.5912, "step": 5578 }, { "epoch": 0.36, "grad_norm": 1.2504650354385376, "learning_rate": 7.4043519445274304e-06, "loss": 0.6063, "step": 5579 }, { "epoch": 0.36, "grad_norm": 1.1796860694885254, "learning_rate": 7.403435367622434e-06, "loss": 0.5721, "step": 5580 }, { "epoch": 0.36, "grad_norm": 1.1812734603881836, "learning_rate": 7.402518685669475e-06, "loss": 0.5035, "step": 5581 }, { "epoch": 0.36, "grad_norm": 1.226014494895935, "learning_rate": 7.4016018987086255e-06, "loss": 0.6, "step": 5582 }, { "epoch": 0.36, "grad_norm": 1.2375158071517944, "learning_rate": 7.400685006779953e-06, "loss": 0.5599, "step": 5583 }, { "epoch": 0.36, "grad_norm": 1.1323561668395996, "learning_rate": 7.399768009923533e-06, "loss": 0.5623, "step": 5584 }, { "epoch": 0.36, "grad_norm": 1.2645033597946167, "learning_rate": 7.398850908179445e-06, "loss": 0.5323, "step": 5585 }, { "epoch": 0.36, "grad_norm": 1.1311620473861694, "learning_rate": 7.3979337015877715e-06, "loss": 0.5597, "step": 5586 }, { "epoch": 0.36, "grad_norm": 1.1367073059082031, "learning_rate": 7.397016390188603e-06, "loss": 0.5405, "step": 5587 }, { "epoch": 0.36, "grad_norm": 1.2368121147155762, "learning_rate": 7.396098974022032e-06, "loss": 0.5821, "step": 5588 }, { "epoch": 0.36, "grad_norm": 1.157475471496582, "learning_rate": 7.395181453128158e-06, "loss": 0.5259, "step": 5589 }, { "epoch": 0.36, "grad_norm": 1.070258378982544, "learning_rate": 7.394263827547083e-06, "loss": 0.5459, "step": 5590 }, { "epoch": 0.36, "grad_norm": 1.3208073377609253, "learning_rate": 7.393346097318912e-06, "loss": 0.6152, "step": 5591 }, { "epoch": 0.36, "grad_norm": 1.1627819538116455, "learning_rate": 7.392428262483758e-06, "loss": 0.5401, "step": 5592 }, { "epoch": 0.36, "grad_norm": 1.225100040435791, "learning_rate": 7.391510323081738e-06, "loss": 0.5462, "step": 5593 }, { "epoch": 0.36, "grad_norm": 1.1290287971496582, "learning_rate": 7.390592279152973e-06, "loss": 0.5464, "step": 5594 }, { "epoch": 0.36, "grad_norm": 1.1168327331542969, "learning_rate": 7.389674130737585e-06, "loss": 0.5065, "step": 5595 }, { "epoch": 0.36, "grad_norm": 1.196143388748169, "learning_rate": 7.388755877875706e-06, "loss": 0.5335, "step": 5596 }, { "epoch": 0.36, "grad_norm": 1.1534392833709717, "learning_rate": 7.387837520607473e-06, "loss": 0.497, "step": 5597 }, { "epoch": 0.36, "grad_norm": 1.2175308465957642, "learning_rate": 7.386919058973021e-06, "loss": 0.5368, "step": 5598 }, { "epoch": 0.36, "grad_norm": 1.2478947639465332, "learning_rate": 7.3860004930124955e-06, "loss": 0.5335, "step": 5599 }, { "epoch": 0.36, "grad_norm": 1.1848423480987549, "learning_rate": 7.385081822766045e-06, "loss": 0.5979, "step": 5600 }, { "epoch": 0.36, "grad_norm": 1.1898576021194458, "learning_rate": 7.3841630482738205e-06, "loss": 0.5334, "step": 5601 }, { "epoch": 0.36, "grad_norm": 1.1835832595825195, "learning_rate": 7.38324416957598e-06, "loss": 0.5376, "step": 5602 }, { "epoch": 0.36, "grad_norm": 1.3220858573913574, "learning_rate": 7.382325186712688e-06, "loss": 0.5846, "step": 5603 }, { "epoch": 0.36, "grad_norm": 1.1190091371536255, "learning_rate": 7.381406099724107e-06, "loss": 0.5507, "step": 5604 }, { "epoch": 0.36, "grad_norm": 1.2299021482467651, "learning_rate": 7.380486908650408e-06, "loss": 0.5971, "step": 5605 }, { "epoch": 0.36, "grad_norm": 1.5388082265853882, "learning_rate": 7.3795676135317704e-06, "loss": 0.5616, "step": 5606 }, { "epoch": 0.36, "grad_norm": 1.2384772300720215, "learning_rate": 7.378648214408371e-06, "loss": 0.5388, "step": 5607 }, { "epoch": 0.36, "grad_norm": 1.2573626041412354, "learning_rate": 7.3777287113203955e-06, "loss": 0.583, "step": 5608 }, { "epoch": 0.36, "grad_norm": 1.1216109991073608, "learning_rate": 7.376809104308032e-06, "loss": 0.5298, "step": 5609 }, { "epoch": 0.36, "grad_norm": 1.2055078744888306, "learning_rate": 7.375889393411475e-06, "loss": 0.5229, "step": 5610 }, { "epoch": 0.36, "grad_norm": 1.14286208152771, "learning_rate": 7.374969578670923e-06, "loss": 0.5341, "step": 5611 }, { "epoch": 0.36, "grad_norm": 1.1720985174179077, "learning_rate": 7.374049660126579e-06, "loss": 0.5934, "step": 5612 }, { "epoch": 0.36, "grad_norm": 1.222639799118042, "learning_rate": 7.373129637818648e-06, "loss": 0.5617, "step": 5613 }, { "epoch": 0.36, "grad_norm": 1.138548493385315, "learning_rate": 7.372209511787342e-06, "loss": 0.5295, "step": 5614 }, { "epoch": 0.36, "grad_norm": 1.0622655153274536, "learning_rate": 7.371289282072882e-06, "loss": 0.5275, "step": 5615 }, { "epoch": 0.36, "grad_norm": 1.2317789793014526, "learning_rate": 7.370368948715483e-06, "loss": 0.5139, "step": 5616 }, { "epoch": 0.36, "grad_norm": 1.1171611547470093, "learning_rate": 7.369448511755373e-06, "loss": 0.5244, "step": 5617 }, { "epoch": 0.36, "grad_norm": 1.1850714683532715, "learning_rate": 7.368527971232784e-06, "loss": 0.5305, "step": 5618 }, { "epoch": 0.36, "grad_norm": 1.2703522443771362, "learning_rate": 7.367607327187945e-06, "loss": 0.5248, "step": 5619 }, { "epoch": 0.36, "grad_norm": 1.2539831399917603, "learning_rate": 7.3666865796611006e-06, "loss": 0.5396, "step": 5620 }, { "epoch": 0.36, "grad_norm": 1.1591225862503052, "learning_rate": 7.365765728692492e-06, "loss": 0.584, "step": 5621 }, { "epoch": 0.36, "grad_norm": 1.2097697257995605, "learning_rate": 7.364844774322366e-06, "loss": 0.5321, "step": 5622 }, { "epoch": 0.36, "grad_norm": 1.1422377824783325, "learning_rate": 7.363923716590976e-06, "loss": 0.5112, "step": 5623 }, { "epoch": 0.36, "grad_norm": 1.3630234003067017, "learning_rate": 7.3630025555385805e-06, "loss": 0.5244, "step": 5624 }, { "epoch": 0.36, "grad_norm": 1.374645709991455, "learning_rate": 7.36208129120544e-06, "loss": 0.5424, "step": 5625 }, { "epoch": 0.36, "grad_norm": 1.160399079322815, "learning_rate": 7.36115992363182e-06, "loss": 0.5999, "step": 5626 }, { "epoch": 0.36, "grad_norm": 1.1022703647613525, "learning_rate": 7.3602384528579914e-06, "loss": 0.5763, "step": 5627 }, { "epoch": 0.36, "grad_norm": 1.2983217239379883, "learning_rate": 7.3593168789242294e-06, "loss": 0.5716, "step": 5628 }, { "epoch": 0.36, "grad_norm": 1.101702332496643, "learning_rate": 7.358395201870815e-06, "loss": 0.5315, "step": 5629 }, { "epoch": 0.36, "grad_norm": 1.1513746976852417, "learning_rate": 7.35747342173803e-06, "loss": 0.5851, "step": 5630 }, { "epoch": 0.36, "grad_norm": 1.1289657354354858, "learning_rate": 7.356551538566164e-06, "loss": 0.4998, "step": 5631 }, { "epoch": 0.36, "grad_norm": 1.1158496141433716, "learning_rate": 7.355629552395511e-06, "loss": 0.5161, "step": 5632 }, { "epoch": 0.36, "grad_norm": 1.1691607236862183, "learning_rate": 7.354707463266367e-06, "loss": 0.5824, "step": 5633 }, { "epoch": 0.36, "grad_norm": 1.192583680152893, "learning_rate": 7.353785271219036e-06, "loss": 0.5555, "step": 5634 }, { "epoch": 0.36, "grad_norm": 1.1023120880126953, "learning_rate": 7.352862976293823e-06, "loss": 0.5973, "step": 5635 }, { "epoch": 0.36, "grad_norm": 1.2444056272506714, "learning_rate": 7.351940578531039e-06, "loss": 0.5369, "step": 5636 }, { "epoch": 0.36, "grad_norm": 1.1512718200683594, "learning_rate": 7.351018077971002e-06, "loss": 0.6064, "step": 5637 }, { "epoch": 0.36, "grad_norm": 1.2902830839157104, "learning_rate": 7.35009547465403e-06, "loss": 0.537, "step": 5638 }, { "epoch": 0.36, "grad_norm": 1.1611862182617188, "learning_rate": 7.3491727686204484e-06, "loss": 0.5102, "step": 5639 }, { "epoch": 0.36, "grad_norm": 1.1686222553253174, "learning_rate": 7.348249959910585e-06, "loss": 0.5455, "step": 5640 }, { "epoch": 0.36, "grad_norm": 1.1612993478775024, "learning_rate": 7.347327048564775e-06, "loss": 0.5848, "step": 5641 }, { "epoch": 0.36, "grad_norm": 1.1217381954193115, "learning_rate": 7.346404034623356e-06, "loss": 0.5748, "step": 5642 }, { "epoch": 0.36, "grad_norm": 1.1214344501495361, "learning_rate": 7.345480918126669e-06, "loss": 0.5294, "step": 5643 }, { "epoch": 0.36, "grad_norm": 1.2123593091964722, "learning_rate": 7.344557699115064e-06, "loss": 0.5095, "step": 5644 }, { "epoch": 0.36, "grad_norm": 1.0677759647369385, "learning_rate": 7.343634377628892e-06, "loss": 0.5244, "step": 5645 }, { "epoch": 0.36, "grad_norm": 1.1563727855682373, "learning_rate": 7.342710953708506e-06, "loss": 0.533, "step": 5646 }, { "epoch": 0.36, "grad_norm": 1.3058327436447144, "learning_rate": 7.341787427394269e-06, "loss": 0.5561, "step": 5647 }, { "epoch": 0.36, "grad_norm": 1.158752679824829, "learning_rate": 7.340863798726546e-06, "loss": 0.5178, "step": 5648 }, { "epoch": 0.36, "grad_norm": 1.222059726715088, "learning_rate": 7.339940067745705e-06, "loss": 0.5644, "step": 5649 }, { "epoch": 0.36, "grad_norm": 1.109535574913025, "learning_rate": 7.3390162344921225e-06, "loss": 0.5113, "step": 5650 }, { "epoch": 0.36, "grad_norm": 1.2074917554855347, "learning_rate": 7.338092299006173e-06, "loss": 0.5502, "step": 5651 }, { "epoch": 0.36, "grad_norm": 1.158144235610962, "learning_rate": 7.337168261328243e-06, "loss": 0.5216, "step": 5652 }, { "epoch": 0.36, "grad_norm": 1.1615883111953735, "learning_rate": 7.336244121498718e-06, "loss": 0.582, "step": 5653 }, { "epoch": 0.36, "grad_norm": 1.1547499895095825, "learning_rate": 7.335319879557989e-06, "loss": 0.5599, "step": 5654 }, { "epoch": 0.37, "grad_norm": 1.1954056024551392, "learning_rate": 7.334395535546453e-06, "loss": 0.5175, "step": 5655 }, { "epoch": 0.37, "grad_norm": 1.1268482208251953, "learning_rate": 7.333471089504514e-06, "loss": 0.5696, "step": 5656 }, { "epoch": 0.37, "grad_norm": 1.2950479984283447, "learning_rate": 7.332546541472571e-06, "loss": 0.5041, "step": 5657 }, { "epoch": 0.37, "grad_norm": 1.198935627937317, "learning_rate": 7.331621891491038e-06, "loss": 0.6081, "step": 5658 }, { "epoch": 0.37, "grad_norm": 1.1267343759536743, "learning_rate": 7.330697139600328e-06, "loss": 0.5619, "step": 5659 }, { "epoch": 0.37, "grad_norm": 1.229082703590393, "learning_rate": 7.329772285840858e-06, "loss": 0.539, "step": 5660 }, { "epoch": 0.37, "grad_norm": 1.3499679565429688, "learning_rate": 7.328847330253052e-06, "loss": 0.5494, "step": 5661 }, { "epoch": 0.37, "grad_norm": 1.207524299621582, "learning_rate": 7.32792227287734e-06, "loss": 0.57, "step": 5662 }, { "epoch": 0.37, "grad_norm": 1.3282941579818726, "learning_rate": 7.326997113754151e-06, "loss": 0.5564, "step": 5663 }, { "epoch": 0.37, "grad_norm": 1.0776125192642212, "learning_rate": 7.326071852923921e-06, "loss": 0.5274, "step": 5664 }, { "epoch": 0.37, "grad_norm": 1.6086294651031494, "learning_rate": 7.325146490427092e-06, "loss": 0.4933, "step": 5665 }, { "epoch": 0.37, "grad_norm": 1.0743650197982788, "learning_rate": 7.324221026304109e-06, "loss": 0.5963, "step": 5666 }, { "epoch": 0.37, "grad_norm": 1.2255381345748901, "learning_rate": 7.323295460595422e-06, "loss": 0.5439, "step": 5667 }, { "epoch": 0.37, "grad_norm": 1.2322518825531006, "learning_rate": 7.322369793341484e-06, "loss": 0.5364, "step": 5668 }, { "epoch": 0.37, "grad_norm": 1.2304028272628784, "learning_rate": 7.321444024582753e-06, "loss": 0.518, "step": 5669 }, { "epoch": 0.37, "grad_norm": 1.206195592880249, "learning_rate": 7.320518154359695e-06, "loss": 0.5381, "step": 5670 }, { "epoch": 0.37, "grad_norm": 1.1182693243026733, "learning_rate": 7.319592182712775e-06, "loss": 0.5736, "step": 5671 }, { "epoch": 0.37, "grad_norm": 1.3144731521606445, "learning_rate": 7.318666109682464e-06, "loss": 0.5802, "step": 5672 }, { "epoch": 0.37, "grad_norm": 1.048600435256958, "learning_rate": 7.3177399353092415e-06, "loss": 0.5121, "step": 5673 }, { "epoch": 0.37, "grad_norm": 1.149122953414917, "learning_rate": 7.3168136596335845e-06, "loss": 0.5492, "step": 5674 }, { "epoch": 0.37, "grad_norm": 1.1580393314361572, "learning_rate": 7.315887282695981e-06, "loss": 0.5873, "step": 5675 }, { "epoch": 0.37, "grad_norm": 1.2105460166931152, "learning_rate": 7.314960804536919e-06, "loss": 0.5281, "step": 5676 }, { "epoch": 0.37, "grad_norm": 1.2614856958389282, "learning_rate": 7.314034225196893e-06, "loss": 0.5825, "step": 5677 }, { "epoch": 0.37, "grad_norm": 1.2115774154663086, "learning_rate": 7.313107544716402e-06, "loss": 0.5853, "step": 5678 }, { "epoch": 0.37, "grad_norm": 1.1962165832519531, "learning_rate": 7.312180763135948e-06, "loss": 0.5077, "step": 5679 }, { "epoch": 0.37, "grad_norm": 1.2476286888122559, "learning_rate": 7.311253880496036e-06, "loss": 0.5751, "step": 5680 }, { "epoch": 0.37, "grad_norm": 1.337890386581421, "learning_rate": 7.310326896837183e-06, "loss": 0.5441, "step": 5681 }, { "epoch": 0.37, "grad_norm": 1.1709275245666504, "learning_rate": 7.309399812199901e-06, "loss": 0.5347, "step": 5682 }, { "epoch": 0.37, "grad_norm": 1.0793637037277222, "learning_rate": 7.3084726266247105e-06, "loss": 0.4593, "step": 5683 }, { "epoch": 0.37, "grad_norm": 1.1673537492752075, "learning_rate": 7.30754534015214e-06, "loss": 0.5382, "step": 5684 }, { "epoch": 0.37, "grad_norm": 1.2252163887023926, "learning_rate": 7.306617952822714e-06, "loss": 0.5388, "step": 5685 }, { "epoch": 0.37, "grad_norm": 1.15753972530365, "learning_rate": 7.30569046467697e-06, "loss": 0.5464, "step": 5686 }, { "epoch": 0.37, "grad_norm": 1.1960277557373047, "learning_rate": 7.3047628757554425e-06, "loss": 0.5449, "step": 5687 }, { "epoch": 0.37, "grad_norm": 1.2605589628219604, "learning_rate": 7.3038351860986774e-06, "loss": 0.5171, "step": 5688 }, { "epoch": 0.37, "grad_norm": 1.4550318717956543, "learning_rate": 7.302907395747221e-06, "loss": 0.534, "step": 5689 }, { "epoch": 0.37, "grad_norm": 1.1738815307617188, "learning_rate": 7.301979504741622e-06, "loss": 0.5471, "step": 5690 }, { "epoch": 0.37, "grad_norm": 1.1394492387771606, "learning_rate": 7.30105151312244e-06, "loss": 0.5407, "step": 5691 }, { "epoch": 0.37, "grad_norm": 1.3473451137542725, "learning_rate": 7.3001234209302315e-06, "loss": 0.5586, "step": 5692 }, { "epoch": 0.37, "grad_norm": 1.2701396942138672, "learning_rate": 7.2991952282055634e-06, "loss": 0.5469, "step": 5693 }, { "epoch": 0.37, "grad_norm": 1.1532648801803589, "learning_rate": 7.298266934989005e-06, "loss": 0.5203, "step": 5694 }, { "epoch": 0.37, "grad_norm": 1.2608929872512817, "learning_rate": 7.297338541321126e-06, "loss": 0.5531, "step": 5695 }, { "epoch": 0.37, "grad_norm": 1.0573583841323853, "learning_rate": 7.296410047242508e-06, "loss": 0.527, "step": 5696 }, { "epoch": 0.37, "grad_norm": 1.1702858209609985, "learning_rate": 7.295481452793732e-06, "loss": 0.5087, "step": 5697 }, { "epoch": 0.37, "grad_norm": 1.3234416246414185, "learning_rate": 7.294552758015383e-06, "loss": 0.5837, "step": 5698 }, { "epoch": 0.37, "grad_norm": 1.1041815280914307, "learning_rate": 7.293623962948053e-06, "loss": 0.5183, "step": 5699 }, { "epoch": 0.37, "grad_norm": 1.2596098184585571, "learning_rate": 7.2926950676323385e-06, "loss": 0.5382, "step": 5700 }, { "epoch": 0.37, "grad_norm": 1.1496812105178833, "learning_rate": 7.291766072108837e-06, "loss": 0.5703, "step": 5701 }, { "epoch": 0.37, "grad_norm": 1.1896954774856567, "learning_rate": 7.290836976418153e-06, "loss": 0.5567, "step": 5702 }, { "epoch": 0.37, "grad_norm": 1.2294483184814453, "learning_rate": 7.289907780600896e-06, "loss": 0.5124, "step": 5703 }, { "epoch": 0.37, "grad_norm": 1.2061004638671875, "learning_rate": 7.288978484697678e-06, "loss": 0.559, "step": 5704 }, { "epoch": 0.37, "grad_norm": 1.1861255168914795, "learning_rate": 7.288049088749115e-06, "loss": 0.5354, "step": 5705 }, { "epoch": 0.37, "grad_norm": 1.1386526823043823, "learning_rate": 7.287119592795831e-06, "loss": 0.5271, "step": 5706 }, { "epoch": 0.37, "grad_norm": 1.3417874574661255, "learning_rate": 7.28618999687845e-06, "loss": 0.5345, "step": 5707 }, { "epoch": 0.37, "grad_norm": 1.1757420301437378, "learning_rate": 7.285260301037603e-06, "loss": 0.5729, "step": 5708 }, { "epoch": 0.37, "grad_norm": 1.2620203495025635, "learning_rate": 7.284330505313924e-06, "loss": 0.5929, "step": 5709 }, { "epoch": 0.37, "grad_norm": 1.1357353925704956, "learning_rate": 7.28340060974805e-06, "loss": 0.5163, "step": 5710 }, { "epoch": 0.37, "grad_norm": 1.092590570449829, "learning_rate": 7.28247061438063e-06, "loss": 0.495, "step": 5711 }, { "epoch": 0.37, "grad_norm": 1.2303190231323242, "learning_rate": 7.2815405192523055e-06, "loss": 0.5386, "step": 5712 }, { "epoch": 0.37, "grad_norm": 1.0872400999069214, "learning_rate": 7.280610324403733e-06, "loss": 0.5144, "step": 5713 }, { "epoch": 0.37, "grad_norm": 1.3676601648330688, "learning_rate": 7.279680029875568e-06, "loss": 0.5801, "step": 5714 }, { "epoch": 0.37, "grad_norm": 1.3029961585998535, "learning_rate": 7.2787496357084686e-06, "loss": 0.5276, "step": 5715 }, { "epoch": 0.37, "grad_norm": 1.0184909105300903, "learning_rate": 7.277819141943103e-06, "loss": 0.5248, "step": 5716 }, { "epoch": 0.37, "grad_norm": 1.1077089309692383, "learning_rate": 7.27688854862014e-06, "loss": 0.5096, "step": 5717 }, { "epoch": 0.37, "grad_norm": 1.3203158378601074, "learning_rate": 7.275957855780252e-06, "loss": 0.5587, "step": 5718 }, { "epoch": 0.37, "grad_norm": 1.3474527597427368, "learning_rate": 7.2750270634641176e-06, "loss": 0.5416, "step": 5719 }, { "epoch": 0.37, "grad_norm": 1.2597583532333374, "learning_rate": 7.274096171712422e-06, "loss": 0.5587, "step": 5720 }, { "epoch": 0.37, "grad_norm": 1.2922543287277222, "learning_rate": 7.273165180565849e-06, "loss": 0.5664, "step": 5721 }, { "epoch": 0.37, "grad_norm": 1.1388320922851562, "learning_rate": 7.272234090065089e-06, "loss": 0.4925, "step": 5722 }, { "epoch": 0.37, "grad_norm": 1.2498856782913208, "learning_rate": 7.271302900250843e-06, "loss": 0.5477, "step": 5723 }, { "epoch": 0.37, "grad_norm": 1.320000171661377, "learning_rate": 7.270371611163805e-06, "loss": 0.5851, "step": 5724 }, { "epoch": 0.37, "grad_norm": 1.333517074584961, "learning_rate": 7.2694402228446805e-06, "loss": 0.5594, "step": 5725 }, { "epoch": 0.37, "grad_norm": 1.414383888244629, "learning_rate": 7.268508735334181e-06, "loss": 0.5962, "step": 5726 }, { "epoch": 0.37, "grad_norm": 10.26375961303711, "learning_rate": 7.267577148673016e-06, "loss": 0.5506, "step": 5727 }, { "epoch": 0.37, "grad_norm": 1.1039445400238037, "learning_rate": 7.266645462901907e-06, "loss": 0.5299, "step": 5728 }, { "epoch": 0.37, "grad_norm": 1.1186233758926392, "learning_rate": 7.26571367806157e-06, "loss": 0.5246, "step": 5729 }, { "epoch": 0.37, "grad_norm": 1.2163915634155273, "learning_rate": 7.264781794192736e-06, "loss": 0.5176, "step": 5730 }, { "epoch": 0.37, "grad_norm": 1.3596240282058716, "learning_rate": 7.26384981133613e-06, "loss": 0.5394, "step": 5731 }, { "epoch": 0.37, "grad_norm": 1.293495774269104, "learning_rate": 7.262917729532491e-06, "loss": 0.5823, "step": 5732 }, { "epoch": 0.37, "grad_norm": 1.148306965827942, "learning_rate": 7.261985548822558e-06, "loss": 0.5315, "step": 5733 }, { "epoch": 0.37, "grad_norm": 1.2074851989746094, "learning_rate": 7.2610532692470715e-06, "loss": 0.5559, "step": 5734 }, { "epoch": 0.37, "grad_norm": 1.2365167140960693, "learning_rate": 7.26012089084678e-06, "loss": 0.5924, "step": 5735 }, { "epoch": 0.37, "grad_norm": 1.2703722715377808, "learning_rate": 7.259188413662436e-06, "loss": 0.5482, "step": 5736 }, { "epoch": 0.37, "grad_norm": 1.1968717575073242, "learning_rate": 7.258255837734794e-06, "loss": 0.5388, "step": 5737 }, { "epoch": 0.37, "grad_norm": 1.1622527837753296, "learning_rate": 7.257323163104617e-06, "loss": 0.5661, "step": 5738 }, { "epoch": 0.37, "grad_norm": 1.0767347812652588, "learning_rate": 7.256390389812667e-06, "loss": 0.48, "step": 5739 }, { "epoch": 0.37, "grad_norm": 1.1728205680847168, "learning_rate": 7.255457517899715e-06, "loss": 0.5492, "step": 5740 }, { "epoch": 0.37, "grad_norm": 1.1159992218017578, "learning_rate": 7.254524547406536e-06, "loss": 0.5421, "step": 5741 }, { "epoch": 0.37, "grad_norm": 1.21157968044281, "learning_rate": 7.253591478373905e-06, "loss": 0.5267, "step": 5742 }, { "epoch": 0.37, "grad_norm": 1.4513084888458252, "learning_rate": 7.2526583108426044e-06, "loss": 0.5573, "step": 5743 }, { "epoch": 0.37, "grad_norm": 1.2191734313964844, "learning_rate": 7.2517250448534214e-06, "loss": 0.5679, "step": 5744 }, { "epoch": 0.37, "grad_norm": 1.1980912685394287, "learning_rate": 7.250791680447145e-06, "loss": 0.5816, "step": 5745 }, { "epoch": 0.37, "grad_norm": 1.2061222791671753, "learning_rate": 7.249858217664571e-06, "loss": 0.5875, "step": 5746 }, { "epoch": 0.37, "grad_norm": 1.1717287302017212, "learning_rate": 7.248924656546501e-06, "loss": 0.5594, "step": 5747 }, { "epoch": 0.37, "grad_norm": 1.1122463941574097, "learning_rate": 7.247990997133736e-06, "loss": 0.5265, "step": 5748 }, { "epoch": 0.37, "grad_norm": 1.191472053527832, "learning_rate": 7.247057239467084e-06, "loss": 0.5835, "step": 5749 }, { "epoch": 0.37, "grad_norm": 1.2899342775344849, "learning_rate": 7.246123383587358e-06, "loss": 0.5318, "step": 5750 }, { "epoch": 0.37, "grad_norm": 1.162307858467102, "learning_rate": 7.245189429535373e-06, "loss": 0.5363, "step": 5751 }, { "epoch": 0.37, "grad_norm": 1.2972877025604248, "learning_rate": 7.244255377351951e-06, "loss": 0.5687, "step": 5752 }, { "epoch": 0.37, "grad_norm": 1.167670726776123, "learning_rate": 7.2433212270779175e-06, "loss": 0.5481, "step": 5753 }, { "epoch": 0.37, "grad_norm": 1.1505582332611084, "learning_rate": 7.2423869787541e-06, "loss": 0.5181, "step": 5754 }, { "epoch": 0.37, "grad_norm": 1.098608136177063, "learning_rate": 7.2414526324213355e-06, "loss": 0.5634, "step": 5755 }, { "epoch": 0.37, "grad_norm": 1.2612473964691162, "learning_rate": 7.2405181881204564e-06, "loss": 0.503, "step": 5756 }, { "epoch": 0.37, "grad_norm": 1.2708951234817505, "learning_rate": 7.2395836458923085e-06, "loss": 0.5827, "step": 5757 }, { "epoch": 0.37, "grad_norm": 1.3373842239379883, "learning_rate": 7.238649005777739e-06, "loss": 0.5871, "step": 5758 }, { "epoch": 0.37, "grad_norm": 1.1804757118225098, "learning_rate": 7.237714267817596e-06, "loss": 0.5731, "step": 5759 }, { "epoch": 0.37, "grad_norm": 1.1370619535446167, "learning_rate": 7.236779432052736e-06, "loss": 0.5859, "step": 5760 }, { "epoch": 0.37, "grad_norm": 1.429229974746704, "learning_rate": 7.235844498524019e-06, "loss": 0.6236, "step": 5761 }, { "epoch": 0.37, "grad_norm": 1.1168715953826904, "learning_rate": 7.234909467272306e-06, "loss": 0.5248, "step": 5762 }, { "epoch": 0.37, "grad_norm": 1.1434231996536255, "learning_rate": 7.233974338338467e-06, "loss": 0.5587, "step": 5763 }, { "epoch": 0.37, "grad_norm": 1.1623176336288452, "learning_rate": 7.233039111763375e-06, "loss": 0.5218, "step": 5764 }, { "epoch": 0.37, "grad_norm": 1.1033375263214111, "learning_rate": 7.232103787587902e-06, "loss": 0.5416, "step": 5765 }, { "epoch": 0.37, "grad_norm": 1.2541650533676147, "learning_rate": 7.2311683658529315e-06, "loss": 0.4975, "step": 5766 }, { "epoch": 0.37, "grad_norm": 1.2249621152877808, "learning_rate": 7.230232846599351e-06, "loss": 0.5483, "step": 5767 }, { "epoch": 0.37, "grad_norm": 1.2077752351760864, "learning_rate": 7.229297229868044e-06, "loss": 0.5439, "step": 5768 }, { "epoch": 0.37, "grad_norm": 1.1820321083068848, "learning_rate": 7.228361515699909e-06, "loss": 0.5568, "step": 5769 }, { "epoch": 0.37, "grad_norm": 1.247342586517334, "learning_rate": 7.22742570413584e-06, "loss": 0.5297, "step": 5770 }, { "epoch": 0.37, "grad_norm": 1.188145637512207, "learning_rate": 7.22648979521674e-06, "loss": 0.5116, "step": 5771 }, { "epoch": 0.37, "grad_norm": 1.249135136604309, "learning_rate": 7.225553788983516e-06, "loss": 0.5761, "step": 5772 }, { "epoch": 0.37, "grad_norm": 1.2102488279342651, "learning_rate": 7.2246176854770785e-06, "loss": 0.5604, "step": 5773 }, { "epoch": 0.37, "grad_norm": 1.1564561128616333, "learning_rate": 7.223681484738341e-06, "loss": 0.596, "step": 5774 }, { "epoch": 0.37, "grad_norm": 1.104111671447754, "learning_rate": 7.222745186808223e-06, "loss": 0.5849, "step": 5775 }, { "epoch": 0.37, "grad_norm": 1.216116189956665, "learning_rate": 7.221808791727648e-06, "loss": 0.5443, "step": 5776 }, { "epoch": 0.37, "grad_norm": 1.092238187789917, "learning_rate": 7.220872299537543e-06, "loss": 0.5465, "step": 5777 }, { "epoch": 0.37, "grad_norm": 1.2184934616088867, "learning_rate": 7.219935710278839e-06, "loss": 0.5464, "step": 5778 }, { "epoch": 0.37, "grad_norm": 1.1472091674804688, "learning_rate": 7.218999023992475e-06, "loss": 0.5295, "step": 5779 }, { "epoch": 0.37, "grad_norm": 1.1155821084976196, "learning_rate": 7.218062240719386e-06, "loss": 0.6005, "step": 5780 }, { "epoch": 0.37, "grad_norm": 1.1193146705627441, "learning_rate": 7.217125360500519e-06, "loss": 0.5204, "step": 5781 }, { "epoch": 0.37, "grad_norm": 1.0940665006637573, "learning_rate": 7.216188383376824e-06, "loss": 0.5195, "step": 5782 }, { "epoch": 0.37, "grad_norm": 1.1847543716430664, "learning_rate": 7.215251309389253e-06, "loss": 0.551, "step": 5783 }, { "epoch": 0.37, "grad_norm": 1.2563034296035767, "learning_rate": 7.214314138578761e-06, "loss": 0.567, "step": 5784 }, { "epoch": 0.37, "grad_norm": 1.1970839500427246, "learning_rate": 7.213376870986313e-06, "loss": 0.5684, "step": 5785 }, { "epoch": 0.37, "grad_norm": 1.1986041069030762, "learning_rate": 7.212439506652871e-06, "loss": 0.5379, "step": 5786 }, { "epoch": 0.37, "grad_norm": 1.1797752380371094, "learning_rate": 7.211502045619406e-06, "loss": 0.5323, "step": 5787 }, { "epoch": 0.37, "grad_norm": 1.2906514406204224, "learning_rate": 7.210564487926894e-06, "loss": 0.5374, "step": 5788 }, { "epoch": 0.37, "grad_norm": 1.1161552667617798, "learning_rate": 7.20962683361631e-06, "loss": 0.5707, "step": 5789 }, { "epoch": 0.37, "grad_norm": 1.1634231805801392, "learning_rate": 7.208689082728639e-06, "loss": 0.5019, "step": 5790 }, { "epoch": 0.37, "grad_norm": 1.1522608995437622, "learning_rate": 7.2077512353048676e-06, "loss": 0.4995, "step": 5791 }, { "epoch": 0.37, "grad_norm": 1.2857542037963867, "learning_rate": 7.206813291385984e-06, "loss": 0.5265, "step": 5792 }, { "epoch": 0.37, "grad_norm": 1.1719450950622559, "learning_rate": 7.205875251012985e-06, "loss": 0.5468, "step": 5793 }, { "epoch": 0.37, "grad_norm": 1.1843669414520264, "learning_rate": 7.204937114226871e-06, "loss": 0.5872, "step": 5794 }, { "epoch": 0.37, "grad_norm": 1.1611640453338623, "learning_rate": 7.203998881068645e-06, "loss": 0.5382, "step": 5795 }, { "epoch": 0.37, "grad_norm": 1.2038507461547852, "learning_rate": 7.203060551579314e-06, "loss": 0.4495, "step": 5796 }, { "epoch": 0.37, "grad_norm": 1.2402204275131226, "learning_rate": 7.2021221257998885e-06, "loss": 0.5223, "step": 5797 }, { "epoch": 0.37, "grad_norm": 1.2340953350067139, "learning_rate": 7.2011836037713875e-06, "loss": 0.5349, "step": 5798 }, { "epoch": 0.37, "grad_norm": 1.1538810729980469, "learning_rate": 7.200244985534832e-06, "loss": 0.537, "step": 5799 }, { "epoch": 0.37, "grad_norm": 1.107735276222229, "learning_rate": 7.199306271131244e-06, "loss": 0.5288, "step": 5800 }, { "epoch": 0.37, "grad_norm": 1.2437244653701782, "learning_rate": 7.198367460601651e-06, "loss": 0.534, "step": 5801 }, { "epoch": 0.37, "grad_norm": 1.2758253812789917, "learning_rate": 7.197428553987091e-06, "loss": 0.5673, "step": 5802 }, { "epoch": 0.37, "grad_norm": 1.1082179546356201, "learning_rate": 7.1964895513285975e-06, "loss": 0.5316, "step": 5803 }, { "epoch": 0.37, "grad_norm": 1.2114299535751343, "learning_rate": 7.195550452667212e-06, "loss": 0.5861, "step": 5804 }, { "epoch": 0.37, "grad_norm": 1.36197030544281, "learning_rate": 7.194611258043981e-06, "loss": 0.5553, "step": 5805 }, { "epoch": 0.37, "grad_norm": 1.1015406847000122, "learning_rate": 7.1936719674999535e-06, "loss": 0.535, "step": 5806 }, { "epoch": 0.37, "grad_norm": 1.2088611125946045, "learning_rate": 7.192732581076185e-06, "loss": 0.4914, "step": 5807 }, { "epoch": 0.37, "grad_norm": 1.298840880393982, "learning_rate": 7.191793098813733e-06, "loss": 0.5776, "step": 5808 }, { "epoch": 0.37, "grad_norm": 1.2928647994995117, "learning_rate": 7.190853520753659e-06, "loss": 0.5994, "step": 5809 }, { "epoch": 0.38, "grad_norm": 1.1830137968063354, "learning_rate": 7.18991384693703e-06, "loss": 0.533, "step": 5810 }, { "epoch": 0.38, "grad_norm": 1.3062474727630615, "learning_rate": 7.188974077404917e-06, "loss": 0.5746, "step": 5811 }, { "epoch": 0.38, "grad_norm": 1.2881027460098267, "learning_rate": 7.188034212198395e-06, "loss": 0.5631, "step": 5812 }, { "epoch": 0.38, "grad_norm": 1.1555759906768799, "learning_rate": 7.187094251358542e-06, "loss": 0.6035, "step": 5813 }, { "epoch": 0.38, "grad_norm": 1.0975843667984009, "learning_rate": 7.1861541949264435e-06, "loss": 0.5248, "step": 5814 }, { "epoch": 0.38, "grad_norm": 1.3384778499603271, "learning_rate": 7.185214042943184e-06, "loss": 0.5614, "step": 5815 }, { "epoch": 0.38, "grad_norm": 1.2005048990249634, "learning_rate": 7.1842737954498565e-06, "loss": 0.5336, "step": 5816 }, { "epoch": 0.38, "grad_norm": 1.1413596868515015, "learning_rate": 7.183333452487559e-06, "loss": 0.5009, "step": 5817 }, { "epoch": 0.38, "grad_norm": 1.1296144723892212, "learning_rate": 7.182393014097386e-06, "loss": 0.5372, "step": 5818 }, { "epoch": 0.38, "grad_norm": 1.2148109674453735, "learning_rate": 7.181452480320449e-06, "loss": 0.5435, "step": 5819 }, { "epoch": 0.38, "grad_norm": 1.1768232583999634, "learning_rate": 7.180511851197849e-06, "loss": 0.536, "step": 5820 }, { "epoch": 0.38, "grad_norm": 1.1124508380889893, "learning_rate": 7.179571126770704e-06, "loss": 0.588, "step": 5821 }, { "epoch": 0.38, "grad_norm": 1.1499580144882202, "learning_rate": 7.178630307080127e-06, "loss": 0.555, "step": 5822 }, { "epoch": 0.38, "grad_norm": 1.2339468002319336, "learning_rate": 7.177689392167239e-06, "loss": 0.5275, "step": 5823 }, { "epoch": 0.38, "grad_norm": 1.2197598218917847, "learning_rate": 7.17674838207317e-06, "loss": 0.6185, "step": 5824 }, { "epoch": 0.38, "grad_norm": 1.207306146621704, "learning_rate": 7.1758072768390426e-06, "loss": 0.5787, "step": 5825 }, { "epoch": 0.38, "grad_norm": 1.2308076620101929, "learning_rate": 7.1748660765059945e-06, "loss": 0.5527, "step": 5826 }, { "epoch": 0.38, "grad_norm": 1.2077268362045288, "learning_rate": 7.173924781115159e-06, "loss": 0.5828, "step": 5827 }, { "epoch": 0.38, "grad_norm": 1.1802500486373901, "learning_rate": 7.1729833907076815e-06, "loss": 0.5444, "step": 5828 }, { "epoch": 0.38, "grad_norm": 1.1997421979904175, "learning_rate": 7.172041905324707e-06, "loss": 0.5679, "step": 5829 }, { "epoch": 0.38, "grad_norm": 1.2595628499984741, "learning_rate": 7.171100325007383e-06, "loss": 0.5973, "step": 5830 }, { "epoch": 0.38, "grad_norm": 1.217757225036621, "learning_rate": 7.170158649796866e-06, "loss": 0.5633, "step": 5831 }, { "epoch": 0.38, "grad_norm": 1.1624360084533691, "learning_rate": 7.1692168797343156e-06, "loss": 0.5066, "step": 5832 }, { "epoch": 0.38, "grad_norm": 1.0261154174804688, "learning_rate": 7.168275014860889e-06, "loss": 0.5385, "step": 5833 }, { "epoch": 0.38, "grad_norm": 1.2280561923980713, "learning_rate": 7.167333055217757e-06, "loss": 0.5909, "step": 5834 }, { "epoch": 0.38, "grad_norm": 1.1064763069152832, "learning_rate": 7.1663910008460894e-06, "loss": 0.5622, "step": 5835 }, { "epoch": 0.38, "grad_norm": 1.1697779893875122, "learning_rate": 7.165448851787059e-06, "loss": 0.5252, "step": 5836 }, { "epoch": 0.38, "grad_norm": 1.2711048126220703, "learning_rate": 7.164506608081847e-06, "loss": 0.5478, "step": 5837 }, { "epoch": 0.38, "grad_norm": 1.1970158815383911, "learning_rate": 7.163564269771637e-06, "loss": 0.5629, "step": 5838 }, { "epoch": 0.38, "grad_norm": 1.251187801361084, "learning_rate": 7.162621836897613e-06, "loss": 0.5293, "step": 5839 }, { "epoch": 0.38, "grad_norm": 1.109452247619629, "learning_rate": 7.16167930950097e-06, "loss": 0.5311, "step": 5840 }, { "epoch": 0.38, "grad_norm": 1.1509573459625244, "learning_rate": 7.1607366876229e-06, "loss": 0.5635, "step": 5841 }, { "epoch": 0.38, "grad_norm": 1.341746211051941, "learning_rate": 7.159793971304605e-06, "loss": 0.5599, "step": 5842 }, { "epoch": 0.38, "grad_norm": 1.2173142433166504, "learning_rate": 7.158851160587288e-06, "loss": 0.5209, "step": 5843 }, { "epoch": 0.38, "grad_norm": 1.085029125213623, "learning_rate": 7.157908255512156e-06, "loss": 0.5317, "step": 5844 }, { "epoch": 0.38, "grad_norm": 1.1904122829437256, "learning_rate": 7.1569652561204206e-06, "loss": 0.5524, "step": 5845 }, { "epoch": 0.38, "grad_norm": 1.2105557918548584, "learning_rate": 7.156022162453301e-06, "loss": 0.5389, "step": 5846 }, { "epoch": 0.38, "grad_norm": 1.0971155166625977, "learning_rate": 7.155078974552014e-06, "loss": 0.5003, "step": 5847 }, { "epoch": 0.38, "grad_norm": 1.2037512063980103, "learning_rate": 7.154135692457785e-06, "loss": 0.5239, "step": 5848 }, { "epoch": 0.38, "grad_norm": 1.1158124208450317, "learning_rate": 7.153192316211845e-06, "loss": 0.5348, "step": 5849 }, { "epoch": 0.38, "grad_norm": 1.2083996534347534, "learning_rate": 7.152248845855421e-06, "loss": 0.5532, "step": 5850 }, { "epoch": 0.38, "grad_norm": 1.1784063577651978, "learning_rate": 7.1513052814297545e-06, "loss": 0.5333, "step": 5851 }, { "epoch": 0.38, "grad_norm": 1.193930983543396, "learning_rate": 7.1503616229760844e-06, "loss": 0.5606, "step": 5852 }, { "epoch": 0.38, "grad_norm": 1.232283115386963, "learning_rate": 7.1494178705356555e-06, "loss": 0.5422, "step": 5853 }, { "epoch": 0.38, "grad_norm": 1.1658791303634644, "learning_rate": 7.1484740241497165e-06, "loss": 0.5604, "step": 5854 }, { "epoch": 0.38, "grad_norm": 1.2025309801101685, "learning_rate": 7.147530083859523e-06, "loss": 0.5227, "step": 5855 }, { "epoch": 0.38, "grad_norm": 1.1393861770629883, "learning_rate": 7.146586049706328e-06, "loss": 0.5745, "step": 5856 }, { "epoch": 0.38, "grad_norm": 1.1124721765518188, "learning_rate": 7.145641921731397e-06, "loss": 0.5554, "step": 5857 }, { "epoch": 0.38, "grad_norm": 1.1088850498199463, "learning_rate": 7.144697699975992e-06, "loss": 0.5698, "step": 5858 }, { "epoch": 0.38, "grad_norm": 1.1969877481460571, "learning_rate": 7.1437533844813845e-06, "loss": 0.5571, "step": 5859 }, { "epoch": 0.38, "grad_norm": 1.1342276334762573, "learning_rate": 7.142808975288846e-06, "loss": 0.5476, "step": 5860 }, { "epoch": 0.38, "grad_norm": 1.1030689477920532, "learning_rate": 7.141864472439659e-06, "loss": 0.5283, "step": 5861 }, { "epoch": 0.38, "grad_norm": 1.1249347925186157, "learning_rate": 7.140919875975098e-06, "loss": 0.5351, "step": 5862 }, { "epoch": 0.38, "grad_norm": 1.2070239782333374, "learning_rate": 7.139975185936456e-06, "loss": 0.5258, "step": 5863 }, { "epoch": 0.38, "grad_norm": 1.1466392278671265, "learning_rate": 7.139030402365019e-06, "loss": 0.5523, "step": 5864 }, { "epoch": 0.38, "grad_norm": 1.280074119567871, "learning_rate": 7.138085525302082e-06, "loss": 0.5234, "step": 5865 }, { "epoch": 0.38, "grad_norm": 1.3668347597122192, "learning_rate": 7.137140554788943e-06, "loss": 0.5573, "step": 5866 }, { "epoch": 0.38, "grad_norm": 1.2635148763656616, "learning_rate": 7.136195490866904e-06, "loss": 0.5484, "step": 5867 }, { "epoch": 0.38, "grad_norm": 1.2932184934616089, "learning_rate": 7.135250333577272e-06, "loss": 0.5639, "step": 5868 }, { "epoch": 0.38, "grad_norm": 1.1789681911468506, "learning_rate": 7.134305082961356e-06, "loss": 0.5631, "step": 5869 }, { "epoch": 0.38, "grad_norm": 1.1316964626312256, "learning_rate": 7.133359739060471e-06, "loss": 0.5619, "step": 5870 }, { "epoch": 0.38, "grad_norm": 1.387231469154358, "learning_rate": 7.132414301915937e-06, "loss": 0.5955, "step": 5871 }, { "epoch": 0.38, "grad_norm": 1.2790650129318237, "learning_rate": 7.131468771569076e-06, "loss": 0.5634, "step": 5872 }, { "epoch": 0.38, "grad_norm": 1.1060516834259033, "learning_rate": 7.1305231480612145e-06, "loss": 0.5085, "step": 5873 }, { "epoch": 0.38, "grad_norm": 1.1414618492126465, "learning_rate": 7.129577431433682e-06, "loss": 0.5164, "step": 5874 }, { "epoch": 0.38, "grad_norm": 1.2361524105072021, "learning_rate": 7.128631621727814e-06, "loss": 0.6084, "step": 5875 }, { "epoch": 0.38, "grad_norm": 1.18135404586792, "learning_rate": 7.1276857189849515e-06, "loss": 0.5653, "step": 5876 }, { "epoch": 0.38, "grad_norm": 1.11202871799469, "learning_rate": 7.126739723246433e-06, "loss": 0.5175, "step": 5877 }, { "epoch": 0.38, "grad_norm": 1.196758508682251, "learning_rate": 7.125793634553611e-06, "loss": 0.606, "step": 5878 }, { "epoch": 0.38, "grad_norm": 1.1840870380401611, "learning_rate": 7.124847452947832e-06, "loss": 0.5297, "step": 5879 }, { "epoch": 0.38, "grad_norm": 1.2410426139831543, "learning_rate": 7.1239011784704535e-06, "loss": 0.5312, "step": 5880 }, { "epoch": 0.38, "grad_norm": 1.1737289428710938, "learning_rate": 7.122954811162834e-06, "loss": 0.5719, "step": 5881 }, { "epoch": 0.38, "grad_norm": 1.2408894300460815, "learning_rate": 7.122008351066339e-06, "loss": 0.5321, "step": 5882 }, { "epoch": 0.38, "grad_norm": 1.1179189682006836, "learning_rate": 7.121061798222331e-06, "loss": 0.5904, "step": 5883 }, { "epoch": 0.38, "grad_norm": 1.2610728740692139, "learning_rate": 7.120115152672186e-06, "loss": 0.5414, "step": 5884 }, { "epoch": 0.38, "grad_norm": 1.1212934255599976, "learning_rate": 7.119168414457276e-06, "loss": 0.5191, "step": 5885 }, { "epoch": 0.38, "grad_norm": 1.0503615140914917, "learning_rate": 7.118221583618983e-06, "loss": 0.5169, "step": 5886 }, { "epoch": 0.38, "grad_norm": 1.1654914617538452, "learning_rate": 7.117274660198691e-06, "loss": 0.5198, "step": 5887 }, { "epoch": 0.38, "grad_norm": 1.2221527099609375, "learning_rate": 7.116327644237785e-06, "loss": 0.5585, "step": 5888 }, { "epoch": 0.38, "grad_norm": 1.2678426504135132, "learning_rate": 7.1153805357776575e-06, "loss": 0.5272, "step": 5889 }, { "epoch": 0.38, "grad_norm": 1.1792014837265015, "learning_rate": 7.114433334859705e-06, "loss": 0.5934, "step": 5890 }, { "epoch": 0.38, "grad_norm": 1.247225284576416, "learning_rate": 7.113486041525326e-06, "loss": 0.6247, "step": 5891 }, { "epoch": 0.38, "grad_norm": 1.193377137184143, "learning_rate": 7.112538655815926e-06, "loss": 0.5539, "step": 5892 }, { "epoch": 0.38, "grad_norm": 1.157038688659668, "learning_rate": 7.111591177772912e-06, "loss": 0.5593, "step": 5893 }, { "epoch": 0.38, "grad_norm": 1.164798617362976, "learning_rate": 7.110643607437695e-06, "loss": 0.5719, "step": 5894 }, { "epoch": 0.38, "grad_norm": 1.1992065906524658, "learning_rate": 7.109695944851691e-06, "loss": 0.5385, "step": 5895 }, { "epoch": 0.38, "grad_norm": 1.2037427425384521, "learning_rate": 7.108748190056322e-06, "loss": 0.556, "step": 5896 }, { "epoch": 0.38, "grad_norm": 1.160654902458191, "learning_rate": 7.1078003430930085e-06, "loss": 0.556, "step": 5897 }, { "epoch": 0.38, "grad_norm": 1.1172174215316772, "learning_rate": 7.106852404003181e-06, "loss": 0.5617, "step": 5898 }, { "epoch": 0.38, "grad_norm": 1.258556842803955, "learning_rate": 7.1059043728282705e-06, "loss": 0.5841, "step": 5899 }, { "epoch": 0.38, "grad_norm": 1.283933401107788, "learning_rate": 7.1049562496097135e-06, "loss": 0.5592, "step": 5900 }, { "epoch": 0.38, "grad_norm": 1.1655707359313965, "learning_rate": 7.1040080343889484e-06, "loss": 0.5778, "step": 5901 }, { "epoch": 0.38, "grad_norm": 1.4522958993911743, "learning_rate": 7.103059727207422e-06, "loss": 0.6279, "step": 5902 }, { "epoch": 0.38, "grad_norm": 1.101605772972107, "learning_rate": 7.10211132810658e-06, "loss": 0.5333, "step": 5903 }, { "epoch": 0.38, "grad_norm": 1.1310769319534302, "learning_rate": 7.101162837127875e-06, "loss": 0.5454, "step": 5904 }, { "epoch": 0.38, "grad_norm": 1.2647957801818848, "learning_rate": 7.100214254312765e-06, "loss": 0.5513, "step": 5905 }, { "epoch": 0.38, "grad_norm": 1.252029299736023, "learning_rate": 7.0992655797027076e-06, "loss": 0.5262, "step": 5906 }, { "epoch": 0.38, "grad_norm": 1.1877245903015137, "learning_rate": 7.0983168133391674e-06, "loss": 0.5609, "step": 5907 }, { "epoch": 0.38, "grad_norm": 1.1753987073898315, "learning_rate": 7.097367955263614e-06, "loss": 0.5526, "step": 5908 }, { "epoch": 0.38, "grad_norm": 1.1298258304595947, "learning_rate": 7.096419005517518e-06, "loss": 0.5253, "step": 5909 }, { "epoch": 0.38, "grad_norm": 1.1976087093353271, "learning_rate": 7.0954699641423566e-06, "loss": 0.5209, "step": 5910 }, { "epoch": 0.38, "grad_norm": 1.1869145631790161, "learning_rate": 7.09452083117961e-06, "loss": 0.5543, "step": 5911 }, { "epoch": 0.38, "grad_norm": 1.1522679328918457, "learning_rate": 7.09357160667076e-06, "loss": 0.5772, "step": 5912 }, { "epoch": 0.38, "grad_norm": 1.2266483306884766, "learning_rate": 7.092622290657298e-06, "loss": 0.5675, "step": 5913 }, { "epoch": 0.38, "grad_norm": 1.1983931064605713, "learning_rate": 7.091672883180715e-06, "loss": 0.5396, "step": 5914 }, { "epoch": 0.38, "grad_norm": 1.150707721710205, "learning_rate": 7.090723384282507e-06, "loss": 0.543, "step": 5915 }, { "epoch": 0.38, "grad_norm": 1.5032914876937866, "learning_rate": 7.089773794004172e-06, "loss": 0.5996, "step": 5916 }, { "epoch": 0.38, "grad_norm": 1.1587097644805908, "learning_rate": 7.088824112387218e-06, "loss": 0.595, "step": 5917 }, { "epoch": 0.38, "grad_norm": 1.180344820022583, "learning_rate": 7.087874339473151e-06, "loss": 0.5404, "step": 5918 }, { "epoch": 0.38, "grad_norm": 1.0839515924453735, "learning_rate": 7.086924475303482e-06, "loss": 0.5052, "step": 5919 }, { "epoch": 0.38, "grad_norm": 1.2977229356765747, "learning_rate": 7.08597451991973e-06, "loss": 0.5863, "step": 5920 }, { "epoch": 0.38, "grad_norm": 1.1270989179611206, "learning_rate": 7.085024473363414e-06, "loss": 0.5622, "step": 5921 }, { "epoch": 0.38, "grad_norm": 1.1888834238052368, "learning_rate": 7.084074335676056e-06, "loss": 0.5938, "step": 5922 }, { "epoch": 0.38, "grad_norm": 1.32233464717865, "learning_rate": 7.083124106899187e-06, "loss": 0.5144, "step": 5923 }, { "epoch": 0.38, "grad_norm": 1.16090989112854, "learning_rate": 7.082173787074338e-06, "loss": 0.5611, "step": 5924 }, { "epoch": 0.38, "grad_norm": 1.2689201831817627, "learning_rate": 7.081223376243045e-06, "loss": 0.5734, "step": 5925 }, { "epoch": 0.38, "grad_norm": 1.1375502347946167, "learning_rate": 7.080272874446847e-06, "loss": 0.5553, "step": 5926 }, { "epoch": 0.38, "grad_norm": 1.1275227069854736, "learning_rate": 7.079322281727288e-06, "loss": 0.5731, "step": 5927 }, { "epoch": 0.38, "grad_norm": 1.2197185754776, "learning_rate": 7.078371598125919e-06, "loss": 0.5209, "step": 5928 }, { "epoch": 0.38, "grad_norm": 1.206825613975525, "learning_rate": 7.077420823684287e-06, "loss": 0.6441, "step": 5929 }, { "epoch": 0.38, "grad_norm": 1.1241000890731812, "learning_rate": 7.076469958443952e-06, "loss": 0.4916, "step": 5930 }, { "epoch": 0.38, "grad_norm": 1.240924596786499, "learning_rate": 7.075519002446474e-06, "loss": 0.5654, "step": 5931 }, { "epoch": 0.38, "grad_norm": 1.7727659940719604, "learning_rate": 7.074567955733413e-06, "loss": 0.518, "step": 5932 }, { "epoch": 0.38, "grad_norm": 1.2272207736968994, "learning_rate": 7.073616818346339e-06, "loss": 0.5366, "step": 5933 }, { "epoch": 0.38, "grad_norm": 1.3265480995178223, "learning_rate": 7.072665590326826e-06, "loss": 0.5699, "step": 5934 }, { "epoch": 0.38, "grad_norm": 1.2067286968231201, "learning_rate": 7.071714271716445e-06, "loss": 0.5326, "step": 5935 }, { "epoch": 0.38, "grad_norm": 1.2139737606048584, "learning_rate": 7.0707628625567805e-06, "loss": 0.5554, "step": 5936 }, { "epoch": 0.38, "grad_norm": 1.2506765127182007, "learning_rate": 7.069811362889414e-06, "loss": 0.5776, "step": 5937 }, { "epoch": 0.38, "grad_norm": 1.1063852310180664, "learning_rate": 7.0688597727559316e-06, "loss": 0.5378, "step": 5938 }, { "epoch": 0.38, "grad_norm": 1.1718217134475708, "learning_rate": 7.067908092197926e-06, "loss": 0.4687, "step": 5939 }, { "epoch": 0.38, "grad_norm": 1.274718165397644, "learning_rate": 7.0669563212569946e-06, "loss": 0.5689, "step": 5940 }, { "epoch": 0.38, "grad_norm": 1.25933837890625, "learning_rate": 7.066004459974735e-06, "loss": 0.5565, "step": 5941 }, { "epoch": 0.38, "grad_norm": 1.3410227298736572, "learning_rate": 7.065052508392749e-06, "loss": 0.5937, "step": 5942 }, { "epoch": 0.38, "grad_norm": 1.205487847328186, "learning_rate": 7.064100466552648e-06, "loss": 0.5314, "step": 5943 }, { "epoch": 0.38, "grad_norm": 1.1802926063537598, "learning_rate": 7.06314833449604e-06, "loss": 0.4926, "step": 5944 }, { "epoch": 0.38, "grad_norm": 1.2016409635543823, "learning_rate": 7.062196112264541e-06, "loss": 0.5408, "step": 5945 }, { "epoch": 0.38, "grad_norm": 1.1880288124084473, "learning_rate": 7.0612437998997705e-06, "loss": 0.5677, "step": 5946 }, { "epoch": 0.38, "grad_norm": 1.2736247777938843, "learning_rate": 7.0602913974433514e-06, "loss": 0.5999, "step": 5947 }, { "epoch": 0.38, "grad_norm": 1.1042466163635254, "learning_rate": 7.0593389049369125e-06, "loss": 0.4835, "step": 5948 }, { "epoch": 0.38, "grad_norm": 1.466592788696289, "learning_rate": 7.058386322422082e-06, "loss": 0.4988, "step": 5949 }, { "epoch": 0.38, "grad_norm": 1.084027647972107, "learning_rate": 7.057433649940496e-06, "loss": 0.5515, "step": 5950 }, { "epoch": 0.38, "grad_norm": 1.3538851737976074, "learning_rate": 7.056480887533793e-06, "loss": 0.5479, "step": 5951 }, { "epoch": 0.38, "grad_norm": 1.1293922662734985, "learning_rate": 7.0555280352436175e-06, "loss": 0.502, "step": 5952 }, { "epoch": 0.38, "grad_norm": 1.2255100011825562, "learning_rate": 7.054575093111614e-06, "loss": 0.5779, "step": 5953 }, { "epoch": 0.38, "grad_norm": 1.356698751449585, "learning_rate": 7.053622061179435e-06, "loss": 0.5794, "step": 5954 }, { "epoch": 0.38, "grad_norm": 1.2134710550308228, "learning_rate": 7.052668939488734e-06, "loss": 0.5775, "step": 5955 }, { "epoch": 0.38, "grad_norm": 1.1153883934020996, "learning_rate": 7.051715728081168e-06, "loss": 0.535, "step": 5956 }, { "epoch": 0.38, "grad_norm": 1.1871329545974731, "learning_rate": 7.050762426998403e-06, "loss": 0.5163, "step": 5957 }, { "epoch": 0.38, "grad_norm": 1.295093059539795, "learning_rate": 7.049809036282102e-06, "loss": 0.5401, "step": 5958 }, { "epoch": 0.38, "grad_norm": 1.1358146667480469, "learning_rate": 7.0488555559739365e-06, "loss": 0.5412, "step": 5959 }, { "epoch": 0.38, "grad_norm": 1.1760401725769043, "learning_rate": 7.047901986115582e-06, "loss": 0.5452, "step": 5960 }, { "epoch": 0.38, "grad_norm": 1.2483502626419067, "learning_rate": 7.046948326748714e-06, "loss": 0.534, "step": 5961 }, { "epoch": 0.38, "grad_norm": 1.309890627861023, "learning_rate": 7.045994577915018e-06, "loss": 0.5746, "step": 5962 }, { "epoch": 0.38, "grad_norm": 1.2698537111282349, "learning_rate": 7.045040739656175e-06, "loss": 0.5463, "step": 5963 }, { "epoch": 0.38, "grad_norm": 1.1822984218597412, "learning_rate": 7.0440868120138795e-06, "loss": 0.5083, "step": 5964 }, { "epoch": 0.39, "grad_norm": 1.1974228620529175, "learning_rate": 7.043132795029822e-06, "loss": 0.5515, "step": 5965 }, { "epoch": 0.39, "grad_norm": 1.152414083480835, "learning_rate": 7.042178688745702e-06, "loss": 0.511, "step": 5966 }, { "epoch": 0.39, "grad_norm": 1.1611335277557373, "learning_rate": 7.04122449320322e-06, "loss": 0.5663, "step": 5967 }, { "epoch": 0.39, "grad_norm": 1.1212968826293945, "learning_rate": 7.040270208444082e-06, "loss": 0.5397, "step": 5968 }, { "epoch": 0.39, "grad_norm": 1.3138253688812256, "learning_rate": 7.039315834509999e-06, "loss": 0.581, "step": 5969 }, { "epoch": 0.39, "grad_norm": 1.174297571182251, "learning_rate": 7.038361371442679e-06, "loss": 0.4905, "step": 5970 }, { "epoch": 0.39, "grad_norm": 1.1507647037506104, "learning_rate": 7.037406819283845e-06, "loss": 0.6123, "step": 5971 }, { "epoch": 0.39, "grad_norm": 1.1887505054473877, "learning_rate": 7.036452178075215e-06, "loss": 0.5265, "step": 5972 }, { "epoch": 0.39, "grad_norm": 1.1592198610305786, "learning_rate": 7.035497447858514e-06, "loss": 0.5484, "step": 5973 }, { "epoch": 0.39, "grad_norm": 1.605101227760315, "learning_rate": 7.03454262867547e-06, "loss": 0.5919, "step": 5974 }, { "epoch": 0.39, "grad_norm": 1.2032926082611084, "learning_rate": 7.03358772056782e-06, "loss": 0.5641, "step": 5975 }, { "epoch": 0.39, "grad_norm": 1.269277572631836, "learning_rate": 7.032632723577295e-06, "loss": 0.5878, "step": 5976 }, { "epoch": 0.39, "grad_norm": 1.1404855251312256, "learning_rate": 7.031677637745637e-06, "loss": 0.5018, "step": 5977 }, { "epoch": 0.39, "grad_norm": 1.1833534240722656, "learning_rate": 7.030722463114594e-06, "loss": 0.5935, "step": 5978 }, { "epoch": 0.39, "grad_norm": 1.200840711593628, "learning_rate": 7.02976719972591e-06, "loss": 0.5683, "step": 5979 }, { "epoch": 0.39, "grad_norm": 1.1470863819122314, "learning_rate": 7.028811847621338e-06, "loss": 0.5312, "step": 5980 }, { "epoch": 0.39, "grad_norm": 1.0573832988739014, "learning_rate": 7.0278564068426366e-06, "loss": 0.5974, "step": 5981 }, { "epoch": 0.39, "grad_norm": 1.0351994037628174, "learning_rate": 7.026900877431562e-06, "loss": 0.4825, "step": 5982 }, { "epoch": 0.39, "grad_norm": 1.0246942043304443, "learning_rate": 7.025945259429879e-06, "loss": 0.4986, "step": 5983 }, { "epoch": 0.39, "grad_norm": 1.053442358970642, "learning_rate": 7.024989552879357e-06, "loss": 0.515, "step": 5984 }, { "epoch": 0.39, "grad_norm": 1.1222892999649048, "learning_rate": 7.024033757821766e-06, "loss": 0.5251, "step": 5985 }, { "epoch": 0.39, "grad_norm": 1.0536178350448608, "learning_rate": 7.023077874298881e-06, "loss": 0.5162, "step": 5986 }, { "epoch": 0.39, "grad_norm": 1.1603038311004639, "learning_rate": 7.0221219023524836e-06, "loss": 0.5369, "step": 5987 }, { "epoch": 0.39, "grad_norm": 1.147620439529419, "learning_rate": 7.021165842024352e-06, "loss": 0.524, "step": 5988 }, { "epoch": 0.39, "grad_norm": 1.1081664562225342, "learning_rate": 7.020209693356278e-06, "loss": 0.5096, "step": 5989 }, { "epoch": 0.39, "grad_norm": 1.1644911766052246, "learning_rate": 7.019253456390051e-06, "loss": 0.5424, "step": 5990 }, { "epoch": 0.39, "grad_norm": 1.0900899171829224, "learning_rate": 7.018297131167464e-06, "loss": 0.488, "step": 5991 }, { "epoch": 0.39, "grad_norm": 1.16664457321167, "learning_rate": 7.017340717730317e-06, "loss": 0.5592, "step": 5992 }, { "epoch": 0.39, "grad_norm": 1.1533406972885132, "learning_rate": 7.016384216120412e-06, "loss": 0.5987, "step": 5993 }, { "epoch": 0.39, "grad_norm": 1.1711434125900269, "learning_rate": 7.015427626379554e-06, "loss": 0.5813, "step": 5994 }, { "epoch": 0.39, "grad_norm": 1.251961588859558, "learning_rate": 7.014470948549555e-06, "loss": 0.5646, "step": 5995 }, { "epoch": 0.39, "grad_norm": 1.2751303911209106, "learning_rate": 7.01351418267223e-06, "loss": 0.6039, "step": 5996 }, { "epoch": 0.39, "grad_norm": 1.2532269954681396, "learning_rate": 7.012557328789393e-06, "loss": 0.5739, "step": 5997 }, { "epoch": 0.39, "grad_norm": 1.1852216720581055, "learning_rate": 7.011600386942868e-06, "loss": 0.5976, "step": 5998 }, { "epoch": 0.39, "grad_norm": 1.0930472612380981, "learning_rate": 7.010643357174483e-06, "loss": 0.5224, "step": 5999 }, { "epoch": 0.39, "grad_norm": 1.0554022789001465, "learning_rate": 7.00968623952606e-06, "loss": 0.5044, "step": 6000 }, { "epoch": 0.39, "grad_norm": 1.199090838432312, "learning_rate": 7.008729034039439e-06, "loss": 0.5513, "step": 6001 }, { "epoch": 0.39, "grad_norm": 1.2251099348068237, "learning_rate": 7.007771740756454e-06, "loss": 0.5547, "step": 6002 }, { "epoch": 0.39, "grad_norm": 1.2981244325637817, "learning_rate": 7.006814359718945e-06, "loss": 0.4832, "step": 6003 }, { "epoch": 0.39, "grad_norm": 1.189541220664978, "learning_rate": 7.005856890968761e-06, "loss": 0.5985, "step": 6004 }, { "epoch": 0.39, "grad_norm": 1.1450172662734985, "learning_rate": 7.004899334547746e-06, "loss": 0.5804, "step": 6005 }, { "epoch": 0.39, "grad_norm": 1.2970777750015259, "learning_rate": 7.003941690497753e-06, "loss": 0.6065, "step": 6006 }, { "epoch": 0.39, "grad_norm": 1.172196865081787, "learning_rate": 7.00298395886064e-06, "loss": 0.5769, "step": 6007 }, { "epoch": 0.39, "grad_norm": 1.1164379119873047, "learning_rate": 7.002026139678264e-06, "loss": 0.5303, "step": 6008 }, { "epoch": 0.39, "grad_norm": 1.2283685207366943, "learning_rate": 7.001068232992494e-06, "loss": 0.6183, "step": 6009 }, { "epoch": 0.39, "grad_norm": 1.2405163049697876, "learning_rate": 7.000110238845192e-06, "loss": 0.558, "step": 6010 }, { "epoch": 0.39, "grad_norm": 1.196915864944458, "learning_rate": 6.999152157278233e-06, "loss": 0.4917, "step": 6011 }, { "epoch": 0.39, "grad_norm": 1.1056181192398071, "learning_rate": 6.9981939883334896e-06, "loss": 0.5014, "step": 6012 }, { "epoch": 0.39, "grad_norm": 1.2112104892730713, "learning_rate": 6.997235732052844e-06, "loss": 0.6033, "step": 6013 }, { "epoch": 0.39, "grad_norm": 1.258182168006897, "learning_rate": 6.996277388478176e-06, "loss": 0.5128, "step": 6014 }, { "epoch": 0.39, "grad_norm": 1.2485671043395996, "learning_rate": 6.995318957651373e-06, "loss": 0.5709, "step": 6015 }, { "epoch": 0.39, "grad_norm": 1.1564486026763916, "learning_rate": 6.99436043961433e-06, "loss": 0.5261, "step": 6016 }, { "epoch": 0.39, "grad_norm": 1.0823822021484375, "learning_rate": 6.993401834408935e-06, "loss": 0.4785, "step": 6017 }, { "epoch": 0.39, "grad_norm": 1.2703471183776855, "learning_rate": 6.992443142077089e-06, "loss": 0.5411, "step": 6018 }, { "epoch": 0.39, "grad_norm": 1.2116377353668213, "learning_rate": 6.991484362660695e-06, "loss": 0.5361, "step": 6019 }, { "epoch": 0.39, "grad_norm": 1.264754056930542, "learning_rate": 6.990525496201657e-06, "loss": 0.5467, "step": 6020 }, { "epoch": 0.39, "grad_norm": 1.2340534925460815, "learning_rate": 6.989566542741884e-06, "loss": 0.5557, "step": 6021 }, { "epoch": 0.39, "grad_norm": 1.168150544166565, "learning_rate": 6.988607502323293e-06, "loss": 0.5498, "step": 6022 }, { "epoch": 0.39, "grad_norm": 1.3923534154891968, "learning_rate": 6.9876483749877964e-06, "loss": 0.5245, "step": 6023 }, { "epoch": 0.39, "grad_norm": 1.1085995435714722, "learning_rate": 6.986689160777318e-06, "loss": 0.5161, "step": 6024 }, { "epoch": 0.39, "grad_norm": 1.2665342092514038, "learning_rate": 6.985729859733783e-06, "loss": 0.5413, "step": 6025 }, { "epoch": 0.39, "grad_norm": 1.0906121730804443, "learning_rate": 6.9847704718991184e-06, "loss": 0.526, "step": 6026 }, { "epoch": 0.39, "grad_norm": 1.441587209701538, "learning_rate": 6.983810997315257e-06, "loss": 0.5393, "step": 6027 }, { "epoch": 0.39, "grad_norm": 1.3206347227096558, "learning_rate": 6.982851436024136e-06, "loss": 0.602, "step": 6028 }, { "epoch": 0.39, "grad_norm": 1.3821207284927368, "learning_rate": 6.981891788067694e-06, "loss": 0.5969, "step": 6029 }, { "epoch": 0.39, "grad_norm": 1.3472919464111328, "learning_rate": 6.980932053487875e-06, "loss": 0.5451, "step": 6030 }, { "epoch": 0.39, "grad_norm": 1.2270338535308838, "learning_rate": 6.979972232326629e-06, "loss": 0.5768, "step": 6031 }, { "epoch": 0.39, "grad_norm": 1.1484627723693848, "learning_rate": 6.979012324625902e-06, "loss": 0.5391, "step": 6032 }, { "epoch": 0.39, "grad_norm": 1.2016282081604004, "learning_rate": 6.978052330427654e-06, "loss": 0.5128, "step": 6033 }, { "epoch": 0.39, "grad_norm": 1.1935162544250488, "learning_rate": 6.977092249773842e-06, "loss": 0.5393, "step": 6034 }, { "epoch": 0.39, "grad_norm": 1.1383541822433472, "learning_rate": 6.976132082706428e-06, "loss": 0.5201, "step": 6035 }, { "epoch": 0.39, "grad_norm": 1.2448954582214355, "learning_rate": 6.975171829267379e-06, "loss": 0.5626, "step": 6036 }, { "epoch": 0.39, "grad_norm": 1.1115562915802002, "learning_rate": 6.9742114894986675e-06, "loss": 0.5489, "step": 6037 }, { "epoch": 0.39, "grad_norm": 1.2389562129974365, "learning_rate": 6.973251063442264e-06, "loss": 0.5542, "step": 6038 }, { "epoch": 0.39, "grad_norm": 1.2927027940750122, "learning_rate": 6.972290551140146e-06, "loss": 0.487, "step": 6039 }, { "epoch": 0.39, "grad_norm": 1.1720937490463257, "learning_rate": 6.9713299526343e-06, "loss": 0.5479, "step": 6040 }, { "epoch": 0.39, "grad_norm": 1.262337327003479, "learning_rate": 6.970369267966705e-06, "loss": 0.5392, "step": 6041 }, { "epoch": 0.39, "grad_norm": 1.3031853437423706, "learning_rate": 6.969408497179353e-06, "loss": 0.5617, "step": 6042 }, { "epoch": 0.39, "grad_norm": 1.096333384513855, "learning_rate": 6.968447640314238e-06, "loss": 0.5278, "step": 6043 }, { "epoch": 0.39, "grad_norm": 1.1268466711044312, "learning_rate": 6.967486697413355e-06, "loss": 0.586, "step": 6044 }, { "epoch": 0.39, "grad_norm": 1.208379864692688, "learning_rate": 6.966525668518704e-06, "loss": 0.5285, "step": 6045 }, { "epoch": 0.39, "grad_norm": 1.1348187923431396, "learning_rate": 6.96556455367229e-06, "loss": 0.5423, "step": 6046 }, { "epoch": 0.39, "grad_norm": 1.1430238485336304, "learning_rate": 6.964603352916122e-06, "loss": 0.5354, "step": 6047 }, { "epoch": 0.39, "grad_norm": 1.269011378288269, "learning_rate": 6.963642066292207e-06, "loss": 0.5243, "step": 6048 }, { "epoch": 0.39, "grad_norm": 1.192659854888916, "learning_rate": 6.962680693842567e-06, "loss": 0.5588, "step": 6049 }, { "epoch": 0.39, "grad_norm": 1.2398600578308105, "learning_rate": 6.961719235609217e-06, "loss": 0.5447, "step": 6050 }, { "epoch": 0.39, "grad_norm": 1.0740631818771362, "learning_rate": 6.960757691634179e-06, "loss": 0.4877, "step": 6051 }, { "epoch": 0.39, "grad_norm": 1.1430654525756836, "learning_rate": 6.9597960619594815e-06, "loss": 0.5428, "step": 6052 }, { "epoch": 0.39, "grad_norm": 1.207662582397461, "learning_rate": 6.958834346627156e-06, "loss": 0.4884, "step": 6053 }, { "epoch": 0.39, "grad_norm": 1.1131601333618164, "learning_rate": 6.957872545679233e-06, "loss": 0.5123, "step": 6054 }, { "epoch": 0.39, "grad_norm": 1.2335745096206665, "learning_rate": 6.956910659157753e-06, "loss": 0.5294, "step": 6055 }, { "epoch": 0.39, "grad_norm": 1.0423855781555176, "learning_rate": 6.9559486871047575e-06, "loss": 0.499, "step": 6056 }, { "epoch": 0.39, "grad_norm": 1.2738733291625977, "learning_rate": 6.954986629562292e-06, "loss": 0.5359, "step": 6057 }, { "epoch": 0.39, "grad_norm": 1.3515545129776, "learning_rate": 6.954024486572404e-06, "loss": 0.5865, "step": 6058 }, { "epoch": 0.39, "grad_norm": 1.0996156930923462, "learning_rate": 6.953062258177146e-06, "loss": 0.5516, "step": 6059 }, { "epoch": 0.39, "grad_norm": 1.1815065145492554, "learning_rate": 6.952099944418578e-06, "loss": 0.5808, "step": 6060 }, { "epoch": 0.39, "grad_norm": 1.2514369487762451, "learning_rate": 6.951137545338757e-06, "loss": 0.5422, "step": 6061 }, { "epoch": 0.39, "grad_norm": 1.1689319610595703, "learning_rate": 6.950175060979747e-06, "loss": 0.5349, "step": 6062 }, { "epoch": 0.39, "grad_norm": 1.1700750589370728, "learning_rate": 6.949212491383617e-06, "loss": 0.4982, "step": 6063 }, { "epoch": 0.39, "grad_norm": 1.2882245779037476, "learning_rate": 6.9482498365924375e-06, "loss": 0.5904, "step": 6064 }, { "epoch": 0.39, "grad_norm": 1.1796550750732422, "learning_rate": 6.947287096648285e-06, "loss": 0.5411, "step": 6065 }, { "epoch": 0.39, "grad_norm": 1.1070138216018677, "learning_rate": 6.946324271593238e-06, "loss": 0.5192, "step": 6066 }, { "epoch": 0.39, "grad_norm": 1.3117196559906006, "learning_rate": 6.945361361469379e-06, "loss": 0.5495, "step": 6067 }, { "epoch": 0.39, "grad_norm": 1.5770291090011597, "learning_rate": 6.944398366318792e-06, "loss": 0.5362, "step": 6068 }, { "epoch": 0.39, "grad_norm": 1.1701055765151978, "learning_rate": 6.9434352861835704e-06, "loss": 0.5567, "step": 6069 }, { "epoch": 0.39, "grad_norm": 1.1229650974273682, "learning_rate": 6.9424721211058054e-06, "loss": 0.5618, "step": 6070 }, { "epoch": 0.39, "grad_norm": 1.2073525190353394, "learning_rate": 6.941508871127597e-06, "loss": 0.5998, "step": 6071 }, { "epoch": 0.39, "grad_norm": 1.2510483264923096, "learning_rate": 6.940545536291045e-06, "loss": 0.5548, "step": 6072 }, { "epoch": 0.39, "grad_norm": 1.157378077507019, "learning_rate": 6.939582116638252e-06, "loss": 0.5524, "step": 6073 }, { "epoch": 0.39, "grad_norm": 1.3090991973876953, "learning_rate": 6.938618612211332e-06, "loss": 0.624, "step": 6074 }, { "epoch": 0.39, "grad_norm": 1.0714911222457886, "learning_rate": 6.937655023052393e-06, "loss": 0.5227, "step": 6075 }, { "epoch": 0.39, "grad_norm": 1.1929303407669067, "learning_rate": 6.936691349203551e-06, "loss": 0.5158, "step": 6076 }, { "epoch": 0.39, "grad_norm": 2.745897054672241, "learning_rate": 6.935727590706928e-06, "loss": 0.5436, "step": 6077 }, { "epoch": 0.39, "grad_norm": 1.4403263330459595, "learning_rate": 6.934763747604647e-06, "loss": 0.5361, "step": 6078 }, { "epoch": 0.39, "grad_norm": 1.1832119226455688, "learning_rate": 6.933799819938833e-06, "loss": 0.5263, "step": 6079 }, { "epoch": 0.39, "grad_norm": 1.0671403408050537, "learning_rate": 6.932835807751618e-06, "loss": 0.4892, "step": 6080 }, { "epoch": 0.39, "grad_norm": 1.1626826524734497, "learning_rate": 6.931871711085139e-06, "loss": 0.5491, "step": 6081 }, { "epoch": 0.39, "grad_norm": 1.0832364559173584, "learning_rate": 6.930907529981529e-06, "loss": 0.466, "step": 6082 }, { "epoch": 0.39, "grad_norm": 1.1642686128616333, "learning_rate": 6.929943264482932e-06, "loss": 0.547, "step": 6083 }, { "epoch": 0.39, "grad_norm": 1.396848440170288, "learning_rate": 6.928978914631498e-06, "loss": 0.5286, "step": 6084 }, { "epoch": 0.39, "grad_norm": 1.2612476348876953, "learning_rate": 6.928014480469369e-06, "loss": 0.5276, "step": 6085 }, { "epoch": 0.39, "grad_norm": 1.2223546504974365, "learning_rate": 6.927049962038703e-06, "loss": 0.5474, "step": 6086 }, { "epoch": 0.39, "grad_norm": 1.095672845840454, "learning_rate": 6.926085359381656e-06, "loss": 0.5237, "step": 6087 }, { "epoch": 0.39, "grad_norm": 1.1244395971298218, "learning_rate": 6.9251206725403854e-06, "loss": 0.5318, "step": 6088 }, { "epoch": 0.39, "grad_norm": 1.1659538745880127, "learning_rate": 6.924155901557057e-06, "loss": 0.5594, "step": 6089 }, { "epoch": 0.39, "grad_norm": 1.1133043766021729, "learning_rate": 6.92319104647384e-06, "loss": 0.5487, "step": 6090 }, { "epoch": 0.39, "grad_norm": 1.2728968858718872, "learning_rate": 6.922226107332903e-06, "loss": 0.4921, "step": 6091 }, { "epoch": 0.39, "grad_norm": 1.2778778076171875, "learning_rate": 6.9212610841764226e-06, "loss": 0.583, "step": 6092 }, { "epoch": 0.39, "grad_norm": 1.0908726453781128, "learning_rate": 6.920295977046578e-06, "loss": 0.5188, "step": 6093 }, { "epoch": 0.39, "grad_norm": 1.134155035018921, "learning_rate": 6.919330785985549e-06, "loss": 0.4969, "step": 6094 }, { "epoch": 0.39, "grad_norm": 1.1925615072250366, "learning_rate": 6.918365511035527e-06, "loss": 0.5583, "step": 6095 }, { "epoch": 0.39, "grad_norm": 1.232566237449646, "learning_rate": 6.917400152238694e-06, "loss": 0.5724, "step": 6096 }, { "epoch": 0.39, "grad_norm": 1.1023310422897339, "learning_rate": 6.916434709637248e-06, "loss": 0.4673, "step": 6097 }, { "epoch": 0.39, "grad_norm": 1.2096987962722778, "learning_rate": 6.9154691832733865e-06, "loss": 0.5035, "step": 6098 }, { "epoch": 0.39, "grad_norm": 1.1559388637542725, "learning_rate": 6.914503573189308e-06, "loss": 0.6072, "step": 6099 }, { "epoch": 0.39, "grad_norm": 1.2453652620315552, "learning_rate": 6.913537879427219e-06, "loss": 0.5435, "step": 6100 }, { "epoch": 0.39, "grad_norm": 1.1782134771347046, "learning_rate": 6.9125721020293255e-06, "loss": 0.5326, "step": 6101 }, { "epoch": 0.39, "grad_norm": 1.064038872718811, "learning_rate": 6.9116062410378405e-06, "loss": 0.5099, "step": 6102 }, { "epoch": 0.39, "grad_norm": 1.149479866027832, "learning_rate": 6.910640296494977e-06, "loss": 0.5626, "step": 6103 }, { "epoch": 0.39, "grad_norm": 1.1718989610671997, "learning_rate": 6.909674268442957e-06, "loss": 0.5025, "step": 6104 }, { "epoch": 0.39, "grad_norm": 1.0925127267837524, "learning_rate": 6.908708156924002e-06, "loss": 0.5455, "step": 6105 }, { "epoch": 0.39, "grad_norm": 1.1734354496002197, "learning_rate": 6.907741961980337e-06, "loss": 0.5152, "step": 6106 }, { "epoch": 0.39, "grad_norm": 1.2430869340896606, "learning_rate": 6.9067756836541945e-06, "loss": 0.5996, "step": 6107 }, { "epoch": 0.39, "grad_norm": 1.2869148254394531, "learning_rate": 6.905809321987805e-06, "loss": 0.5311, "step": 6108 }, { "epoch": 0.39, "grad_norm": 1.2331085205078125, "learning_rate": 6.904842877023407e-06, "loss": 0.5217, "step": 6109 }, { "epoch": 0.39, "grad_norm": 1.1892468929290771, "learning_rate": 6.9038763488032425e-06, "loss": 0.5937, "step": 6110 }, { "epoch": 0.39, "grad_norm": 1.1445409059524536, "learning_rate": 6.902909737369554e-06, "loss": 0.5489, "step": 6111 }, { "epoch": 0.39, "grad_norm": 1.2602051496505737, "learning_rate": 6.9019430427645895e-06, "loss": 0.5592, "step": 6112 }, { "epoch": 0.39, "grad_norm": 1.3477544784545898, "learning_rate": 6.9009762650306036e-06, "loss": 0.5709, "step": 6113 }, { "epoch": 0.39, "grad_norm": 1.1873548030853271, "learning_rate": 6.900009404209849e-06, "loss": 0.5576, "step": 6114 }, { "epoch": 0.39, "grad_norm": 1.1707823276519775, "learning_rate": 6.899042460344585e-06, "loss": 0.5591, "step": 6115 }, { "epoch": 0.39, "grad_norm": 1.1535279750823975, "learning_rate": 6.898075433477076e-06, "loss": 0.5118, "step": 6116 }, { "epoch": 0.39, "grad_norm": 1.1759034395217896, "learning_rate": 6.897108323649585e-06, "loss": 0.5271, "step": 6117 }, { "epoch": 0.39, "grad_norm": 1.1745489835739136, "learning_rate": 6.8961411309043845e-06, "loss": 0.525, "step": 6118 }, { "epoch": 0.39, "grad_norm": 1.2129008769989014, "learning_rate": 6.895173855283748e-06, "loss": 0.5046, "step": 6119 }, { "epoch": 0.4, "grad_norm": 1.2151168584823608, "learning_rate": 6.8942064968299515e-06, "loss": 0.5846, "step": 6120 }, { "epoch": 0.4, "grad_norm": 1.1068388223648071, "learning_rate": 6.893239055585275e-06, "loss": 0.5215, "step": 6121 }, { "epoch": 0.4, "grad_norm": 1.1389178037643433, "learning_rate": 6.892271531592006e-06, "loss": 0.5153, "step": 6122 }, { "epoch": 0.4, "grad_norm": 1.140947699546814, "learning_rate": 6.8913039248924295e-06, "loss": 0.5645, "step": 6123 }, { "epoch": 0.4, "grad_norm": 1.2187559604644775, "learning_rate": 6.8903362355288376e-06, "loss": 0.539, "step": 6124 }, { "epoch": 0.4, "grad_norm": 1.1514949798583984, "learning_rate": 6.889368463543527e-06, "loss": 0.5604, "step": 6125 }, { "epoch": 0.4, "grad_norm": 1.1651664972305298, "learning_rate": 6.8884006089787945e-06, "loss": 0.5498, "step": 6126 }, { "epoch": 0.4, "grad_norm": 1.248345136642456, "learning_rate": 6.887432671876943e-06, "loss": 0.5887, "step": 6127 }, { "epoch": 0.4, "grad_norm": 1.2693934440612793, "learning_rate": 6.886464652280282e-06, "loss": 0.56, "step": 6128 }, { "epoch": 0.4, "grad_norm": 1.0906604528427124, "learning_rate": 6.885496550231115e-06, "loss": 0.5539, "step": 6129 }, { "epoch": 0.4, "grad_norm": 1.2612546682357788, "learning_rate": 6.884528365771759e-06, "loss": 0.5755, "step": 6130 }, { "epoch": 0.4, "grad_norm": 1.1863057613372803, "learning_rate": 6.883560098944532e-06, "loss": 0.565, "step": 6131 }, { "epoch": 0.4, "grad_norm": 1.118100881576538, "learning_rate": 6.882591749791752e-06, "loss": 0.5182, "step": 6132 }, { "epoch": 0.4, "grad_norm": 1.1138108968734741, "learning_rate": 6.881623318355742e-06, "loss": 0.5546, "step": 6133 }, { "epoch": 0.4, "grad_norm": 1.2449263334274292, "learning_rate": 6.880654804678833e-06, "loss": 0.5655, "step": 6134 }, { "epoch": 0.4, "grad_norm": 1.2226442098617554, "learning_rate": 6.879686208803354e-06, "loss": 0.5507, "step": 6135 }, { "epoch": 0.4, "grad_norm": 1.8439936637878418, "learning_rate": 6.878717530771642e-06, "loss": 0.5691, "step": 6136 }, { "epoch": 0.4, "grad_norm": 1.1231306791305542, "learning_rate": 6.877748770626033e-06, "loss": 0.5753, "step": 6137 }, { "epoch": 0.4, "grad_norm": 1.1639727354049683, "learning_rate": 6.8767799284088696e-06, "loss": 0.5611, "step": 6138 }, { "epoch": 0.4, "grad_norm": 1.2662460803985596, "learning_rate": 6.875811004162498e-06, "loss": 0.5491, "step": 6139 }, { "epoch": 0.4, "grad_norm": 1.2104285955429077, "learning_rate": 6.874841997929267e-06, "loss": 0.5615, "step": 6140 }, { "epoch": 0.4, "grad_norm": 1.2495653629302979, "learning_rate": 6.8738729097515285e-06, "loss": 0.5432, "step": 6141 }, { "epoch": 0.4, "grad_norm": 1.0710490942001343, "learning_rate": 6.872903739671641e-06, "loss": 0.5432, "step": 6142 }, { "epoch": 0.4, "grad_norm": 1.2847353219985962, "learning_rate": 6.871934487731962e-06, "loss": 0.5762, "step": 6143 }, { "epoch": 0.4, "grad_norm": 1.155266523361206, "learning_rate": 6.870965153974858e-06, "loss": 0.5528, "step": 6144 }, { "epoch": 0.4, "grad_norm": 1.2420604228973389, "learning_rate": 6.869995738442693e-06, "loss": 0.5111, "step": 6145 }, { "epoch": 0.4, "grad_norm": 1.087788701057434, "learning_rate": 6.869026241177838e-06, "loss": 0.5275, "step": 6146 }, { "epoch": 0.4, "grad_norm": 1.0885372161865234, "learning_rate": 6.868056662222671e-06, "loss": 0.5213, "step": 6147 }, { "epoch": 0.4, "grad_norm": 1.131984829902649, "learning_rate": 6.867087001619564e-06, "loss": 0.5104, "step": 6148 }, { "epoch": 0.4, "grad_norm": 1.0742156505584717, "learning_rate": 6.8661172594109035e-06, "loss": 0.5366, "step": 6149 }, { "epoch": 0.4, "grad_norm": 1.316005825996399, "learning_rate": 6.865147435639071e-06, "loss": 0.5583, "step": 6150 }, { "epoch": 0.4, "grad_norm": 1.1539866924285889, "learning_rate": 6.8641775303464575e-06, "loss": 0.5652, "step": 6151 }, { "epoch": 0.4, "grad_norm": 1.4322410821914673, "learning_rate": 6.863207543575452e-06, "loss": 0.5756, "step": 6152 }, { "epoch": 0.4, "grad_norm": 1.1806581020355225, "learning_rate": 6.862237475368453e-06, "loss": 0.5329, "step": 6153 }, { "epoch": 0.4, "grad_norm": 1.1805520057678223, "learning_rate": 6.861267325767859e-06, "loss": 0.5187, "step": 6154 }, { "epoch": 0.4, "grad_norm": 1.232495665550232, "learning_rate": 6.8602970948160705e-06, "loss": 0.529, "step": 6155 }, { "epoch": 0.4, "grad_norm": 1.2062058448791504, "learning_rate": 6.859326782555497e-06, "loss": 0.5395, "step": 6156 }, { "epoch": 0.4, "grad_norm": 1.1710022687911987, "learning_rate": 6.858356389028548e-06, "loss": 0.5568, "step": 6157 }, { "epoch": 0.4, "grad_norm": 1.163751482963562, "learning_rate": 6.857385914277633e-06, "loss": 0.5256, "step": 6158 }, { "epoch": 0.4, "grad_norm": 1.1841154098510742, "learning_rate": 6.8564153583451745e-06, "loss": 0.5906, "step": 6159 }, { "epoch": 0.4, "grad_norm": 1.1245373487472534, "learning_rate": 6.855444721273589e-06, "loss": 0.587, "step": 6160 }, { "epoch": 0.4, "grad_norm": 1.2807302474975586, "learning_rate": 6.8544740031053015e-06, "loss": 0.5934, "step": 6161 }, { "epoch": 0.4, "grad_norm": 1.1213295459747314, "learning_rate": 6.853503203882741e-06, "loss": 0.5493, "step": 6162 }, { "epoch": 0.4, "grad_norm": 1.1774574518203735, "learning_rate": 6.852532323648337e-06, "loss": 0.5212, "step": 6163 }, { "epoch": 0.4, "grad_norm": 1.1440532207489014, "learning_rate": 6.851561362444525e-06, "loss": 0.514, "step": 6164 }, { "epoch": 0.4, "grad_norm": 1.1892151832580566, "learning_rate": 6.8505903203137416e-06, "loss": 0.5474, "step": 6165 }, { "epoch": 0.4, "grad_norm": 1.1239874362945557, "learning_rate": 6.849619197298431e-06, "loss": 0.5192, "step": 6166 }, { "epoch": 0.4, "grad_norm": 1.1852420568466187, "learning_rate": 6.848647993441038e-06, "loss": 0.563, "step": 6167 }, { "epoch": 0.4, "grad_norm": 1.1981594562530518, "learning_rate": 6.84767670878401e-06, "loss": 0.5751, "step": 6168 }, { "epoch": 0.4, "grad_norm": 1.3802472352981567, "learning_rate": 6.846705343369801e-06, "loss": 0.5774, "step": 6169 }, { "epoch": 0.4, "grad_norm": 1.1217526197433472, "learning_rate": 6.845733897240865e-06, "loss": 0.5307, "step": 6170 }, { "epoch": 0.4, "grad_norm": 1.2574317455291748, "learning_rate": 6.8447623704396615e-06, "loss": 0.5696, "step": 6171 }, { "epoch": 0.4, "grad_norm": 1.1008095741271973, "learning_rate": 6.843790763008657e-06, "loss": 0.5112, "step": 6172 }, { "epoch": 0.4, "grad_norm": 1.1752705574035645, "learning_rate": 6.842819074990312e-06, "loss": 0.5581, "step": 6173 }, { "epoch": 0.4, "grad_norm": 1.1924093961715698, "learning_rate": 6.841847306427102e-06, "loss": 0.5926, "step": 6174 }, { "epoch": 0.4, "grad_norm": 1.2240839004516602, "learning_rate": 6.840875457361499e-06, "loss": 0.5391, "step": 6175 }, { "epoch": 0.4, "grad_norm": 1.1463924646377563, "learning_rate": 6.839903527835977e-06, "loss": 0.5435, "step": 6176 }, { "epoch": 0.4, "grad_norm": 1.211368203163147, "learning_rate": 6.838931517893019e-06, "loss": 0.5454, "step": 6177 }, { "epoch": 0.4, "grad_norm": 1.1521978378295898, "learning_rate": 6.8379594275751115e-06, "loss": 0.5461, "step": 6178 }, { "epoch": 0.4, "grad_norm": 1.3102827072143555, "learning_rate": 6.836987256924737e-06, "loss": 0.6103, "step": 6179 }, { "epoch": 0.4, "grad_norm": 1.2204264402389526, "learning_rate": 6.836015005984389e-06, "loss": 0.5888, "step": 6180 }, { "epoch": 0.4, "grad_norm": 1.1435116529464722, "learning_rate": 6.8350426747965635e-06, "loss": 0.4908, "step": 6181 }, { "epoch": 0.4, "grad_norm": 1.0425684452056885, "learning_rate": 6.834070263403756e-06, "loss": 0.5051, "step": 6182 }, { "epoch": 0.4, "grad_norm": 1.094900131225586, "learning_rate": 6.833097771848471e-06, "loss": 0.5079, "step": 6183 }, { "epoch": 0.4, "grad_norm": 1.3161786794662476, "learning_rate": 6.83212520017321e-06, "loss": 0.5553, "step": 6184 }, { "epoch": 0.4, "grad_norm": 1.1906652450561523, "learning_rate": 6.831152548420483e-06, "loss": 0.5623, "step": 6185 }, { "epoch": 0.4, "grad_norm": 1.2371349334716797, "learning_rate": 6.830179816632805e-06, "loss": 0.5625, "step": 6186 }, { "epoch": 0.4, "grad_norm": 1.2581223249435425, "learning_rate": 6.829207004852687e-06, "loss": 0.5599, "step": 6187 }, { "epoch": 0.4, "grad_norm": 1.1344915628433228, "learning_rate": 6.82823411312265e-06, "loss": 0.4949, "step": 6188 }, { "epoch": 0.4, "grad_norm": 1.170858383178711, "learning_rate": 6.827261141485219e-06, "loss": 0.5558, "step": 6189 }, { "epoch": 0.4, "grad_norm": 1.15266752243042, "learning_rate": 6.826288089982916e-06, "loss": 0.5559, "step": 6190 }, { "epoch": 0.4, "grad_norm": 1.2077409029006958, "learning_rate": 6.825314958658273e-06, "loss": 0.5029, "step": 6191 }, { "epoch": 0.4, "grad_norm": 1.1027870178222656, "learning_rate": 6.824341747553823e-06, "loss": 0.4937, "step": 6192 }, { "epoch": 0.4, "grad_norm": 1.1922012567520142, "learning_rate": 6.823368456712102e-06, "loss": 0.5076, "step": 6193 }, { "epoch": 0.4, "grad_norm": 1.2134554386138916, "learning_rate": 6.82239508617565e-06, "loss": 0.6046, "step": 6194 }, { "epoch": 0.4, "grad_norm": 1.202255129814148, "learning_rate": 6.82142163598701e-06, "loss": 0.5764, "step": 6195 }, { "epoch": 0.4, "grad_norm": 1.1616559028625488, "learning_rate": 6.8204481061887306e-06, "loss": 0.5092, "step": 6196 }, { "epoch": 0.4, "grad_norm": 1.2050328254699707, "learning_rate": 6.81947449682336e-06, "loss": 0.5953, "step": 6197 }, { "epoch": 0.4, "grad_norm": 1.1330598592758179, "learning_rate": 6.818500807933455e-06, "loss": 0.524, "step": 6198 }, { "epoch": 0.4, "grad_norm": 1.1093913316726685, "learning_rate": 6.817527039561571e-06, "loss": 0.5162, "step": 6199 }, { "epoch": 0.4, "grad_norm": 1.1696937084197998, "learning_rate": 6.816553191750268e-06, "loss": 0.608, "step": 6200 }, { "epoch": 0.4, "grad_norm": 1.1299281120300293, "learning_rate": 6.8155792645421136e-06, "loss": 0.5634, "step": 6201 }, { "epoch": 0.4, "grad_norm": 1.190109372138977, "learning_rate": 6.814605257979673e-06, "loss": 0.522, "step": 6202 }, { "epoch": 0.4, "grad_norm": 1.1673444509506226, "learning_rate": 6.8136311721055186e-06, "loss": 0.5222, "step": 6203 }, { "epoch": 0.4, "grad_norm": 1.1818861961364746, "learning_rate": 6.8126570069622245e-06, "loss": 0.5594, "step": 6204 }, { "epoch": 0.4, "grad_norm": 1.2391098737716675, "learning_rate": 6.8116827625923686e-06, "loss": 0.5829, "step": 6205 }, { "epoch": 0.4, "grad_norm": 1.2831860780715942, "learning_rate": 6.810708439038535e-06, "loss": 0.5375, "step": 6206 }, { "epoch": 0.4, "grad_norm": 1.2531908750534058, "learning_rate": 6.809734036343307e-06, "loss": 0.6123, "step": 6207 }, { "epoch": 0.4, "grad_norm": 1.1168426275253296, "learning_rate": 6.8087595545492725e-06, "loss": 0.5274, "step": 6208 }, { "epoch": 0.4, "grad_norm": 1.1025714874267578, "learning_rate": 6.8077849936990245e-06, "loss": 0.5402, "step": 6209 }, { "epoch": 0.4, "grad_norm": 1.261942744255066, "learning_rate": 6.80681035383516e-06, "loss": 0.5579, "step": 6210 }, { "epoch": 0.4, "grad_norm": 1.2200559377670288, "learning_rate": 6.805835635000275e-06, "loss": 0.5462, "step": 6211 }, { "epoch": 0.4, "grad_norm": 1.1094104051589966, "learning_rate": 6.804860837236973e-06, "loss": 0.5262, "step": 6212 }, { "epoch": 0.4, "grad_norm": 1.2699966430664062, "learning_rate": 6.803885960587863e-06, "loss": 0.5357, "step": 6213 }, { "epoch": 0.4, "grad_norm": 1.1818995475769043, "learning_rate": 6.802911005095549e-06, "loss": 0.4696, "step": 6214 }, { "epoch": 0.4, "grad_norm": 1.289813756942749, "learning_rate": 6.8019359708026474e-06, "loss": 0.5609, "step": 6215 }, { "epoch": 0.4, "grad_norm": 1.0978182554244995, "learning_rate": 6.800960857751775e-06, "loss": 0.5145, "step": 6216 }, { "epoch": 0.4, "grad_norm": 1.040073037147522, "learning_rate": 6.799985665985549e-06, "loss": 0.5556, "step": 6217 }, { "epoch": 0.4, "grad_norm": 1.107276439666748, "learning_rate": 6.799010395546592e-06, "loss": 0.5162, "step": 6218 }, { "epoch": 0.4, "grad_norm": 1.0610854625701904, "learning_rate": 6.798035046477535e-06, "loss": 0.4818, "step": 6219 }, { "epoch": 0.4, "grad_norm": 1.1494131088256836, "learning_rate": 6.797059618821004e-06, "loss": 0.5195, "step": 6220 }, { "epoch": 0.4, "grad_norm": 1.0823564529418945, "learning_rate": 6.796084112619633e-06, "loss": 0.542, "step": 6221 }, { "epoch": 0.4, "grad_norm": 1.1609615087509155, "learning_rate": 6.79510852791606e-06, "loss": 0.5377, "step": 6222 }, { "epoch": 0.4, "grad_norm": 1.1961146593093872, "learning_rate": 6.794132864752925e-06, "loss": 0.5849, "step": 6223 }, { "epoch": 0.4, "grad_norm": 1.155741810798645, "learning_rate": 6.793157123172871e-06, "loss": 0.5488, "step": 6224 }, { "epoch": 0.4, "grad_norm": 1.2411519289016724, "learning_rate": 6.792181303218544e-06, "loss": 0.5704, "step": 6225 }, { "epoch": 0.4, "grad_norm": 1.131258487701416, "learning_rate": 6.7912054049325985e-06, "loss": 0.5744, "step": 6226 }, { "epoch": 0.4, "grad_norm": 1.1420243978500366, "learning_rate": 6.790229428357685e-06, "loss": 0.5443, "step": 6227 }, { "epoch": 0.4, "grad_norm": 1.2745170593261719, "learning_rate": 6.789253373536462e-06, "loss": 0.5391, "step": 6228 }, { "epoch": 0.4, "grad_norm": 1.1093865633010864, "learning_rate": 6.7882772405115904e-06, "loss": 0.5137, "step": 6229 }, { "epoch": 0.4, "grad_norm": 1.1834949254989624, "learning_rate": 6.787301029325735e-06, "loss": 0.5275, "step": 6230 }, { "epoch": 0.4, "grad_norm": 1.1257179975509644, "learning_rate": 6.786324740021563e-06, "loss": 0.5006, "step": 6231 }, { "epoch": 0.4, "grad_norm": 1.1611995697021484, "learning_rate": 6.785348372641743e-06, "loss": 0.5505, "step": 6232 }, { "epoch": 0.4, "grad_norm": 1.2069058418273926, "learning_rate": 6.784371927228956e-06, "loss": 0.6129, "step": 6233 }, { "epoch": 0.4, "grad_norm": 1.1423993110656738, "learning_rate": 6.783395403825872e-06, "loss": 0.5528, "step": 6234 }, { "epoch": 0.4, "grad_norm": 1.1629153490066528, "learning_rate": 6.782418802475178e-06, "loss": 0.5678, "step": 6235 }, { "epoch": 0.4, "grad_norm": 1.2157329320907593, "learning_rate": 6.781442123219557e-06, "loss": 0.553, "step": 6236 }, { "epoch": 0.4, "grad_norm": 1.146023154258728, "learning_rate": 6.780465366101696e-06, "loss": 0.5571, "step": 6237 }, { "epoch": 0.4, "grad_norm": 1.2512625455856323, "learning_rate": 6.779488531164289e-06, "loss": 0.5079, "step": 6238 }, { "epoch": 0.4, "grad_norm": 1.1497924327850342, "learning_rate": 6.778511618450028e-06, "loss": 0.5486, "step": 6239 }, { "epoch": 0.4, "grad_norm": 1.039147973060608, "learning_rate": 6.777534628001614e-06, "loss": 0.4849, "step": 6240 }, { "epoch": 0.4, "grad_norm": 1.0899019241333008, "learning_rate": 6.776557559861746e-06, "loss": 0.5307, "step": 6241 }, { "epoch": 0.4, "grad_norm": 1.0712246894836426, "learning_rate": 6.7755804140731306e-06, "loss": 0.5041, "step": 6242 }, { "epoch": 0.4, "grad_norm": 1.1592512130737305, "learning_rate": 6.774603190678478e-06, "loss": 0.5441, "step": 6243 }, { "epoch": 0.4, "grad_norm": 1.0418107509613037, "learning_rate": 6.773625889720497e-06, "loss": 0.5264, "step": 6244 }, { "epoch": 0.4, "grad_norm": 1.2064048051834106, "learning_rate": 6.772648511241906e-06, "loss": 0.5744, "step": 6245 }, { "epoch": 0.4, "grad_norm": 1.165002465248108, "learning_rate": 6.771671055285422e-06, "loss": 0.5637, "step": 6246 }, { "epoch": 0.4, "grad_norm": 1.1611536741256714, "learning_rate": 6.770693521893765e-06, "loss": 0.5979, "step": 6247 }, { "epoch": 0.4, "grad_norm": 1.028760552406311, "learning_rate": 6.769715911109664e-06, "loss": 0.4768, "step": 6248 }, { "epoch": 0.4, "grad_norm": 1.14340078830719, "learning_rate": 6.7687382229758455e-06, "loss": 0.5199, "step": 6249 }, { "epoch": 0.4, "grad_norm": 1.2347105741500854, "learning_rate": 6.767760457535044e-06, "loss": 0.565, "step": 6250 }, { "epoch": 0.4, "grad_norm": 1.2570358514785767, "learning_rate": 6.766782614829994e-06, "loss": 0.5542, "step": 6251 }, { "epoch": 0.4, "grad_norm": 1.1060149669647217, "learning_rate": 6.765804694903433e-06, "loss": 0.5393, "step": 6252 }, { "epoch": 0.4, "grad_norm": 1.1714591979980469, "learning_rate": 6.764826697798105e-06, "loss": 0.5469, "step": 6253 }, { "epoch": 0.4, "grad_norm": 1.088959813117981, "learning_rate": 6.763848623556756e-06, "loss": 0.5145, "step": 6254 }, { "epoch": 0.4, "grad_norm": 1.1937607526779175, "learning_rate": 6.762870472222134e-06, "loss": 0.5789, "step": 6255 }, { "epoch": 0.4, "grad_norm": 1.1750608682632446, "learning_rate": 6.761892243836992e-06, "loss": 0.5031, "step": 6256 }, { "epoch": 0.4, "grad_norm": 1.0782783031463623, "learning_rate": 6.7609139384440844e-06, "loss": 0.5736, "step": 6257 }, { "epoch": 0.4, "grad_norm": 1.2857240438461304, "learning_rate": 6.759935556086174e-06, "loss": 0.5419, "step": 6258 }, { "epoch": 0.4, "grad_norm": 1.1297870874404907, "learning_rate": 6.758957096806019e-06, "loss": 0.56, "step": 6259 }, { "epoch": 0.4, "grad_norm": 1.8294684886932373, "learning_rate": 6.75797856064639e-06, "loss": 0.5835, "step": 6260 }, { "epoch": 0.4, "grad_norm": 1.2184600830078125, "learning_rate": 6.756999947650052e-06, "loss": 0.5768, "step": 6261 }, { "epoch": 0.4, "grad_norm": 1.2808016538619995, "learning_rate": 6.75602125785978e-06, "loss": 0.5516, "step": 6262 }, { "epoch": 0.4, "grad_norm": 1.091097354888916, "learning_rate": 6.755042491318349e-06, "loss": 0.5509, "step": 6263 }, { "epoch": 0.4, "grad_norm": 1.1493154764175415, "learning_rate": 6.754063648068538e-06, "loss": 0.5685, "step": 6264 }, { "epoch": 0.4, "grad_norm": 1.2483724355697632, "learning_rate": 6.753084728153132e-06, "loss": 0.5304, "step": 6265 }, { "epoch": 0.4, "grad_norm": 1.1655519008636475, "learning_rate": 6.752105731614915e-06, "loss": 0.5658, "step": 6266 }, { "epoch": 0.4, "grad_norm": 1.0982307195663452, "learning_rate": 6.751126658496678e-06, "loss": 0.56, "step": 6267 }, { "epoch": 0.4, "grad_norm": 1.1585532426834106, "learning_rate": 6.750147508841211e-06, "loss": 0.5899, "step": 6268 }, { "epoch": 0.4, "grad_norm": 1.0906898975372314, "learning_rate": 6.7491682826913115e-06, "loss": 0.5371, "step": 6269 }, { "epoch": 0.4, "grad_norm": 1.1108118295669556, "learning_rate": 6.748188980089781e-06, "loss": 0.5334, "step": 6270 }, { "epoch": 0.4, "grad_norm": 1.2162574529647827, "learning_rate": 6.747209601079421e-06, "loss": 0.5475, "step": 6271 }, { "epoch": 0.4, "grad_norm": 1.3383821249008179, "learning_rate": 6.746230145703035e-06, "loss": 0.5236, "step": 6272 }, { "epoch": 0.4, "grad_norm": 1.0856972932815552, "learning_rate": 6.745250614003436e-06, "loss": 0.5189, "step": 6273 }, { "epoch": 0.4, "grad_norm": 1.206095576286316, "learning_rate": 6.744271006023435e-06, "loss": 0.5592, "step": 6274 }, { "epoch": 0.41, "grad_norm": 1.1160759925842285, "learning_rate": 6.743291321805849e-06, "loss": 0.5254, "step": 6275 }, { "epoch": 0.41, "grad_norm": 1.2378844022750854, "learning_rate": 6.7423115613934965e-06, "loss": 0.543, "step": 6276 }, { "epoch": 0.41, "grad_norm": 1.1193021535873413, "learning_rate": 6.741331724829202e-06, "loss": 0.5152, "step": 6277 }, { "epoch": 0.41, "grad_norm": 1.185930848121643, "learning_rate": 6.740351812155789e-06, "loss": 0.5529, "step": 6278 }, { "epoch": 0.41, "grad_norm": 1.2685974836349487, "learning_rate": 6.739371823416089e-06, "loss": 0.5267, "step": 6279 }, { "epoch": 0.41, "grad_norm": 1.1869227886199951, "learning_rate": 6.738391758652936e-06, "loss": 0.5429, "step": 6280 }, { "epoch": 0.41, "grad_norm": 1.2361222505569458, "learning_rate": 6.737411617909162e-06, "loss": 0.5554, "step": 6281 }, { "epoch": 0.41, "grad_norm": 1.2386929988861084, "learning_rate": 6.736431401227609e-06, "loss": 0.5128, "step": 6282 }, { "epoch": 0.41, "grad_norm": 1.1905664205551147, "learning_rate": 6.735451108651121e-06, "loss": 0.5808, "step": 6283 }, { "epoch": 0.41, "grad_norm": 1.1070398092269897, "learning_rate": 6.734470740222541e-06, "loss": 0.5605, "step": 6284 }, { "epoch": 0.41, "grad_norm": 1.4892581701278687, "learning_rate": 6.733490295984722e-06, "loss": 0.5106, "step": 6285 }, { "epoch": 0.41, "grad_norm": 1.1670253276824951, "learning_rate": 6.732509775980512e-06, "loss": 0.5071, "step": 6286 }, { "epoch": 0.41, "grad_norm": 1.518441915512085, "learning_rate": 6.731529180252772e-06, "loss": 0.589, "step": 6287 }, { "epoch": 0.41, "grad_norm": 1.2517870664596558, "learning_rate": 6.730548508844357e-06, "loss": 0.6019, "step": 6288 }, { "epoch": 0.41, "grad_norm": 1.3757600784301758, "learning_rate": 6.729567761798132e-06, "loss": 0.5228, "step": 6289 }, { "epoch": 0.41, "grad_norm": 1.1749088764190674, "learning_rate": 6.728586939156962e-06, "loss": 0.5263, "step": 6290 }, { "epoch": 0.41, "grad_norm": 1.1735305786132812, "learning_rate": 6.727606040963718e-06, "loss": 0.5354, "step": 6291 }, { "epoch": 0.41, "grad_norm": 1.238019585609436, "learning_rate": 6.726625067261272e-06, "loss": 0.5163, "step": 6292 }, { "epoch": 0.41, "grad_norm": 1.249703288078308, "learning_rate": 6.725644018092497e-06, "loss": 0.5643, "step": 6293 }, { "epoch": 0.41, "grad_norm": 1.3504984378814697, "learning_rate": 6.724662893500275e-06, "loss": 0.5279, "step": 6294 }, { "epoch": 0.41, "grad_norm": 1.111467957496643, "learning_rate": 6.723681693527488e-06, "loss": 0.5145, "step": 6295 }, { "epoch": 0.41, "grad_norm": 1.2363653182983398, "learning_rate": 6.7227004182170205e-06, "loss": 0.5144, "step": 6296 }, { "epoch": 0.41, "grad_norm": 1.2501658201217651, "learning_rate": 6.721719067611763e-06, "loss": 0.5455, "step": 6297 }, { "epoch": 0.41, "grad_norm": 1.1406536102294922, "learning_rate": 6.720737641754607e-06, "loss": 0.553, "step": 6298 }, { "epoch": 0.41, "grad_norm": 1.0240488052368164, "learning_rate": 6.719756140688447e-06, "loss": 0.5267, "step": 6299 }, { "epoch": 0.41, "grad_norm": 1.1931664943695068, "learning_rate": 6.718774564456184e-06, "loss": 0.5849, "step": 6300 }, { "epoch": 0.41, "grad_norm": 1.2188876867294312, "learning_rate": 6.71779291310072e-06, "loss": 0.5249, "step": 6301 }, { "epoch": 0.41, "grad_norm": 1.2505007982254028, "learning_rate": 6.71681118666496e-06, "loss": 0.5456, "step": 6302 }, { "epoch": 0.41, "grad_norm": 1.3142039775848389, "learning_rate": 6.715829385191811e-06, "loss": 0.541, "step": 6303 }, { "epoch": 0.41, "grad_norm": 1.1935938596725464, "learning_rate": 6.714847508724188e-06, "loss": 0.5473, "step": 6304 }, { "epoch": 0.41, "grad_norm": 1.1774812936782837, "learning_rate": 6.7138655573050035e-06, "loss": 0.5307, "step": 6305 }, { "epoch": 0.41, "grad_norm": 1.1764358282089233, "learning_rate": 6.712883530977178e-06, "loss": 0.5621, "step": 6306 }, { "epoch": 0.41, "grad_norm": 1.3084146976470947, "learning_rate": 6.711901429783633e-06, "loss": 0.5776, "step": 6307 }, { "epoch": 0.41, "grad_norm": 1.2235435247421265, "learning_rate": 6.710919253767291e-06, "loss": 0.552, "step": 6308 }, { "epoch": 0.41, "grad_norm": 1.1598966121673584, "learning_rate": 6.709937002971086e-06, "loss": 0.5315, "step": 6309 }, { "epoch": 0.41, "grad_norm": 1.1727098226547241, "learning_rate": 6.708954677437944e-06, "loss": 0.5148, "step": 6310 }, { "epoch": 0.41, "grad_norm": 1.1582798957824707, "learning_rate": 6.707972277210804e-06, "loss": 0.5165, "step": 6311 }, { "epoch": 0.41, "grad_norm": 1.1683510541915894, "learning_rate": 6.706989802332601e-06, "loss": 0.5258, "step": 6312 }, { "epoch": 0.41, "grad_norm": 1.1206493377685547, "learning_rate": 6.7060072528462785e-06, "loss": 0.5267, "step": 6313 }, { "epoch": 0.41, "grad_norm": 1.1552114486694336, "learning_rate": 6.705024628794779e-06, "loss": 0.5208, "step": 6314 }, { "epoch": 0.41, "grad_norm": 1.1576974391937256, "learning_rate": 6.7040419302210535e-06, "loss": 0.5192, "step": 6315 }, { "epoch": 0.41, "grad_norm": 1.5723485946655273, "learning_rate": 6.7030591571680516e-06, "loss": 0.5471, "step": 6316 }, { "epoch": 0.41, "grad_norm": 1.2151051759719849, "learning_rate": 6.702076309678727e-06, "loss": 0.5618, "step": 6317 }, { "epoch": 0.41, "grad_norm": 1.2464088201522827, "learning_rate": 6.701093387796039e-06, "loss": 0.5491, "step": 6318 }, { "epoch": 0.41, "grad_norm": 1.2205731868743896, "learning_rate": 6.700110391562949e-06, "loss": 0.4725, "step": 6319 }, { "epoch": 0.41, "grad_norm": 1.084608554840088, "learning_rate": 6.699127321022419e-06, "loss": 0.5044, "step": 6320 }, { "epoch": 0.41, "grad_norm": 1.1655094623565674, "learning_rate": 6.698144176217417e-06, "loss": 0.5489, "step": 6321 }, { "epoch": 0.41, "grad_norm": 1.2295266389846802, "learning_rate": 6.697160957190915e-06, "loss": 0.5494, "step": 6322 }, { "epoch": 0.41, "grad_norm": 1.107872724533081, "learning_rate": 6.696177663985886e-06, "loss": 0.5856, "step": 6323 }, { "epoch": 0.41, "grad_norm": 1.1364741325378418, "learning_rate": 6.695194296645307e-06, "loss": 0.5861, "step": 6324 }, { "epoch": 0.41, "grad_norm": 1.1216959953308105, "learning_rate": 6.69421085521216e-06, "loss": 0.5743, "step": 6325 }, { "epoch": 0.41, "grad_norm": 1.1436476707458496, "learning_rate": 6.6932273397294265e-06, "loss": 0.552, "step": 6326 }, { "epoch": 0.41, "grad_norm": 1.2598135471343994, "learning_rate": 6.692243750240097e-06, "loss": 0.5611, "step": 6327 }, { "epoch": 0.41, "grad_norm": 1.2209001779556274, "learning_rate": 6.691260086787157e-06, "loss": 0.5747, "step": 6328 }, { "epoch": 0.41, "grad_norm": 1.1475441455841064, "learning_rate": 6.6902763494136034e-06, "loss": 0.5668, "step": 6329 }, { "epoch": 0.41, "grad_norm": 1.1532559394836426, "learning_rate": 6.689292538162431e-06, "loss": 0.5625, "step": 6330 }, { "epoch": 0.41, "grad_norm": 1.1181906461715698, "learning_rate": 6.6883086530766395e-06, "loss": 0.5123, "step": 6331 }, { "epoch": 0.41, "grad_norm": 1.2904777526855469, "learning_rate": 6.6873246941992335e-06, "loss": 0.583, "step": 6332 }, { "epoch": 0.41, "grad_norm": 1.293689250946045, "learning_rate": 6.686340661573218e-06, "loss": 0.5624, "step": 6333 }, { "epoch": 0.41, "grad_norm": 1.1955546140670776, "learning_rate": 6.685356555241605e-06, "loss": 0.5758, "step": 6334 }, { "epoch": 0.41, "grad_norm": 1.2879810333251953, "learning_rate": 6.684372375247402e-06, "loss": 0.5418, "step": 6335 }, { "epoch": 0.41, "grad_norm": 1.1633409261703491, "learning_rate": 6.6833881216336304e-06, "loss": 0.5357, "step": 6336 }, { "epoch": 0.41, "grad_norm": 1.3358625173568726, "learning_rate": 6.682403794443306e-06, "loss": 0.5881, "step": 6337 }, { "epoch": 0.41, "grad_norm": 1.2776726484298706, "learning_rate": 6.6814193937194525e-06, "loss": 0.5254, "step": 6338 }, { "epoch": 0.41, "grad_norm": 1.2305216789245605, "learning_rate": 6.6804349195050965e-06, "loss": 0.5572, "step": 6339 }, { "epoch": 0.41, "grad_norm": 1.1701120138168335, "learning_rate": 6.679450371843264e-06, "loss": 0.5934, "step": 6340 }, { "epoch": 0.41, "grad_norm": 1.1822928190231323, "learning_rate": 6.67846575077699e-06, "loss": 0.5415, "step": 6341 }, { "epoch": 0.41, "grad_norm": 1.26362144947052, "learning_rate": 6.677481056349309e-06, "loss": 0.5535, "step": 6342 }, { "epoch": 0.41, "grad_norm": 1.233300805091858, "learning_rate": 6.676496288603258e-06, "loss": 0.5374, "step": 6343 }, { "epoch": 0.41, "grad_norm": 1.2222321033477783, "learning_rate": 6.675511447581879e-06, "loss": 0.5816, "step": 6344 }, { "epoch": 0.41, "grad_norm": 1.1807105541229248, "learning_rate": 6.674526533328221e-06, "loss": 0.5431, "step": 6345 }, { "epoch": 0.41, "grad_norm": 1.25046706199646, "learning_rate": 6.673541545885325e-06, "loss": 0.5063, "step": 6346 }, { "epoch": 0.41, "grad_norm": 1.2792917490005493, "learning_rate": 6.672556485296246e-06, "loss": 0.5558, "step": 6347 }, { "epoch": 0.41, "grad_norm": 1.165184497833252, "learning_rate": 6.67157135160404e-06, "loss": 0.5458, "step": 6348 }, { "epoch": 0.41, "grad_norm": 1.1126819849014282, "learning_rate": 6.670586144851762e-06, "loss": 0.5637, "step": 6349 }, { "epoch": 0.41, "grad_norm": 1.638604998588562, "learning_rate": 6.669600865082473e-06, "loss": 0.6107, "step": 6350 }, { "epoch": 0.41, "grad_norm": 1.0703682899475098, "learning_rate": 6.66861551233924e-06, "loss": 0.5446, "step": 6351 }, { "epoch": 0.41, "grad_norm": 1.2124139070510864, "learning_rate": 6.667630086665126e-06, "loss": 0.5331, "step": 6352 }, { "epoch": 0.41, "grad_norm": 1.197564721107483, "learning_rate": 6.666644588103205e-06, "loss": 0.526, "step": 6353 }, { "epoch": 0.41, "grad_norm": 1.4475113153457642, "learning_rate": 6.665659016696548e-06, "loss": 0.5486, "step": 6354 }, { "epoch": 0.41, "grad_norm": 1.3187721967697144, "learning_rate": 6.664673372488233e-06, "loss": 0.5733, "step": 6355 }, { "epoch": 0.41, "grad_norm": 1.348671793937683, "learning_rate": 6.66368765552134e-06, "loss": 0.5812, "step": 6356 }, { "epoch": 0.41, "grad_norm": 1.123686671257019, "learning_rate": 6.662701865838952e-06, "loss": 0.5298, "step": 6357 }, { "epoch": 0.41, "grad_norm": 1.0882108211517334, "learning_rate": 6.6617160034841545e-06, "loss": 0.4946, "step": 6358 }, { "epoch": 0.41, "grad_norm": 1.1499640941619873, "learning_rate": 6.660730068500039e-06, "loss": 0.5262, "step": 6359 }, { "epoch": 0.41, "grad_norm": 1.2246735095977783, "learning_rate": 6.659744060929696e-06, "loss": 0.5545, "step": 6360 }, { "epoch": 0.41, "grad_norm": 1.1271274089813232, "learning_rate": 6.658757980816221e-06, "loss": 0.5589, "step": 6361 }, { "epoch": 0.41, "grad_norm": 1.2432665824890137, "learning_rate": 6.657771828202717e-06, "loss": 0.5299, "step": 6362 }, { "epoch": 0.41, "grad_norm": 1.110887885093689, "learning_rate": 6.656785603132283e-06, "loss": 0.5564, "step": 6363 }, { "epoch": 0.41, "grad_norm": 1.1711663007736206, "learning_rate": 6.655799305648023e-06, "loss": 0.5292, "step": 6364 }, { "epoch": 0.41, "grad_norm": 1.2233372926712036, "learning_rate": 6.65481293579305e-06, "loss": 0.5772, "step": 6365 }, { "epoch": 0.41, "grad_norm": 1.2320258617401123, "learning_rate": 6.653826493610471e-06, "loss": 0.5864, "step": 6366 }, { "epoch": 0.41, "grad_norm": 1.1259779930114746, "learning_rate": 6.652839979143404e-06, "loss": 0.5209, "step": 6367 }, { "epoch": 0.41, "grad_norm": 1.2303825616836548, "learning_rate": 6.651853392434966e-06, "loss": 0.5272, "step": 6368 }, { "epoch": 0.41, "grad_norm": 1.3601125478744507, "learning_rate": 6.650866733528276e-06, "loss": 0.5546, "step": 6369 }, { "epoch": 0.41, "grad_norm": 1.2516448497772217, "learning_rate": 6.6498800024664625e-06, "loss": 0.5684, "step": 6370 }, { "epoch": 0.41, "grad_norm": 1.0329171419143677, "learning_rate": 6.648893199292651e-06, "loss": 0.5493, "step": 6371 }, { "epoch": 0.41, "grad_norm": 1.3541892766952515, "learning_rate": 6.647906324049971e-06, "loss": 0.523, "step": 6372 }, { "epoch": 0.41, "grad_norm": 1.1467006206512451, "learning_rate": 6.646919376781556e-06, "loss": 0.5196, "step": 6373 }, { "epoch": 0.41, "grad_norm": 1.2188483476638794, "learning_rate": 6.6459323575305464e-06, "loss": 0.5639, "step": 6374 }, { "epoch": 0.41, "grad_norm": 1.1822631359100342, "learning_rate": 6.64494526634008e-06, "loss": 0.5329, "step": 6375 }, { "epoch": 0.41, "grad_norm": 1.2340325117111206, "learning_rate": 6.6439581032533e-06, "loss": 0.5537, "step": 6376 }, { "epoch": 0.41, "grad_norm": 1.0713247060775757, "learning_rate": 6.642970868313351e-06, "loss": 0.5531, "step": 6377 }, { "epoch": 0.41, "grad_norm": 1.1737505197525024, "learning_rate": 6.641983561563385e-06, "loss": 0.5569, "step": 6378 }, { "epoch": 0.41, "grad_norm": 1.2545557022094727, "learning_rate": 6.640996183046555e-06, "loss": 0.59, "step": 6379 }, { "epoch": 0.41, "grad_norm": 1.2891991138458252, "learning_rate": 6.6400087328060135e-06, "loss": 0.5377, "step": 6380 }, { "epoch": 0.41, "grad_norm": 1.208181381225586, "learning_rate": 6.639021210884924e-06, "loss": 0.4957, "step": 6381 }, { "epoch": 0.41, "grad_norm": 1.2390388250350952, "learning_rate": 6.638033617326445e-06, "loss": 0.5658, "step": 6382 }, { "epoch": 0.41, "grad_norm": 1.1624573469161987, "learning_rate": 6.637045952173744e-06, "loss": 0.5407, "step": 6383 }, { "epoch": 0.41, "grad_norm": 1.1962090730667114, "learning_rate": 6.636058215469986e-06, "loss": 0.5288, "step": 6384 }, { "epoch": 0.41, "grad_norm": 1.143343210220337, "learning_rate": 6.635070407258345e-06, "loss": 0.5476, "step": 6385 }, { "epoch": 0.41, "grad_norm": 1.311577320098877, "learning_rate": 6.634082527581995e-06, "loss": 0.5974, "step": 6386 }, { "epoch": 0.41, "grad_norm": 1.2217905521392822, "learning_rate": 6.633094576484115e-06, "loss": 0.5356, "step": 6387 }, { "epoch": 0.41, "grad_norm": 1.1972016096115112, "learning_rate": 6.632106554007882e-06, "loss": 0.5361, "step": 6388 }, { "epoch": 0.41, "grad_norm": 1.1220377683639526, "learning_rate": 6.631118460196485e-06, "loss": 0.4817, "step": 6389 }, { "epoch": 0.41, "grad_norm": 1.0776128768920898, "learning_rate": 6.630130295093108e-06, "loss": 0.5087, "step": 6390 }, { "epoch": 0.41, "grad_norm": 1.200892686843872, "learning_rate": 6.629142058740941e-06, "loss": 0.5118, "step": 6391 }, { "epoch": 0.41, "grad_norm": 1.2050586938858032, "learning_rate": 6.628153751183178e-06, "loss": 0.5443, "step": 6392 }, { "epoch": 0.41, "grad_norm": 1.274290919303894, "learning_rate": 6.6271653724630145e-06, "loss": 0.5415, "step": 6393 }, { "epoch": 0.41, "grad_norm": 1.1958763599395752, "learning_rate": 6.62617692262365e-06, "loss": 0.5765, "step": 6394 }, { "epoch": 0.41, "grad_norm": 1.1876440048217773, "learning_rate": 6.625188401708291e-06, "loss": 0.5799, "step": 6395 }, { "epoch": 0.41, "grad_norm": 1.0882560014724731, "learning_rate": 6.624199809760138e-06, "loss": 0.5556, "step": 6396 }, { "epoch": 0.41, "grad_norm": 1.0499353408813477, "learning_rate": 6.623211146822403e-06, "loss": 0.5067, "step": 6397 }, { "epoch": 0.41, "grad_norm": 1.2110189199447632, "learning_rate": 6.622222412938295e-06, "loss": 0.5436, "step": 6398 }, { "epoch": 0.41, "grad_norm": 1.148988962173462, "learning_rate": 6.6212336081510295e-06, "loss": 0.4957, "step": 6399 }, { "epoch": 0.41, "grad_norm": 1.2767508029937744, "learning_rate": 6.6202447325038285e-06, "loss": 0.5395, "step": 6400 }, { "epoch": 0.41, "grad_norm": 1.2075483798980713, "learning_rate": 6.6192557860399094e-06, "loss": 0.5587, "step": 6401 }, { "epoch": 0.41, "grad_norm": 1.2212862968444824, "learning_rate": 6.618266768802498e-06, "loss": 0.5923, "step": 6402 }, { "epoch": 0.41, "grad_norm": 1.4270480871200562, "learning_rate": 6.617277680834823e-06, "loss": 0.6008, "step": 6403 }, { "epoch": 0.41, "grad_norm": 1.3828870058059692, "learning_rate": 6.61628852218011e-06, "loss": 0.4882, "step": 6404 }, { "epoch": 0.41, "grad_norm": 1.2359862327575684, "learning_rate": 6.615299292881597e-06, "loss": 0.5188, "step": 6405 }, { "epoch": 0.41, "grad_norm": 1.403937578201294, "learning_rate": 6.61430999298252e-06, "loss": 0.5631, "step": 6406 }, { "epoch": 0.41, "grad_norm": 1.1503788232803345, "learning_rate": 6.613320622526118e-06, "loss": 0.5517, "step": 6407 }, { "epoch": 0.41, "grad_norm": 1.1785142421722412, "learning_rate": 6.612331181555634e-06, "loss": 0.5121, "step": 6408 }, { "epoch": 0.41, "grad_norm": 1.2979580163955688, "learning_rate": 6.611341670114314e-06, "loss": 0.5491, "step": 6409 }, { "epoch": 0.41, "grad_norm": 1.1751387119293213, "learning_rate": 6.610352088245407e-06, "loss": 0.5869, "step": 6410 }, { "epoch": 0.41, "grad_norm": 1.210850477218628, "learning_rate": 6.6093624359921636e-06, "loss": 0.5344, "step": 6411 }, { "epoch": 0.41, "grad_norm": 1.1658287048339844, "learning_rate": 6.608372713397843e-06, "loss": 0.5102, "step": 6412 }, { "epoch": 0.41, "grad_norm": 1.2365059852600098, "learning_rate": 6.6073829205056985e-06, "loss": 0.5221, "step": 6413 }, { "epoch": 0.41, "grad_norm": 1.2755231857299805, "learning_rate": 6.606393057358994e-06, "loss": 0.5853, "step": 6414 }, { "epoch": 0.41, "grad_norm": 1.0804882049560547, "learning_rate": 6.605403124000994e-06, "loss": 0.5073, "step": 6415 }, { "epoch": 0.41, "grad_norm": 1.173353910446167, "learning_rate": 6.604413120474965e-06, "loss": 0.5196, "step": 6416 }, { "epoch": 0.41, "grad_norm": 1.1453913450241089, "learning_rate": 6.603423046824178e-06, "loss": 0.517, "step": 6417 }, { "epoch": 0.41, "grad_norm": 1.1636801958084106, "learning_rate": 6.6024329030919074e-06, "loss": 0.5261, "step": 6418 }, { "epoch": 0.41, "grad_norm": 1.192845344543457, "learning_rate": 6.601442689321428e-06, "loss": 0.489, "step": 6419 }, { "epoch": 0.41, "grad_norm": 1.183387279510498, "learning_rate": 6.60045240555602e-06, "loss": 0.5717, "step": 6420 }, { "epoch": 0.41, "grad_norm": 1.3027430772781372, "learning_rate": 6.599462051838968e-06, "loss": 0.537, "step": 6421 }, { "epoch": 0.41, "grad_norm": 1.1672474145889282, "learning_rate": 6.598471628213556e-06, "loss": 0.5655, "step": 6422 }, { "epoch": 0.41, "grad_norm": 1.152176856994629, "learning_rate": 6.597481134723074e-06, "loss": 0.5203, "step": 6423 }, { "epoch": 0.41, "grad_norm": 1.0902094841003418, "learning_rate": 6.596490571410811e-06, "loss": 0.5254, "step": 6424 }, { "epoch": 0.41, "grad_norm": 1.1992172002792358, "learning_rate": 6.595499938320066e-06, "loss": 0.5245, "step": 6425 }, { "epoch": 0.41, "grad_norm": 1.1577690839767456, "learning_rate": 6.594509235494134e-06, "loss": 0.5242, "step": 6426 }, { "epoch": 0.41, "grad_norm": 1.3002238273620605, "learning_rate": 6.593518462976317e-06, "loss": 0.5904, "step": 6427 }, { "epoch": 0.41, "grad_norm": 1.129783034324646, "learning_rate": 6.592527620809921e-06, "loss": 0.4988, "step": 6428 }, { "epoch": 0.41, "grad_norm": 1.280651330947876, "learning_rate": 6.59153670903825e-06, "loss": 0.5446, "step": 6429 }, { "epoch": 0.42, "grad_norm": 1.0633476972579956, "learning_rate": 6.590545727704616e-06, "loss": 0.5612, "step": 6430 }, { "epoch": 0.42, "grad_norm": 1.1770249605178833, "learning_rate": 6.589554676852332e-06, "loss": 0.5631, "step": 6431 }, { "epoch": 0.42, "grad_norm": 1.2820936441421509, "learning_rate": 6.588563556524714e-06, "loss": 0.5586, "step": 6432 }, { "epoch": 0.42, "grad_norm": 1.2717100381851196, "learning_rate": 6.587572366765083e-06, "loss": 0.6051, "step": 6433 }, { "epoch": 0.42, "grad_norm": 1.21515691280365, "learning_rate": 6.586581107616756e-06, "loss": 0.5548, "step": 6434 }, { "epoch": 0.42, "grad_norm": 0.9419683814048767, "learning_rate": 6.585589779123065e-06, "loss": 0.4401, "step": 6435 }, { "epoch": 0.42, "grad_norm": 1.187894582748413, "learning_rate": 6.584598381327336e-06, "loss": 0.6066, "step": 6436 }, { "epoch": 0.42, "grad_norm": 1.173710584640503, "learning_rate": 6.5836069142729e-06, "loss": 0.5479, "step": 6437 }, { "epoch": 0.42, "grad_norm": 1.189429521560669, "learning_rate": 6.582615378003091e-06, "loss": 0.5485, "step": 6438 }, { "epoch": 0.42, "grad_norm": 1.2479647397994995, "learning_rate": 6.581623772561247e-06, "loss": 0.5421, "step": 6439 }, { "epoch": 0.42, "grad_norm": 1.2070115804672241, "learning_rate": 6.580632097990707e-06, "loss": 0.5364, "step": 6440 }, { "epoch": 0.42, "grad_norm": 1.1166249513626099, "learning_rate": 6.579640354334819e-06, "loss": 0.5369, "step": 6441 }, { "epoch": 0.42, "grad_norm": 1.1291404962539673, "learning_rate": 6.578648541636925e-06, "loss": 0.5363, "step": 6442 }, { "epoch": 0.42, "grad_norm": 1.2352145910263062, "learning_rate": 6.577656659940376e-06, "loss": 0.5414, "step": 6443 }, { "epoch": 0.42, "grad_norm": 1.2825480699539185, "learning_rate": 6.576664709288525e-06, "loss": 0.5397, "step": 6444 }, { "epoch": 0.42, "grad_norm": 1.1860463619232178, "learning_rate": 6.575672689724728e-06, "loss": 0.6158, "step": 6445 }, { "epoch": 0.42, "grad_norm": 1.2131847143173218, "learning_rate": 6.574680601292342e-06, "loss": 0.572, "step": 6446 }, { "epoch": 0.42, "grad_norm": 1.208343505859375, "learning_rate": 6.57368844403473e-06, "loss": 0.5465, "step": 6447 }, { "epoch": 0.42, "grad_norm": 1.181206226348877, "learning_rate": 6.572696217995257e-06, "loss": 0.52, "step": 6448 }, { "epoch": 0.42, "grad_norm": 1.0753259658813477, "learning_rate": 6.571703923217289e-06, "loss": 0.5029, "step": 6449 }, { "epoch": 0.42, "grad_norm": 1.0422494411468506, "learning_rate": 6.570711559744199e-06, "loss": 0.5101, "step": 6450 }, { "epoch": 0.42, "grad_norm": 1.1800720691680908, "learning_rate": 6.569719127619357e-06, "loss": 0.5498, "step": 6451 }, { "epoch": 0.42, "grad_norm": 1.1795026063919067, "learning_rate": 6.568726626886144e-06, "loss": 0.5679, "step": 6452 }, { "epoch": 0.42, "grad_norm": 1.165018916130066, "learning_rate": 6.567734057587937e-06, "loss": 0.5629, "step": 6453 }, { "epoch": 0.42, "grad_norm": 1.2435675859451294, "learning_rate": 6.566741419768118e-06, "loss": 0.5021, "step": 6454 }, { "epoch": 0.42, "grad_norm": 2.1553165912628174, "learning_rate": 6.565748713470075e-06, "loss": 0.5662, "step": 6455 }, { "epoch": 0.42, "grad_norm": 1.0514428615570068, "learning_rate": 6.564755938737195e-06, "loss": 0.5535, "step": 6456 }, { "epoch": 0.42, "grad_norm": 1.1670109033584595, "learning_rate": 6.563763095612869e-06, "loss": 0.5739, "step": 6457 }, { "epoch": 0.42, "grad_norm": 1.2330244779586792, "learning_rate": 6.562770184140494e-06, "loss": 0.565, "step": 6458 }, { "epoch": 0.42, "grad_norm": 1.1336445808410645, "learning_rate": 6.561777204363466e-06, "loss": 0.5299, "step": 6459 }, { "epoch": 0.42, "grad_norm": 1.1555898189544678, "learning_rate": 6.560784156325187e-06, "loss": 0.5599, "step": 6460 }, { "epoch": 0.42, "grad_norm": 1.2084554433822632, "learning_rate": 6.559791040069057e-06, "loss": 0.5551, "step": 6461 }, { "epoch": 0.42, "grad_norm": 1.2111666202545166, "learning_rate": 6.558797855638487e-06, "loss": 0.6052, "step": 6462 }, { "epoch": 0.42, "grad_norm": 1.2582403421401978, "learning_rate": 6.557804603076883e-06, "loss": 0.5155, "step": 6463 }, { "epoch": 0.42, "grad_norm": 1.1079152822494507, "learning_rate": 6.556811282427659e-06, "loss": 0.5559, "step": 6464 }, { "epoch": 0.42, "grad_norm": 1.1545823812484741, "learning_rate": 6.555817893734232e-06, "loss": 0.5713, "step": 6465 }, { "epoch": 0.42, "grad_norm": 1.2171316146850586, "learning_rate": 6.554824437040017e-06, "loss": 0.5218, "step": 6466 }, { "epoch": 0.42, "grad_norm": 1.1139135360717773, "learning_rate": 6.5538309123884385e-06, "loss": 0.5046, "step": 6467 }, { "epoch": 0.42, "grad_norm": 1.1448094844818115, "learning_rate": 6.55283731982292e-06, "loss": 0.5304, "step": 6468 }, { "epoch": 0.42, "grad_norm": 1.1950427293777466, "learning_rate": 6.551843659386888e-06, "loss": 0.5353, "step": 6469 }, { "epoch": 0.42, "grad_norm": 1.276706576347351, "learning_rate": 6.550849931123775e-06, "loss": 0.5024, "step": 6470 }, { "epoch": 0.42, "grad_norm": 1.0956354141235352, "learning_rate": 6.549856135077011e-06, "loss": 0.4826, "step": 6471 }, { "epoch": 0.42, "grad_norm": 1.2321946620941162, "learning_rate": 6.548862271290035e-06, "loss": 0.5834, "step": 6472 }, { "epoch": 0.42, "grad_norm": 1.055593490600586, "learning_rate": 6.547868339806284e-06, "loss": 0.5242, "step": 6473 }, { "epoch": 0.42, "grad_norm": 1.134716510772705, "learning_rate": 6.546874340669202e-06, "loss": 0.5261, "step": 6474 }, { "epoch": 0.42, "grad_norm": 1.1700377464294434, "learning_rate": 6.545880273922234e-06, "loss": 0.5504, "step": 6475 }, { "epoch": 0.42, "grad_norm": 1.1808403730392456, "learning_rate": 6.544886139608827e-06, "loss": 0.5294, "step": 6476 }, { "epoch": 0.42, "grad_norm": 1.0746443271636963, "learning_rate": 6.543891937772434e-06, "loss": 0.5594, "step": 6477 }, { "epoch": 0.42, "grad_norm": 1.2142786979675293, "learning_rate": 6.542897668456506e-06, "loss": 0.5256, "step": 6478 }, { "epoch": 0.42, "grad_norm": 1.18659245967865, "learning_rate": 6.541903331704502e-06, "loss": 0.547, "step": 6479 }, { "epoch": 0.42, "grad_norm": 1.1429678201675415, "learning_rate": 6.540908927559882e-06, "loss": 0.5344, "step": 6480 }, { "epoch": 0.42, "grad_norm": 1.2833826541900635, "learning_rate": 6.539914456066109e-06, "loss": 0.5765, "step": 6481 }, { "epoch": 0.42, "grad_norm": 1.3264048099517822, "learning_rate": 6.538919917266647e-06, "loss": 0.5073, "step": 6482 }, { "epoch": 0.42, "grad_norm": 1.1774094104766846, "learning_rate": 6.5379253112049664e-06, "loss": 0.553, "step": 6483 }, { "epoch": 0.42, "grad_norm": 1.1860023736953735, "learning_rate": 6.536930637924538e-06, "loss": 0.5529, "step": 6484 }, { "epoch": 0.42, "grad_norm": 1.3718410730361938, "learning_rate": 6.535935897468838e-06, "loss": 0.5658, "step": 6485 }, { "epoch": 0.42, "grad_norm": 1.175766944885254, "learning_rate": 6.534941089881341e-06, "loss": 0.5601, "step": 6486 }, { "epoch": 0.42, "grad_norm": 1.0821012258529663, "learning_rate": 6.53394621520553e-06, "loss": 0.4917, "step": 6487 }, { "epoch": 0.42, "grad_norm": 1.1084589958190918, "learning_rate": 6.532951273484888e-06, "loss": 0.4922, "step": 6488 }, { "epoch": 0.42, "grad_norm": 1.1819207668304443, "learning_rate": 6.5319562647629e-06, "loss": 0.5649, "step": 6489 }, { "epoch": 0.42, "grad_norm": 1.2153732776641846, "learning_rate": 6.530961189083056e-06, "loss": 0.5532, "step": 6490 }, { "epoch": 0.42, "grad_norm": 1.1139824390411377, "learning_rate": 6.529966046488849e-06, "loss": 0.5301, "step": 6491 }, { "epoch": 0.42, "grad_norm": 1.1032339334487915, "learning_rate": 6.528970837023773e-06, "loss": 0.4688, "step": 6492 }, { "epoch": 0.42, "grad_norm": 1.0799466371536255, "learning_rate": 6.527975560731327e-06, "loss": 0.5044, "step": 6493 }, { "epoch": 0.42, "grad_norm": 1.1912298202514648, "learning_rate": 6.526980217655012e-06, "loss": 0.5634, "step": 6494 }, { "epoch": 0.42, "grad_norm": 1.2126996517181396, "learning_rate": 6.52598480783833e-06, "loss": 0.5662, "step": 6495 }, { "epoch": 0.42, "grad_norm": 1.2616199254989624, "learning_rate": 6.5249893313247894e-06, "loss": 0.5436, "step": 6496 }, { "epoch": 0.42, "grad_norm": 1.3193703889846802, "learning_rate": 6.523993788157901e-06, "loss": 0.5703, "step": 6497 }, { "epoch": 0.42, "grad_norm": 1.2202540636062622, "learning_rate": 6.522998178381175e-06, "loss": 0.5957, "step": 6498 }, { "epoch": 0.42, "grad_norm": 1.113661527633667, "learning_rate": 6.522002502038127e-06, "loss": 0.5491, "step": 6499 }, { "epoch": 0.42, "grad_norm": 1.175451636314392, "learning_rate": 6.521006759172279e-06, "loss": 0.5376, "step": 6500 }, { "epoch": 0.42, "grad_norm": 1.4606192111968994, "learning_rate": 6.520010949827148e-06, "loss": 0.5159, "step": 6501 }, { "epoch": 0.42, "grad_norm": 1.211768388748169, "learning_rate": 6.51901507404626e-06, "loss": 0.5534, "step": 6502 }, { "epoch": 0.42, "grad_norm": 1.8369158506393433, "learning_rate": 6.518019131873144e-06, "loss": 0.5756, "step": 6503 }, { "epoch": 0.42, "grad_norm": 1.1689181327819824, "learning_rate": 6.517023123351326e-06, "loss": 0.5453, "step": 6504 }, { "epoch": 0.42, "grad_norm": 1.1528558731079102, "learning_rate": 6.516027048524341e-06, "loss": 0.5251, "step": 6505 }, { "epoch": 0.42, "grad_norm": 1.337033987045288, "learning_rate": 6.515030907435728e-06, "loss": 0.5298, "step": 6506 }, { "epoch": 0.42, "grad_norm": 1.4124033451080322, "learning_rate": 6.514034700129021e-06, "loss": 0.5432, "step": 6507 }, { "epoch": 0.42, "grad_norm": 1.1849852800369263, "learning_rate": 6.513038426647763e-06, "loss": 0.5639, "step": 6508 }, { "epoch": 0.42, "grad_norm": 1.1980726718902588, "learning_rate": 6.5120420870355e-06, "loss": 0.5441, "step": 6509 }, { "epoch": 0.42, "grad_norm": 1.1802884340286255, "learning_rate": 6.511045681335778e-06, "loss": 0.5414, "step": 6510 }, { "epoch": 0.42, "grad_norm": 1.2287400960922241, "learning_rate": 6.5100492095921485e-06, "loss": 0.5467, "step": 6511 }, { "epoch": 0.42, "grad_norm": 1.689786434173584, "learning_rate": 6.509052671848164e-06, "loss": 0.5087, "step": 6512 }, { "epoch": 0.42, "grad_norm": 1.2666577100753784, "learning_rate": 6.508056068147379e-06, "loss": 0.5348, "step": 6513 }, { "epoch": 0.42, "grad_norm": 1.1315951347351074, "learning_rate": 6.507059398533357e-06, "loss": 0.5734, "step": 6514 }, { "epoch": 0.42, "grad_norm": 1.2382230758666992, "learning_rate": 6.506062663049655e-06, "loss": 0.5525, "step": 6515 }, { "epoch": 0.42, "grad_norm": 1.1634482145309448, "learning_rate": 6.505065861739839e-06, "loss": 0.5127, "step": 6516 }, { "epoch": 0.42, "grad_norm": 1.1060154438018799, "learning_rate": 6.504068994647479e-06, "loss": 0.48, "step": 6517 }, { "epoch": 0.42, "grad_norm": 1.0217286348342896, "learning_rate": 6.503072061816142e-06, "loss": 0.5645, "step": 6518 }, { "epoch": 0.42, "grad_norm": 1.0949198007583618, "learning_rate": 6.5020750632894056e-06, "loss": 0.5242, "step": 6519 }, { "epoch": 0.42, "grad_norm": 1.2277565002441406, "learning_rate": 6.501077999110842e-06, "loss": 0.5496, "step": 6520 }, { "epoch": 0.42, "grad_norm": 1.1370766162872314, "learning_rate": 6.500080869324032e-06, "loss": 0.5026, "step": 6521 }, { "epoch": 0.42, "grad_norm": 1.269655466079712, "learning_rate": 6.499083673972558e-06, "loss": 0.5353, "step": 6522 }, { "epoch": 0.42, "grad_norm": 1.2114415168762207, "learning_rate": 6.498086413100004e-06, "loss": 0.5262, "step": 6523 }, { "epoch": 0.42, "grad_norm": 1.1960961818695068, "learning_rate": 6.497089086749958e-06, "loss": 0.5308, "step": 6524 }, { "epoch": 0.42, "grad_norm": 1.4339969158172607, "learning_rate": 6.49609169496601e-06, "loss": 0.5746, "step": 6525 }, { "epoch": 0.42, "grad_norm": 1.2573586702346802, "learning_rate": 6.495094237791756e-06, "loss": 0.4932, "step": 6526 }, { "epoch": 0.42, "grad_norm": 1.1164155006408691, "learning_rate": 6.494096715270788e-06, "loss": 0.5155, "step": 6527 }, { "epoch": 0.42, "grad_norm": 1.1142258644104004, "learning_rate": 6.493099127446707e-06, "loss": 0.5105, "step": 6528 }, { "epoch": 0.42, "grad_norm": 1.1865112781524658, "learning_rate": 6.492101474363117e-06, "loss": 0.5524, "step": 6529 }, { "epoch": 0.42, "grad_norm": 1.2832131385803223, "learning_rate": 6.4911037560636216e-06, "loss": 0.5334, "step": 6530 }, { "epoch": 0.42, "grad_norm": 1.1166702508926392, "learning_rate": 6.490105972591827e-06, "loss": 0.507, "step": 6531 }, { "epoch": 0.42, "grad_norm": 1.2459646463394165, "learning_rate": 6.4891081239913455e-06, "loss": 0.5433, "step": 6532 }, { "epoch": 0.42, "grad_norm": 1.1458576917648315, "learning_rate": 6.488110210305789e-06, "loss": 0.5052, "step": 6533 }, { "epoch": 0.42, "grad_norm": 1.1862666606903076, "learning_rate": 6.4871122315787735e-06, "loss": 0.5307, "step": 6534 }, { "epoch": 0.42, "grad_norm": 1.2394171953201294, "learning_rate": 6.486114187853922e-06, "loss": 0.5251, "step": 6535 }, { "epoch": 0.42, "grad_norm": 1.2370086908340454, "learning_rate": 6.4851160791748525e-06, "loss": 0.5743, "step": 6536 }, { "epoch": 0.42, "grad_norm": 1.1964085102081299, "learning_rate": 6.48411790558519e-06, "loss": 0.5562, "step": 6537 }, { "epoch": 0.42, "grad_norm": 1.0586286783218384, "learning_rate": 6.483119667128564e-06, "loss": 0.54, "step": 6538 }, { "epoch": 0.42, "grad_norm": 1.3031513690948486, "learning_rate": 6.482121363848603e-06, "loss": 0.5441, "step": 6539 }, { "epoch": 0.42, "grad_norm": 1.130773901939392, "learning_rate": 6.481122995788941e-06, "loss": 0.5531, "step": 6540 }, { "epoch": 0.42, "grad_norm": 1.242902398109436, "learning_rate": 6.4801245629932155e-06, "loss": 0.5497, "step": 6541 }, { "epoch": 0.42, "grad_norm": 1.111303448677063, "learning_rate": 6.479126065505063e-06, "loss": 0.5398, "step": 6542 }, { "epoch": 0.42, "grad_norm": 1.2797574996948242, "learning_rate": 6.478127503368126e-06, "loss": 0.5741, "step": 6543 }, { "epoch": 0.42, "grad_norm": 1.219530463218689, "learning_rate": 6.477128876626051e-06, "loss": 0.5045, "step": 6544 }, { "epoch": 0.42, "grad_norm": 1.2118405103683472, "learning_rate": 6.476130185322483e-06, "loss": 0.5204, "step": 6545 }, { "epoch": 0.42, "grad_norm": 1.248372197151184, "learning_rate": 6.475131429501073e-06, "loss": 0.5262, "step": 6546 }, { "epoch": 0.42, "grad_norm": 1.2261348962783813, "learning_rate": 6.4741326092054745e-06, "loss": 0.5771, "step": 6547 }, { "epoch": 0.42, "grad_norm": 1.1319769620895386, "learning_rate": 6.473133724479342e-06, "loss": 0.5627, "step": 6548 }, { "epoch": 0.42, "grad_norm": 1.22377347946167, "learning_rate": 6.472134775366336e-06, "loss": 0.5145, "step": 6549 }, { "epoch": 0.42, "grad_norm": 1.179980754852295, "learning_rate": 6.471135761910117e-06, "loss": 0.5829, "step": 6550 }, { "epoch": 0.42, "grad_norm": 1.1869877576828003, "learning_rate": 6.470136684154349e-06, "loss": 0.5715, "step": 6551 }, { "epoch": 0.42, "grad_norm": 1.1822117567062378, "learning_rate": 6.469137542142699e-06, "loss": 0.5625, "step": 6552 }, { "epoch": 0.42, "grad_norm": 1.231103539466858, "learning_rate": 6.468138335918839e-06, "loss": 0.5562, "step": 6553 }, { "epoch": 0.42, "grad_norm": 1.1618614196777344, "learning_rate": 6.4671390655264395e-06, "loss": 0.5613, "step": 6554 }, { "epoch": 0.42, "grad_norm": 1.2346030473709106, "learning_rate": 6.466139731009176e-06, "loss": 0.4817, "step": 6555 }, { "epoch": 0.42, "grad_norm": 1.1012208461761475, "learning_rate": 6.465140332410728e-06, "loss": 0.552, "step": 6556 }, { "epoch": 0.42, "grad_norm": 1.0732829570770264, "learning_rate": 6.464140869774777e-06, "loss": 0.4967, "step": 6557 }, { "epoch": 0.42, "grad_norm": 1.3104441165924072, "learning_rate": 6.463141343145005e-06, "loss": 0.6052, "step": 6558 }, { "epoch": 0.42, "grad_norm": 1.2500042915344238, "learning_rate": 6.462141752565101e-06, "loss": 0.5745, "step": 6559 }, { "epoch": 0.42, "grad_norm": 1.2150158882141113, "learning_rate": 6.461142098078752e-06, "loss": 0.5412, "step": 6560 }, { "epoch": 0.42, "grad_norm": 1.1056628227233887, "learning_rate": 6.4601423797296535e-06, "loss": 0.5246, "step": 6561 }, { "epoch": 0.42, "grad_norm": 1.0751217603683472, "learning_rate": 6.459142597561497e-06, "loss": 0.5285, "step": 6562 }, { "epoch": 0.42, "grad_norm": 1.1502169370651245, "learning_rate": 6.458142751617984e-06, "loss": 0.512, "step": 6563 }, { "epoch": 0.42, "grad_norm": 1.155112385749817, "learning_rate": 6.4571428419428115e-06, "loss": 0.5104, "step": 6564 }, { "epoch": 0.42, "grad_norm": 1.163106918334961, "learning_rate": 6.456142868579686e-06, "loss": 0.5399, "step": 6565 }, { "epoch": 0.42, "grad_norm": 1.1381722688674927, "learning_rate": 6.455142831572313e-06, "loss": 0.5231, "step": 6566 }, { "epoch": 0.42, "grad_norm": 1.1919084787368774, "learning_rate": 6.4541427309644e-06, "loss": 0.5481, "step": 6567 }, { "epoch": 0.42, "grad_norm": 1.157789707183838, "learning_rate": 6.4531425667996615e-06, "loss": 0.5702, "step": 6568 }, { "epoch": 0.42, "grad_norm": 1.0459330081939697, "learning_rate": 6.452142339121808e-06, "loss": 0.499, "step": 6569 }, { "epoch": 0.42, "grad_norm": 1.0671554803848267, "learning_rate": 6.451142047974562e-06, "loss": 0.5137, "step": 6570 }, { "epoch": 0.42, "grad_norm": 1.0138496160507202, "learning_rate": 6.45014169340164e-06, "loss": 0.5751, "step": 6571 }, { "epoch": 0.42, "grad_norm": 1.1641526222229004, "learning_rate": 6.4491412754467655e-06, "loss": 0.5513, "step": 6572 }, { "epoch": 0.42, "grad_norm": 1.1168676614761353, "learning_rate": 6.448140794153666e-06, "loss": 0.502, "step": 6573 }, { "epoch": 0.42, "grad_norm": 1.1834667921066284, "learning_rate": 6.447140249566067e-06, "loss": 0.505, "step": 6574 }, { "epoch": 0.42, "grad_norm": 1.1445894241333008, "learning_rate": 6.446139641727702e-06, "loss": 0.5557, "step": 6575 }, { "epoch": 0.42, "grad_norm": 1.1656997203826904, "learning_rate": 6.445138970682305e-06, "loss": 0.5562, "step": 6576 }, { "epoch": 0.42, "grad_norm": 1.160735011100769, "learning_rate": 6.444138236473612e-06, "loss": 0.5478, "step": 6577 }, { "epoch": 0.42, "grad_norm": 1.1095209121704102, "learning_rate": 6.443137439145362e-06, "loss": 0.496, "step": 6578 }, { "epoch": 0.42, "grad_norm": 1.2735775709152222, "learning_rate": 6.442136578741298e-06, "loss": 0.6046, "step": 6579 }, { "epoch": 0.42, "grad_norm": 1.0551080703735352, "learning_rate": 6.441135655305165e-06, "loss": 0.6099, "step": 6580 }, { "epoch": 0.42, "grad_norm": 1.2548493146896362, "learning_rate": 6.440134668880711e-06, "loss": 0.5633, "step": 6581 }, { "epoch": 0.42, "grad_norm": 1.2313859462738037, "learning_rate": 6.439133619511686e-06, "loss": 0.569, "step": 6582 }, { "epoch": 0.42, "grad_norm": 1.1420798301696777, "learning_rate": 6.438132507241843e-06, "loss": 0.5224, "step": 6583 }, { "epoch": 0.42, "grad_norm": 1.3149768114089966, "learning_rate": 6.4371313321149385e-06, "loss": 0.558, "step": 6584 }, { "epoch": 0.43, "grad_norm": 1.0299975872039795, "learning_rate": 6.436130094174733e-06, "loss": 0.5198, "step": 6585 }, { "epoch": 0.43, "grad_norm": 1.2031192779541016, "learning_rate": 6.435128793464984e-06, "loss": 0.5439, "step": 6586 }, { "epoch": 0.43, "grad_norm": 1.1147325038909912, "learning_rate": 6.4341274300294595e-06, "loss": 0.5372, "step": 6587 }, { "epoch": 0.43, "grad_norm": 1.3437724113464355, "learning_rate": 6.433126003911925e-06, "loss": 0.5596, "step": 6588 }, { "epoch": 0.43, "grad_norm": 1.1707764863967896, "learning_rate": 6.43212451515615e-06, "loss": 0.5622, "step": 6589 }, { "epoch": 0.43, "grad_norm": 1.1318869590759277, "learning_rate": 6.431122963805907e-06, "loss": 0.5474, "step": 6590 }, { "epoch": 0.43, "grad_norm": 1.1793087720870972, "learning_rate": 6.430121349904973e-06, "loss": 0.5118, "step": 6591 }, { "epoch": 0.43, "grad_norm": 1.1493185758590698, "learning_rate": 6.429119673497124e-06, "loss": 0.5426, "step": 6592 }, { "epoch": 0.43, "grad_norm": 1.234368085861206, "learning_rate": 6.42811793462614e-06, "loss": 0.5579, "step": 6593 }, { "epoch": 0.43, "grad_norm": 1.1758615970611572, "learning_rate": 6.427116133335808e-06, "loss": 0.5588, "step": 6594 }, { "epoch": 0.43, "grad_norm": 1.5578436851501465, "learning_rate": 6.42611426966991e-06, "loss": 0.5234, "step": 6595 }, { "epoch": 0.43, "grad_norm": 1.1059601306915283, "learning_rate": 6.425112343672238e-06, "loss": 0.5604, "step": 6596 }, { "epoch": 0.43, "grad_norm": 1.1529700756072998, "learning_rate": 6.424110355386581e-06, "loss": 0.5572, "step": 6597 }, { "epoch": 0.43, "grad_norm": 1.1757985353469849, "learning_rate": 6.423108304856736e-06, "loss": 0.5398, "step": 6598 }, { "epoch": 0.43, "grad_norm": 1.1941335201263428, "learning_rate": 6.422106192126496e-06, "loss": 0.5811, "step": 6599 }, { "epoch": 0.43, "grad_norm": 1.1892967224121094, "learning_rate": 6.421104017239667e-06, "loss": 0.5831, "step": 6600 }, { "epoch": 0.43, "grad_norm": 1.100899338722229, "learning_rate": 6.420101780240046e-06, "loss": 0.5015, "step": 6601 }, { "epoch": 0.43, "grad_norm": 1.1722382307052612, "learning_rate": 6.419099481171439e-06, "loss": 0.5016, "step": 6602 }, { "epoch": 0.43, "grad_norm": 1.0539590120315552, "learning_rate": 6.418097120077658e-06, "loss": 0.4795, "step": 6603 }, { "epoch": 0.43, "grad_norm": 1.1696726083755493, "learning_rate": 6.417094697002507e-06, "loss": 0.561, "step": 6604 }, { "epoch": 0.43, "grad_norm": 1.2152032852172852, "learning_rate": 6.416092211989805e-06, "loss": 0.5195, "step": 6605 }, { "epoch": 0.43, "grad_norm": 1.2245122194290161, "learning_rate": 6.415089665083367e-06, "loss": 0.5501, "step": 6606 }, { "epoch": 0.43, "grad_norm": 1.1275116205215454, "learning_rate": 6.414087056327009e-06, "loss": 0.564, "step": 6607 }, { "epoch": 0.43, "grad_norm": 1.1263766288757324, "learning_rate": 6.413084385764555e-06, "loss": 0.5, "step": 6608 }, { "epoch": 0.43, "grad_norm": 1.2606641054153442, "learning_rate": 6.412081653439828e-06, "loss": 0.5548, "step": 6609 }, { "epoch": 0.43, "grad_norm": 1.1410746574401855, "learning_rate": 6.411078859396655e-06, "loss": 0.4962, "step": 6610 }, { "epoch": 0.43, "grad_norm": 1.1974711418151855, "learning_rate": 6.410076003678866e-06, "loss": 0.5712, "step": 6611 }, { "epoch": 0.43, "grad_norm": 1.2712841033935547, "learning_rate": 6.409073086330293e-06, "loss": 0.5843, "step": 6612 }, { "epoch": 0.43, "grad_norm": 1.0647729635238647, "learning_rate": 6.4080701073947705e-06, "loss": 0.5635, "step": 6613 }, { "epoch": 0.43, "grad_norm": 1.1496176719665527, "learning_rate": 6.407067066916136e-06, "loss": 0.5393, "step": 6614 }, { "epoch": 0.43, "grad_norm": 1.3654991388320923, "learning_rate": 6.406063964938232e-06, "loss": 0.6114, "step": 6615 }, { "epoch": 0.43, "grad_norm": 1.1909685134887695, "learning_rate": 6.405060801504899e-06, "loss": 0.5493, "step": 6616 }, { "epoch": 0.43, "grad_norm": 1.1185274124145508, "learning_rate": 6.404057576659983e-06, "loss": 0.5618, "step": 6617 }, { "epoch": 0.43, "grad_norm": 1.1115777492523193, "learning_rate": 6.403054290447332e-06, "loss": 0.5408, "step": 6618 }, { "epoch": 0.43, "grad_norm": 1.2093309164047241, "learning_rate": 6.4020509429107995e-06, "loss": 0.5324, "step": 6619 }, { "epoch": 0.43, "grad_norm": 1.2419217824935913, "learning_rate": 6.401047534094237e-06, "loss": 0.5497, "step": 6620 }, { "epoch": 0.43, "grad_norm": 1.2501477003097534, "learning_rate": 6.4000440640415015e-06, "loss": 0.5924, "step": 6621 }, { "epoch": 0.43, "grad_norm": 1.076384425163269, "learning_rate": 6.399040532796451e-06, "loss": 0.5296, "step": 6622 }, { "epoch": 0.43, "grad_norm": 1.2681604623794556, "learning_rate": 6.398036940402951e-06, "loss": 0.5405, "step": 6623 }, { "epoch": 0.43, "grad_norm": 1.1009299755096436, "learning_rate": 6.397033286904861e-06, "loss": 0.5445, "step": 6624 }, { "epoch": 0.43, "grad_norm": 1.187114953994751, "learning_rate": 6.396029572346051e-06, "loss": 0.5989, "step": 6625 }, { "epoch": 0.43, "grad_norm": 1.0613240003585815, "learning_rate": 6.3950257967703914e-06, "loss": 0.5634, "step": 6626 }, { "epoch": 0.43, "grad_norm": 1.226152777671814, "learning_rate": 6.3940219602217525e-06, "loss": 0.5588, "step": 6627 }, { "epoch": 0.43, "grad_norm": 1.2421042919158936, "learning_rate": 6.393018062744009e-06, "loss": 0.5696, "step": 6628 }, { "epoch": 0.43, "grad_norm": 1.1002297401428223, "learning_rate": 6.392014104381042e-06, "loss": 0.5207, "step": 6629 }, { "epoch": 0.43, "grad_norm": 1.3195234537124634, "learning_rate": 6.3910100851767295e-06, "loss": 0.561, "step": 6630 }, { "epoch": 0.43, "grad_norm": 1.1133586168289185, "learning_rate": 6.390006005174955e-06, "loss": 0.5008, "step": 6631 }, { "epoch": 0.43, "grad_norm": 1.145371913909912, "learning_rate": 6.389001864419604e-06, "loss": 0.553, "step": 6632 }, { "epoch": 0.43, "grad_norm": 1.1086817979812622, "learning_rate": 6.3879976629545645e-06, "loss": 0.5321, "step": 6633 }, { "epoch": 0.43, "grad_norm": 1.2953245639801025, "learning_rate": 6.386993400823729e-06, "loss": 0.5694, "step": 6634 }, { "epoch": 0.43, "grad_norm": 1.151080846786499, "learning_rate": 6.385989078070991e-06, "loss": 0.5448, "step": 6635 }, { "epoch": 0.43, "grad_norm": 1.1887531280517578, "learning_rate": 6.384984694740246e-06, "loss": 0.4978, "step": 6636 }, { "epoch": 0.43, "grad_norm": 1.1070908308029175, "learning_rate": 6.383980250875392e-06, "loss": 0.5197, "step": 6637 }, { "epoch": 0.43, "grad_norm": 1.114690899848938, "learning_rate": 6.3829757465203345e-06, "loss": 0.5171, "step": 6638 }, { "epoch": 0.43, "grad_norm": 1.117717981338501, "learning_rate": 6.381971181718973e-06, "loss": 0.5211, "step": 6639 }, { "epoch": 0.43, "grad_norm": 1.2272666692733765, "learning_rate": 6.380966556515217e-06, "loss": 0.5768, "step": 6640 }, { "epoch": 0.43, "grad_norm": 1.1199657917022705, "learning_rate": 6.379961870952976e-06, "loss": 0.5569, "step": 6641 }, { "epoch": 0.43, "grad_norm": 1.0424433946609497, "learning_rate": 6.378957125076163e-06, "loss": 0.5159, "step": 6642 }, { "epoch": 0.43, "grad_norm": 1.1643624305725098, "learning_rate": 6.377952318928691e-06, "loss": 0.5365, "step": 6643 }, { "epoch": 0.43, "grad_norm": 1.2140549421310425, "learning_rate": 6.376947452554478e-06, "loss": 0.5557, "step": 6644 }, { "epoch": 0.43, "grad_norm": 1.1828885078430176, "learning_rate": 6.375942525997445e-06, "loss": 0.5268, "step": 6645 }, { "epoch": 0.43, "grad_norm": 1.122763752937317, "learning_rate": 6.374937539301514e-06, "loss": 0.5557, "step": 6646 }, { "epoch": 0.43, "grad_norm": 1.1576948165893555, "learning_rate": 6.373932492510611e-06, "loss": 0.5478, "step": 6647 }, { "epoch": 0.43, "grad_norm": 0.9814996719360352, "learning_rate": 6.3729273856686615e-06, "loss": 0.4865, "step": 6648 }, { "epoch": 0.43, "grad_norm": 1.197359323501587, "learning_rate": 6.371922218819599e-06, "loss": 0.5006, "step": 6649 }, { "epoch": 0.43, "grad_norm": 1.131338357925415, "learning_rate": 6.370916992007357e-06, "loss": 0.518, "step": 6650 }, { "epoch": 0.43, "grad_norm": 1.0859702825546265, "learning_rate": 6.36991170527587e-06, "loss": 0.5379, "step": 6651 }, { "epoch": 0.43, "grad_norm": 1.0943994522094727, "learning_rate": 6.368906358669078e-06, "loss": 0.5204, "step": 6652 }, { "epoch": 0.43, "grad_norm": 1.2140947580337524, "learning_rate": 6.367900952230919e-06, "loss": 0.5585, "step": 6653 }, { "epoch": 0.43, "grad_norm": 1.4062848091125488, "learning_rate": 6.366895486005338e-06, "loss": 0.5703, "step": 6654 }, { "epoch": 0.43, "grad_norm": 1.1390233039855957, "learning_rate": 6.365889960036285e-06, "loss": 0.5625, "step": 6655 }, { "epoch": 0.43, "grad_norm": 1.1600004434585571, "learning_rate": 6.364884374367702e-06, "loss": 0.5411, "step": 6656 }, { "epoch": 0.43, "grad_norm": 1.1759625673294067, "learning_rate": 6.363878729043548e-06, "loss": 0.5441, "step": 6657 }, { "epoch": 0.43, "grad_norm": 1.125388741493225, "learning_rate": 6.3628730241077715e-06, "loss": 0.5287, "step": 6658 }, { "epoch": 0.43, "grad_norm": 1.198327898979187, "learning_rate": 6.3618672596043325e-06, "loss": 0.5791, "step": 6659 }, { "epoch": 0.43, "grad_norm": 1.2838727235794067, "learning_rate": 6.360861435577188e-06, "loss": 0.5412, "step": 6660 }, { "epoch": 0.43, "grad_norm": 1.2813241481781006, "learning_rate": 6.359855552070303e-06, "loss": 0.5537, "step": 6661 }, { "epoch": 0.43, "grad_norm": 1.200461745262146, "learning_rate": 6.358849609127638e-06, "loss": 0.5187, "step": 6662 }, { "epoch": 0.43, "grad_norm": 1.2151081562042236, "learning_rate": 6.357843606793164e-06, "loss": 0.5337, "step": 6663 }, { "epoch": 0.43, "grad_norm": 1.1022887229919434, "learning_rate": 6.35683754511085e-06, "loss": 0.5058, "step": 6664 }, { "epoch": 0.43, "grad_norm": 1.1020140647888184, "learning_rate": 6.355831424124665e-06, "loss": 0.4847, "step": 6665 }, { "epoch": 0.43, "grad_norm": 1.1624722480773926, "learning_rate": 6.354825243878588e-06, "loss": 0.5654, "step": 6666 }, { "epoch": 0.43, "grad_norm": 1.2767335176467896, "learning_rate": 6.353819004416596e-06, "loss": 0.5232, "step": 6667 }, { "epoch": 0.43, "grad_norm": 1.2118127346038818, "learning_rate": 6.352812705782667e-06, "loss": 0.6069, "step": 6668 }, { "epoch": 0.43, "grad_norm": 1.1560550928115845, "learning_rate": 6.351806348020785e-06, "loss": 0.5043, "step": 6669 }, { "epoch": 0.43, "grad_norm": 1.1382254362106323, "learning_rate": 6.350799931174936e-06, "loss": 0.5145, "step": 6670 }, { "epoch": 0.43, "grad_norm": 1.0721601247787476, "learning_rate": 6.349793455289106e-06, "loss": 0.5208, "step": 6671 }, { "epoch": 0.43, "grad_norm": 1.288313865661621, "learning_rate": 6.348786920407286e-06, "loss": 0.5699, "step": 6672 }, { "epoch": 0.43, "grad_norm": 1.0897806882858276, "learning_rate": 6.347780326573471e-06, "loss": 0.5043, "step": 6673 }, { "epoch": 0.43, "grad_norm": 1.0872310400009155, "learning_rate": 6.3467736738316555e-06, "loss": 0.5339, "step": 6674 }, { "epoch": 0.43, "grad_norm": 1.3498934507369995, "learning_rate": 6.3457669622258364e-06, "loss": 0.5908, "step": 6675 }, { "epoch": 0.43, "grad_norm": 1.2046737670898438, "learning_rate": 6.344760191800017e-06, "loss": 0.556, "step": 6676 }, { "epoch": 0.43, "grad_norm": 1.225901484489441, "learning_rate": 6.343753362598198e-06, "loss": 0.5608, "step": 6677 }, { "epoch": 0.43, "grad_norm": 1.1428412199020386, "learning_rate": 6.3427464746643865e-06, "loss": 0.5315, "step": 6678 }, { "epoch": 0.43, "grad_norm": 1.1263453960418701, "learning_rate": 6.341739528042592e-06, "loss": 0.5455, "step": 6679 }, { "epoch": 0.43, "grad_norm": 1.1435719728469849, "learning_rate": 6.3407325227768225e-06, "loss": 0.5402, "step": 6680 }, { "epoch": 0.43, "grad_norm": 1.0904382467269897, "learning_rate": 6.339725458911094e-06, "loss": 0.5473, "step": 6681 }, { "epoch": 0.43, "grad_norm": 1.3319333791732788, "learning_rate": 6.3387183364894245e-06, "loss": 0.5325, "step": 6682 }, { "epoch": 0.43, "grad_norm": 1.164131999015808, "learning_rate": 6.337711155555828e-06, "loss": 0.5893, "step": 6683 }, { "epoch": 0.43, "grad_norm": 1.1563197374343872, "learning_rate": 6.336703916154329e-06, "loss": 0.5071, "step": 6684 }, { "epoch": 0.43, "grad_norm": 1.182946801185608, "learning_rate": 6.335696618328951e-06, "loss": 0.5482, "step": 6685 }, { "epoch": 0.43, "grad_norm": 1.20772385597229, "learning_rate": 6.334689262123719e-06, "loss": 0.5159, "step": 6686 }, { "epoch": 0.43, "grad_norm": 1.1021828651428223, "learning_rate": 6.333681847582662e-06, "loss": 0.5329, "step": 6687 }, { "epoch": 0.43, "grad_norm": 1.2859179973602295, "learning_rate": 6.3326743747498155e-06, "loss": 0.5562, "step": 6688 }, { "epoch": 0.43, "grad_norm": 1.1011086702346802, "learning_rate": 6.3316668436692085e-06, "loss": 0.5519, "step": 6689 }, { "epoch": 0.43, "grad_norm": 1.1424318552017212, "learning_rate": 6.330659254384879e-06, "loss": 0.5731, "step": 6690 }, { "epoch": 0.43, "grad_norm": 1.1106187105178833, "learning_rate": 6.329651606940869e-06, "loss": 0.5071, "step": 6691 }, { "epoch": 0.43, "grad_norm": 1.2973883152008057, "learning_rate": 6.328643901381215e-06, "loss": 0.5387, "step": 6692 }, { "epoch": 0.43, "grad_norm": 1.2485250234603882, "learning_rate": 6.327636137749965e-06, "loss": 0.5751, "step": 6693 }, { "epoch": 0.43, "grad_norm": 1.1536225080490112, "learning_rate": 6.326628316091166e-06, "loss": 0.5177, "step": 6694 }, { "epoch": 0.43, "grad_norm": 1.2200766801834106, "learning_rate": 6.325620436448865e-06, "loss": 0.5287, "step": 6695 }, { "epoch": 0.43, "grad_norm": 1.0932575464248657, "learning_rate": 6.324612498867115e-06, "loss": 0.5631, "step": 6696 }, { "epoch": 0.43, "grad_norm": 1.1316252946853638, "learning_rate": 6.3236045033899696e-06, "loss": 0.5345, "step": 6697 }, { "epoch": 0.43, "grad_norm": 1.2658323049545288, "learning_rate": 6.322596450061487e-06, "loss": 0.5137, "step": 6698 }, { "epoch": 0.43, "grad_norm": 1.087519884109497, "learning_rate": 6.321588338925726e-06, "loss": 0.4928, "step": 6699 }, { "epoch": 0.43, "grad_norm": 1.1668543815612793, "learning_rate": 6.3205801700267476e-06, "loss": 0.5532, "step": 6700 }, { "epoch": 0.43, "grad_norm": 1.1104485988616943, "learning_rate": 6.319571943408619e-06, "loss": 0.5192, "step": 6701 }, { "epoch": 0.43, "grad_norm": 1.1283878087997437, "learning_rate": 6.3185636591154035e-06, "loss": 0.5177, "step": 6702 }, { "epoch": 0.43, "grad_norm": 1.2585766315460205, "learning_rate": 6.317555317191172e-06, "loss": 0.5643, "step": 6703 }, { "epoch": 0.43, "grad_norm": 1.3158694505691528, "learning_rate": 6.316546917679998e-06, "loss": 0.579, "step": 6704 }, { "epoch": 0.43, "grad_norm": 1.3501734733581543, "learning_rate": 6.315538460625955e-06, "loss": 0.5636, "step": 6705 }, { "epoch": 0.43, "grad_norm": 1.4122841358184814, "learning_rate": 6.31452994607312e-06, "loss": 0.5922, "step": 6706 }, { "epoch": 0.43, "grad_norm": 1.2335443496704102, "learning_rate": 6.313521374065572e-06, "loss": 0.6017, "step": 6707 }, { "epoch": 0.43, "grad_norm": 1.1170846223831177, "learning_rate": 6.3125127446473955e-06, "loss": 0.5332, "step": 6708 }, { "epoch": 0.43, "grad_norm": 1.1544042825698853, "learning_rate": 6.311504057862672e-06, "loss": 0.5706, "step": 6709 }, { "epoch": 0.43, "grad_norm": 1.3449640274047852, "learning_rate": 6.310495313755489e-06, "loss": 0.5371, "step": 6710 }, { "epoch": 0.43, "grad_norm": 1.2692160606384277, "learning_rate": 6.309486512369938e-06, "loss": 0.549, "step": 6711 }, { "epoch": 0.43, "grad_norm": 1.1409558057785034, "learning_rate": 6.3084776537501095e-06, "loss": 0.5741, "step": 6712 }, { "epoch": 0.43, "grad_norm": 1.2066971063613892, "learning_rate": 6.307468737940099e-06, "loss": 0.4835, "step": 6713 }, { "epoch": 0.43, "grad_norm": 1.1529502868652344, "learning_rate": 6.306459764984003e-06, "loss": 0.5241, "step": 6714 }, { "epoch": 0.43, "grad_norm": 1.234434723854065, "learning_rate": 6.305450734925922e-06, "loss": 0.5796, "step": 6715 }, { "epoch": 0.43, "grad_norm": 1.0866888761520386, "learning_rate": 6.304441647809956e-06, "loss": 0.5073, "step": 6716 }, { "epoch": 0.43, "grad_norm": 1.2113020420074463, "learning_rate": 6.303432503680213e-06, "loss": 0.5636, "step": 6717 }, { "epoch": 0.43, "grad_norm": 1.1149721145629883, "learning_rate": 6.302423302580797e-06, "loss": 0.5382, "step": 6718 }, { "epoch": 0.43, "grad_norm": 1.1074836254119873, "learning_rate": 6.301414044555818e-06, "loss": 0.4709, "step": 6719 }, { "epoch": 0.43, "grad_norm": 1.126065731048584, "learning_rate": 6.300404729649391e-06, "loss": 0.5467, "step": 6720 }, { "epoch": 0.43, "grad_norm": 1.1229344606399536, "learning_rate": 6.299395357905627e-06, "loss": 0.537, "step": 6721 }, { "epoch": 0.43, "grad_norm": 1.1473721265792847, "learning_rate": 6.298385929368645e-06, "loss": 0.5422, "step": 6722 }, { "epoch": 0.43, "grad_norm": 1.211525797843933, "learning_rate": 6.297376444082563e-06, "loss": 0.5855, "step": 6723 }, { "epoch": 0.43, "grad_norm": 1.2363842725753784, "learning_rate": 6.296366902091504e-06, "loss": 0.5227, "step": 6724 }, { "epoch": 0.43, "grad_norm": 1.0950682163238525, "learning_rate": 6.295357303439592e-06, "loss": 0.5451, "step": 6725 }, { "epoch": 0.43, "grad_norm": 1.1286815404891968, "learning_rate": 6.294347648170957e-06, "loss": 0.5093, "step": 6726 }, { "epoch": 0.43, "grad_norm": 1.1287585496902466, "learning_rate": 6.293337936329722e-06, "loss": 0.5101, "step": 6727 }, { "epoch": 0.43, "grad_norm": 1.1168146133422852, "learning_rate": 6.292328167960025e-06, "loss": 0.5595, "step": 6728 }, { "epoch": 0.43, "grad_norm": 1.2156448364257812, "learning_rate": 6.291318343105997e-06, "loss": 0.5229, "step": 6729 }, { "epoch": 0.43, "grad_norm": 1.1712074279785156, "learning_rate": 6.290308461811777e-06, "loss": 0.5621, "step": 6730 }, { "epoch": 0.43, "grad_norm": 1.2120726108551025, "learning_rate": 6.289298524121502e-06, "loss": 0.54, "step": 6731 }, { "epoch": 0.43, "grad_norm": 1.1196033954620361, "learning_rate": 6.288288530079316e-06, "loss": 0.5548, "step": 6732 }, { "epoch": 0.43, "grad_norm": 1.177720069885254, "learning_rate": 6.287278479729362e-06, "loss": 0.5856, "step": 6733 }, { "epoch": 0.43, "grad_norm": 1.088247537612915, "learning_rate": 6.286268373115786e-06, "loss": 0.5062, "step": 6734 }, { "epoch": 0.43, "grad_norm": 1.4289277791976929, "learning_rate": 6.285258210282738e-06, "loss": 0.4869, "step": 6735 }, { "epoch": 0.43, "grad_norm": 1.2705706357955933, "learning_rate": 6.284247991274372e-06, "loss": 0.573, "step": 6736 }, { "epoch": 0.43, "grad_norm": 1.3010585308074951, "learning_rate": 6.283237716134838e-06, "loss": 0.5147, "step": 6737 }, { "epoch": 0.43, "grad_norm": 1.188440203666687, "learning_rate": 6.282227384908294e-06, "loss": 0.5237, "step": 6738 }, { "epoch": 0.43, "grad_norm": 1.1716055870056152, "learning_rate": 6.281216997638898e-06, "loss": 0.5273, "step": 6739 }, { "epoch": 0.44, "grad_norm": 1.1704139709472656, "learning_rate": 6.280206554370816e-06, "loss": 0.5446, "step": 6740 }, { "epoch": 0.44, "grad_norm": 1.2911522388458252, "learning_rate": 6.279196055148206e-06, "loss": 0.5282, "step": 6741 }, { "epoch": 0.44, "grad_norm": 1.2759385108947754, "learning_rate": 6.278185500015236e-06, "loss": 0.5546, "step": 6742 }, { "epoch": 0.44, "grad_norm": 1.2613354921340942, "learning_rate": 6.277174889016078e-06, "loss": 0.5201, "step": 6743 }, { "epoch": 0.44, "grad_norm": 1.2125481367111206, "learning_rate": 6.276164222194899e-06, "loss": 0.5547, "step": 6744 }, { "epoch": 0.44, "grad_norm": 1.0845580101013184, "learning_rate": 6.275153499595874e-06, "loss": 0.5131, "step": 6745 }, { "epoch": 0.44, "grad_norm": 1.0915933847427368, "learning_rate": 6.274142721263181e-06, "loss": 0.5634, "step": 6746 }, { "epoch": 0.44, "grad_norm": 1.2797696590423584, "learning_rate": 6.273131887240996e-06, "loss": 0.5564, "step": 6747 }, { "epoch": 0.44, "grad_norm": 1.2373501062393188, "learning_rate": 6.2721209975735034e-06, "loss": 0.5267, "step": 6748 }, { "epoch": 0.44, "grad_norm": 1.1703773736953735, "learning_rate": 6.271110052304881e-06, "loss": 0.537, "step": 6749 }, { "epoch": 0.44, "grad_norm": 1.1855086088180542, "learning_rate": 6.270099051479319e-06, "loss": 0.5002, "step": 6750 }, { "epoch": 0.44, "grad_norm": 1.1892950534820557, "learning_rate": 6.269087995141004e-06, "loss": 0.5882, "step": 6751 }, { "epoch": 0.44, "grad_norm": 1.1575284004211426, "learning_rate": 6.268076883334127e-06, "loss": 0.5044, "step": 6752 }, { "epoch": 0.44, "grad_norm": 1.1110472679138184, "learning_rate": 6.2670657161028826e-06, "loss": 0.5249, "step": 6753 }, { "epoch": 0.44, "grad_norm": 1.2118079662322998, "learning_rate": 6.266054493491462e-06, "loss": 0.5373, "step": 6754 }, { "epoch": 0.44, "grad_norm": 1.1539233922958374, "learning_rate": 6.265043215544069e-06, "loss": 0.556, "step": 6755 }, { "epoch": 0.44, "grad_norm": 1.1706339120864868, "learning_rate": 6.2640318823048996e-06, "loss": 0.5556, "step": 6756 }, { "epoch": 0.44, "grad_norm": 1.216418981552124, "learning_rate": 6.263020493818157e-06, "loss": 0.5772, "step": 6757 }, { "epoch": 0.44, "grad_norm": 1.2249383926391602, "learning_rate": 6.2620090501280484e-06, "loss": 0.5462, "step": 6758 }, { "epoch": 0.44, "grad_norm": 1.1233980655670166, "learning_rate": 6.260997551278779e-06, "loss": 0.526, "step": 6759 }, { "epoch": 0.44, "grad_norm": 1.2602605819702148, "learning_rate": 6.25998599731456e-06, "loss": 0.4969, "step": 6760 }, { "epoch": 0.44, "grad_norm": 1.2117416858673096, "learning_rate": 6.258974388279606e-06, "loss": 0.5662, "step": 6761 }, { "epoch": 0.44, "grad_norm": 1.0165600776672363, "learning_rate": 6.257962724218127e-06, "loss": 0.5277, "step": 6762 }, { "epoch": 0.44, "grad_norm": 1.2343249320983887, "learning_rate": 6.256951005174345e-06, "loss": 0.5572, "step": 6763 }, { "epoch": 0.44, "grad_norm": 1.4334464073181152, "learning_rate": 6.2559392311924774e-06, "loss": 0.5818, "step": 6764 }, { "epoch": 0.44, "grad_norm": 1.0872509479522705, "learning_rate": 6.254927402316745e-06, "loss": 0.5095, "step": 6765 }, { "epoch": 0.44, "grad_norm": 1.1506636142730713, "learning_rate": 6.253915518591374e-06, "loss": 0.5367, "step": 6766 }, { "epoch": 0.44, "grad_norm": 1.0973119735717773, "learning_rate": 6.252903580060592e-06, "loss": 0.5301, "step": 6767 }, { "epoch": 0.44, "grad_norm": 1.1042602062225342, "learning_rate": 6.251891586768626e-06, "loss": 0.5336, "step": 6768 }, { "epoch": 0.44, "grad_norm": 1.2398840188980103, "learning_rate": 6.250879538759709e-06, "loss": 0.5466, "step": 6769 }, { "epoch": 0.44, "grad_norm": 1.1141756772994995, "learning_rate": 6.2498674360780745e-06, "loss": 0.5053, "step": 6770 }, { "epoch": 0.44, "grad_norm": 1.1904479265213013, "learning_rate": 6.248855278767959e-06, "loss": 0.5599, "step": 6771 }, { "epoch": 0.44, "grad_norm": 1.0705400705337524, "learning_rate": 6.247843066873601e-06, "loss": 0.5208, "step": 6772 }, { "epoch": 0.44, "grad_norm": 1.3041726350784302, "learning_rate": 6.246830800439243e-06, "loss": 0.5341, "step": 6773 }, { "epoch": 0.44, "grad_norm": 1.1913373470306396, "learning_rate": 6.245818479509127e-06, "loss": 0.5245, "step": 6774 }, { "epoch": 0.44, "grad_norm": 1.1065378189086914, "learning_rate": 6.2448061041275e-06, "loss": 0.6013, "step": 6775 }, { "epoch": 0.44, "grad_norm": 1.2110543251037598, "learning_rate": 6.24379367433861e-06, "loss": 0.5574, "step": 6776 }, { "epoch": 0.44, "grad_norm": 1.1865408420562744, "learning_rate": 6.242781190186707e-06, "loss": 0.5351, "step": 6777 }, { "epoch": 0.44, "grad_norm": 1.417027235031128, "learning_rate": 6.2417686517160435e-06, "loss": 0.5171, "step": 6778 }, { "epoch": 0.44, "grad_norm": 1.0883121490478516, "learning_rate": 6.2407560589708786e-06, "loss": 0.5504, "step": 6779 }, { "epoch": 0.44, "grad_norm": 1.2571884393692017, "learning_rate": 6.239743411995465e-06, "loss": 0.5336, "step": 6780 }, { "epoch": 0.44, "grad_norm": 1.1501826047897339, "learning_rate": 6.2387307108340665e-06, "loss": 0.5011, "step": 6781 }, { "epoch": 0.44, "grad_norm": 1.2476816177368164, "learning_rate": 6.237717955530944e-06, "loss": 0.5948, "step": 6782 }, { "epoch": 0.44, "grad_norm": 1.2635692358016968, "learning_rate": 6.236705146130364e-06, "loss": 0.5548, "step": 6783 }, { "epoch": 0.44, "grad_norm": 1.2386068105697632, "learning_rate": 6.235692282676593e-06, "loss": 0.5535, "step": 6784 }, { "epoch": 0.44, "grad_norm": 1.0706104040145874, "learning_rate": 6.234679365213899e-06, "loss": 0.5336, "step": 6785 }, { "epoch": 0.44, "grad_norm": 1.109586238861084, "learning_rate": 6.233666393786557e-06, "loss": 0.5182, "step": 6786 }, { "epoch": 0.44, "grad_norm": 1.186035394668579, "learning_rate": 6.232653368438839e-06, "loss": 0.5159, "step": 6787 }, { "epoch": 0.44, "grad_norm": 1.0908302068710327, "learning_rate": 6.231640289215023e-06, "loss": 0.5323, "step": 6788 }, { "epoch": 0.44, "grad_norm": 1.178027868270874, "learning_rate": 6.230627156159387e-06, "loss": 0.5473, "step": 6789 }, { "epoch": 0.44, "grad_norm": 1.1483092308044434, "learning_rate": 6.229613969316214e-06, "loss": 0.5472, "step": 6790 }, { "epoch": 0.44, "grad_norm": 1.2437912225723267, "learning_rate": 6.228600728729786e-06, "loss": 0.5762, "step": 6791 }, { "epoch": 0.44, "grad_norm": 1.5257097482681274, "learning_rate": 6.22758743444439e-06, "loss": 0.6081, "step": 6792 }, { "epoch": 0.44, "grad_norm": 1.3115607500076294, "learning_rate": 6.226574086504316e-06, "loss": 0.5486, "step": 6793 }, { "epoch": 0.44, "grad_norm": 1.240601897239685, "learning_rate": 6.225560684953851e-06, "loss": 0.5643, "step": 6794 }, { "epoch": 0.44, "grad_norm": 1.1718342304229736, "learning_rate": 6.2245472298372925e-06, "loss": 0.53, "step": 6795 }, { "epoch": 0.44, "grad_norm": 1.1871646642684937, "learning_rate": 6.223533721198932e-06, "loss": 0.554, "step": 6796 }, { "epoch": 0.44, "grad_norm": 1.1051684617996216, "learning_rate": 6.222520159083071e-06, "loss": 0.5671, "step": 6797 }, { "epoch": 0.44, "grad_norm": 1.1356797218322754, "learning_rate": 6.221506543534007e-06, "loss": 0.4964, "step": 6798 }, { "epoch": 0.44, "grad_norm": 1.195529818534851, "learning_rate": 6.220492874596044e-06, "loss": 0.5685, "step": 6799 }, { "epoch": 0.44, "grad_norm": 1.156920313835144, "learning_rate": 6.219479152313485e-06, "loss": 0.4973, "step": 6800 }, { "epoch": 0.44, "grad_norm": 1.166253924369812, "learning_rate": 6.21846537673064e-06, "loss": 0.5797, "step": 6801 }, { "epoch": 0.44, "grad_norm": 1.222861647605896, "learning_rate": 6.217451547891817e-06, "loss": 0.5714, "step": 6802 }, { "epoch": 0.44, "grad_norm": 1.1042920351028442, "learning_rate": 6.216437665841328e-06, "loss": 0.5525, "step": 6803 }, { "epoch": 0.44, "grad_norm": 1.2037585973739624, "learning_rate": 6.215423730623487e-06, "loss": 0.5576, "step": 6804 }, { "epoch": 0.44, "grad_norm": 1.1108492612838745, "learning_rate": 6.21440974228261e-06, "loss": 0.5447, "step": 6805 }, { "epoch": 0.44, "grad_norm": 1.0848970413208008, "learning_rate": 6.213395700863016e-06, "loss": 0.5874, "step": 6806 }, { "epoch": 0.44, "grad_norm": 1.1022357940673828, "learning_rate": 6.212381606409027e-06, "loss": 0.5669, "step": 6807 }, { "epoch": 0.44, "grad_norm": 1.3528956174850464, "learning_rate": 6.211367458964966e-06, "loss": 0.6122, "step": 6808 }, { "epoch": 0.44, "grad_norm": 1.1100287437438965, "learning_rate": 6.2103532585751594e-06, "loss": 0.4932, "step": 6809 }, { "epoch": 0.44, "grad_norm": 1.1413315534591675, "learning_rate": 6.209339005283934e-06, "loss": 0.5462, "step": 6810 }, { "epoch": 0.44, "grad_norm": 1.2167271375656128, "learning_rate": 6.208324699135621e-06, "loss": 0.5179, "step": 6811 }, { "epoch": 0.44, "grad_norm": 1.1019314527511597, "learning_rate": 6.207310340174553e-06, "loss": 0.496, "step": 6812 }, { "epoch": 0.44, "grad_norm": 1.195087194442749, "learning_rate": 6.206295928445064e-06, "loss": 0.549, "step": 6813 }, { "epoch": 0.44, "grad_norm": 1.1585335731506348, "learning_rate": 6.2052814639914935e-06, "loss": 0.5592, "step": 6814 }, { "epoch": 0.44, "grad_norm": 1.2866971492767334, "learning_rate": 6.204266946858179e-06, "loss": 0.5819, "step": 6815 }, { "epoch": 0.44, "grad_norm": 1.2037031650543213, "learning_rate": 6.2032523770894635e-06, "loss": 0.5588, "step": 6816 }, { "epoch": 0.44, "grad_norm": 1.1417639255523682, "learning_rate": 6.202237754729692e-06, "loss": 0.5912, "step": 6817 }, { "epoch": 0.44, "grad_norm": 1.2404885292053223, "learning_rate": 6.201223079823209e-06, "loss": 0.5529, "step": 6818 }, { "epoch": 0.44, "grad_norm": 1.1275578737258911, "learning_rate": 6.2002083524143645e-06, "loss": 0.5289, "step": 6819 }, { "epoch": 0.44, "grad_norm": 1.2184343338012695, "learning_rate": 6.199193572547511e-06, "loss": 0.5677, "step": 6820 }, { "epoch": 0.44, "grad_norm": 1.2344970703125, "learning_rate": 6.198178740266998e-06, "loss": 0.4999, "step": 6821 }, { "epoch": 0.44, "grad_norm": 1.1696161031723022, "learning_rate": 6.197163855617184e-06, "loss": 0.5489, "step": 6822 }, { "epoch": 0.44, "grad_norm": 1.2463723421096802, "learning_rate": 6.196148918642428e-06, "loss": 0.5595, "step": 6823 }, { "epoch": 0.44, "grad_norm": 1.1972641944885254, "learning_rate": 6.195133929387087e-06, "loss": 0.5238, "step": 6824 }, { "epoch": 0.44, "grad_norm": 1.1501179933547974, "learning_rate": 6.194118887895528e-06, "loss": 0.5698, "step": 6825 }, { "epoch": 0.44, "grad_norm": 1.2294681072235107, "learning_rate": 6.193103794212111e-06, "loss": 0.5626, "step": 6826 }, { "epoch": 0.44, "grad_norm": 1.0841090679168701, "learning_rate": 6.192088648381206e-06, "loss": 0.5401, "step": 6827 }, { "epoch": 0.44, "grad_norm": 1.2403156757354736, "learning_rate": 6.1910734504471825e-06, "loss": 0.4964, "step": 6828 }, { "epoch": 0.44, "grad_norm": 1.2633928060531616, "learning_rate": 6.1900582004544105e-06, "loss": 0.5308, "step": 6829 }, { "epoch": 0.44, "grad_norm": 1.166534185409546, "learning_rate": 6.189042898447266e-06, "loss": 0.5397, "step": 6830 }, { "epoch": 0.44, "grad_norm": 1.1920108795166016, "learning_rate": 6.188027544470124e-06, "loss": 0.546, "step": 6831 }, { "epoch": 0.44, "grad_norm": 1.2044172286987305, "learning_rate": 6.187012138567362e-06, "loss": 0.5787, "step": 6832 }, { "epoch": 0.44, "grad_norm": 1.1026018857955933, "learning_rate": 6.185996680783363e-06, "loss": 0.5059, "step": 6833 }, { "epoch": 0.44, "grad_norm": 1.0948491096496582, "learning_rate": 6.18498117116251e-06, "loss": 0.5103, "step": 6834 }, { "epoch": 0.44, "grad_norm": 1.1223775148391724, "learning_rate": 6.183965609749185e-06, "loss": 0.5519, "step": 6835 }, { "epoch": 0.44, "grad_norm": 1.1391513347625732, "learning_rate": 6.182949996587779e-06, "loss": 0.5718, "step": 6836 }, { "epoch": 0.44, "grad_norm": 1.1534706354141235, "learning_rate": 6.181934331722681e-06, "loss": 0.5166, "step": 6837 }, { "epoch": 0.44, "grad_norm": 1.1824513673782349, "learning_rate": 6.180918615198283e-06, "loss": 0.5497, "step": 6838 }, { "epoch": 0.44, "grad_norm": 1.1367617845535278, "learning_rate": 6.179902847058977e-06, "loss": 0.5342, "step": 6839 }, { "epoch": 0.44, "grad_norm": 1.2167878150939941, "learning_rate": 6.178887027349164e-06, "loss": 0.5541, "step": 6840 }, { "epoch": 0.44, "grad_norm": 1.19105863571167, "learning_rate": 6.1778711561132396e-06, "loss": 0.5278, "step": 6841 }, { "epoch": 0.44, "grad_norm": 1.141863226890564, "learning_rate": 6.176855233395607e-06, "loss": 0.5348, "step": 6842 }, { "epoch": 0.44, "grad_norm": 1.137318730354309, "learning_rate": 6.175839259240668e-06, "loss": 0.503, "step": 6843 }, { "epoch": 0.44, "grad_norm": 1.1024043560028076, "learning_rate": 6.1748232336928285e-06, "loss": 0.525, "step": 6844 }, { "epoch": 0.44, "grad_norm": 1.126178503036499, "learning_rate": 6.173807156796497e-06, "loss": 0.5585, "step": 6845 }, { "epoch": 0.44, "grad_norm": 1.1939383745193481, "learning_rate": 6.172791028596082e-06, "loss": 0.5729, "step": 6846 }, { "epoch": 0.44, "grad_norm": 1.205169916152954, "learning_rate": 6.1717748491359976e-06, "loss": 0.5576, "step": 6847 }, { "epoch": 0.44, "grad_norm": 1.208457350730896, "learning_rate": 6.170758618460657e-06, "loss": 0.5529, "step": 6848 }, { "epoch": 0.44, "grad_norm": 1.3138360977172852, "learning_rate": 6.169742336614479e-06, "loss": 0.5625, "step": 6849 }, { "epoch": 0.44, "grad_norm": 1.2466132640838623, "learning_rate": 6.168726003641882e-06, "loss": 0.5336, "step": 6850 }, { "epoch": 0.44, "grad_norm": 1.2396925687789917, "learning_rate": 6.167709619587286e-06, "loss": 0.539, "step": 6851 }, { "epoch": 0.44, "grad_norm": 1.3708839416503906, "learning_rate": 6.166693184495114e-06, "loss": 0.5635, "step": 6852 }, { "epoch": 0.44, "grad_norm": 1.1831982135772705, "learning_rate": 6.165676698409794e-06, "loss": 0.472, "step": 6853 }, { "epoch": 0.44, "grad_norm": 1.1797980070114136, "learning_rate": 6.1646601613757525e-06, "loss": 0.4831, "step": 6854 }, { "epoch": 0.44, "grad_norm": 1.1902151107788086, "learning_rate": 6.1636435734374215e-06, "loss": 0.5399, "step": 6855 }, { "epoch": 0.44, "grad_norm": 1.1658587455749512, "learning_rate": 6.162626934639231e-06, "loss": 0.4901, "step": 6856 }, { "epoch": 0.44, "grad_norm": 1.1310430765151978, "learning_rate": 6.161610245025616e-06, "loss": 0.5127, "step": 6857 }, { "epoch": 0.44, "grad_norm": 1.1293658018112183, "learning_rate": 6.160593504641017e-06, "loss": 0.5456, "step": 6858 }, { "epoch": 0.44, "grad_norm": 1.5209153890609741, "learning_rate": 6.159576713529868e-06, "loss": 0.6026, "step": 6859 }, { "epoch": 0.44, "grad_norm": 1.2808387279510498, "learning_rate": 6.1585598717366125e-06, "loss": 0.5401, "step": 6860 }, { "epoch": 0.44, "grad_norm": 1.3432295322418213, "learning_rate": 6.157542979305696e-06, "loss": 0.4911, "step": 6861 }, { "epoch": 0.44, "grad_norm": 1.5326800346374512, "learning_rate": 6.156526036281561e-06, "loss": 0.5263, "step": 6862 }, { "epoch": 0.44, "grad_norm": 1.1695150136947632, "learning_rate": 6.155509042708656e-06, "loss": 0.5185, "step": 6863 }, { "epoch": 0.44, "grad_norm": 1.1976499557495117, "learning_rate": 6.154491998631434e-06, "loss": 0.5785, "step": 6864 }, { "epoch": 0.44, "grad_norm": 1.211490273475647, "learning_rate": 6.153474904094344e-06, "loss": 0.5237, "step": 6865 }, { "epoch": 0.44, "grad_norm": 1.2049427032470703, "learning_rate": 6.152457759141843e-06, "loss": 0.5813, "step": 6866 }, { "epoch": 0.44, "grad_norm": 1.1751891374588013, "learning_rate": 6.151440563818386e-06, "loss": 0.5014, "step": 6867 }, { "epoch": 0.44, "grad_norm": 1.1731411218643188, "learning_rate": 6.150423318168432e-06, "loss": 0.5369, "step": 6868 }, { "epoch": 0.44, "grad_norm": 1.219452142715454, "learning_rate": 6.149406022236445e-06, "loss": 0.5335, "step": 6869 }, { "epoch": 0.44, "grad_norm": 1.0543498992919922, "learning_rate": 6.1483886760668846e-06, "loss": 0.5829, "step": 6870 }, { "epoch": 0.44, "grad_norm": 1.2493737936019897, "learning_rate": 6.147371279704218e-06, "loss": 0.5427, "step": 6871 }, { "epoch": 0.44, "grad_norm": 1.1457195281982422, "learning_rate": 6.146353833192914e-06, "loss": 0.5258, "step": 6872 }, { "epoch": 0.44, "grad_norm": 1.165400505065918, "learning_rate": 6.145336336577441e-06, "loss": 0.5221, "step": 6873 }, { "epoch": 0.44, "grad_norm": 1.2207006216049194, "learning_rate": 6.1443187899022706e-06, "loss": 0.5296, "step": 6874 }, { "epoch": 0.44, "grad_norm": 1.1941081285476685, "learning_rate": 6.14330119321188e-06, "loss": 0.572, "step": 6875 }, { "epoch": 0.44, "grad_norm": 1.2958039045333862, "learning_rate": 6.142283546550743e-06, "loss": 0.5955, "step": 6876 }, { "epoch": 0.44, "grad_norm": 1.111495852470398, "learning_rate": 6.141265849963341e-06, "loss": 0.5425, "step": 6877 }, { "epoch": 0.44, "grad_norm": 1.1281205415725708, "learning_rate": 6.140248103494152e-06, "loss": 0.497, "step": 6878 }, { "epoch": 0.44, "grad_norm": 1.1478666067123413, "learning_rate": 6.139230307187659e-06, "loss": 0.5312, "step": 6879 }, { "epoch": 0.44, "grad_norm": 1.0958137512207031, "learning_rate": 6.13821246108835e-06, "loss": 0.5459, "step": 6880 }, { "epoch": 0.44, "grad_norm": 1.0599042177200317, "learning_rate": 6.1371945652407115e-06, "loss": 0.4989, "step": 6881 }, { "epoch": 0.44, "grad_norm": 1.2077945470809937, "learning_rate": 6.136176619689231e-06, "loss": 0.5728, "step": 6882 }, { "epoch": 0.44, "grad_norm": 1.2440731525421143, "learning_rate": 6.1351586244784025e-06, "loss": 0.5412, "step": 6883 }, { "epoch": 0.44, "grad_norm": 1.0919592380523682, "learning_rate": 6.1341405796527196e-06, "loss": 0.5152, "step": 6884 }, { "epoch": 0.44, "grad_norm": 1.1449111700057983, "learning_rate": 6.133122485256677e-06, "loss": 0.5674, "step": 6885 }, { "epoch": 0.44, "grad_norm": 1.276466965675354, "learning_rate": 6.132104341334776e-06, "loss": 0.5193, "step": 6886 }, { "epoch": 0.44, "grad_norm": 1.1739921569824219, "learning_rate": 6.131086147931515e-06, "loss": 0.5011, "step": 6887 }, { "epoch": 0.44, "grad_norm": 1.3628050088882446, "learning_rate": 6.130067905091395e-06, "loss": 0.5606, "step": 6888 }, { "epoch": 0.44, "grad_norm": 1.1291062831878662, "learning_rate": 6.129049612858923e-06, "loss": 0.5328, "step": 6889 }, { "epoch": 0.44, "grad_norm": 1.2382169961929321, "learning_rate": 6.128031271278605e-06, "loss": 0.5578, "step": 6890 }, { "epoch": 0.44, "grad_norm": 1.2108242511749268, "learning_rate": 6.127012880394952e-06, "loss": 0.5276, "step": 6891 }, { "epoch": 0.44, "grad_norm": 1.178237795829773, "learning_rate": 6.125994440252473e-06, "loss": 0.5189, "step": 6892 }, { "epoch": 0.44, "grad_norm": 1.1552395820617676, "learning_rate": 6.1249759508956815e-06, "loss": 0.5263, "step": 6893 }, { "epoch": 0.45, "grad_norm": 1.2913343906402588, "learning_rate": 6.123957412369095e-06, "loss": 0.5304, "step": 6894 }, { "epoch": 0.45, "grad_norm": 1.1654647588729858, "learning_rate": 6.122938824717228e-06, "loss": 0.545, "step": 6895 }, { "epoch": 0.45, "grad_norm": 1.2292076349258423, "learning_rate": 6.121920187984603e-06, "loss": 0.6127, "step": 6896 }, { "epoch": 0.45, "grad_norm": 1.1466739177703857, "learning_rate": 6.120901502215742e-06, "loss": 0.5667, "step": 6897 }, { "epoch": 0.45, "grad_norm": 1.2755959033966064, "learning_rate": 6.119882767455168e-06, "loss": 0.5606, "step": 6898 }, { "epoch": 0.45, "grad_norm": 1.1985609531402588, "learning_rate": 6.118863983747407e-06, "loss": 0.515, "step": 6899 }, { "epoch": 0.45, "grad_norm": 1.637126088142395, "learning_rate": 6.117845151136987e-06, "loss": 0.5073, "step": 6900 }, { "epoch": 0.45, "grad_norm": 1.3192228078842163, "learning_rate": 6.116826269668441e-06, "loss": 0.6109, "step": 6901 }, { "epoch": 0.45, "grad_norm": 1.2948119640350342, "learning_rate": 6.115807339386302e-06, "loss": 0.5335, "step": 6902 }, { "epoch": 0.45, "grad_norm": 1.2299840450286865, "learning_rate": 6.1147883603351e-06, "loss": 0.5401, "step": 6903 }, { "epoch": 0.45, "grad_norm": 1.3519829511642456, "learning_rate": 6.1137693325593746e-06, "loss": 0.5444, "step": 6904 }, { "epoch": 0.45, "grad_norm": 1.2252779006958008, "learning_rate": 6.112750256103668e-06, "loss": 0.5332, "step": 6905 }, { "epoch": 0.45, "grad_norm": 1.0424662828445435, "learning_rate": 6.111731131012516e-06, "loss": 0.5336, "step": 6906 }, { "epoch": 0.45, "grad_norm": 1.1939045190811157, "learning_rate": 6.110711957330466e-06, "loss": 0.5281, "step": 6907 }, { "epoch": 0.45, "grad_norm": 1.2745391130447388, "learning_rate": 6.109692735102062e-06, "loss": 0.5192, "step": 6908 }, { "epoch": 0.45, "grad_norm": 1.0980370044708252, "learning_rate": 6.108673464371852e-06, "loss": 0.5171, "step": 6909 }, { "epoch": 0.45, "grad_norm": 1.2864041328430176, "learning_rate": 6.1076541451843855e-06, "loss": 0.5, "step": 6910 }, { "epoch": 0.45, "grad_norm": 1.26646089553833, "learning_rate": 6.106634777584212e-06, "loss": 0.5751, "step": 6911 }, { "epoch": 0.45, "grad_norm": 1.278491735458374, "learning_rate": 6.105615361615888e-06, "loss": 0.5638, "step": 6912 }, { "epoch": 0.45, "grad_norm": 1.218826174736023, "learning_rate": 6.10459589732397e-06, "loss": 0.5228, "step": 6913 }, { "epoch": 0.45, "grad_norm": 1.2550265789031982, "learning_rate": 6.103576384753016e-06, "loss": 0.5599, "step": 6914 }, { "epoch": 0.45, "grad_norm": 1.1491137742996216, "learning_rate": 6.102556823947584e-06, "loss": 0.528, "step": 6915 }, { "epoch": 0.45, "grad_norm": 1.2091445922851562, "learning_rate": 6.101537214952239e-06, "loss": 0.5901, "step": 6916 }, { "epoch": 0.45, "grad_norm": 1.1747121810913086, "learning_rate": 6.100517557811543e-06, "loss": 0.5168, "step": 6917 }, { "epoch": 0.45, "grad_norm": 1.1626251935958862, "learning_rate": 6.099497852570064e-06, "loss": 0.588, "step": 6918 }, { "epoch": 0.45, "grad_norm": 1.25653076171875, "learning_rate": 6.098478099272372e-06, "loss": 0.5284, "step": 6919 }, { "epoch": 0.45, "grad_norm": 1.218740463256836, "learning_rate": 6.097458297963036e-06, "loss": 0.5516, "step": 6920 }, { "epoch": 0.45, "grad_norm": 1.3788201808929443, "learning_rate": 6.096438448686628e-06, "loss": 0.4975, "step": 6921 }, { "epoch": 0.45, "grad_norm": 1.236946702003479, "learning_rate": 6.095418551487726e-06, "loss": 0.551, "step": 6922 }, { "epoch": 0.45, "grad_norm": 1.105238914489746, "learning_rate": 6.094398606410905e-06, "loss": 0.5636, "step": 6923 }, { "epoch": 0.45, "grad_norm": 1.1168485879898071, "learning_rate": 6.093378613500743e-06, "loss": 0.5599, "step": 6924 }, { "epoch": 0.45, "grad_norm": 1.1674401760101318, "learning_rate": 6.092358572801824e-06, "loss": 0.5722, "step": 6925 }, { "epoch": 0.45, "grad_norm": 1.2346967458724976, "learning_rate": 6.091338484358731e-06, "loss": 0.5648, "step": 6926 }, { "epoch": 0.45, "grad_norm": 1.188775897026062, "learning_rate": 6.090318348216047e-06, "loss": 0.5517, "step": 6927 }, { "epoch": 0.45, "grad_norm": 1.3232202529907227, "learning_rate": 6.089298164418361e-06, "loss": 0.5428, "step": 6928 }, { "epoch": 0.45, "grad_norm": 1.185787558555603, "learning_rate": 6.088277933010264e-06, "loss": 0.4982, "step": 6929 }, { "epoch": 0.45, "grad_norm": 1.1954772472381592, "learning_rate": 6.0872576540363446e-06, "loss": 0.5491, "step": 6930 }, { "epoch": 0.45, "grad_norm": 1.2955033779144287, "learning_rate": 6.0862373275412005e-06, "loss": 0.5174, "step": 6931 }, { "epoch": 0.45, "grad_norm": 1.1089370250701904, "learning_rate": 6.085216953569423e-06, "loss": 0.5129, "step": 6932 }, { "epoch": 0.45, "grad_norm": 1.323983907699585, "learning_rate": 6.084196532165613e-06, "loss": 0.6589, "step": 6933 }, { "epoch": 0.45, "grad_norm": 1.1533541679382324, "learning_rate": 6.0831760633743705e-06, "loss": 0.5946, "step": 6934 }, { "epoch": 0.45, "grad_norm": 1.273072361946106, "learning_rate": 6.082155547240298e-06, "loss": 0.4824, "step": 6935 }, { "epoch": 0.45, "grad_norm": 1.1258262395858765, "learning_rate": 6.081134983807997e-06, "loss": 0.4949, "step": 6936 }, { "epoch": 0.45, "grad_norm": 1.2083665132522583, "learning_rate": 6.080114373122074e-06, "loss": 0.5934, "step": 6937 }, { "epoch": 0.45, "grad_norm": 1.2817237377166748, "learning_rate": 6.079093715227141e-06, "loss": 0.5502, "step": 6938 }, { "epoch": 0.45, "grad_norm": 1.1897437572479248, "learning_rate": 6.0780730101678044e-06, "loss": 0.5353, "step": 6939 }, { "epoch": 0.45, "grad_norm": 1.1124919652938843, "learning_rate": 6.0770522579886795e-06, "loss": 0.4996, "step": 6940 }, { "epoch": 0.45, "grad_norm": 1.2354204654693604, "learning_rate": 6.076031458734377e-06, "loss": 0.5534, "step": 6941 }, { "epoch": 0.45, "grad_norm": 1.1289875507354736, "learning_rate": 6.075010612449516e-06, "loss": 0.496, "step": 6942 }, { "epoch": 0.45, "grad_norm": 1.1271703243255615, "learning_rate": 6.073989719178717e-06, "loss": 0.5278, "step": 6943 }, { "epoch": 0.45, "grad_norm": 1.2090388536453247, "learning_rate": 6.072968778966596e-06, "loss": 0.584, "step": 6944 }, { "epoch": 0.45, "grad_norm": 1.099663496017456, "learning_rate": 6.071947791857779e-06, "loss": 0.568, "step": 6945 }, { "epoch": 0.45, "grad_norm": 1.0917304754257202, "learning_rate": 6.070926757896889e-06, "loss": 0.5018, "step": 6946 }, { "epoch": 0.45, "grad_norm": 1.1237695217132568, "learning_rate": 6.069905677128554e-06, "loss": 0.5037, "step": 6947 }, { "epoch": 0.45, "grad_norm": 1.1622207164764404, "learning_rate": 6.0688845495974015e-06, "loss": 0.5301, "step": 6948 }, { "epoch": 0.45, "grad_norm": 1.1012073755264282, "learning_rate": 6.067863375348064e-06, "loss": 0.5137, "step": 6949 }, { "epoch": 0.45, "grad_norm": 1.323278784751892, "learning_rate": 6.066842154425172e-06, "loss": 0.5572, "step": 6950 }, { "epoch": 0.45, "grad_norm": 1.1774961948394775, "learning_rate": 6.065820886873364e-06, "loss": 0.6121, "step": 6951 }, { "epoch": 0.45, "grad_norm": 1.157535433769226, "learning_rate": 6.064799572737274e-06, "loss": 0.5803, "step": 6952 }, { "epoch": 0.45, "grad_norm": 1.262294054031372, "learning_rate": 6.063778212061541e-06, "loss": 0.5943, "step": 6953 }, { "epoch": 0.45, "grad_norm": 1.2390384674072266, "learning_rate": 6.062756804890808e-06, "loss": 0.5429, "step": 6954 }, { "epoch": 0.45, "grad_norm": 1.1432892084121704, "learning_rate": 6.061735351269717e-06, "loss": 0.5118, "step": 6955 }, { "epoch": 0.45, "grad_norm": 1.257129192352295, "learning_rate": 6.060713851242912e-06, "loss": 0.5559, "step": 6956 }, { "epoch": 0.45, "grad_norm": 1.2747178077697754, "learning_rate": 6.059692304855042e-06, "loss": 0.5367, "step": 6957 }, { "epoch": 0.45, "grad_norm": 1.1086299419403076, "learning_rate": 6.058670712150756e-06, "loss": 0.5191, "step": 6958 }, { "epoch": 0.45, "grad_norm": 1.1676455736160278, "learning_rate": 6.057649073174704e-06, "loss": 0.5215, "step": 6959 }, { "epoch": 0.45, "grad_norm": 1.1888092756271362, "learning_rate": 6.056627387971541e-06, "loss": 0.5489, "step": 6960 }, { "epoch": 0.45, "grad_norm": 1.253205418586731, "learning_rate": 6.055605656585919e-06, "loss": 0.588, "step": 6961 }, { "epoch": 0.45, "grad_norm": 1.2306956052780151, "learning_rate": 6.0545838790624976e-06, "loss": 0.5715, "step": 6962 }, { "epoch": 0.45, "grad_norm": 1.2436903715133667, "learning_rate": 6.0535620554459375e-06, "loss": 0.5654, "step": 6963 }, { "epoch": 0.45, "grad_norm": 1.0412113666534424, "learning_rate": 6.052540185780896e-06, "loss": 0.4886, "step": 6964 }, { "epoch": 0.45, "grad_norm": 1.0588475465774536, "learning_rate": 6.051518270112039e-06, "loss": 0.5295, "step": 6965 }, { "epoch": 0.45, "grad_norm": 1.088417410850525, "learning_rate": 6.050496308484033e-06, "loss": 0.5488, "step": 6966 }, { "epoch": 0.45, "grad_norm": 1.2373780012130737, "learning_rate": 6.049474300941541e-06, "loss": 0.551, "step": 6967 }, { "epoch": 0.45, "grad_norm": 1.3190028667449951, "learning_rate": 6.048452247529237e-06, "loss": 0.5391, "step": 6968 }, { "epoch": 0.45, "grad_norm": 1.1470719575881958, "learning_rate": 6.0474301482917906e-06, "loss": 0.5432, "step": 6969 }, { "epoch": 0.45, "grad_norm": 1.233060598373413, "learning_rate": 6.0464080032738734e-06, "loss": 0.537, "step": 6970 }, { "epoch": 0.45, "grad_norm": 1.159338355064392, "learning_rate": 6.0453858125201625e-06, "loss": 0.5054, "step": 6971 }, { "epoch": 0.45, "grad_norm": 1.239289402961731, "learning_rate": 6.044363576075336e-06, "loss": 0.5865, "step": 6972 }, { "epoch": 0.45, "grad_norm": 1.2442741394042969, "learning_rate": 6.043341293984071e-06, "loss": 0.5507, "step": 6973 }, { "epoch": 0.45, "grad_norm": 1.138957142829895, "learning_rate": 6.042318966291048e-06, "loss": 0.5298, "step": 6974 }, { "epoch": 0.45, "grad_norm": 1.1796261072158813, "learning_rate": 6.0412965930409565e-06, "loss": 0.5668, "step": 6975 }, { "epoch": 0.45, "grad_norm": 1.196461796760559, "learning_rate": 6.040274174278474e-06, "loss": 0.5864, "step": 6976 }, { "epoch": 0.45, "grad_norm": 1.1683874130249023, "learning_rate": 6.039251710048293e-06, "loss": 0.5202, "step": 6977 }, { "epoch": 0.45, "grad_norm": 1.1920750141143799, "learning_rate": 6.0382292003951e-06, "loss": 0.5392, "step": 6978 }, { "epoch": 0.45, "grad_norm": 1.4487967491149902, "learning_rate": 6.037206645363589e-06, "loss": 0.5763, "step": 6979 }, { "epoch": 0.45, "grad_norm": 1.3056793212890625, "learning_rate": 6.036184044998451e-06, "loss": 0.5294, "step": 6980 }, { "epoch": 0.45, "grad_norm": 1.1735495328903198, "learning_rate": 6.03516139934438e-06, "loss": 0.5065, "step": 6981 }, { "epoch": 0.45, "grad_norm": 1.1782686710357666, "learning_rate": 6.034138708446078e-06, "loss": 0.5682, "step": 6982 }, { "epoch": 0.45, "grad_norm": 1.1123192310333252, "learning_rate": 6.033115972348239e-06, "loss": 0.5487, "step": 6983 }, { "epoch": 0.45, "grad_norm": 1.1841036081314087, "learning_rate": 6.032093191095567e-06, "loss": 0.5247, "step": 6984 }, { "epoch": 0.45, "grad_norm": 1.2792677879333496, "learning_rate": 6.031070364732764e-06, "loss": 0.5539, "step": 6985 }, { "epoch": 0.45, "grad_norm": 1.21280038356781, "learning_rate": 6.030047493304536e-06, "loss": 0.5351, "step": 6986 }, { "epoch": 0.45, "grad_norm": 1.2416260242462158, "learning_rate": 6.02902457685559e-06, "loss": 0.5288, "step": 6987 }, { "epoch": 0.45, "grad_norm": 1.1359235048294067, "learning_rate": 6.028001615430633e-06, "loss": 0.5371, "step": 6988 }, { "epoch": 0.45, "grad_norm": 1.2211943864822388, "learning_rate": 6.026978609074379e-06, "loss": 0.5314, "step": 6989 }, { "epoch": 0.45, "grad_norm": 1.2889938354492188, "learning_rate": 6.02595555783154e-06, "loss": 0.5943, "step": 6990 }, { "epoch": 0.45, "grad_norm": 1.0989902019500732, "learning_rate": 6.024932461746829e-06, "loss": 0.5272, "step": 6991 }, { "epoch": 0.45, "grad_norm": 1.1733282804489136, "learning_rate": 6.0239093208649655e-06, "loss": 0.5187, "step": 6992 }, { "epoch": 0.45, "grad_norm": 1.3019351959228516, "learning_rate": 6.022886135230668e-06, "loss": 0.5319, "step": 6993 }, { "epoch": 0.45, "grad_norm": 1.1201212406158447, "learning_rate": 6.021862904888655e-06, "loss": 0.5223, "step": 6994 }, { "epoch": 0.45, "grad_norm": 1.183465838432312, "learning_rate": 6.020839629883652e-06, "loss": 0.5322, "step": 6995 }, { "epoch": 0.45, "grad_norm": 1.168243408203125, "learning_rate": 6.019816310260383e-06, "loss": 0.5692, "step": 6996 }, { "epoch": 0.45, "grad_norm": 1.2622895240783691, "learning_rate": 6.018792946063573e-06, "loss": 0.5813, "step": 6997 }, { "epoch": 0.45, "grad_norm": 1.2126154899597168, "learning_rate": 6.017769537337953e-06, "loss": 0.5253, "step": 6998 }, { "epoch": 0.45, "grad_norm": 1.345534324645996, "learning_rate": 6.016746084128251e-06, "loss": 0.5383, "step": 6999 }, { "epoch": 0.45, "grad_norm": 1.0965934991836548, "learning_rate": 6.0157225864792015e-06, "loss": 0.5111, "step": 7000 }, { "epoch": 0.45, "grad_norm": 1.1183408498764038, "learning_rate": 6.014699044435539e-06, "loss": 0.5592, "step": 7001 }, { "epoch": 0.45, "grad_norm": 1.1611359119415283, "learning_rate": 6.013675458041999e-06, "loss": 0.5322, "step": 7002 }, { "epoch": 0.45, "grad_norm": 1.3658982515335083, "learning_rate": 6.0126518273433184e-06, "loss": 0.5715, "step": 7003 }, { "epoch": 0.45, "grad_norm": 1.3000390529632568, "learning_rate": 6.011628152384241e-06, "loss": 0.5959, "step": 7004 }, { "epoch": 0.45, "grad_norm": 1.2080224752426147, "learning_rate": 6.010604433209507e-06, "loss": 0.5784, "step": 7005 }, { "epoch": 0.45, "grad_norm": 1.198075771331787, "learning_rate": 6.009580669863859e-06, "loss": 0.5329, "step": 7006 }, { "epoch": 0.45, "grad_norm": 1.2077407836914062, "learning_rate": 6.0085568623920475e-06, "loss": 0.5426, "step": 7007 }, { "epoch": 0.45, "grad_norm": 1.2071714401245117, "learning_rate": 6.007533010838816e-06, "loss": 0.5107, "step": 7008 }, { "epoch": 0.45, "grad_norm": 1.1525906324386597, "learning_rate": 6.006509115248915e-06, "loss": 0.5273, "step": 7009 }, { "epoch": 0.45, "grad_norm": 1.1099690198898315, "learning_rate": 6.0054851756671e-06, "loss": 0.5519, "step": 7010 }, { "epoch": 0.45, "grad_norm": 1.0828111171722412, "learning_rate": 6.004461192138119e-06, "loss": 0.4874, "step": 7011 }, { "epoch": 0.45, "grad_norm": 1.1188043355941772, "learning_rate": 6.003437164706733e-06, "loss": 0.5457, "step": 7012 }, { "epoch": 0.45, "grad_norm": 1.0789450407028198, "learning_rate": 6.002413093417697e-06, "loss": 0.5429, "step": 7013 }, { "epoch": 0.45, "grad_norm": 1.2278413772583008, "learning_rate": 6.001388978315771e-06, "loss": 0.533, "step": 7014 }, { "epoch": 0.45, "grad_norm": 1.1431268453598022, "learning_rate": 6.000364819445715e-06, "loss": 0.4994, "step": 7015 }, { "epoch": 0.45, "grad_norm": 1.2224845886230469, "learning_rate": 5.999340616852296e-06, "loss": 0.5504, "step": 7016 }, { "epoch": 0.45, "grad_norm": 1.1040263175964355, "learning_rate": 5.998316370580276e-06, "loss": 0.5699, "step": 7017 }, { "epoch": 0.45, "grad_norm": 1.1066986322402954, "learning_rate": 5.997292080674423e-06, "loss": 0.5305, "step": 7018 }, { "epoch": 0.45, "grad_norm": 1.2383071184158325, "learning_rate": 5.996267747179506e-06, "loss": 0.5831, "step": 7019 }, { "epoch": 0.45, "grad_norm": 1.1190170049667358, "learning_rate": 5.995243370140297e-06, "loss": 0.5433, "step": 7020 }, { "epoch": 0.45, "grad_norm": 1.1585599184036255, "learning_rate": 5.994218949601567e-06, "loss": 0.4746, "step": 7021 }, { "epoch": 0.45, "grad_norm": 1.179849624633789, "learning_rate": 5.9931944856080924e-06, "loss": 0.5657, "step": 7022 }, { "epoch": 0.45, "grad_norm": 1.167737603187561, "learning_rate": 5.992169978204649e-06, "loss": 0.4895, "step": 7023 }, { "epoch": 0.45, "grad_norm": 1.1897944211959839, "learning_rate": 5.991145427436017e-06, "loss": 0.5032, "step": 7024 }, { "epoch": 0.45, "grad_norm": 1.0695557594299316, "learning_rate": 5.990120833346974e-06, "loss": 0.5378, "step": 7025 }, { "epoch": 0.45, "grad_norm": 1.1143372058868408, "learning_rate": 5.989096195982304e-06, "loss": 0.5661, "step": 7026 }, { "epoch": 0.45, "grad_norm": 1.3360273838043213, "learning_rate": 5.988071515386792e-06, "loss": 0.5117, "step": 7027 }, { "epoch": 0.45, "grad_norm": 1.081138253211975, "learning_rate": 5.9870467916052224e-06, "loss": 0.569, "step": 7028 }, { "epoch": 0.45, "grad_norm": 1.3315539360046387, "learning_rate": 5.986022024682385e-06, "loss": 0.5074, "step": 7029 }, { "epoch": 0.45, "grad_norm": 1.184887170791626, "learning_rate": 5.984997214663069e-06, "loss": 0.5517, "step": 7030 }, { "epoch": 0.45, "grad_norm": 1.286043405532837, "learning_rate": 5.9839723615920665e-06, "loss": 0.4832, "step": 7031 }, { "epoch": 0.45, "grad_norm": 1.0328733921051025, "learning_rate": 5.98294746551417e-06, "loss": 0.5148, "step": 7032 }, { "epoch": 0.45, "grad_norm": 1.236586093902588, "learning_rate": 5.981922526474174e-06, "loss": 0.4957, "step": 7033 }, { "epoch": 0.45, "grad_norm": 1.1624383926391602, "learning_rate": 5.98089754451688e-06, "loss": 0.5165, "step": 7034 }, { "epoch": 0.45, "grad_norm": 1.1053963899612427, "learning_rate": 5.979872519687084e-06, "loss": 0.5545, "step": 7035 }, { "epoch": 0.45, "grad_norm": 1.1404178142547607, "learning_rate": 5.978847452029589e-06, "loss": 0.4955, "step": 7036 }, { "epoch": 0.45, "grad_norm": 1.2257587909698486, "learning_rate": 5.977822341589198e-06, "loss": 0.5606, "step": 7037 }, { "epoch": 0.45, "grad_norm": 1.1399366855621338, "learning_rate": 5.976797188410713e-06, "loss": 0.532, "step": 7038 }, { "epoch": 0.45, "grad_norm": 1.2804100513458252, "learning_rate": 5.9757719925389455e-06, "loss": 0.5828, "step": 7039 }, { "epoch": 0.45, "grad_norm": 1.2003589868545532, "learning_rate": 5.974746754018701e-06, "loss": 0.5576, "step": 7040 }, { "epoch": 0.45, "grad_norm": 1.2109030485153198, "learning_rate": 5.97372147289479e-06, "loss": 0.5311, "step": 7041 }, { "epoch": 0.45, "grad_norm": 1.1328707933425903, "learning_rate": 5.9726961492120275e-06, "loss": 0.5283, "step": 7042 }, { "epoch": 0.45, "grad_norm": 1.200523853302002, "learning_rate": 5.971670783015223e-06, "loss": 0.5441, "step": 7043 }, { "epoch": 0.45, "grad_norm": 1.0930126905441284, "learning_rate": 5.970645374349197e-06, "loss": 0.5052, "step": 7044 }, { "epoch": 0.45, "grad_norm": 1.250327467918396, "learning_rate": 5.969619923258767e-06, "loss": 0.5469, "step": 7045 }, { "epoch": 0.45, "grad_norm": 1.1136335134506226, "learning_rate": 5.96859442978875e-06, "loss": 0.5397, "step": 7046 }, { "epoch": 0.45, "grad_norm": 1.210993766784668, "learning_rate": 5.96756889398397e-06, "loss": 0.5649, "step": 7047 }, { "epoch": 0.45, "grad_norm": 1.7970690727233887, "learning_rate": 5.966543315889251e-06, "loss": 0.5146, "step": 7048 }, { "epoch": 0.46, "grad_norm": 1.3029013872146606, "learning_rate": 5.965517695549416e-06, "loss": 0.5029, "step": 7049 }, { "epoch": 0.46, "grad_norm": 1.2794848680496216, "learning_rate": 5.964492033009295e-06, "loss": 0.5498, "step": 7050 }, { "epoch": 0.46, "grad_norm": 1.1684874296188354, "learning_rate": 5.963466328313715e-06, "loss": 0.5478, "step": 7051 }, { "epoch": 0.46, "grad_norm": 1.226733684539795, "learning_rate": 5.962440581507507e-06, "loss": 0.5277, "step": 7052 }, { "epoch": 0.46, "grad_norm": 1.4703128337860107, "learning_rate": 5.961414792635505e-06, "loss": 0.585, "step": 7053 }, { "epoch": 0.46, "grad_norm": 1.1490836143493652, "learning_rate": 5.960388961742543e-06, "loss": 0.5475, "step": 7054 }, { "epoch": 0.46, "grad_norm": 1.2602295875549316, "learning_rate": 5.959363088873457e-06, "loss": 0.5741, "step": 7055 }, { "epoch": 0.46, "grad_norm": 1.181503415107727, "learning_rate": 5.958337174073084e-06, "loss": 0.5406, "step": 7056 }, { "epoch": 0.46, "grad_norm": 1.337059497833252, "learning_rate": 5.957311217386269e-06, "loss": 0.5292, "step": 7057 }, { "epoch": 0.46, "grad_norm": 1.2233387231826782, "learning_rate": 5.956285218857848e-06, "loss": 0.5412, "step": 7058 }, { "epoch": 0.46, "grad_norm": 1.249946117401123, "learning_rate": 5.955259178532667e-06, "loss": 0.5669, "step": 7059 }, { "epoch": 0.46, "grad_norm": 1.1811271905899048, "learning_rate": 5.954233096455575e-06, "loss": 0.5007, "step": 7060 }, { "epoch": 0.46, "grad_norm": 1.1171811819076538, "learning_rate": 5.953206972671414e-06, "loss": 0.5212, "step": 7061 }, { "epoch": 0.46, "grad_norm": 1.2166285514831543, "learning_rate": 5.952180807225035e-06, "loss": 0.4953, "step": 7062 }, { "epoch": 0.46, "grad_norm": 1.2315212488174438, "learning_rate": 5.9511546001612915e-06, "loss": 0.604, "step": 7063 }, { "epoch": 0.46, "grad_norm": 1.2047616243362427, "learning_rate": 5.950128351525033e-06, "loss": 0.5768, "step": 7064 }, { "epoch": 0.46, "grad_norm": 1.2394158840179443, "learning_rate": 5.9491020613611165e-06, "loss": 0.5301, "step": 7065 }, { "epoch": 0.46, "grad_norm": 1.1826406717300415, "learning_rate": 5.948075729714398e-06, "loss": 0.5406, "step": 7066 }, { "epoch": 0.46, "grad_norm": 1.1851054430007935, "learning_rate": 5.9470493566297324e-06, "loss": 0.5606, "step": 7067 }, { "epoch": 0.46, "grad_norm": 1.3602862358093262, "learning_rate": 5.9460229421519856e-06, "loss": 0.5781, "step": 7068 }, { "epoch": 0.46, "grad_norm": 1.085331916809082, "learning_rate": 5.944996486326017e-06, "loss": 0.5367, "step": 7069 }, { "epoch": 0.46, "grad_norm": 1.2857787609100342, "learning_rate": 5.943969989196688e-06, "loss": 0.5251, "step": 7070 }, { "epoch": 0.46, "grad_norm": 1.234046459197998, "learning_rate": 5.942943450808869e-06, "loss": 0.6174, "step": 7071 }, { "epoch": 0.46, "grad_norm": 1.341030478477478, "learning_rate": 5.941916871207423e-06, "loss": 0.558, "step": 7072 }, { "epoch": 0.46, "grad_norm": 1.1679197549819946, "learning_rate": 5.940890250437222e-06, "loss": 0.5133, "step": 7073 }, { "epoch": 0.46, "grad_norm": 1.1442270278930664, "learning_rate": 5.939863588543133e-06, "loss": 0.5591, "step": 7074 }, { "epoch": 0.46, "grad_norm": 1.0891618728637695, "learning_rate": 5.9388368855700325e-06, "loss": 0.5345, "step": 7075 }, { "epoch": 0.46, "grad_norm": 1.1501885652542114, "learning_rate": 5.937810141562796e-06, "loss": 0.5211, "step": 7076 }, { "epoch": 0.46, "grad_norm": 1.152266025543213, "learning_rate": 5.9367833565662956e-06, "loss": 0.5294, "step": 7077 }, { "epoch": 0.46, "grad_norm": 1.1143659353256226, "learning_rate": 5.935756530625413e-06, "loss": 0.5758, "step": 7078 }, { "epoch": 0.46, "grad_norm": 1.1065452098846436, "learning_rate": 5.934729663785026e-06, "loss": 0.5157, "step": 7079 }, { "epoch": 0.46, "grad_norm": 1.157789707183838, "learning_rate": 5.9337027560900175e-06, "loss": 0.5548, "step": 7080 }, { "epoch": 0.46, "grad_norm": 1.1927108764648438, "learning_rate": 5.93267580758527e-06, "loss": 0.5618, "step": 7081 }, { "epoch": 0.46, "grad_norm": 1.1541258096694946, "learning_rate": 5.931648818315669e-06, "loss": 0.4822, "step": 7082 }, { "epoch": 0.46, "grad_norm": 1.151792287826538, "learning_rate": 5.930621788326103e-06, "loss": 0.5096, "step": 7083 }, { "epoch": 0.46, "grad_norm": 1.1702274084091187, "learning_rate": 5.9295947176614575e-06, "loss": 0.5197, "step": 7084 }, { "epoch": 0.46, "grad_norm": 1.184921383857727, "learning_rate": 5.928567606366626e-06, "loss": 0.5554, "step": 7085 }, { "epoch": 0.46, "grad_norm": 1.1550337076187134, "learning_rate": 5.927540454486503e-06, "loss": 0.567, "step": 7086 }, { "epoch": 0.46, "grad_norm": 1.2464123964309692, "learning_rate": 5.926513262065976e-06, "loss": 0.5399, "step": 7087 }, { "epoch": 0.46, "grad_norm": 1.1993380784988403, "learning_rate": 5.925486029149946e-06, "loss": 0.5752, "step": 7088 }, { "epoch": 0.46, "grad_norm": 1.0939702987670898, "learning_rate": 5.924458755783311e-06, "loss": 0.5046, "step": 7089 }, { "epoch": 0.46, "grad_norm": 3.7655014991760254, "learning_rate": 5.923431442010968e-06, "loss": 0.5411, "step": 7090 }, { "epoch": 0.46, "grad_norm": 1.3197910785675049, "learning_rate": 5.922404087877818e-06, "loss": 0.5602, "step": 7091 }, { "epoch": 0.46, "grad_norm": 1.3315783739089966, "learning_rate": 5.921376693428766e-06, "loss": 0.5319, "step": 7092 }, { "epoch": 0.46, "grad_norm": 1.2644562721252441, "learning_rate": 5.920349258708716e-06, "loss": 0.5625, "step": 7093 }, { "epoch": 0.46, "grad_norm": 1.1661423444747925, "learning_rate": 5.919321783762574e-06, "loss": 0.5326, "step": 7094 }, { "epoch": 0.46, "grad_norm": 1.2898504734039307, "learning_rate": 5.91829426863525e-06, "loss": 0.585, "step": 7095 }, { "epoch": 0.46, "grad_norm": 1.2319366931915283, "learning_rate": 5.917266713371651e-06, "loss": 0.4985, "step": 7096 }, { "epoch": 0.46, "grad_norm": 1.2192537784576416, "learning_rate": 5.916239118016691e-06, "loss": 0.5153, "step": 7097 }, { "epoch": 0.46, "grad_norm": 2.3393614292144775, "learning_rate": 5.915211482615284e-06, "loss": 0.5613, "step": 7098 }, { "epoch": 0.46, "grad_norm": 1.383957862854004, "learning_rate": 5.914183807212344e-06, "loss": 0.5812, "step": 7099 }, { "epoch": 0.46, "grad_norm": 1.113673210144043, "learning_rate": 5.913156091852787e-06, "loss": 0.5782, "step": 7100 }, { "epoch": 0.46, "grad_norm": 1.2006481885910034, "learning_rate": 5.912128336581536e-06, "loss": 0.5276, "step": 7101 }, { "epoch": 0.46, "grad_norm": 1.2298566102981567, "learning_rate": 5.911100541443507e-06, "loss": 0.5561, "step": 7102 }, { "epoch": 0.46, "grad_norm": 1.1644558906555176, "learning_rate": 5.910072706483624e-06, "loss": 0.5183, "step": 7103 }, { "epoch": 0.46, "grad_norm": 1.1370823383331299, "learning_rate": 5.909044831746812e-06, "loss": 0.5376, "step": 7104 }, { "epoch": 0.46, "grad_norm": 1.2222305536270142, "learning_rate": 5.908016917277995e-06, "loss": 0.5488, "step": 7105 }, { "epoch": 0.46, "grad_norm": 1.2205522060394287, "learning_rate": 5.9069889631221e-06, "loss": 0.5792, "step": 7106 }, { "epoch": 0.46, "grad_norm": 1.208503246307373, "learning_rate": 5.90596096932406e-06, "loss": 0.5348, "step": 7107 }, { "epoch": 0.46, "grad_norm": 1.0762754678726196, "learning_rate": 5.904932935928801e-06, "loss": 0.5579, "step": 7108 }, { "epoch": 0.46, "grad_norm": 1.1674227714538574, "learning_rate": 5.903904862981259e-06, "loss": 0.5462, "step": 7109 }, { "epoch": 0.46, "grad_norm": 1.2170482873916626, "learning_rate": 5.902876750526369e-06, "loss": 0.4923, "step": 7110 }, { "epoch": 0.46, "grad_norm": 1.1312155723571777, "learning_rate": 5.9018485986090636e-06, "loss": 0.4828, "step": 7111 }, { "epoch": 0.46, "grad_norm": 1.2799781560897827, "learning_rate": 5.900820407274284e-06, "loss": 0.5504, "step": 7112 }, { "epoch": 0.46, "grad_norm": 1.250157356262207, "learning_rate": 5.8997921765669685e-06, "loss": 0.4856, "step": 7113 }, { "epoch": 0.46, "grad_norm": 1.122738003730774, "learning_rate": 5.898763906532058e-06, "loss": 0.4977, "step": 7114 }, { "epoch": 0.46, "grad_norm": 1.2071527242660522, "learning_rate": 5.8977355972144956e-06, "loss": 0.5567, "step": 7115 }, { "epoch": 0.46, "grad_norm": 1.134123682975769, "learning_rate": 5.896707248659228e-06, "loss": 0.5665, "step": 7116 }, { "epoch": 0.46, "grad_norm": 1.303701400756836, "learning_rate": 5.895678860911198e-06, "loss": 0.598, "step": 7117 }, { "epoch": 0.46, "grad_norm": 1.1696988344192505, "learning_rate": 5.894650434015358e-06, "loss": 0.571, "step": 7118 }, { "epoch": 0.46, "grad_norm": 1.2698051929473877, "learning_rate": 5.893621968016654e-06, "loss": 0.5441, "step": 7119 }, { "epoch": 0.46, "grad_norm": 1.0489475727081299, "learning_rate": 5.892593462960041e-06, "loss": 0.5117, "step": 7120 }, { "epoch": 0.46, "grad_norm": 1.0914925336837769, "learning_rate": 5.891564918890469e-06, "loss": 0.507, "step": 7121 }, { "epoch": 0.46, "grad_norm": 1.1216405630111694, "learning_rate": 5.890536335852895e-06, "loss": 0.5107, "step": 7122 }, { "epoch": 0.46, "grad_norm": 1.279923439025879, "learning_rate": 5.889507713892278e-06, "loss": 0.4993, "step": 7123 }, { "epoch": 0.46, "grad_norm": 1.0482218265533447, "learning_rate": 5.88847905305357e-06, "loss": 0.5196, "step": 7124 }, { "epoch": 0.46, "grad_norm": 1.231353759765625, "learning_rate": 5.887450353381737e-06, "loss": 0.483, "step": 7125 }, { "epoch": 0.46, "grad_norm": 1.1505775451660156, "learning_rate": 5.886421614921738e-06, "loss": 0.5332, "step": 7126 }, { "epoch": 0.46, "grad_norm": 1.1914901733398438, "learning_rate": 5.885392837718538e-06, "loss": 0.5484, "step": 7127 }, { "epoch": 0.46, "grad_norm": 1.175150752067566, "learning_rate": 5.8843640218171e-06, "loss": 0.5063, "step": 7128 }, { "epoch": 0.46, "grad_norm": 1.0605860948562622, "learning_rate": 5.8833351672623924e-06, "loss": 0.5329, "step": 7129 }, { "epoch": 0.46, "grad_norm": 1.0942375659942627, "learning_rate": 5.882306274099385e-06, "loss": 0.5168, "step": 7130 }, { "epoch": 0.46, "grad_norm": 1.1730926036834717, "learning_rate": 5.881277342373046e-06, "loss": 0.5266, "step": 7131 }, { "epoch": 0.46, "grad_norm": 1.2496137619018555, "learning_rate": 5.880248372128349e-06, "loss": 0.5957, "step": 7132 }, { "epoch": 0.46, "grad_norm": 1.319053053855896, "learning_rate": 5.879219363410266e-06, "loss": 0.5426, "step": 7133 }, { "epoch": 0.46, "grad_norm": 1.0760940313339233, "learning_rate": 5.878190316263772e-06, "loss": 0.5552, "step": 7134 }, { "epoch": 0.46, "grad_norm": 1.254478096961975, "learning_rate": 5.877161230733847e-06, "loss": 0.5826, "step": 7135 }, { "epoch": 0.46, "grad_norm": 1.2857402563095093, "learning_rate": 5.876132106865467e-06, "loss": 0.5198, "step": 7136 }, { "epoch": 0.46, "grad_norm": 1.0764528512954712, "learning_rate": 5.875102944703612e-06, "loss": 0.5359, "step": 7137 }, { "epoch": 0.46, "grad_norm": 1.1369261741638184, "learning_rate": 5.874073744293265e-06, "loss": 0.5498, "step": 7138 }, { "epoch": 0.46, "grad_norm": 1.0781168937683105, "learning_rate": 5.873044505679411e-06, "loss": 0.5267, "step": 7139 }, { "epoch": 0.46, "grad_norm": 1.1060798168182373, "learning_rate": 5.872015228907034e-06, "loss": 0.5404, "step": 7140 }, { "epoch": 0.46, "grad_norm": 1.118801236152649, "learning_rate": 5.870985914021121e-06, "loss": 0.5359, "step": 7141 }, { "epoch": 0.46, "grad_norm": 1.2680854797363281, "learning_rate": 5.869956561066661e-06, "loss": 0.5378, "step": 7142 }, { "epoch": 0.46, "grad_norm": 1.186435341835022, "learning_rate": 5.8689271700886445e-06, "loss": 0.5852, "step": 7143 }, { "epoch": 0.46, "grad_norm": 1.3181873559951782, "learning_rate": 5.867897741132061e-06, "loss": 0.5568, "step": 7144 }, { "epoch": 0.46, "grad_norm": 1.1596134901046753, "learning_rate": 5.866868274241909e-06, "loss": 0.5354, "step": 7145 }, { "epoch": 0.46, "grad_norm": 1.2046458721160889, "learning_rate": 5.8658387694631815e-06, "loss": 0.5296, "step": 7146 }, { "epoch": 0.46, "grad_norm": 1.1549304723739624, "learning_rate": 5.864809226840873e-06, "loss": 0.5394, "step": 7147 }, { "epoch": 0.46, "grad_norm": 1.1547898054122925, "learning_rate": 5.863779646419987e-06, "loss": 0.5148, "step": 7148 }, { "epoch": 0.46, "grad_norm": 1.171507716178894, "learning_rate": 5.86275002824552e-06, "loss": 0.5902, "step": 7149 }, { "epoch": 0.46, "grad_norm": 1.2553632259368896, "learning_rate": 5.861720372362474e-06, "loss": 0.5048, "step": 7150 }, { "epoch": 0.46, "grad_norm": 1.2532352209091187, "learning_rate": 5.860690678815856e-06, "loss": 0.5467, "step": 7151 }, { "epoch": 0.46, "grad_norm": 1.2819942235946655, "learning_rate": 5.859660947650667e-06, "loss": 0.5602, "step": 7152 }, { "epoch": 0.46, "grad_norm": 1.3071818351745605, "learning_rate": 5.858631178911917e-06, "loss": 0.5253, "step": 7153 }, { "epoch": 0.46, "grad_norm": 1.2563824653625488, "learning_rate": 5.857601372644613e-06, "loss": 0.5539, "step": 7154 }, { "epoch": 0.46, "grad_norm": 1.1600877046585083, "learning_rate": 5.856571528893766e-06, "loss": 0.5299, "step": 7155 }, { "epoch": 0.46, "grad_norm": 1.1249686479568481, "learning_rate": 5.8555416477043884e-06, "loss": 0.497, "step": 7156 }, { "epoch": 0.46, "grad_norm": 1.2242285013198853, "learning_rate": 5.854511729121492e-06, "loss": 0.5457, "step": 7157 }, { "epoch": 0.46, "grad_norm": 1.3168256282806396, "learning_rate": 5.853481773190092e-06, "loss": 0.5881, "step": 7158 }, { "epoch": 0.46, "grad_norm": 1.297255277633667, "learning_rate": 5.852451779955206e-06, "loss": 0.5318, "step": 7159 }, { "epoch": 0.46, "grad_norm": 1.1677833795547485, "learning_rate": 5.851421749461852e-06, "loss": 0.5339, "step": 7160 }, { "epoch": 0.46, "grad_norm": 1.1599148511886597, "learning_rate": 5.85039168175505e-06, "loss": 0.5703, "step": 7161 }, { "epoch": 0.46, "grad_norm": 1.2263596057891846, "learning_rate": 5.849361576879821e-06, "loss": 0.5457, "step": 7162 }, { "epoch": 0.46, "grad_norm": 1.1148178577423096, "learning_rate": 5.848331434881191e-06, "loss": 0.5301, "step": 7163 }, { "epoch": 0.46, "grad_norm": 1.1742327213287354, "learning_rate": 5.847301255804181e-06, "loss": 0.56, "step": 7164 }, { "epoch": 0.46, "grad_norm": 1.4718878269195557, "learning_rate": 5.846271039693821e-06, "loss": 0.5407, "step": 7165 }, { "epoch": 0.46, "grad_norm": 1.1749117374420166, "learning_rate": 5.8452407865951346e-06, "loss": 0.4897, "step": 7166 }, { "epoch": 0.46, "grad_norm": 1.243117332458496, "learning_rate": 5.8442104965531556e-06, "loss": 0.5646, "step": 7167 }, { "epoch": 0.46, "grad_norm": 1.171570897102356, "learning_rate": 5.8431801696129145e-06, "loss": 0.5364, "step": 7168 }, { "epoch": 0.46, "grad_norm": 1.1962883472442627, "learning_rate": 5.842149805819442e-06, "loss": 0.5538, "step": 7169 }, { "epoch": 0.46, "grad_norm": 1.1560200452804565, "learning_rate": 5.841119405217775e-06, "loss": 0.5204, "step": 7170 }, { "epoch": 0.46, "grad_norm": 1.2260810136795044, "learning_rate": 5.840088967852949e-06, "loss": 0.5946, "step": 7171 }, { "epoch": 0.46, "grad_norm": 1.2221195697784424, "learning_rate": 5.839058493770003e-06, "loss": 0.5787, "step": 7172 }, { "epoch": 0.46, "grad_norm": 1.1695847511291504, "learning_rate": 5.838027983013973e-06, "loss": 0.5544, "step": 7173 }, { "epoch": 0.46, "grad_norm": 1.187703251838684, "learning_rate": 5.836997435629903e-06, "loss": 0.5801, "step": 7174 }, { "epoch": 0.46, "grad_norm": 1.3051904439926147, "learning_rate": 5.8359668516628344e-06, "loss": 0.5398, "step": 7175 }, { "epoch": 0.46, "grad_norm": 1.2457956075668335, "learning_rate": 5.83493623115781e-06, "loss": 0.6172, "step": 7176 }, { "epoch": 0.46, "grad_norm": 1.2083492279052734, "learning_rate": 5.8339055741598794e-06, "loss": 0.591, "step": 7177 }, { "epoch": 0.46, "grad_norm": 1.1742743253707886, "learning_rate": 5.832874880714087e-06, "loss": 0.5537, "step": 7178 }, { "epoch": 0.46, "grad_norm": 1.117916464805603, "learning_rate": 5.831844150865481e-06, "loss": 0.5163, "step": 7179 }, { "epoch": 0.46, "grad_norm": 1.131622314453125, "learning_rate": 5.830813384659115e-06, "loss": 0.5467, "step": 7180 }, { "epoch": 0.46, "grad_norm": 1.1263593435287476, "learning_rate": 5.829782582140039e-06, "loss": 0.5129, "step": 7181 }, { "epoch": 0.46, "grad_norm": 1.1918054819107056, "learning_rate": 5.828751743353307e-06, "loss": 0.5161, "step": 7182 }, { "epoch": 0.46, "grad_norm": 1.0709460973739624, "learning_rate": 5.827720868343976e-06, "loss": 0.5095, "step": 7183 }, { "epoch": 0.46, "grad_norm": 1.1781309843063354, "learning_rate": 5.826689957157099e-06, "loss": 0.554, "step": 7184 }, { "epoch": 0.46, "grad_norm": 1.103532314300537, "learning_rate": 5.8256590098377374e-06, "loss": 0.4926, "step": 7185 }, { "epoch": 0.46, "grad_norm": 1.1928142309188843, "learning_rate": 5.824628026430952e-06, "loss": 0.531, "step": 7186 }, { "epoch": 0.46, "grad_norm": 1.2411679029464722, "learning_rate": 5.823597006981803e-06, "loss": 0.5705, "step": 7187 }, { "epoch": 0.46, "grad_norm": 1.6697237491607666, "learning_rate": 5.822565951535352e-06, "loss": 0.5592, "step": 7188 }, { "epoch": 0.46, "grad_norm": 1.2810202836990356, "learning_rate": 5.821534860136667e-06, "loss": 0.5642, "step": 7189 }, { "epoch": 0.46, "grad_norm": 1.2381155490875244, "learning_rate": 5.820503732830812e-06, "loss": 0.5367, "step": 7190 }, { "epoch": 0.46, "grad_norm": 1.331784725189209, "learning_rate": 5.8194725696628565e-06, "loss": 0.5255, "step": 7191 }, { "epoch": 0.46, "grad_norm": 1.2121413946151733, "learning_rate": 5.8184413706778695e-06, "loss": 0.5359, "step": 7192 }, { "epoch": 0.46, "grad_norm": 1.096466302871704, "learning_rate": 5.817410135920921e-06, "loss": 0.5093, "step": 7193 }, { "epoch": 0.46, "grad_norm": 1.169761300086975, "learning_rate": 5.816378865437085e-06, "loss": 0.5634, "step": 7194 }, { "epoch": 0.46, "grad_norm": 1.1092385053634644, "learning_rate": 5.8153475592714345e-06, "loss": 0.4766, "step": 7195 }, { "epoch": 0.46, "grad_norm": 1.3086827993392944, "learning_rate": 5.814316217469046e-06, "loss": 0.5785, "step": 7196 }, { "epoch": 0.46, "grad_norm": 1.1940912008285522, "learning_rate": 5.813284840074996e-06, "loss": 0.545, "step": 7197 }, { "epoch": 0.46, "grad_norm": 1.3824334144592285, "learning_rate": 5.812253427134366e-06, "loss": 0.5193, "step": 7198 }, { "epoch": 0.46, "grad_norm": 1.3256245851516724, "learning_rate": 5.811221978692232e-06, "loss": 0.5503, "step": 7199 }, { "epoch": 0.46, "grad_norm": 1.1961140632629395, "learning_rate": 5.8101904947936795e-06, "loss": 0.5581, "step": 7200 }, { "epoch": 0.46, "grad_norm": 1.2900103330612183, "learning_rate": 5.809158975483791e-06, "loss": 0.523, "step": 7201 }, { "epoch": 0.46, "grad_norm": 1.1380751132965088, "learning_rate": 5.80812742080765e-06, "loss": 0.5725, "step": 7202 }, { "epoch": 0.46, "grad_norm": 1.1100465059280396, "learning_rate": 5.807095830810346e-06, "loss": 0.5047, "step": 7203 }, { "epoch": 0.47, "grad_norm": 1.2313178777694702, "learning_rate": 5.8060642055369645e-06, "loss": 0.5602, "step": 7204 }, { "epoch": 0.47, "grad_norm": 1.0723958015441895, "learning_rate": 5.8050325450325965e-06, "loss": 0.509, "step": 7205 }, { "epoch": 0.47, "grad_norm": 1.2210866212844849, "learning_rate": 5.8040008493423324e-06, "loss": 0.5339, "step": 7206 }, { "epoch": 0.47, "grad_norm": 1.2145209312438965, "learning_rate": 5.802969118511267e-06, "loss": 0.5791, "step": 7207 }, { "epoch": 0.47, "grad_norm": 1.078338384628296, "learning_rate": 5.801937352584493e-06, "loss": 0.5173, "step": 7208 }, { "epoch": 0.47, "grad_norm": 1.2401727437973022, "learning_rate": 5.800905551607106e-06, "loss": 0.5761, "step": 7209 }, { "epoch": 0.47, "grad_norm": 1.1432393789291382, "learning_rate": 5.799873715624202e-06, "loss": 0.548, "step": 7210 }, { "epoch": 0.47, "grad_norm": 1.169692873954773, "learning_rate": 5.798841844680883e-06, "loss": 0.5696, "step": 7211 }, { "epoch": 0.47, "grad_norm": 1.133901596069336, "learning_rate": 5.797809938822249e-06, "loss": 0.4742, "step": 7212 }, { "epoch": 0.47, "grad_norm": 1.2041796445846558, "learning_rate": 5.796777998093399e-06, "loss": 0.5189, "step": 7213 }, { "epoch": 0.47, "grad_norm": 1.2760969400405884, "learning_rate": 5.79574602253944e-06, "loss": 0.59, "step": 7214 }, { "epoch": 0.47, "grad_norm": 1.1437222957611084, "learning_rate": 5.794714012205474e-06, "loss": 0.5191, "step": 7215 }, { "epoch": 0.47, "grad_norm": 1.1937910318374634, "learning_rate": 5.793681967136612e-06, "loss": 0.4568, "step": 7216 }, { "epoch": 0.47, "grad_norm": 1.1231119632720947, "learning_rate": 5.792649887377956e-06, "loss": 0.5287, "step": 7217 }, { "epoch": 0.47, "grad_norm": 1.0366839170455933, "learning_rate": 5.791617772974619e-06, "loss": 0.5345, "step": 7218 }, { "epoch": 0.47, "grad_norm": 1.1477042436599731, "learning_rate": 5.790585623971712e-06, "loss": 0.5395, "step": 7219 }, { "epoch": 0.47, "grad_norm": 1.3000823259353638, "learning_rate": 5.789553440414346e-06, "loss": 0.5343, "step": 7220 }, { "epoch": 0.47, "grad_norm": 1.19790518283844, "learning_rate": 5.788521222347638e-06, "loss": 0.5487, "step": 7221 }, { "epoch": 0.47, "grad_norm": 1.2709046602249146, "learning_rate": 5.7874889698167e-06, "loss": 0.5207, "step": 7222 }, { "epoch": 0.47, "grad_norm": 1.122359037399292, "learning_rate": 5.786456682866652e-06, "loss": 0.55, "step": 7223 }, { "epoch": 0.47, "grad_norm": 1.2061569690704346, "learning_rate": 5.785424361542611e-06, "loss": 0.5082, "step": 7224 }, { "epoch": 0.47, "grad_norm": 1.1743954420089722, "learning_rate": 5.784392005889698e-06, "loss": 0.5003, "step": 7225 }, { "epoch": 0.47, "grad_norm": 1.1091798543930054, "learning_rate": 5.7833596159530325e-06, "loss": 0.5235, "step": 7226 }, { "epoch": 0.47, "grad_norm": 1.1673370599746704, "learning_rate": 5.782327191777741e-06, "loss": 0.5245, "step": 7227 }, { "epoch": 0.47, "grad_norm": 1.1445201635360718, "learning_rate": 5.781294733408946e-06, "loss": 0.561, "step": 7228 }, { "epoch": 0.47, "grad_norm": 1.3073639869689941, "learning_rate": 5.780262240891774e-06, "loss": 0.5651, "step": 7229 }, { "epoch": 0.47, "grad_norm": 1.3002525568008423, "learning_rate": 5.7792297142713534e-06, "loss": 0.5943, "step": 7230 }, { "epoch": 0.47, "grad_norm": 1.073269248008728, "learning_rate": 5.778197153592811e-06, "loss": 0.5118, "step": 7231 }, { "epoch": 0.47, "grad_norm": 1.1615735292434692, "learning_rate": 5.777164558901279e-06, "loss": 0.5635, "step": 7232 }, { "epoch": 0.47, "grad_norm": 1.164879560470581, "learning_rate": 5.776131930241891e-06, "loss": 0.4948, "step": 7233 }, { "epoch": 0.47, "grad_norm": 1.145783543586731, "learning_rate": 5.775099267659776e-06, "loss": 0.5235, "step": 7234 }, { "epoch": 0.47, "grad_norm": 1.0672495365142822, "learning_rate": 5.774066571200073e-06, "loss": 0.534, "step": 7235 }, { "epoch": 0.47, "grad_norm": 1.1901322603225708, "learning_rate": 5.773033840907919e-06, "loss": 0.5759, "step": 7236 }, { "epoch": 0.47, "grad_norm": 1.2921053171157837, "learning_rate": 5.772001076828448e-06, "loss": 0.5423, "step": 7237 }, { "epoch": 0.47, "grad_norm": 1.1040377616882324, "learning_rate": 5.770968279006803e-06, "loss": 0.5023, "step": 7238 }, { "epoch": 0.47, "grad_norm": 1.1985952854156494, "learning_rate": 5.769935447488123e-06, "loss": 0.5209, "step": 7239 }, { "epoch": 0.47, "grad_norm": 1.2019320726394653, "learning_rate": 5.76890258231755e-06, "loss": 0.4879, "step": 7240 }, { "epoch": 0.47, "grad_norm": 1.274949073791504, "learning_rate": 5.7678696835402305e-06, "loss": 0.5469, "step": 7241 }, { "epoch": 0.47, "grad_norm": 1.2625164985656738, "learning_rate": 5.766836751201308e-06, "loss": 0.5465, "step": 7242 }, { "epoch": 0.47, "grad_norm": 1.3057379722595215, "learning_rate": 5.7658037853459295e-06, "loss": 0.5331, "step": 7243 }, { "epoch": 0.47, "grad_norm": 1.1047053337097168, "learning_rate": 5.764770786019243e-06, "loss": 0.4721, "step": 7244 }, { "epoch": 0.47, "grad_norm": 1.2687146663665771, "learning_rate": 5.7637377532664e-06, "loss": 0.5448, "step": 7245 }, { "epoch": 0.47, "grad_norm": 1.1732102632522583, "learning_rate": 5.762704687132548e-06, "loss": 0.5242, "step": 7246 }, { "epoch": 0.47, "grad_norm": 1.174641728401184, "learning_rate": 5.761671587662843e-06, "loss": 0.4821, "step": 7247 }, { "epoch": 0.47, "grad_norm": 1.2183561325073242, "learning_rate": 5.7606384549024385e-06, "loss": 0.5599, "step": 7248 }, { "epoch": 0.47, "grad_norm": 1.2728984355926514, "learning_rate": 5.759605288896489e-06, "loss": 0.5984, "step": 7249 }, { "epoch": 0.47, "grad_norm": 1.2391186952590942, "learning_rate": 5.758572089690152e-06, "loss": 0.5869, "step": 7250 }, { "epoch": 0.47, "grad_norm": 1.173529863357544, "learning_rate": 5.757538857328587e-06, "loss": 0.5454, "step": 7251 }, { "epoch": 0.47, "grad_norm": 1.0659948587417603, "learning_rate": 5.756505591856952e-06, "loss": 0.4869, "step": 7252 }, { "epoch": 0.47, "grad_norm": 1.1594455242156982, "learning_rate": 5.75547229332041e-06, "loss": 0.5029, "step": 7253 }, { "epoch": 0.47, "grad_norm": 1.1899008750915527, "learning_rate": 5.7544389617641225e-06, "loss": 0.525, "step": 7254 }, { "epoch": 0.47, "grad_norm": 1.1582409143447876, "learning_rate": 5.753405597233255e-06, "loss": 0.5744, "step": 7255 }, { "epoch": 0.47, "grad_norm": 1.1564722061157227, "learning_rate": 5.752372199772973e-06, "loss": 0.5329, "step": 7256 }, { "epoch": 0.47, "grad_norm": 1.0470064878463745, "learning_rate": 5.751338769428443e-06, "loss": 0.5268, "step": 7257 }, { "epoch": 0.47, "grad_norm": 1.1892976760864258, "learning_rate": 5.750305306244834e-06, "loss": 0.5499, "step": 7258 }, { "epoch": 0.47, "grad_norm": 1.1946842670440674, "learning_rate": 5.749271810267316e-06, "loss": 0.5203, "step": 7259 }, { "epoch": 0.47, "grad_norm": 1.1050670146942139, "learning_rate": 5.74823828154106e-06, "loss": 0.5406, "step": 7260 }, { "epoch": 0.47, "grad_norm": 1.0887320041656494, "learning_rate": 5.747204720111239e-06, "loss": 0.563, "step": 7261 }, { "epoch": 0.47, "grad_norm": 1.2330957651138306, "learning_rate": 5.746171126023028e-06, "loss": 0.6313, "step": 7262 }, { "epoch": 0.47, "grad_norm": 1.2070342302322388, "learning_rate": 5.745137499321602e-06, "loss": 0.5454, "step": 7263 }, { "epoch": 0.47, "grad_norm": 1.3498406410217285, "learning_rate": 5.744103840052138e-06, "loss": 0.5322, "step": 7264 }, { "epoch": 0.47, "grad_norm": 1.1930934190750122, "learning_rate": 5.743070148259817e-06, "loss": 0.5343, "step": 7265 }, { "epoch": 0.47, "grad_norm": 1.1085346937179565, "learning_rate": 5.742036423989814e-06, "loss": 0.5052, "step": 7266 }, { "epoch": 0.47, "grad_norm": 1.10801100730896, "learning_rate": 5.741002667287315e-06, "loss": 0.5073, "step": 7267 }, { "epoch": 0.47, "grad_norm": 1.1768380403518677, "learning_rate": 5.739968878197502e-06, "loss": 0.5559, "step": 7268 }, { "epoch": 0.47, "grad_norm": 1.1326487064361572, "learning_rate": 5.738935056765556e-06, "loss": 0.5342, "step": 7269 }, { "epoch": 0.47, "grad_norm": 1.2019925117492676, "learning_rate": 5.737901203036666e-06, "loss": 0.5078, "step": 7270 }, { "epoch": 0.47, "grad_norm": 1.2463456392288208, "learning_rate": 5.736867317056019e-06, "loss": 0.5881, "step": 7271 }, { "epoch": 0.47, "grad_norm": 1.1561328172683716, "learning_rate": 5.735833398868803e-06, "loss": 0.5606, "step": 7272 }, { "epoch": 0.47, "grad_norm": 1.3595421314239502, "learning_rate": 5.734799448520206e-06, "loss": 0.6042, "step": 7273 }, { "epoch": 0.47, "grad_norm": 1.1414015293121338, "learning_rate": 5.733765466055423e-06, "loss": 0.52, "step": 7274 }, { "epoch": 0.47, "grad_norm": 1.596933364868164, "learning_rate": 5.732731451519643e-06, "loss": 0.5356, "step": 7275 }, { "epoch": 0.47, "grad_norm": 1.1679061651229858, "learning_rate": 5.731697404958062e-06, "loss": 0.4932, "step": 7276 }, { "epoch": 0.47, "grad_norm": 1.19071364402771, "learning_rate": 5.7306633264158764e-06, "loss": 0.4951, "step": 7277 }, { "epoch": 0.47, "grad_norm": 1.3100793361663818, "learning_rate": 5.72962921593828e-06, "loss": 0.5499, "step": 7278 }, { "epoch": 0.47, "grad_norm": 1.0849742889404297, "learning_rate": 5.728595073570474e-06, "loss": 0.5278, "step": 7279 }, { "epoch": 0.47, "grad_norm": 1.497631549835205, "learning_rate": 5.7275608993576586e-06, "loss": 0.5655, "step": 7280 }, { "epoch": 0.47, "grad_norm": 1.1602460145950317, "learning_rate": 5.7265266933450316e-06, "loss": 0.5163, "step": 7281 }, { "epoch": 0.47, "grad_norm": 1.228947401046753, "learning_rate": 5.725492455577798e-06, "loss": 0.588, "step": 7282 }, { "epoch": 0.47, "grad_norm": 1.1805288791656494, "learning_rate": 5.724458186101161e-06, "loss": 0.5102, "step": 7283 }, { "epoch": 0.47, "grad_norm": 1.228719711303711, "learning_rate": 5.723423884960325e-06, "loss": 0.5526, "step": 7284 }, { "epoch": 0.47, "grad_norm": 1.1085673570632935, "learning_rate": 5.722389552200498e-06, "loss": 0.5231, "step": 7285 }, { "epoch": 0.47, "grad_norm": 1.1448537111282349, "learning_rate": 5.721355187866888e-06, "loss": 0.551, "step": 7286 }, { "epoch": 0.47, "grad_norm": 1.157682180404663, "learning_rate": 5.720320792004703e-06, "loss": 0.515, "step": 7287 }, { "epoch": 0.47, "grad_norm": 1.1869282722473145, "learning_rate": 5.719286364659155e-06, "loss": 0.5341, "step": 7288 }, { "epoch": 0.47, "grad_norm": 1.4337786436080933, "learning_rate": 5.718251905875456e-06, "loss": 0.5512, "step": 7289 }, { "epoch": 0.47, "grad_norm": 1.0669047832489014, "learning_rate": 5.717217415698818e-06, "loss": 0.5176, "step": 7290 }, { "epoch": 0.47, "grad_norm": 1.3652948141098022, "learning_rate": 5.716182894174458e-06, "loss": 0.5512, "step": 7291 }, { "epoch": 0.47, "grad_norm": 1.1574840545654297, "learning_rate": 5.715148341347593e-06, "loss": 0.5388, "step": 7292 }, { "epoch": 0.47, "grad_norm": 1.157922625541687, "learning_rate": 5.714113757263437e-06, "loss": 0.5349, "step": 7293 }, { "epoch": 0.47, "grad_norm": 1.1225569248199463, "learning_rate": 5.7130791419672125e-06, "loss": 0.5377, "step": 7294 }, { "epoch": 0.47, "grad_norm": 1.1328482627868652, "learning_rate": 5.712044495504138e-06, "loss": 0.5065, "step": 7295 }, { "epoch": 0.47, "grad_norm": 1.24077308177948, "learning_rate": 5.711009817919435e-06, "loss": 0.5312, "step": 7296 }, { "epoch": 0.47, "grad_norm": 1.182208776473999, "learning_rate": 5.709975109258329e-06, "loss": 0.5858, "step": 7297 }, { "epoch": 0.47, "grad_norm": 1.1854363679885864, "learning_rate": 5.708940369566041e-06, "loss": 0.4929, "step": 7298 }, { "epoch": 0.47, "grad_norm": 1.301313042640686, "learning_rate": 5.707905598887798e-06, "loss": 0.5457, "step": 7299 }, { "epoch": 0.47, "grad_norm": 1.2636882066726685, "learning_rate": 5.706870797268831e-06, "loss": 0.5096, "step": 7300 }, { "epoch": 0.47, "grad_norm": 1.1660689115524292, "learning_rate": 5.7058359647543625e-06, "loss": 0.5458, "step": 7301 }, { "epoch": 0.47, "grad_norm": 1.2174535989761353, "learning_rate": 5.7048011013896255e-06, "loss": 0.5062, "step": 7302 }, { "epoch": 0.47, "grad_norm": 1.175108790397644, "learning_rate": 5.703766207219851e-06, "loss": 0.5738, "step": 7303 }, { "epoch": 0.47, "grad_norm": 1.3191324472427368, "learning_rate": 5.70273128229027e-06, "loss": 0.5669, "step": 7304 }, { "epoch": 0.47, "grad_norm": 1.1593308448791504, "learning_rate": 5.70169632664612e-06, "loss": 0.5275, "step": 7305 }, { "epoch": 0.47, "grad_norm": 1.334751009941101, "learning_rate": 5.700661340332633e-06, "loss": 0.5324, "step": 7306 }, { "epoch": 0.47, "grad_norm": 1.1173096895217896, "learning_rate": 5.699626323395046e-06, "loss": 0.5434, "step": 7307 }, { "epoch": 0.47, "grad_norm": 1.3712520599365234, "learning_rate": 5.698591275878599e-06, "loss": 0.5753, "step": 7308 }, { "epoch": 0.47, "grad_norm": 1.2903732061386108, "learning_rate": 5.6975561978285275e-06, "loss": 0.5569, "step": 7309 }, { "epoch": 0.47, "grad_norm": 1.1031501293182373, "learning_rate": 5.696521089290077e-06, "loss": 0.5371, "step": 7310 }, { "epoch": 0.47, "grad_norm": 1.0642465353012085, "learning_rate": 5.695485950308484e-06, "loss": 0.4964, "step": 7311 }, { "epoch": 0.47, "grad_norm": 1.1918140649795532, "learning_rate": 5.694450780928997e-06, "loss": 0.5285, "step": 7312 }, { "epoch": 0.47, "grad_norm": 1.1876883506774902, "learning_rate": 5.6934155811968565e-06, "loss": 0.5555, "step": 7313 }, { "epoch": 0.47, "grad_norm": 1.1437418460845947, "learning_rate": 5.69238035115731e-06, "loss": 0.5177, "step": 7314 }, { "epoch": 0.47, "grad_norm": 1.2060483694076538, "learning_rate": 5.691345090855605e-06, "loss": 0.551, "step": 7315 }, { "epoch": 0.47, "grad_norm": 1.1584385633468628, "learning_rate": 5.690309800336989e-06, "loss": 0.5501, "step": 7316 }, { "epoch": 0.47, "grad_norm": 1.0849734544754028, "learning_rate": 5.689274479646714e-06, "loss": 0.4725, "step": 7317 }, { "epoch": 0.47, "grad_norm": 1.185024619102478, "learning_rate": 5.68823912883003e-06, "loss": 0.5363, "step": 7318 }, { "epoch": 0.47, "grad_norm": 1.2432096004486084, "learning_rate": 5.687203747932187e-06, "loss": 0.5527, "step": 7319 }, { "epoch": 0.47, "grad_norm": 1.1961506605148315, "learning_rate": 5.686168336998444e-06, "loss": 0.5344, "step": 7320 }, { "epoch": 0.47, "grad_norm": 1.1432796716690063, "learning_rate": 5.685132896074052e-06, "loss": 0.5116, "step": 7321 }, { "epoch": 0.47, "grad_norm": 1.5492490530014038, "learning_rate": 5.684097425204268e-06, "loss": 0.559, "step": 7322 }, { "epoch": 0.47, "grad_norm": 1.18941330909729, "learning_rate": 5.683061924434351e-06, "loss": 0.554, "step": 7323 }, { "epoch": 0.47, "grad_norm": 1.227079153060913, "learning_rate": 5.682026393809561e-06, "loss": 0.5411, "step": 7324 }, { "epoch": 0.47, "grad_norm": 1.094606876373291, "learning_rate": 5.680990833375155e-06, "loss": 0.5373, "step": 7325 }, { "epoch": 0.47, "grad_norm": 1.1511332988739014, "learning_rate": 5.679955243176398e-06, "loss": 0.5084, "step": 7326 }, { "epoch": 0.47, "grad_norm": 1.2875370979309082, "learning_rate": 5.678919623258552e-06, "loss": 0.5353, "step": 7327 }, { "epoch": 0.47, "grad_norm": 1.1967942714691162, "learning_rate": 5.67788397366688e-06, "loss": 0.5325, "step": 7328 }, { "epoch": 0.47, "grad_norm": 1.225393533706665, "learning_rate": 5.676848294446648e-06, "loss": 0.5723, "step": 7329 }, { "epoch": 0.47, "grad_norm": 1.1214821338653564, "learning_rate": 5.675812585643124e-06, "loss": 0.5521, "step": 7330 }, { "epoch": 0.47, "grad_norm": 1.228227138519287, "learning_rate": 5.674776847301575e-06, "loss": 0.5391, "step": 7331 }, { "epoch": 0.47, "grad_norm": 1.2372851371765137, "learning_rate": 5.673741079467272e-06, "loss": 0.5425, "step": 7332 }, { "epoch": 0.47, "grad_norm": 1.1559804677963257, "learning_rate": 5.672705282185484e-06, "loss": 0.5065, "step": 7333 }, { "epoch": 0.47, "grad_norm": 1.1759881973266602, "learning_rate": 5.671669455501484e-06, "loss": 0.5752, "step": 7334 }, { "epoch": 0.47, "grad_norm": 1.1191248893737793, "learning_rate": 5.6706335994605445e-06, "loss": 0.4952, "step": 7335 }, { "epoch": 0.47, "grad_norm": 1.2201086282730103, "learning_rate": 5.6695977141079415e-06, "loss": 0.5785, "step": 7336 }, { "epoch": 0.47, "grad_norm": 1.1120097637176514, "learning_rate": 5.66856179948895e-06, "loss": 0.5388, "step": 7337 }, { "epoch": 0.47, "grad_norm": 1.1753730773925781, "learning_rate": 5.6675258556488465e-06, "loss": 0.6014, "step": 7338 }, { "epoch": 0.47, "grad_norm": 1.1232099533081055, "learning_rate": 5.666489882632911e-06, "loss": 0.5273, "step": 7339 }, { "epoch": 0.47, "grad_norm": 1.248248815536499, "learning_rate": 5.66545388048642e-06, "loss": 0.5605, "step": 7340 }, { "epoch": 0.47, "grad_norm": 1.0973293781280518, "learning_rate": 5.66441784925466e-06, "loss": 0.537, "step": 7341 }, { "epoch": 0.47, "grad_norm": 1.2212467193603516, "learning_rate": 5.663381788982907e-06, "loss": 0.4956, "step": 7342 }, { "epoch": 0.47, "grad_norm": 1.1800075769424438, "learning_rate": 5.662345699716449e-06, "loss": 0.5404, "step": 7343 }, { "epoch": 0.47, "grad_norm": 1.249633550643921, "learning_rate": 5.6613095815005705e-06, "loss": 0.5431, "step": 7344 }, { "epoch": 0.47, "grad_norm": 1.1967593431472778, "learning_rate": 5.660273434380554e-06, "loss": 0.5321, "step": 7345 }, { "epoch": 0.47, "grad_norm": 1.2428148984909058, "learning_rate": 5.6592372584016895e-06, "loss": 0.5768, "step": 7346 }, { "epoch": 0.47, "grad_norm": 1.2666115760803223, "learning_rate": 5.658201053609267e-06, "loss": 0.495, "step": 7347 }, { "epoch": 0.47, "grad_norm": 1.0804709196090698, "learning_rate": 5.657164820048574e-06, "loss": 0.539, "step": 7348 }, { "epoch": 0.47, "grad_norm": 1.1358387470245361, "learning_rate": 5.656128557764901e-06, "loss": 0.475, "step": 7349 }, { "epoch": 0.47, "grad_norm": 1.2198972702026367, "learning_rate": 5.655092266803544e-06, "loss": 0.5347, "step": 7350 }, { "epoch": 0.47, "grad_norm": 1.067549705505371, "learning_rate": 5.6540559472097925e-06, "loss": 0.5279, "step": 7351 }, { "epoch": 0.47, "grad_norm": 1.0637743473052979, "learning_rate": 5.6530195990289435e-06, "loss": 0.5196, "step": 7352 }, { "epoch": 0.47, "grad_norm": 1.0607545375823975, "learning_rate": 5.651983222306292e-06, "loss": 0.4626, "step": 7353 }, { "epoch": 0.47, "grad_norm": 1.220522165298462, "learning_rate": 5.650946817087137e-06, "loss": 0.5096, "step": 7354 }, { "epoch": 0.47, "grad_norm": 1.1238281726837158, "learning_rate": 5.649910383416776e-06, "loss": 0.5531, "step": 7355 }, { "epoch": 0.47, "grad_norm": 1.1417231559753418, "learning_rate": 5.648873921340509e-06, "loss": 0.5327, "step": 7356 }, { "epoch": 0.47, "grad_norm": 1.0798943042755127, "learning_rate": 5.647837430903635e-06, "loss": 0.4966, "step": 7357 }, { "epoch": 0.47, "grad_norm": 1.144775390625, "learning_rate": 5.64680091215146e-06, "loss": 0.4966, "step": 7358 }, { "epoch": 0.48, "grad_norm": 1.118808388710022, "learning_rate": 5.645764365129287e-06, "loss": 0.5516, "step": 7359 }, { "epoch": 0.48, "grad_norm": 1.151526927947998, "learning_rate": 5.644727789882417e-06, "loss": 0.5287, "step": 7360 }, { "epoch": 0.48, "grad_norm": 1.1664682626724243, "learning_rate": 5.64369118645616e-06, "loss": 0.549, "step": 7361 }, { "epoch": 0.48, "grad_norm": 1.1106600761413574, "learning_rate": 5.642654554895823e-06, "loss": 0.5048, "step": 7362 }, { "epoch": 0.48, "grad_norm": 1.2160518169403076, "learning_rate": 5.6416178952467125e-06, "loss": 0.5566, "step": 7363 }, { "epoch": 0.48, "grad_norm": 1.0560046434402466, "learning_rate": 5.640581207554139e-06, "loss": 0.5356, "step": 7364 }, { "epoch": 0.48, "grad_norm": 1.1314637660980225, "learning_rate": 5.639544491863414e-06, "loss": 0.5882, "step": 7365 }, { "epoch": 0.48, "grad_norm": 1.1412746906280518, "learning_rate": 5.638507748219849e-06, "loss": 0.5436, "step": 7366 }, { "epoch": 0.48, "grad_norm": 1.2962764501571655, "learning_rate": 5.6374709766687575e-06, "loss": 0.5881, "step": 7367 }, { "epoch": 0.48, "grad_norm": 1.3116101026535034, "learning_rate": 5.6364341772554555e-06, "loss": 0.5364, "step": 7368 }, { "epoch": 0.48, "grad_norm": 1.155655860900879, "learning_rate": 5.635397350025257e-06, "loss": 0.5507, "step": 7369 }, { "epoch": 0.48, "grad_norm": 1.1449378728866577, "learning_rate": 5.634360495023479e-06, "loss": 0.5349, "step": 7370 }, { "epoch": 0.48, "grad_norm": 1.1993134021759033, "learning_rate": 5.633323612295441e-06, "loss": 0.499, "step": 7371 }, { "epoch": 0.48, "grad_norm": 1.1548681259155273, "learning_rate": 5.632286701886462e-06, "loss": 0.5357, "step": 7372 }, { "epoch": 0.48, "grad_norm": 1.2027544975280762, "learning_rate": 5.6312497638418616e-06, "loss": 0.5525, "step": 7373 }, { "epoch": 0.48, "grad_norm": 1.284242868423462, "learning_rate": 5.630212798206964e-06, "loss": 0.5414, "step": 7374 }, { "epoch": 0.48, "grad_norm": 1.1029828786849976, "learning_rate": 5.62917580502709e-06, "loss": 0.4912, "step": 7375 }, { "epoch": 0.48, "grad_norm": 1.1275031566619873, "learning_rate": 5.628138784347565e-06, "loss": 0.4956, "step": 7376 }, { "epoch": 0.48, "grad_norm": 1.1062260866165161, "learning_rate": 5.627101736213716e-06, "loss": 0.4781, "step": 7377 }, { "epoch": 0.48, "grad_norm": 1.1965843439102173, "learning_rate": 5.6260646606708665e-06, "loss": 0.5801, "step": 7378 }, { "epoch": 0.48, "grad_norm": 1.0834746360778809, "learning_rate": 5.625027557764345e-06, "loss": 0.502, "step": 7379 }, { "epoch": 0.48, "grad_norm": 1.2031865119934082, "learning_rate": 5.623990427539484e-06, "loss": 0.5677, "step": 7380 }, { "epoch": 0.48, "grad_norm": 1.2039031982421875, "learning_rate": 5.62295327004161e-06, "loss": 0.5063, "step": 7381 }, { "epoch": 0.48, "grad_norm": 1.1435706615447998, "learning_rate": 5.621916085316056e-06, "loss": 0.5215, "step": 7382 }, { "epoch": 0.48, "grad_norm": 1.2657058238983154, "learning_rate": 5.6208788734081544e-06, "loss": 0.5343, "step": 7383 }, { "epoch": 0.48, "grad_norm": 1.0902224779129028, "learning_rate": 5.619841634363239e-06, "loss": 0.4975, "step": 7384 }, { "epoch": 0.48, "grad_norm": 1.163818120956421, "learning_rate": 5.618804368226646e-06, "loss": 0.5357, "step": 7385 }, { "epoch": 0.48, "grad_norm": 1.2409236431121826, "learning_rate": 5.6177670750437085e-06, "loss": 0.6316, "step": 7386 }, { "epoch": 0.48, "grad_norm": 1.1559845209121704, "learning_rate": 5.6167297548597665e-06, "loss": 0.5168, "step": 7387 }, { "epoch": 0.48, "grad_norm": 1.1613106727600098, "learning_rate": 5.6156924077201605e-06, "loss": 0.5253, "step": 7388 }, { "epoch": 0.48, "grad_norm": 1.0562652349472046, "learning_rate": 5.6146550336702255e-06, "loss": 0.4657, "step": 7389 }, { "epoch": 0.48, "grad_norm": 1.153406023979187, "learning_rate": 5.613617632755305e-06, "loss": 0.5364, "step": 7390 }, { "epoch": 0.48, "grad_norm": 1.1320818662643433, "learning_rate": 5.6125802050207425e-06, "loss": 0.5169, "step": 7391 }, { "epoch": 0.48, "grad_norm": 1.2749723196029663, "learning_rate": 5.611542750511878e-06, "loss": 0.5333, "step": 7392 }, { "epoch": 0.48, "grad_norm": 1.1087955236434937, "learning_rate": 5.610505269274058e-06, "loss": 0.553, "step": 7393 }, { "epoch": 0.48, "grad_norm": 1.3223940134048462, "learning_rate": 5.609467761352628e-06, "loss": 0.5796, "step": 7394 }, { "epoch": 0.48, "grad_norm": 1.0889171361923218, "learning_rate": 5.608430226792934e-06, "loss": 0.4764, "step": 7395 }, { "epoch": 0.48, "grad_norm": 1.1765429973602295, "learning_rate": 5.607392665640326e-06, "loss": 0.5047, "step": 7396 }, { "epoch": 0.48, "grad_norm": 1.290372371673584, "learning_rate": 5.606355077940151e-06, "loss": 0.5823, "step": 7397 }, { "epoch": 0.48, "grad_norm": 1.0980807542800903, "learning_rate": 5.60531746373776e-06, "loss": 0.5548, "step": 7398 }, { "epoch": 0.48, "grad_norm": 1.1882085800170898, "learning_rate": 5.604279823078505e-06, "loss": 0.5222, "step": 7399 }, { "epoch": 0.48, "grad_norm": 1.2266924381256104, "learning_rate": 5.603242156007737e-06, "loss": 0.5385, "step": 7400 }, { "epoch": 0.48, "grad_norm": 1.2400267124176025, "learning_rate": 5.60220446257081e-06, "loss": 0.5494, "step": 7401 }, { "epoch": 0.48, "grad_norm": 1.2599682807922363, "learning_rate": 5.601166742813081e-06, "loss": 0.5439, "step": 7402 }, { "epoch": 0.48, "grad_norm": 1.283560037612915, "learning_rate": 5.600128996779905e-06, "loss": 0.5742, "step": 7403 }, { "epoch": 0.48, "grad_norm": 1.2059499025344849, "learning_rate": 5.599091224516638e-06, "loss": 0.5383, "step": 7404 }, { "epoch": 0.48, "grad_norm": 1.0063068866729736, "learning_rate": 5.598053426068639e-06, "loss": 0.4785, "step": 7405 }, { "epoch": 0.48, "grad_norm": 1.2923377752304077, "learning_rate": 5.597015601481269e-06, "loss": 0.5591, "step": 7406 }, { "epoch": 0.48, "grad_norm": 1.14368736743927, "learning_rate": 5.5959777507998865e-06, "loss": 0.5165, "step": 7407 }, { "epoch": 0.48, "grad_norm": 1.2862136363983154, "learning_rate": 5.594939874069853e-06, "loss": 0.5554, "step": 7408 }, { "epoch": 0.48, "grad_norm": 1.2094368934631348, "learning_rate": 5.593901971336536e-06, "loss": 0.5183, "step": 7409 }, { "epoch": 0.48, "grad_norm": 1.3000544309616089, "learning_rate": 5.592864042645293e-06, "loss": 0.5432, "step": 7410 }, { "epoch": 0.48, "grad_norm": 1.130380630493164, "learning_rate": 5.591826088041493e-06, "loss": 0.4953, "step": 7411 }, { "epoch": 0.48, "grad_norm": 1.180986762046814, "learning_rate": 5.590788107570503e-06, "loss": 0.5155, "step": 7412 }, { "epoch": 0.48, "grad_norm": 1.115618348121643, "learning_rate": 5.5897501012776874e-06, "loss": 0.4819, "step": 7413 }, { "epoch": 0.48, "grad_norm": 1.1513025760650635, "learning_rate": 5.588712069208416e-06, "loss": 0.5053, "step": 7414 }, { "epoch": 0.48, "grad_norm": 1.2440112829208374, "learning_rate": 5.587674011408062e-06, "loss": 0.5531, "step": 7415 }, { "epoch": 0.48, "grad_norm": 1.2161036729812622, "learning_rate": 5.586635927921991e-06, "loss": 0.5022, "step": 7416 }, { "epoch": 0.48, "grad_norm": 1.2874062061309814, "learning_rate": 5.585597818795576e-06, "loss": 0.5817, "step": 7417 }, { "epoch": 0.48, "grad_norm": 1.2128945589065552, "learning_rate": 5.584559684074193e-06, "loss": 0.5211, "step": 7418 }, { "epoch": 0.48, "grad_norm": 1.1230922937393188, "learning_rate": 5.583521523803214e-06, "loss": 0.5289, "step": 7419 }, { "epoch": 0.48, "grad_norm": 1.1297441720962524, "learning_rate": 5.582483338028014e-06, "loss": 0.5185, "step": 7420 }, { "epoch": 0.48, "grad_norm": 1.095078945159912, "learning_rate": 5.5814451267939715e-06, "loss": 0.4781, "step": 7421 }, { "epoch": 0.48, "grad_norm": 1.1987203359603882, "learning_rate": 5.5804068901464615e-06, "loss": 0.5497, "step": 7422 }, { "epoch": 0.48, "grad_norm": 1.3512287139892578, "learning_rate": 5.5793686281308645e-06, "loss": 0.5331, "step": 7423 }, { "epoch": 0.48, "grad_norm": 1.2831789255142212, "learning_rate": 5.578330340792559e-06, "loss": 0.5186, "step": 7424 }, { "epoch": 0.48, "grad_norm": 1.3101708889007568, "learning_rate": 5.577292028176926e-06, "loss": 0.5632, "step": 7425 }, { "epoch": 0.48, "grad_norm": 1.091207504272461, "learning_rate": 5.57625369032935e-06, "loss": 0.5148, "step": 7426 }, { "epoch": 0.48, "grad_norm": 1.0642892122268677, "learning_rate": 5.57521532729521e-06, "loss": 0.4945, "step": 7427 }, { "epoch": 0.48, "grad_norm": 1.156800627708435, "learning_rate": 5.574176939119892e-06, "loss": 0.4528, "step": 7428 }, { "epoch": 0.48, "grad_norm": 1.1735073328018188, "learning_rate": 5.573138525848783e-06, "loss": 0.4633, "step": 7429 }, { "epoch": 0.48, "grad_norm": 1.1715320348739624, "learning_rate": 5.572100087527266e-06, "loss": 0.5216, "step": 7430 }, { "epoch": 0.48, "grad_norm": 1.2034358978271484, "learning_rate": 5.571061624200731e-06, "loss": 0.521, "step": 7431 }, { "epoch": 0.48, "grad_norm": 1.1367688179016113, "learning_rate": 5.570023135914566e-06, "loss": 0.5176, "step": 7432 }, { "epoch": 0.48, "grad_norm": 1.1572507619857788, "learning_rate": 5.56898462271416e-06, "loss": 0.5629, "step": 7433 }, { "epoch": 0.48, "grad_norm": 1.0914405584335327, "learning_rate": 5.567946084644904e-06, "loss": 0.5007, "step": 7434 }, { "epoch": 0.48, "grad_norm": 1.1138278245925903, "learning_rate": 5.56690752175219e-06, "loss": 0.5562, "step": 7435 }, { "epoch": 0.48, "grad_norm": 1.1987593173980713, "learning_rate": 5.5658689340814106e-06, "loss": 0.4586, "step": 7436 }, { "epoch": 0.48, "grad_norm": 1.1443278789520264, "learning_rate": 5.564830321677961e-06, "loss": 0.557, "step": 7437 }, { "epoch": 0.48, "grad_norm": 1.184622049331665, "learning_rate": 5.563791684587235e-06, "loss": 0.5259, "step": 7438 }, { "epoch": 0.48, "grad_norm": 1.1221176385879517, "learning_rate": 5.562753022854629e-06, "loss": 0.5467, "step": 7439 }, { "epoch": 0.48, "grad_norm": 1.139955759048462, "learning_rate": 5.56171433652554e-06, "loss": 0.4967, "step": 7440 }, { "epoch": 0.48, "grad_norm": 1.2247686386108398, "learning_rate": 5.560675625645368e-06, "loss": 0.5683, "step": 7441 }, { "epoch": 0.48, "grad_norm": 1.0546566247940063, "learning_rate": 5.559636890259509e-06, "loss": 0.4975, "step": 7442 }, { "epoch": 0.48, "grad_norm": 1.1116868257522583, "learning_rate": 5.558598130413366e-06, "loss": 0.5214, "step": 7443 }, { "epoch": 0.48, "grad_norm": 1.2825634479522705, "learning_rate": 5.55755934615234e-06, "loss": 0.5333, "step": 7444 }, { "epoch": 0.48, "grad_norm": 1.2307863235473633, "learning_rate": 5.556520537521834e-06, "loss": 0.5559, "step": 7445 }, { "epoch": 0.48, "grad_norm": 1.182038426399231, "learning_rate": 5.555481704567251e-06, "loss": 0.5368, "step": 7446 }, { "epoch": 0.48, "grad_norm": 1.2007938623428345, "learning_rate": 5.554442847333995e-06, "loss": 0.5287, "step": 7447 }, { "epoch": 0.48, "grad_norm": 1.2199424505233765, "learning_rate": 5.553403965867474e-06, "loss": 0.4779, "step": 7448 }, { "epoch": 0.48, "grad_norm": 1.2695180177688599, "learning_rate": 5.552365060213093e-06, "loss": 0.5242, "step": 7449 }, { "epoch": 0.48, "grad_norm": 1.1455302238464355, "learning_rate": 5.55132613041626e-06, "loss": 0.5801, "step": 7450 }, { "epoch": 0.48, "grad_norm": 1.0847413539886475, "learning_rate": 5.550287176522384e-06, "loss": 0.4991, "step": 7451 }, { "epoch": 0.48, "grad_norm": 1.1104075908660889, "learning_rate": 5.549248198576875e-06, "loss": 0.5003, "step": 7452 }, { "epoch": 0.48, "grad_norm": 1.2528834342956543, "learning_rate": 5.548209196625146e-06, "loss": 0.5219, "step": 7453 }, { "epoch": 0.48, "grad_norm": 1.2268739938735962, "learning_rate": 5.5471701707126054e-06, "loss": 0.4944, "step": 7454 }, { "epoch": 0.48, "grad_norm": 1.2631235122680664, "learning_rate": 5.54613112088467e-06, "loss": 0.5463, "step": 7455 }, { "epoch": 0.48, "grad_norm": 1.1618072986602783, "learning_rate": 5.545092047186752e-06, "loss": 0.5539, "step": 7456 }, { "epoch": 0.48, "grad_norm": 1.0725314617156982, "learning_rate": 5.5440529496642656e-06, "loss": 0.4673, "step": 7457 }, { "epoch": 0.48, "grad_norm": 1.2961128950119019, "learning_rate": 5.54301382836263e-06, "loss": 0.5319, "step": 7458 }, { "epoch": 0.48, "grad_norm": 1.2352230548858643, "learning_rate": 5.541974683327261e-06, "loss": 0.5117, "step": 7459 }, { "epoch": 0.48, "grad_norm": 1.1258851289749146, "learning_rate": 5.540935514603576e-06, "loss": 0.5589, "step": 7460 }, { "epoch": 0.48, "grad_norm": 1.4098036289215088, "learning_rate": 5.539896322236995e-06, "loss": 0.5415, "step": 7461 }, { "epoch": 0.48, "grad_norm": 1.2675950527191162, "learning_rate": 5.53885710627294e-06, "loss": 0.5638, "step": 7462 }, { "epoch": 0.48, "grad_norm": 1.1136020421981812, "learning_rate": 5.537817866756831e-06, "loss": 0.5174, "step": 7463 }, { "epoch": 0.48, "grad_norm": 1.0832310914993286, "learning_rate": 5.536778603734088e-06, "loss": 0.4989, "step": 7464 }, { "epoch": 0.48, "grad_norm": 1.138284683227539, "learning_rate": 5.53573931725014e-06, "loss": 0.4616, "step": 7465 }, { "epoch": 0.48, "grad_norm": 1.1945892572402954, "learning_rate": 5.5347000073504085e-06, "loss": 0.5363, "step": 7466 }, { "epoch": 0.48, "grad_norm": 1.1429699659347534, "learning_rate": 5.5336606740803185e-06, "loss": 0.5357, "step": 7467 }, { "epoch": 0.48, "grad_norm": 1.6121751070022583, "learning_rate": 5.532621317485297e-06, "loss": 0.5488, "step": 7468 }, { "epoch": 0.48, "grad_norm": 1.3109841346740723, "learning_rate": 5.531581937610772e-06, "loss": 0.539, "step": 7469 }, { "epoch": 0.48, "grad_norm": 1.255157709121704, "learning_rate": 5.530542534502174e-06, "loss": 0.5445, "step": 7470 }, { "epoch": 0.48, "grad_norm": 1.1478935480117798, "learning_rate": 5.529503108204928e-06, "loss": 0.5576, "step": 7471 }, { "epoch": 0.48, "grad_norm": 1.2389042377471924, "learning_rate": 5.528463658764468e-06, "loss": 0.5648, "step": 7472 }, { "epoch": 0.48, "grad_norm": 1.176343560218811, "learning_rate": 5.527424186226226e-06, "loss": 0.5286, "step": 7473 }, { "epoch": 0.48, "grad_norm": 1.1269633769989014, "learning_rate": 5.5263846906356325e-06, "loss": 0.5611, "step": 7474 }, { "epoch": 0.48, "grad_norm": 1.1496129035949707, "learning_rate": 5.525345172038121e-06, "loss": 0.5244, "step": 7475 }, { "epoch": 0.48, "grad_norm": 1.1998388767242432, "learning_rate": 5.524305630479131e-06, "loss": 0.4699, "step": 7476 }, { "epoch": 0.48, "grad_norm": 1.259493350982666, "learning_rate": 5.523266066004092e-06, "loss": 0.5088, "step": 7477 }, { "epoch": 0.48, "grad_norm": 1.2718796730041504, "learning_rate": 5.5222264786584436e-06, "loss": 0.5835, "step": 7478 }, { "epoch": 0.48, "grad_norm": 1.1501545906066895, "learning_rate": 5.521186868487623e-06, "loss": 0.5254, "step": 7479 }, { "epoch": 0.48, "grad_norm": 1.1886564493179321, "learning_rate": 5.52014723553707e-06, "loss": 0.5845, "step": 7480 }, { "epoch": 0.48, "grad_norm": 1.2815134525299072, "learning_rate": 5.519107579852222e-06, "loss": 0.4973, "step": 7481 }, { "epoch": 0.48, "grad_norm": 1.2012948989868164, "learning_rate": 5.518067901478523e-06, "loss": 0.5438, "step": 7482 }, { "epoch": 0.48, "grad_norm": 1.061722993850708, "learning_rate": 5.517028200461411e-06, "loss": 0.4939, "step": 7483 }, { "epoch": 0.48, "grad_norm": 1.1888699531555176, "learning_rate": 5.51598847684633e-06, "loss": 0.5578, "step": 7484 }, { "epoch": 0.48, "grad_norm": 1.0123512744903564, "learning_rate": 5.5149487306787265e-06, "loss": 0.5083, "step": 7485 }, { "epoch": 0.48, "grad_norm": 1.2197673320770264, "learning_rate": 5.5139089620040395e-06, "loss": 0.5327, "step": 7486 }, { "epoch": 0.48, "grad_norm": 1.2068941593170166, "learning_rate": 5.512869170867718e-06, "loss": 0.5635, "step": 7487 }, { "epoch": 0.48, "grad_norm": 1.2060723304748535, "learning_rate": 5.511829357315211e-06, "loss": 0.5692, "step": 7488 }, { "epoch": 0.48, "grad_norm": 1.0463556051254272, "learning_rate": 5.510789521391961e-06, "loss": 0.5206, "step": 7489 }, { "epoch": 0.48, "grad_norm": 1.1362488269805908, "learning_rate": 5.50974966314342e-06, "loss": 0.5217, "step": 7490 }, { "epoch": 0.48, "grad_norm": 1.1428771018981934, "learning_rate": 5.508709782615036e-06, "loss": 0.5113, "step": 7491 }, { "epoch": 0.48, "grad_norm": 1.186754822731018, "learning_rate": 5.5076698798522595e-06, "loss": 0.548, "step": 7492 }, { "epoch": 0.48, "grad_norm": 1.229202151298523, "learning_rate": 5.506629954900543e-06, "loss": 0.5398, "step": 7493 }, { "epoch": 0.48, "grad_norm": 1.183544397354126, "learning_rate": 5.505590007805337e-06, "loss": 0.569, "step": 7494 }, { "epoch": 0.48, "grad_norm": 1.113430380821228, "learning_rate": 5.504550038612099e-06, "loss": 0.546, "step": 7495 }, { "epoch": 0.48, "grad_norm": 1.630251407623291, "learning_rate": 5.503510047366277e-06, "loss": 0.5334, "step": 7496 }, { "epoch": 0.48, "grad_norm": 1.112476110458374, "learning_rate": 5.502470034113333e-06, "loss": 0.4889, "step": 7497 }, { "epoch": 0.48, "grad_norm": 1.185004711151123, "learning_rate": 5.501429998898718e-06, "loss": 0.5348, "step": 7498 }, { "epoch": 0.48, "grad_norm": 1.2129441499710083, "learning_rate": 5.500389941767892e-06, "loss": 0.5125, "step": 7499 }, { "epoch": 0.48, "grad_norm": 1.3168022632598877, "learning_rate": 5.499349862766313e-06, "loss": 0.5415, "step": 7500 }, { "epoch": 0.48, "grad_norm": 1.2114570140838623, "learning_rate": 5.49830976193944e-06, "loss": 0.5444, "step": 7501 }, { "epoch": 0.48, "grad_norm": 1.1127041578292847, "learning_rate": 5.497269639332732e-06, "loss": 0.5762, "step": 7502 }, { "epoch": 0.48, "grad_norm": 1.175902247428894, "learning_rate": 5.4962294949916524e-06, "loss": 0.5414, "step": 7503 }, { "epoch": 0.48, "grad_norm": 1.1630703210830688, "learning_rate": 5.495189328961661e-06, "loss": 0.4762, "step": 7504 }, { "epoch": 0.48, "grad_norm": 1.252553105354309, "learning_rate": 5.494149141288222e-06, "loss": 0.5299, "step": 7505 }, { "epoch": 0.48, "grad_norm": 1.0721111297607422, "learning_rate": 5.4931089320168e-06, "loss": 0.4753, "step": 7506 }, { "epoch": 0.48, "grad_norm": 1.13849937915802, "learning_rate": 5.492068701192856e-06, "loss": 0.5096, "step": 7507 }, { "epoch": 0.48, "grad_norm": 1.1340655088424683, "learning_rate": 5.491028448861861e-06, "loss": 0.5024, "step": 7508 }, { "epoch": 0.48, "grad_norm": 1.1492164134979248, "learning_rate": 5.489988175069279e-06, "loss": 0.5482, "step": 7509 }, { "epoch": 0.48, "grad_norm": 1.2150331735610962, "learning_rate": 5.488947879860577e-06, "loss": 0.5244, "step": 7510 }, { "epoch": 0.48, "grad_norm": 1.0831706523895264, "learning_rate": 5.487907563281226e-06, "loss": 0.4583, "step": 7511 }, { "epoch": 0.48, "grad_norm": 1.2239423990249634, "learning_rate": 5.486867225376693e-06, "loss": 0.5636, "step": 7512 }, { "epoch": 0.48, "grad_norm": 1.1606922149658203, "learning_rate": 5.48582686619245e-06, "loss": 0.5367, "step": 7513 }, { "epoch": 0.49, "grad_norm": 1.2109874486923218, "learning_rate": 5.484786485773968e-06, "loss": 0.5688, "step": 7514 }, { "epoch": 0.49, "grad_norm": 1.0643665790557861, "learning_rate": 5.48374608416672e-06, "loss": 0.5229, "step": 7515 }, { "epoch": 0.49, "grad_norm": 1.3685299158096313, "learning_rate": 5.482705661416179e-06, "loss": 0.6023, "step": 7516 }, { "epoch": 0.49, "grad_norm": 1.1903036832809448, "learning_rate": 5.481665217567819e-06, "loss": 0.5393, "step": 7517 }, { "epoch": 0.49, "grad_norm": 1.1953743696212769, "learning_rate": 5.480624752667114e-06, "loss": 0.5179, "step": 7518 }, { "epoch": 0.49, "grad_norm": 1.2427139282226562, "learning_rate": 5.4795842667595415e-06, "loss": 0.507, "step": 7519 }, { "epoch": 0.49, "grad_norm": 1.1794312000274658, "learning_rate": 5.478543759890579e-06, "loss": 0.5322, "step": 7520 }, { "epoch": 0.49, "grad_norm": 1.2656372785568237, "learning_rate": 5.477503232105702e-06, "loss": 0.5777, "step": 7521 }, { "epoch": 0.49, "grad_norm": 1.1044962406158447, "learning_rate": 5.4764626834503905e-06, "loss": 0.5206, "step": 7522 }, { "epoch": 0.49, "grad_norm": 1.1624263525009155, "learning_rate": 5.4754221139701265e-06, "loss": 0.5026, "step": 7523 }, { "epoch": 0.49, "grad_norm": 1.1239395141601562, "learning_rate": 5.474381523710387e-06, "loss": 0.5086, "step": 7524 }, { "epoch": 0.49, "grad_norm": 1.2795528173446655, "learning_rate": 5.473340912716655e-06, "loss": 0.5867, "step": 7525 }, { "epoch": 0.49, "grad_norm": 1.2199307680130005, "learning_rate": 5.472300281034414e-06, "loss": 0.5795, "step": 7526 }, { "epoch": 0.49, "grad_norm": 1.191631555557251, "learning_rate": 5.4712596287091446e-06, "loss": 0.6043, "step": 7527 }, { "epoch": 0.49, "grad_norm": 1.3435510396957397, "learning_rate": 5.470218955786334e-06, "loss": 0.5809, "step": 7528 }, { "epoch": 0.49, "grad_norm": 1.2073687314987183, "learning_rate": 5.469178262311465e-06, "loss": 0.5751, "step": 7529 }, { "epoch": 0.49, "grad_norm": 1.1455962657928467, "learning_rate": 5.468137548330026e-06, "loss": 0.474, "step": 7530 }, { "epoch": 0.49, "grad_norm": 1.3976091146469116, "learning_rate": 5.4670968138875015e-06, "loss": 0.5539, "step": 7531 }, { "epoch": 0.49, "grad_norm": 1.1524885892868042, "learning_rate": 5.466056059029381e-06, "loss": 0.5348, "step": 7532 }, { "epoch": 0.49, "grad_norm": 1.14180326461792, "learning_rate": 5.4650152838011515e-06, "loss": 0.5888, "step": 7533 }, { "epoch": 0.49, "grad_norm": 1.0816991329193115, "learning_rate": 5.463974488248305e-06, "loss": 0.5581, "step": 7534 }, { "epoch": 0.49, "grad_norm": 1.1404600143432617, "learning_rate": 5.46293367241633e-06, "loss": 0.5229, "step": 7535 }, { "epoch": 0.49, "grad_norm": 1.1724812984466553, "learning_rate": 5.461892836350718e-06, "loss": 0.5334, "step": 7536 }, { "epoch": 0.49, "grad_norm": 1.249756932258606, "learning_rate": 5.460851980096964e-06, "loss": 0.5311, "step": 7537 }, { "epoch": 0.49, "grad_norm": 1.1254549026489258, "learning_rate": 5.459811103700557e-06, "loss": 0.5246, "step": 7538 }, { "epoch": 0.49, "grad_norm": 1.1330734491348267, "learning_rate": 5.458770207206995e-06, "loss": 0.508, "step": 7539 }, { "epoch": 0.49, "grad_norm": 1.0665194988250732, "learning_rate": 5.457729290661769e-06, "loss": 0.5342, "step": 7540 }, { "epoch": 0.49, "grad_norm": 1.2327380180358887, "learning_rate": 5.456688354110377e-06, "loss": 0.5957, "step": 7541 }, { "epoch": 0.49, "grad_norm": 1.2357500791549683, "learning_rate": 5.455647397598316e-06, "loss": 0.5526, "step": 7542 }, { "epoch": 0.49, "grad_norm": 1.0849530696868896, "learning_rate": 5.454606421171082e-06, "loss": 0.5348, "step": 7543 }, { "epoch": 0.49, "grad_norm": 1.1725434064865112, "learning_rate": 5.453565424874174e-06, "loss": 0.4982, "step": 7544 }, { "epoch": 0.49, "grad_norm": 1.2111589908599854, "learning_rate": 5.452524408753091e-06, "loss": 0.5289, "step": 7545 }, { "epoch": 0.49, "grad_norm": 1.1298447847366333, "learning_rate": 5.451483372853335e-06, "loss": 0.4823, "step": 7546 }, { "epoch": 0.49, "grad_norm": 1.1097875833511353, "learning_rate": 5.450442317220406e-06, "loss": 0.5374, "step": 7547 }, { "epoch": 0.49, "grad_norm": 1.3404027223587036, "learning_rate": 5.449401241899804e-06, "loss": 0.5055, "step": 7548 }, { "epoch": 0.49, "grad_norm": 1.1503758430480957, "learning_rate": 5.448360146937034e-06, "loss": 0.5214, "step": 7549 }, { "epoch": 0.49, "grad_norm": 1.1358855962753296, "learning_rate": 5.4473190323776e-06, "loss": 0.5195, "step": 7550 }, { "epoch": 0.49, "grad_norm": 1.130581021308899, "learning_rate": 5.4462778982670025e-06, "loss": 0.4641, "step": 7551 }, { "epoch": 0.49, "grad_norm": 1.2341811656951904, "learning_rate": 5.445236744650751e-06, "loss": 0.4777, "step": 7552 }, { "epoch": 0.49, "grad_norm": 1.0807569026947021, "learning_rate": 5.44419557157435e-06, "loss": 0.5128, "step": 7553 }, { "epoch": 0.49, "grad_norm": 1.1887434720993042, "learning_rate": 5.443154379083306e-06, "loss": 0.5354, "step": 7554 }, { "epoch": 0.49, "grad_norm": 1.1608244180679321, "learning_rate": 5.442113167223129e-06, "loss": 0.5138, "step": 7555 }, { "epoch": 0.49, "grad_norm": 1.2836264371871948, "learning_rate": 5.441071936039325e-06, "loss": 0.5368, "step": 7556 }, { "epoch": 0.49, "grad_norm": 1.0790698528289795, "learning_rate": 5.440030685577404e-06, "loss": 0.5253, "step": 7557 }, { "epoch": 0.49, "grad_norm": 1.1891390085220337, "learning_rate": 5.438989415882878e-06, "loss": 0.5732, "step": 7558 }, { "epoch": 0.49, "grad_norm": 1.1566271781921387, "learning_rate": 5.437948127001257e-06, "loss": 0.5095, "step": 7559 }, { "epoch": 0.49, "grad_norm": 1.1518301963806152, "learning_rate": 5.436906818978052e-06, "loss": 0.5538, "step": 7560 }, { "epoch": 0.49, "grad_norm": 1.1578559875488281, "learning_rate": 5.435865491858781e-06, "loss": 0.5079, "step": 7561 }, { "epoch": 0.49, "grad_norm": 1.08319091796875, "learning_rate": 5.43482414568895e-06, "loss": 0.504, "step": 7562 }, { "epoch": 0.49, "grad_norm": 1.1596202850341797, "learning_rate": 5.433782780514079e-06, "loss": 0.5436, "step": 7563 }, { "epoch": 0.49, "grad_norm": 1.131249189376831, "learning_rate": 5.432741396379681e-06, "loss": 0.4984, "step": 7564 }, { "epoch": 0.49, "grad_norm": 1.325500249862671, "learning_rate": 5.431699993331274e-06, "loss": 0.5553, "step": 7565 }, { "epoch": 0.49, "grad_norm": 1.2382292747497559, "learning_rate": 5.430658571414374e-06, "loss": 0.5344, "step": 7566 }, { "epoch": 0.49, "grad_norm": 1.1526868343353271, "learning_rate": 5.4296171306745e-06, "loss": 0.5326, "step": 7567 }, { "epoch": 0.49, "grad_norm": 1.1833299398422241, "learning_rate": 5.4285756711571665e-06, "loss": 0.5407, "step": 7568 }, { "epoch": 0.49, "grad_norm": 1.1198761463165283, "learning_rate": 5.427534192907899e-06, "loss": 0.5188, "step": 7569 }, { "epoch": 0.49, "grad_norm": 1.1912684440612793, "learning_rate": 5.426492695972214e-06, "loss": 0.5443, "step": 7570 }, { "epoch": 0.49, "grad_norm": 1.0699470043182373, "learning_rate": 5.425451180395633e-06, "loss": 0.5031, "step": 7571 }, { "epoch": 0.49, "grad_norm": 1.2813829183578491, "learning_rate": 5.424409646223679e-06, "loss": 0.5706, "step": 7572 }, { "epoch": 0.49, "grad_norm": 1.2434666156768799, "learning_rate": 5.423368093501876e-06, "loss": 0.5567, "step": 7573 }, { "epoch": 0.49, "grad_norm": 1.1973146200180054, "learning_rate": 5.422326522275744e-06, "loss": 0.5461, "step": 7574 }, { "epoch": 0.49, "grad_norm": 1.0963596105575562, "learning_rate": 5.421284932590809e-06, "loss": 0.536, "step": 7575 }, { "epoch": 0.49, "grad_norm": 1.2742305994033813, "learning_rate": 5.420243324492599e-06, "loss": 0.5348, "step": 7576 }, { "epoch": 0.49, "grad_norm": 1.0914641618728638, "learning_rate": 5.419201698026635e-06, "loss": 0.5115, "step": 7577 }, { "epoch": 0.49, "grad_norm": 1.1202526092529297, "learning_rate": 5.418160053238447e-06, "loss": 0.5408, "step": 7578 }, { "epoch": 0.49, "grad_norm": 1.243452787399292, "learning_rate": 5.417118390173562e-06, "loss": 0.4975, "step": 7579 }, { "epoch": 0.49, "grad_norm": 1.171875238418579, "learning_rate": 5.416076708877509e-06, "loss": 0.4997, "step": 7580 }, { "epoch": 0.49, "grad_norm": 1.0746245384216309, "learning_rate": 5.415035009395817e-06, "loss": 0.5972, "step": 7581 }, { "epoch": 0.49, "grad_norm": 1.1271456480026245, "learning_rate": 5.413993291774015e-06, "loss": 0.5462, "step": 7582 }, { "epoch": 0.49, "grad_norm": 1.1211036443710327, "learning_rate": 5.412951556057633e-06, "loss": 0.5413, "step": 7583 }, { "epoch": 0.49, "grad_norm": 1.1568734645843506, "learning_rate": 5.411909802292206e-06, "loss": 0.5678, "step": 7584 }, { "epoch": 0.49, "grad_norm": 1.137654423713684, "learning_rate": 5.410868030523262e-06, "loss": 0.561, "step": 7585 }, { "epoch": 0.49, "grad_norm": 1.1563918590545654, "learning_rate": 5.409826240796339e-06, "loss": 0.4849, "step": 7586 }, { "epoch": 0.49, "grad_norm": 1.184324026107788, "learning_rate": 5.408784433156966e-06, "loss": 0.5599, "step": 7587 }, { "epoch": 0.49, "grad_norm": 1.3376977443695068, "learning_rate": 5.407742607650682e-06, "loss": 0.5447, "step": 7588 }, { "epoch": 0.49, "grad_norm": 1.2917433977127075, "learning_rate": 5.4067007643230195e-06, "loss": 0.5429, "step": 7589 }, { "epoch": 0.49, "grad_norm": 1.1925631761550903, "learning_rate": 5.405658903219515e-06, "loss": 0.5682, "step": 7590 }, { "epoch": 0.49, "grad_norm": 1.1811245679855347, "learning_rate": 5.404617024385709e-06, "loss": 0.6016, "step": 7591 }, { "epoch": 0.49, "grad_norm": 1.2820780277252197, "learning_rate": 5.403575127867134e-06, "loss": 0.5583, "step": 7592 }, { "epoch": 0.49, "grad_norm": 1.0932893753051758, "learning_rate": 5.402533213709333e-06, "loss": 0.485, "step": 7593 }, { "epoch": 0.49, "grad_norm": 1.102400302886963, "learning_rate": 5.401491281957845e-06, "loss": 0.495, "step": 7594 }, { "epoch": 0.49, "grad_norm": 1.1474465131759644, "learning_rate": 5.400449332658206e-06, "loss": 0.4913, "step": 7595 }, { "epoch": 0.49, "grad_norm": 1.1178895235061646, "learning_rate": 5.399407365855962e-06, "loss": 0.5598, "step": 7596 }, { "epoch": 0.49, "grad_norm": 1.1734375953674316, "learning_rate": 5.398365381596652e-06, "loss": 0.5278, "step": 7597 }, { "epoch": 0.49, "grad_norm": 1.1805366277694702, "learning_rate": 5.397323379925818e-06, "loss": 0.5037, "step": 7598 }, { "epoch": 0.49, "grad_norm": 1.092585802078247, "learning_rate": 5.396281360889006e-06, "loss": 0.497, "step": 7599 }, { "epoch": 0.49, "grad_norm": 1.4044487476348877, "learning_rate": 5.395239324531756e-06, "loss": 0.5665, "step": 7600 }, { "epoch": 0.49, "grad_norm": 1.1191613674163818, "learning_rate": 5.394197270899616e-06, "loss": 0.5481, "step": 7601 }, { "epoch": 0.49, "grad_norm": 1.1824854612350464, "learning_rate": 5.3931552000381305e-06, "loss": 0.5147, "step": 7602 }, { "epoch": 0.49, "grad_norm": 1.097609281539917, "learning_rate": 5.392113111992845e-06, "loss": 0.5191, "step": 7603 }, { "epoch": 0.49, "grad_norm": 1.1613041162490845, "learning_rate": 5.391071006809308e-06, "loss": 0.5081, "step": 7604 }, { "epoch": 0.49, "grad_norm": 1.0482887029647827, "learning_rate": 5.390028884533066e-06, "loss": 0.5372, "step": 7605 }, { "epoch": 0.49, "grad_norm": 1.4032565355300903, "learning_rate": 5.388986745209667e-06, "loss": 0.5534, "step": 7606 }, { "epoch": 0.49, "grad_norm": 1.2612310647964478, "learning_rate": 5.387944588884661e-06, "loss": 0.5855, "step": 7607 }, { "epoch": 0.49, "grad_norm": 1.0974284410476685, "learning_rate": 5.386902415603599e-06, "loss": 0.5346, "step": 7608 }, { "epoch": 0.49, "grad_norm": 1.193663477897644, "learning_rate": 5.3858602254120294e-06, "loss": 0.5808, "step": 7609 }, { "epoch": 0.49, "grad_norm": 1.201838493347168, "learning_rate": 5.384818018355504e-06, "loss": 0.5486, "step": 7610 }, { "epoch": 0.49, "grad_norm": 1.1249217987060547, "learning_rate": 5.383775794479579e-06, "loss": 0.5186, "step": 7611 }, { "epoch": 0.49, "grad_norm": 1.1868623495101929, "learning_rate": 5.382733553829802e-06, "loss": 0.5283, "step": 7612 }, { "epoch": 0.49, "grad_norm": 1.1665679216384888, "learning_rate": 5.381691296451729e-06, "loss": 0.5488, "step": 7613 }, { "epoch": 0.49, "grad_norm": 1.2856723070144653, "learning_rate": 5.380649022390915e-06, "loss": 0.5329, "step": 7614 }, { "epoch": 0.49, "grad_norm": 1.3287138938903809, "learning_rate": 5.379606731692914e-06, "loss": 0.5427, "step": 7615 }, { "epoch": 0.49, "grad_norm": 1.1857757568359375, "learning_rate": 5.378564424403281e-06, "loss": 0.5291, "step": 7616 }, { "epoch": 0.49, "grad_norm": 1.2549891471862793, "learning_rate": 5.377522100567575e-06, "loss": 0.5471, "step": 7617 }, { "epoch": 0.49, "grad_norm": 1.340527057647705, "learning_rate": 5.376479760231351e-06, "loss": 0.5621, "step": 7618 }, { "epoch": 0.49, "grad_norm": 1.316412329673767, "learning_rate": 5.375437403440169e-06, "loss": 0.5292, "step": 7619 }, { "epoch": 0.49, "grad_norm": 1.176963448524475, "learning_rate": 5.374395030239587e-06, "loss": 0.609, "step": 7620 }, { "epoch": 0.49, "grad_norm": 1.3863468170166016, "learning_rate": 5.373352640675163e-06, "loss": 0.5482, "step": 7621 }, { "epoch": 0.49, "grad_norm": 1.2824138402938843, "learning_rate": 5.372310234792459e-06, "loss": 0.5507, "step": 7622 }, { "epoch": 0.49, "grad_norm": 1.1706299781799316, "learning_rate": 5.371267812637035e-06, "loss": 0.555, "step": 7623 }, { "epoch": 0.49, "grad_norm": 1.1394027471542358, "learning_rate": 5.370225374254453e-06, "loss": 0.5074, "step": 7624 }, { "epoch": 0.49, "grad_norm": 1.1415951251983643, "learning_rate": 5.369182919690275e-06, "loss": 0.5465, "step": 7625 }, { "epoch": 0.49, "grad_norm": 1.172597885131836, "learning_rate": 5.368140448990064e-06, "loss": 0.5504, "step": 7626 }, { "epoch": 0.49, "grad_norm": 1.1498122215270996, "learning_rate": 5.367097962199385e-06, "loss": 0.5548, "step": 7627 }, { "epoch": 0.49, "grad_norm": 1.211609959602356, "learning_rate": 5.3660554593638e-06, "loss": 0.598, "step": 7628 }, { "epoch": 0.49, "grad_norm": 1.213610291481018, "learning_rate": 5.365012940528876e-06, "loss": 0.518, "step": 7629 }, { "epoch": 0.49, "grad_norm": 1.2270804643630981, "learning_rate": 5.363970405740178e-06, "loss": 0.5251, "step": 7630 }, { "epoch": 0.49, "grad_norm": 1.3449360132217407, "learning_rate": 5.3629278550432705e-06, "loss": 0.4969, "step": 7631 }, { "epoch": 0.49, "grad_norm": 1.1352112293243408, "learning_rate": 5.361885288483725e-06, "loss": 0.5004, "step": 7632 }, { "epoch": 0.49, "grad_norm": 1.1759967803955078, "learning_rate": 5.360842706107107e-06, "loss": 0.4995, "step": 7633 }, { "epoch": 0.49, "grad_norm": 1.4078569412231445, "learning_rate": 5.359800107958985e-06, "loss": 0.5086, "step": 7634 }, { "epoch": 0.49, "grad_norm": 1.131999135017395, "learning_rate": 5.358757494084928e-06, "loss": 0.5169, "step": 7635 }, { "epoch": 0.49, "grad_norm": 1.6524149179458618, "learning_rate": 5.357714864530508e-06, "loss": 0.5636, "step": 7636 }, { "epoch": 0.49, "grad_norm": 1.1380126476287842, "learning_rate": 5.356672219341293e-06, "loss": 0.5624, "step": 7637 }, { "epoch": 0.49, "grad_norm": 1.1572401523590088, "learning_rate": 5.355629558562856e-06, "loss": 0.5699, "step": 7638 }, { "epoch": 0.49, "grad_norm": 1.2533541917800903, "learning_rate": 5.3545868822407674e-06, "loss": 0.5396, "step": 7639 }, { "epoch": 0.49, "grad_norm": 1.142591118812561, "learning_rate": 5.353544190420603e-06, "loss": 0.5075, "step": 7640 }, { "epoch": 0.49, "grad_norm": 1.274195671081543, "learning_rate": 5.352501483147933e-06, "loss": 0.5556, "step": 7641 }, { "epoch": 0.49, "grad_norm": 1.1212817430496216, "learning_rate": 5.351458760468332e-06, "loss": 0.5281, "step": 7642 }, { "epoch": 0.49, "grad_norm": 1.302475094795227, "learning_rate": 5.350416022427377e-06, "loss": 0.5697, "step": 7643 }, { "epoch": 0.49, "grad_norm": 1.1607937812805176, "learning_rate": 5.349373269070641e-06, "loss": 0.5237, "step": 7644 }, { "epoch": 0.49, "grad_norm": 1.1147608757019043, "learning_rate": 5.348330500443701e-06, "loss": 0.4788, "step": 7645 }, { "epoch": 0.49, "grad_norm": 1.1872674226760864, "learning_rate": 5.347287716592133e-06, "loss": 0.558, "step": 7646 }, { "epoch": 0.49, "grad_norm": 1.1224817037582397, "learning_rate": 5.346244917561515e-06, "loss": 0.4972, "step": 7647 }, { "epoch": 0.49, "grad_norm": 1.1229914426803589, "learning_rate": 5.345202103397424e-06, "loss": 0.5783, "step": 7648 }, { "epoch": 0.49, "grad_norm": 1.2072649002075195, "learning_rate": 5.344159274145441e-06, "loss": 0.5049, "step": 7649 }, { "epoch": 0.49, "grad_norm": 1.1576842069625854, "learning_rate": 5.343116429851145e-06, "loss": 0.5441, "step": 7650 }, { "epoch": 0.49, "grad_norm": 1.0818231105804443, "learning_rate": 5.342073570560114e-06, "loss": 0.5022, "step": 7651 }, { "epoch": 0.49, "grad_norm": 1.1298221349716187, "learning_rate": 5.34103069631793e-06, "loss": 0.5482, "step": 7652 }, { "epoch": 0.49, "grad_norm": 1.2153106927871704, "learning_rate": 5.339987807170174e-06, "loss": 0.5326, "step": 7653 }, { "epoch": 0.49, "grad_norm": 1.1127243041992188, "learning_rate": 5.338944903162428e-06, "loss": 0.5417, "step": 7654 }, { "epoch": 0.49, "grad_norm": 1.064591646194458, "learning_rate": 5.337901984340275e-06, "loss": 0.5193, "step": 7655 }, { "epoch": 0.49, "grad_norm": 1.2996517419815063, "learning_rate": 5.336859050749297e-06, "loss": 0.5363, "step": 7656 }, { "epoch": 0.49, "grad_norm": 1.1412984132766724, "learning_rate": 5.335816102435079e-06, "loss": 0.4992, "step": 7657 }, { "epoch": 0.49, "grad_norm": 1.2444841861724854, "learning_rate": 5.334773139443208e-06, "loss": 0.5676, "step": 7658 }, { "epoch": 0.49, "grad_norm": 1.1608144044876099, "learning_rate": 5.3337301618192646e-06, "loss": 0.5242, "step": 7659 }, { "epoch": 0.49, "grad_norm": 1.1293950080871582, "learning_rate": 5.332687169608836e-06, "loss": 0.5617, "step": 7660 }, { "epoch": 0.49, "grad_norm": 1.0950595140457153, "learning_rate": 5.331644162857513e-06, "loss": 0.5326, "step": 7661 }, { "epoch": 0.49, "grad_norm": 1.053153395652771, "learning_rate": 5.330601141610877e-06, "loss": 0.4975, "step": 7662 }, { "epoch": 0.49, "grad_norm": 1.2448877096176147, "learning_rate": 5.329558105914518e-06, "loss": 0.5664, "step": 7663 }, { "epoch": 0.49, "grad_norm": 1.0917404890060425, "learning_rate": 5.328515055814025e-06, "loss": 0.5366, "step": 7664 }, { "epoch": 0.49, "grad_norm": 1.1886612176895142, "learning_rate": 5.327471991354986e-06, "loss": 0.5118, "step": 7665 }, { "epoch": 0.49, "grad_norm": 1.12199068069458, "learning_rate": 5.326428912582992e-06, "loss": 0.5122, "step": 7666 }, { "epoch": 0.49, "grad_norm": 1.0499036312103271, "learning_rate": 5.325385819543633e-06, "loss": 0.4599, "step": 7667 }, { "epoch": 0.49, "grad_norm": 1.1031429767608643, "learning_rate": 5.324342712282497e-06, "loss": 0.5399, "step": 7668 }, { "epoch": 0.5, "grad_norm": 1.2337169647216797, "learning_rate": 5.323299590845179e-06, "loss": 0.5691, "step": 7669 }, { "epoch": 0.5, "grad_norm": 1.1570956707000732, "learning_rate": 5.322256455277271e-06, "loss": 0.5193, "step": 7670 }, { "epoch": 0.5, "grad_norm": 1.186313509941101, "learning_rate": 5.321213305624364e-06, "loss": 0.5402, "step": 7671 }, { "epoch": 0.5, "grad_norm": 1.190500259399414, "learning_rate": 5.320170141932052e-06, "loss": 0.5417, "step": 7672 }, { "epoch": 0.5, "grad_norm": 1.1238794326782227, "learning_rate": 5.31912696424593e-06, "loss": 0.5591, "step": 7673 }, { "epoch": 0.5, "grad_norm": 1.2923046350479126, "learning_rate": 5.31808377261159e-06, "loss": 0.5386, "step": 7674 }, { "epoch": 0.5, "grad_norm": 1.1262770891189575, "learning_rate": 5.317040567074631e-06, "loss": 0.4868, "step": 7675 }, { "epoch": 0.5, "grad_norm": 1.295341968536377, "learning_rate": 5.3159973476806446e-06, "loss": 0.5673, "step": 7676 }, { "epoch": 0.5, "grad_norm": 1.1467902660369873, "learning_rate": 5.314954114475231e-06, "loss": 0.5001, "step": 7677 }, { "epoch": 0.5, "grad_norm": 1.2461841106414795, "learning_rate": 5.313910867503986e-06, "loss": 0.5369, "step": 7678 }, { "epoch": 0.5, "grad_norm": 1.3012715578079224, "learning_rate": 5.312867606812506e-06, "loss": 0.5542, "step": 7679 }, { "epoch": 0.5, "grad_norm": 1.2326487302780151, "learning_rate": 5.31182433244639e-06, "loss": 0.5575, "step": 7680 }, { "epoch": 0.5, "grad_norm": 1.182056188583374, "learning_rate": 5.310781044451238e-06, "loss": 0.5384, "step": 7681 }, { "epoch": 0.5, "grad_norm": 1.234609603881836, "learning_rate": 5.309737742872647e-06, "loss": 0.5381, "step": 7682 }, { "epoch": 0.5, "grad_norm": 1.2533973455429077, "learning_rate": 5.308694427756219e-06, "loss": 0.5717, "step": 7683 }, { "epoch": 0.5, "grad_norm": 1.148665189743042, "learning_rate": 5.307651099147555e-06, "loss": 0.5126, "step": 7684 }, { "epoch": 0.5, "grad_norm": 1.1762863397598267, "learning_rate": 5.306607757092254e-06, "loss": 0.5235, "step": 7685 }, { "epoch": 0.5, "grad_norm": 1.1402829885482788, "learning_rate": 5.305564401635919e-06, "loss": 0.5553, "step": 7686 }, { "epoch": 0.5, "grad_norm": 1.23421049118042, "learning_rate": 5.304521032824153e-06, "loss": 0.5336, "step": 7687 }, { "epoch": 0.5, "grad_norm": 1.3458373546600342, "learning_rate": 5.303477650702556e-06, "loss": 0.4609, "step": 7688 }, { "epoch": 0.5, "grad_norm": 1.0978708267211914, "learning_rate": 5.302434255316737e-06, "loss": 0.5509, "step": 7689 }, { "epoch": 0.5, "grad_norm": 1.1032464504241943, "learning_rate": 5.301390846712295e-06, "loss": 0.5265, "step": 7690 }, { "epoch": 0.5, "grad_norm": 1.10223388671875, "learning_rate": 5.300347424934837e-06, "loss": 0.5279, "step": 7691 }, { "epoch": 0.5, "grad_norm": 1.1783840656280518, "learning_rate": 5.299303990029969e-06, "loss": 0.5253, "step": 7692 }, { "epoch": 0.5, "grad_norm": 1.0937764644622803, "learning_rate": 5.298260542043295e-06, "loss": 0.5502, "step": 7693 }, { "epoch": 0.5, "grad_norm": 1.2278755903244019, "learning_rate": 5.297217081020422e-06, "loss": 0.5354, "step": 7694 }, { "epoch": 0.5, "grad_norm": 1.2351484298706055, "learning_rate": 5.296173607006958e-06, "loss": 0.5623, "step": 7695 }, { "epoch": 0.5, "grad_norm": 1.1493550539016724, "learning_rate": 5.295130120048509e-06, "loss": 0.5323, "step": 7696 }, { "epoch": 0.5, "grad_norm": 1.185829520225525, "learning_rate": 5.294086620190685e-06, "loss": 0.5686, "step": 7697 }, { "epoch": 0.5, "grad_norm": 1.3089407682418823, "learning_rate": 5.293043107479093e-06, "loss": 0.5113, "step": 7698 }, { "epoch": 0.5, "grad_norm": 1.204541802406311, "learning_rate": 5.291999581959343e-06, "loss": 0.5466, "step": 7699 }, { "epoch": 0.5, "grad_norm": 1.2277565002441406, "learning_rate": 5.290956043677043e-06, "loss": 0.5277, "step": 7700 }, { "epoch": 0.5, "grad_norm": 1.11003839969635, "learning_rate": 5.289912492677807e-06, "loss": 0.5254, "step": 7701 }, { "epoch": 0.5, "grad_norm": 1.2235997915267944, "learning_rate": 5.2888689290072435e-06, "loss": 0.5319, "step": 7702 }, { "epoch": 0.5, "grad_norm": 1.053902506828308, "learning_rate": 5.287825352710964e-06, "loss": 0.5545, "step": 7703 }, { "epoch": 0.5, "grad_norm": 1.0905793905258179, "learning_rate": 5.286781763834581e-06, "loss": 0.5015, "step": 7704 }, { "epoch": 0.5, "grad_norm": 1.214762806892395, "learning_rate": 5.285738162423708e-06, "loss": 0.5155, "step": 7705 }, { "epoch": 0.5, "grad_norm": 1.1399286985397339, "learning_rate": 5.284694548523956e-06, "loss": 0.545, "step": 7706 }, { "epoch": 0.5, "grad_norm": 1.219923973083496, "learning_rate": 5.283650922180939e-06, "loss": 0.5313, "step": 7707 }, { "epoch": 0.5, "grad_norm": 1.138938069343567, "learning_rate": 5.2826072834402745e-06, "loss": 0.5734, "step": 7708 }, { "epoch": 0.5, "grad_norm": 1.1754947900772095, "learning_rate": 5.281563632347573e-06, "loss": 0.5702, "step": 7709 }, { "epoch": 0.5, "grad_norm": 1.321535587310791, "learning_rate": 5.280519968948451e-06, "loss": 0.6035, "step": 7710 }, { "epoch": 0.5, "grad_norm": 1.113982915878296, "learning_rate": 5.279476293288527e-06, "loss": 0.5375, "step": 7711 }, { "epoch": 0.5, "grad_norm": 1.1421645879745483, "learning_rate": 5.278432605413414e-06, "loss": 0.5409, "step": 7712 }, { "epoch": 0.5, "grad_norm": 1.0949416160583496, "learning_rate": 5.277388905368729e-06, "loss": 0.4942, "step": 7713 }, { "epoch": 0.5, "grad_norm": 1.1441619396209717, "learning_rate": 5.2763451932000935e-06, "loss": 0.5011, "step": 7714 }, { "epoch": 0.5, "grad_norm": 1.1625422239303589, "learning_rate": 5.27530146895312e-06, "loss": 0.5315, "step": 7715 }, { "epoch": 0.5, "grad_norm": 1.2337862253189087, "learning_rate": 5.27425773267343e-06, "loss": 0.5289, "step": 7716 }, { "epoch": 0.5, "grad_norm": 1.2883881330490112, "learning_rate": 5.273213984406643e-06, "loss": 0.5562, "step": 7717 }, { "epoch": 0.5, "grad_norm": 1.1874619722366333, "learning_rate": 5.272170224198377e-06, "loss": 0.5302, "step": 7718 }, { "epoch": 0.5, "grad_norm": 1.2349441051483154, "learning_rate": 5.271126452094254e-06, "loss": 0.5943, "step": 7719 }, { "epoch": 0.5, "grad_norm": 1.2570602893829346, "learning_rate": 5.270082668139892e-06, "loss": 0.5214, "step": 7720 }, { "epoch": 0.5, "grad_norm": 1.2172598838806152, "learning_rate": 5.2690388723809135e-06, "loss": 0.5323, "step": 7721 }, { "epoch": 0.5, "grad_norm": 1.1189780235290527, "learning_rate": 5.267995064862941e-06, "loss": 0.503, "step": 7722 }, { "epoch": 0.5, "grad_norm": 1.271881341934204, "learning_rate": 5.266951245631595e-06, "loss": 0.5447, "step": 7723 }, { "epoch": 0.5, "grad_norm": 1.348789095878601, "learning_rate": 5.2659074147324995e-06, "loss": 0.5425, "step": 7724 }, { "epoch": 0.5, "grad_norm": 1.3360055685043335, "learning_rate": 5.264863572211275e-06, "loss": 0.5444, "step": 7725 }, { "epoch": 0.5, "grad_norm": 1.1645920276641846, "learning_rate": 5.26381971811355e-06, "loss": 0.5674, "step": 7726 }, { "epoch": 0.5, "grad_norm": 1.1861889362335205, "learning_rate": 5.262775852484942e-06, "loss": 0.5571, "step": 7727 }, { "epoch": 0.5, "grad_norm": 1.2739434242248535, "learning_rate": 5.261731975371084e-06, "loss": 0.543, "step": 7728 }, { "epoch": 0.5, "grad_norm": 1.0937628746032715, "learning_rate": 5.260688086817594e-06, "loss": 0.5187, "step": 7729 }, { "epoch": 0.5, "grad_norm": 1.2289342880249023, "learning_rate": 5.259644186870099e-06, "loss": 0.5564, "step": 7730 }, { "epoch": 0.5, "grad_norm": 1.252579927444458, "learning_rate": 5.258600275574229e-06, "loss": 0.5723, "step": 7731 }, { "epoch": 0.5, "grad_norm": 1.3132266998291016, "learning_rate": 5.257556352975607e-06, "loss": 0.5881, "step": 7732 }, { "epoch": 0.5, "grad_norm": 1.1546375751495361, "learning_rate": 5.25651241911986e-06, "loss": 0.5368, "step": 7733 }, { "epoch": 0.5, "grad_norm": 1.2283525466918945, "learning_rate": 5.25546847405262e-06, "loss": 0.5465, "step": 7734 }, { "epoch": 0.5, "grad_norm": 1.240020751953125, "learning_rate": 5.254424517819509e-06, "loss": 0.5402, "step": 7735 }, { "epoch": 0.5, "grad_norm": 1.210371732711792, "learning_rate": 5.253380550466161e-06, "loss": 0.5405, "step": 7736 }, { "epoch": 0.5, "grad_norm": 1.2980023622512817, "learning_rate": 5.252336572038203e-06, "loss": 0.5458, "step": 7737 }, { "epoch": 0.5, "grad_norm": 1.1834850311279297, "learning_rate": 5.251292582581263e-06, "loss": 0.497, "step": 7738 }, { "epoch": 0.5, "grad_norm": 1.2606830596923828, "learning_rate": 5.250248582140972e-06, "loss": 0.5698, "step": 7739 }, { "epoch": 0.5, "grad_norm": 1.1229602098464966, "learning_rate": 5.249204570762963e-06, "loss": 0.5266, "step": 7740 }, { "epoch": 0.5, "grad_norm": 1.0914559364318848, "learning_rate": 5.248160548492864e-06, "loss": 0.5323, "step": 7741 }, { "epoch": 0.5, "grad_norm": 1.2939577102661133, "learning_rate": 5.247116515376308e-06, "loss": 0.4867, "step": 7742 }, { "epoch": 0.5, "grad_norm": 1.1953822374343872, "learning_rate": 5.246072471458929e-06, "loss": 0.5835, "step": 7743 }, { "epoch": 0.5, "grad_norm": 1.2002512216567993, "learning_rate": 5.245028416786353e-06, "loss": 0.5052, "step": 7744 }, { "epoch": 0.5, "grad_norm": 1.1933867931365967, "learning_rate": 5.243984351404219e-06, "loss": 0.5109, "step": 7745 }, { "epoch": 0.5, "grad_norm": 1.1895334720611572, "learning_rate": 5.242940275358159e-06, "loss": 0.5414, "step": 7746 }, { "epoch": 0.5, "grad_norm": 1.1946401596069336, "learning_rate": 5.241896188693805e-06, "loss": 0.567, "step": 7747 }, { "epoch": 0.5, "grad_norm": 1.2158410549163818, "learning_rate": 5.240852091456794e-06, "loss": 0.5479, "step": 7748 }, { "epoch": 0.5, "grad_norm": 1.266484022140503, "learning_rate": 5.2398079836927595e-06, "loss": 0.5996, "step": 7749 }, { "epoch": 0.5, "grad_norm": 1.0654284954071045, "learning_rate": 5.238763865447336e-06, "loss": 0.5023, "step": 7750 }, { "epoch": 0.5, "grad_norm": 1.1961854696273804, "learning_rate": 5.23771973676616e-06, "loss": 0.5391, "step": 7751 }, { "epoch": 0.5, "grad_norm": 1.21120023727417, "learning_rate": 5.236675597694869e-06, "loss": 0.5289, "step": 7752 }, { "epoch": 0.5, "grad_norm": 1.1335391998291016, "learning_rate": 5.235631448279097e-06, "loss": 0.4919, "step": 7753 }, { "epoch": 0.5, "grad_norm": 1.155288815498352, "learning_rate": 5.2345872885644825e-06, "loss": 0.5264, "step": 7754 }, { "epoch": 0.5, "grad_norm": 1.1307332515716553, "learning_rate": 5.2335431185966634e-06, "loss": 0.5597, "step": 7755 }, { "epoch": 0.5, "grad_norm": 1.1686135530471802, "learning_rate": 5.232498938421276e-06, "loss": 0.5277, "step": 7756 }, { "epoch": 0.5, "grad_norm": 1.134121060371399, "learning_rate": 5.231454748083959e-06, "loss": 0.5605, "step": 7757 }, { "epoch": 0.5, "grad_norm": 1.1864211559295654, "learning_rate": 5.2304105476303545e-06, "loss": 0.5864, "step": 7758 }, { "epoch": 0.5, "grad_norm": 1.1143114566802979, "learning_rate": 5.229366337106099e-06, "loss": 0.5286, "step": 7759 }, { "epoch": 0.5, "grad_norm": 1.2025948762893677, "learning_rate": 5.2283221165568324e-06, "loss": 0.5225, "step": 7760 }, { "epoch": 0.5, "grad_norm": 1.1325104236602783, "learning_rate": 5.227277886028195e-06, "loss": 0.5476, "step": 7761 }, { "epoch": 0.5, "grad_norm": 1.2873919010162354, "learning_rate": 5.2262336455658265e-06, "loss": 0.5616, "step": 7762 }, { "epoch": 0.5, "grad_norm": 1.249937891960144, "learning_rate": 5.22518939521537e-06, "loss": 0.5688, "step": 7763 }, { "epoch": 0.5, "grad_norm": 1.1243492364883423, "learning_rate": 5.224145135022467e-06, "loss": 0.5337, "step": 7764 }, { "epoch": 0.5, "grad_norm": 1.2980576753616333, "learning_rate": 5.223100865032757e-06, "loss": 0.5608, "step": 7765 }, { "epoch": 0.5, "grad_norm": 1.0570698976516724, "learning_rate": 5.222056585291885e-06, "loss": 0.483, "step": 7766 }, { "epoch": 0.5, "grad_norm": 1.2140209674835205, "learning_rate": 5.221012295845492e-06, "loss": 0.4957, "step": 7767 }, { "epoch": 0.5, "grad_norm": 1.120991826057434, "learning_rate": 5.219967996739222e-06, "loss": 0.4982, "step": 7768 }, { "epoch": 0.5, "grad_norm": 1.0982786417007446, "learning_rate": 5.218923688018717e-06, "loss": 0.5192, "step": 7769 }, { "epoch": 0.5, "grad_norm": 1.2386342287063599, "learning_rate": 5.217879369729624e-06, "loss": 0.529, "step": 7770 }, { "epoch": 0.5, "grad_norm": 1.2070401906967163, "learning_rate": 5.216835041917586e-06, "loss": 0.5661, "step": 7771 }, { "epoch": 0.5, "grad_norm": 1.1335554122924805, "learning_rate": 5.215790704628247e-06, "loss": 0.553, "step": 7772 }, { "epoch": 0.5, "grad_norm": 1.1317439079284668, "learning_rate": 5.214746357907254e-06, "loss": 0.4953, "step": 7773 }, { "epoch": 0.5, "grad_norm": 1.3042030334472656, "learning_rate": 5.21370200180025e-06, "loss": 0.5201, "step": 7774 }, { "epoch": 0.5, "grad_norm": 1.165939450263977, "learning_rate": 5.212657636352885e-06, "loss": 0.5241, "step": 7775 }, { "epoch": 0.5, "grad_norm": 1.176911473274231, "learning_rate": 5.211613261610801e-06, "loss": 0.5541, "step": 7776 }, { "epoch": 0.5, "grad_norm": 1.1386953592300415, "learning_rate": 5.21056887761965e-06, "loss": 0.5296, "step": 7777 }, { "epoch": 0.5, "grad_norm": 1.1882644891738892, "learning_rate": 5.209524484425075e-06, "loss": 0.535, "step": 7778 }, { "epoch": 0.5, "grad_norm": 1.0543464422225952, "learning_rate": 5.208480082072724e-06, "loss": 0.5677, "step": 7779 }, { "epoch": 0.5, "grad_norm": 1.1178048849105835, "learning_rate": 5.207435670608249e-06, "loss": 0.5285, "step": 7780 }, { "epoch": 0.5, "grad_norm": 1.1248948574066162, "learning_rate": 5.206391250077295e-06, "loss": 0.477, "step": 7781 }, { "epoch": 0.5, "grad_norm": 1.0758979320526123, "learning_rate": 5.205346820525512e-06, "loss": 0.4835, "step": 7782 }, { "epoch": 0.5, "grad_norm": 1.1384761333465576, "learning_rate": 5.204302381998548e-06, "loss": 0.5663, "step": 7783 }, { "epoch": 0.5, "grad_norm": 1.1672465801239014, "learning_rate": 5.203257934542056e-06, "loss": 0.5547, "step": 7784 }, { "epoch": 0.5, "grad_norm": 1.154524803161621, "learning_rate": 5.202213478201684e-06, "loss": 0.5108, "step": 7785 }, { "epoch": 0.5, "grad_norm": 1.2913771867752075, "learning_rate": 5.20116901302308e-06, "loss": 0.5311, "step": 7786 }, { "epoch": 0.5, "grad_norm": 1.2708568572998047, "learning_rate": 5.200124539051902e-06, "loss": 0.5559, "step": 7787 }, { "epoch": 0.5, "grad_norm": 1.2654136419296265, "learning_rate": 5.199080056333793e-06, "loss": 0.53, "step": 7788 }, { "epoch": 0.5, "grad_norm": 1.166499376296997, "learning_rate": 5.198035564914408e-06, "loss": 0.528, "step": 7789 }, { "epoch": 0.5, "grad_norm": 1.2769759893417358, "learning_rate": 5.196991064839403e-06, "loss": 0.5415, "step": 7790 }, { "epoch": 0.5, "grad_norm": 1.1457736492156982, "learning_rate": 5.195946556154424e-06, "loss": 0.4837, "step": 7791 }, { "epoch": 0.5, "grad_norm": 1.0859063863754272, "learning_rate": 5.1949020389051275e-06, "loss": 0.5543, "step": 7792 }, { "epoch": 0.5, "grad_norm": 1.124924898147583, "learning_rate": 5.193857513137166e-06, "loss": 0.5342, "step": 7793 }, { "epoch": 0.5, "grad_norm": 1.189165472984314, "learning_rate": 5.192812978896191e-06, "loss": 0.5414, "step": 7794 }, { "epoch": 0.5, "grad_norm": 1.1662095785140991, "learning_rate": 5.19176843622786e-06, "loss": 0.4666, "step": 7795 }, { "epoch": 0.5, "grad_norm": 1.1843101978302002, "learning_rate": 5.1907238851778255e-06, "loss": 0.5269, "step": 7796 }, { "epoch": 0.5, "grad_norm": 1.234484314918518, "learning_rate": 5.189679325791741e-06, "loss": 0.496, "step": 7797 }, { "epoch": 0.5, "grad_norm": 1.3048216104507446, "learning_rate": 5.1886347581152615e-06, "loss": 0.5619, "step": 7798 }, { "epoch": 0.5, "grad_norm": 1.221936821937561, "learning_rate": 5.187590182194047e-06, "loss": 0.482, "step": 7799 }, { "epoch": 0.5, "grad_norm": 1.3959534168243408, "learning_rate": 5.186545598073747e-06, "loss": 0.543, "step": 7800 }, { "epoch": 0.5, "grad_norm": 1.2225773334503174, "learning_rate": 5.18550100580002e-06, "loss": 0.5143, "step": 7801 }, { "epoch": 0.5, "grad_norm": 1.4092061519622803, "learning_rate": 5.1844564054185234e-06, "loss": 0.568, "step": 7802 }, { "epoch": 0.5, "grad_norm": 1.291857123374939, "learning_rate": 5.183411796974913e-06, "loss": 0.5697, "step": 7803 }, { "epoch": 0.5, "grad_norm": 1.1450297832489014, "learning_rate": 5.182367180514846e-06, "loss": 0.535, "step": 7804 }, { "epoch": 0.5, "grad_norm": 1.2216933965682983, "learning_rate": 5.181322556083981e-06, "loss": 0.5545, "step": 7805 }, { "epoch": 0.5, "grad_norm": 1.2386013269424438, "learning_rate": 5.180277923727975e-06, "loss": 0.5372, "step": 7806 }, { "epoch": 0.5, "grad_norm": 1.3210726976394653, "learning_rate": 5.1792332834924845e-06, "loss": 0.5833, "step": 7807 }, { "epoch": 0.5, "grad_norm": 1.3135019540786743, "learning_rate": 5.17818863542317e-06, "loss": 0.5211, "step": 7808 }, { "epoch": 0.5, "grad_norm": 1.1427934169769287, "learning_rate": 5.1771439795656905e-06, "loss": 0.5445, "step": 7809 }, { "epoch": 0.5, "grad_norm": 1.0780410766601562, "learning_rate": 5.176099315965706e-06, "loss": 0.5023, "step": 7810 }, { "epoch": 0.5, "grad_norm": 1.1940311193466187, "learning_rate": 5.175054644668872e-06, "loss": 0.5839, "step": 7811 }, { "epoch": 0.5, "grad_norm": 1.12567937374115, "learning_rate": 5.174009965720852e-06, "loss": 0.5199, "step": 7812 }, { "epoch": 0.5, "grad_norm": 1.16250479221344, "learning_rate": 5.172965279167307e-06, "loss": 0.5763, "step": 7813 }, { "epoch": 0.5, "grad_norm": 1.1807907819747925, "learning_rate": 5.171920585053894e-06, "loss": 0.5562, "step": 7814 }, { "epoch": 0.5, "grad_norm": 1.173722743988037, "learning_rate": 5.1708758834262776e-06, "loss": 0.5177, "step": 7815 }, { "epoch": 0.5, "grad_norm": 1.257441520690918, "learning_rate": 5.169831174330116e-06, "loss": 0.4993, "step": 7816 }, { "epoch": 0.5, "grad_norm": 1.4687978029251099, "learning_rate": 5.168786457811071e-06, "loss": 0.5248, "step": 7817 }, { "epoch": 0.5, "grad_norm": 1.2120842933654785, "learning_rate": 5.167741733914808e-06, "loss": 0.5136, "step": 7818 }, { "epoch": 0.5, "grad_norm": 1.1543493270874023, "learning_rate": 5.166697002686984e-06, "loss": 0.5363, "step": 7819 }, { "epoch": 0.5, "grad_norm": 1.2409253120422363, "learning_rate": 5.165652264173266e-06, "loss": 0.5432, "step": 7820 }, { "epoch": 0.5, "grad_norm": 1.2128525972366333, "learning_rate": 5.1646075184193135e-06, "loss": 0.5309, "step": 7821 }, { "epoch": 0.5, "grad_norm": 1.2755398750305176, "learning_rate": 5.163562765470792e-06, "loss": 0.5241, "step": 7822 }, { "epoch": 0.5, "grad_norm": 1.3728739023208618, "learning_rate": 5.162518005373364e-06, "loss": 0.5227, "step": 7823 }, { "epoch": 0.51, "grad_norm": 1.103628396987915, "learning_rate": 5.1614732381726915e-06, "loss": 0.4991, "step": 7824 }, { "epoch": 0.51, "grad_norm": 1.2723239660263062, "learning_rate": 5.160428463914443e-06, "loss": 0.5448, "step": 7825 }, { "epoch": 0.51, "grad_norm": 1.1597821712493896, "learning_rate": 5.159383682644279e-06, "loss": 0.5093, "step": 7826 }, { "epoch": 0.51, "grad_norm": 1.0708715915679932, "learning_rate": 5.158338894407865e-06, "loss": 0.5739, "step": 7827 }, { "epoch": 0.51, "grad_norm": 1.1129884719848633, "learning_rate": 5.157294099250868e-06, "loss": 0.5676, "step": 7828 }, { "epoch": 0.51, "grad_norm": 1.2071037292480469, "learning_rate": 5.156249297218951e-06, "loss": 0.5224, "step": 7829 }, { "epoch": 0.51, "grad_norm": 1.1629117727279663, "learning_rate": 5.155204488357779e-06, "loss": 0.5708, "step": 7830 }, { "epoch": 0.51, "grad_norm": 1.1334370374679565, "learning_rate": 5.1541596727130204e-06, "loss": 0.4792, "step": 7831 }, { "epoch": 0.51, "grad_norm": 1.1510213613510132, "learning_rate": 5.15311485033034e-06, "loss": 0.544, "step": 7832 }, { "epoch": 0.51, "grad_norm": 1.1882411241531372, "learning_rate": 5.152070021255404e-06, "loss": 0.5765, "step": 7833 }, { "epoch": 0.51, "grad_norm": 1.1113368272781372, "learning_rate": 5.151025185533881e-06, "loss": 0.4882, "step": 7834 }, { "epoch": 0.51, "grad_norm": 1.26118004322052, "learning_rate": 5.149980343211436e-06, "loss": 0.5498, "step": 7835 }, { "epoch": 0.51, "grad_norm": 1.100162386894226, "learning_rate": 5.148935494333736e-06, "loss": 0.505, "step": 7836 }, { "epoch": 0.51, "grad_norm": 1.2305821180343628, "learning_rate": 5.147890638946453e-06, "loss": 0.5539, "step": 7837 }, { "epoch": 0.51, "grad_norm": 1.064319133758545, "learning_rate": 5.14684577709525e-06, "loss": 0.4779, "step": 7838 }, { "epoch": 0.51, "grad_norm": 1.286368489265442, "learning_rate": 5.1458009088257955e-06, "loss": 0.5087, "step": 7839 }, { "epoch": 0.51, "grad_norm": 1.0821539163589478, "learning_rate": 5.144756034183762e-06, "loss": 0.4431, "step": 7840 }, { "epoch": 0.51, "grad_norm": 1.2042863368988037, "learning_rate": 5.143711153214814e-06, "loss": 0.5041, "step": 7841 }, { "epoch": 0.51, "grad_norm": 1.3060150146484375, "learning_rate": 5.142666265964622e-06, "loss": 0.5022, "step": 7842 }, { "epoch": 0.51, "grad_norm": 1.137740135192871, "learning_rate": 5.1416213724788574e-06, "loss": 0.5229, "step": 7843 }, { "epoch": 0.51, "grad_norm": 1.1925411224365234, "learning_rate": 5.140576472803186e-06, "loss": 0.5287, "step": 7844 }, { "epoch": 0.51, "grad_norm": 1.2892006635665894, "learning_rate": 5.13953156698328e-06, "loss": 0.5909, "step": 7845 }, { "epoch": 0.51, "grad_norm": 1.154076337814331, "learning_rate": 5.138486655064812e-06, "loss": 0.4942, "step": 7846 }, { "epoch": 0.51, "grad_norm": 1.0556875467300415, "learning_rate": 5.137441737093446e-06, "loss": 0.5242, "step": 7847 }, { "epoch": 0.51, "grad_norm": 1.6634103059768677, "learning_rate": 5.1363968131148575e-06, "loss": 0.5735, "step": 7848 }, { "epoch": 0.51, "grad_norm": 1.186650276184082, "learning_rate": 5.1353518831747175e-06, "loss": 0.514, "step": 7849 }, { "epoch": 0.51, "grad_norm": 1.1429258584976196, "learning_rate": 5.134306947318694e-06, "loss": 0.5009, "step": 7850 }, { "epoch": 0.51, "grad_norm": 1.1367371082305908, "learning_rate": 5.133262005592462e-06, "loss": 0.5683, "step": 7851 }, { "epoch": 0.51, "grad_norm": 1.2578537464141846, "learning_rate": 5.13221705804169e-06, "loss": 0.5373, "step": 7852 }, { "epoch": 0.51, "grad_norm": 1.232250452041626, "learning_rate": 5.131172104712052e-06, "loss": 0.521, "step": 7853 }, { "epoch": 0.51, "grad_norm": 1.1463603973388672, "learning_rate": 5.130127145649221e-06, "loss": 0.4833, "step": 7854 }, { "epoch": 0.51, "grad_norm": 1.157271385192871, "learning_rate": 5.129082180898867e-06, "loss": 0.5663, "step": 7855 }, { "epoch": 0.51, "grad_norm": 1.2051196098327637, "learning_rate": 5.128037210506664e-06, "loss": 0.5321, "step": 7856 }, { "epoch": 0.51, "grad_norm": 1.2073968648910522, "learning_rate": 5.126992234518284e-06, "loss": 0.5095, "step": 7857 }, { "epoch": 0.51, "grad_norm": 1.0920884609222412, "learning_rate": 5.1259472529794015e-06, "loss": 0.489, "step": 7858 }, { "epoch": 0.51, "grad_norm": 1.195332646369934, "learning_rate": 5.124902265935691e-06, "loss": 0.5743, "step": 7859 }, { "epoch": 0.51, "grad_norm": 1.18009614944458, "learning_rate": 5.123857273432824e-06, "loss": 0.5445, "step": 7860 }, { "epoch": 0.51, "grad_norm": 1.269842267036438, "learning_rate": 5.122812275516474e-06, "loss": 0.5572, "step": 7861 }, { "epoch": 0.51, "grad_norm": 1.2459334135055542, "learning_rate": 5.121767272232319e-06, "loss": 0.5588, "step": 7862 }, { "epoch": 0.51, "grad_norm": 1.2870631217956543, "learning_rate": 5.1207222636260276e-06, "loss": 0.5758, "step": 7863 }, { "epoch": 0.51, "grad_norm": 1.1602832078933716, "learning_rate": 5.11967724974328e-06, "loss": 0.5471, "step": 7864 }, { "epoch": 0.51, "grad_norm": 1.1567630767822266, "learning_rate": 5.118632230629747e-06, "loss": 0.5209, "step": 7865 }, { "epoch": 0.51, "grad_norm": 1.1551090478897095, "learning_rate": 5.117587206331105e-06, "loss": 0.4942, "step": 7866 }, { "epoch": 0.51, "grad_norm": 1.1896710395812988, "learning_rate": 5.116542176893032e-06, "loss": 0.5532, "step": 7867 }, { "epoch": 0.51, "grad_norm": 1.0926998853683472, "learning_rate": 5.115497142361197e-06, "loss": 0.5197, "step": 7868 }, { "epoch": 0.51, "grad_norm": 1.0834715366363525, "learning_rate": 5.114452102781283e-06, "loss": 0.4788, "step": 7869 }, { "epoch": 0.51, "grad_norm": 1.163498878479004, "learning_rate": 5.113407058198962e-06, "loss": 0.5219, "step": 7870 }, { "epoch": 0.51, "grad_norm": 1.1895365715026855, "learning_rate": 5.112362008659911e-06, "loss": 0.5508, "step": 7871 }, { "epoch": 0.51, "grad_norm": 1.1338411569595337, "learning_rate": 5.1113169542098075e-06, "loss": 0.5667, "step": 7872 }, { "epoch": 0.51, "grad_norm": 1.1780433654785156, "learning_rate": 5.110271894894326e-06, "loss": 0.4994, "step": 7873 }, { "epoch": 0.51, "grad_norm": 1.0839805603027344, "learning_rate": 5.109226830759144e-06, "loss": 0.4764, "step": 7874 }, { "epoch": 0.51, "grad_norm": 1.0894241333007812, "learning_rate": 5.108181761849941e-06, "loss": 0.4991, "step": 7875 }, { "epoch": 0.51, "grad_norm": 1.2261892557144165, "learning_rate": 5.107136688212391e-06, "loss": 0.5129, "step": 7876 }, { "epoch": 0.51, "grad_norm": 1.2282081842422485, "learning_rate": 5.1060916098921735e-06, "loss": 0.5245, "step": 7877 }, { "epoch": 0.51, "grad_norm": 1.1588937044143677, "learning_rate": 5.105046526934965e-06, "loss": 0.542, "step": 7878 }, { "epoch": 0.51, "grad_norm": 1.2854772806167603, "learning_rate": 5.104001439386443e-06, "loss": 0.5837, "step": 7879 }, { "epoch": 0.51, "grad_norm": 1.0840107202529907, "learning_rate": 5.102956347292287e-06, "loss": 0.5273, "step": 7880 }, { "epoch": 0.51, "grad_norm": 1.2339156866073608, "learning_rate": 5.101911250698175e-06, "loss": 0.5844, "step": 7881 }, { "epoch": 0.51, "grad_norm": 1.1613614559173584, "learning_rate": 5.100866149649785e-06, "loss": 0.557, "step": 7882 }, { "epoch": 0.51, "grad_norm": 1.0431406497955322, "learning_rate": 5.099821044192795e-06, "loss": 0.5664, "step": 7883 }, { "epoch": 0.51, "grad_norm": 1.1988943815231323, "learning_rate": 5.098775934372887e-06, "loss": 0.5467, "step": 7884 }, { "epoch": 0.51, "grad_norm": 1.1770998239517212, "learning_rate": 5.097730820235736e-06, "loss": 0.5097, "step": 7885 }, { "epoch": 0.51, "grad_norm": 1.4913898706436157, "learning_rate": 5.096685701827022e-06, "loss": 0.5468, "step": 7886 }, { "epoch": 0.51, "grad_norm": 1.202262282371521, "learning_rate": 5.095640579192428e-06, "loss": 0.5506, "step": 7887 }, { "epoch": 0.51, "grad_norm": 1.208024024963379, "learning_rate": 5.094595452377629e-06, "loss": 0.5453, "step": 7888 }, { "epoch": 0.51, "grad_norm": 1.1671324968338013, "learning_rate": 5.093550321428308e-06, "loss": 0.5859, "step": 7889 }, { "epoch": 0.51, "grad_norm": 1.242081642150879, "learning_rate": 5.092505186390143e-06, "loss": 0.5798, "step": 7890 }, { "epoch": 0.51, "grad_norm": 1.1110761165618896, "learning_rate": 5.0914600473088146e-06, "loss": 0.5398, "step": 7891 }, { "epoch": 0.51, "grad_norm": 1.0810966491699219, "learning_rate": 5.0904149042300045e-06, "loss": 0.54, "step": 7892 }, { "epoch": 0.51, "grad_norm": 1.3513864278793335, "learning_rate": 5.089369757199393e-06, "loss": 0.5325, "step": 7893 }, { "epoch": 0.51, "grad_norm": 1.2545437812805176, "learning_rate": 5.088324606262659e-06, "loss": 0.5529, "step": 7894 }, { "epoch": 0.51, "grad_norm": 1.1411640644073486, "learning_rate": 5.087279451465484e-06, "loss": 0.4856, "step": 7895 }, { "epoch": 0.51, "grad_norm": 1.1132619380950928, "learning_rate": 5.08623429285355e-06, "loss": 0.5401, "step": 7896 }, { "epoch": 0.51, "grad_norm": 1.161073923110962, "learning_rate": 5.085189130472536e-06, "loss": 0.5585, "step": 7897 }, { "epoch": 0.51, "grad_norm": 1.1601324081420898, "learning_rate": 5.084143964368128e-06, "loss": 0.551, "step": 7898 }, { "epoch": 0.51, "grad_norm": 1.2291392087936401, "learning_rate": 5.083098794586001e-06, "loss": 0.5367, "step": 7899 }, { "epoch": 0.51, "grad_norm": 1.1445565223693848, "learning_rate": 5.082053621171842e-06, "loss": 0.4917, "step": 7900 }, { "epoch": 0.51, "grad_norm": 1.1422920227050781, "learning_rate": 5.081008444171332e-06, "loss": 0.561, "step": 7901 }, { "epoch": 0.51, "grad_norm": 1.1560416221618652, "learning_rate": 5.079963263630149e-06, "loss": 0.5247, "step": 7902 }, { "epoch": 0.51, "grad_norm": 1.3020292520523071, "learning_rate": 5.07891807959398e-06, "loss": 0.4934, "step": 7903 }, { "epoch": 0.51, "grad_norm": 1.0961267948150635, "learning_rate": 5.077872892108505e-06, "loss": 0.5018, "step": 7904 }, { "epoch": 0.51, "grad_norm": 1.2887076139450073, "learning_rate": 5.076827701219406e-06, "loss": 0.5392, "step": 7905 }, { "epoch": 0.51, "grad_norm": 1.1616584062576294, "learning_rate": 5.075782506972366e-06, "loss": 0.6347, "step": 7906 }, { "epoch": 0.51, "grad_norm": 1.2132620811462402, "learning_rate": 5.0747373094130705e-06, "loss": 0.5217, "step": 7907 }, { "epoch": 0.51, "grad_norm": 1.1793595552444458, "learning_rate": 5.073692108587198e-06, "loss": 0.5428, "step": 7908 }, { "epoch": 0.51, "grad_norm": 1.1537350416183472, "learning_rate": 5.072646904540436e-06, "loss": 0.5157, "step": 7909 }, { "epoch": 0.51, "grad_norm": 1.1472373008728027, "learning_rate": 5.071601697318462e-06, "loss": 0.5175, "step": 7910 }, { "epoch": 0.51, "grad_norm": 1.2022571563720703, "learning_rate": 5.070556486966965e-06, "loss": 0.5483, "step": 7911 }, { "epoch": 0.51, "grad_norm": 1.0334526300430298, "learning_rate": 5.069511273531625e-06, "loss": 0.5185, "step": 7912 }, { "epoch": 0.51, "grad_norm": 1.1595790386199951, "learning_rate": 5.068466057058127e-06, "loss": 0.4933, "step": 7913 }, { "epoch": 0.51, "grad_norm": 1.1270925998687744, "learning_rate": 5.067420837592154e-06, "loss": 0.5268, "step": 7914 }, { "epoch": 0.51, "grad_norm": 1.1271237134933472, "learning_rate": 5.066375615179389e-06, "loss": 0.5272, "step": 7915 }, { "epoch": 0.51, "grad_norm": 1.3078213930130005, "learning_rate": 5.065330389865519e-06, "loss": 0.5376, "step": 7916 }, { "epoch": 0.51, "grad_norm": 1.1692254543304443, "learning_rate": 5.064285161696225e-06, "loss": 0.5416, "step": 7917 }, { "epoch": 0.51, "grad_norm": 1.1321972608566284, "learning_rate": 5.063239930717193e-06, "loss": 0.5269, "step": 7918 }, { "epoch": 0.51, "grad_norm": 1.1825004816055298, "learning_rate": 5.062194696974107e-06, "loss": 0.5662, "step": 7919 }, { "epoch": 0.51, "grad_norm": 1.2589465379714966, "learning_rate": 5.0611494605126506e-06, "loss": 0.5213, "step": 7920 }, { "epoch": 0.51, "grad_norm": 1.198716163635254, "learning_rate": 5.060104221378509e-06, "loss": 0.5741, "step": 7921 }, { "epoch": 0.51, "grad_norm": 1.157873511314392, "learning_rate": 5.059058979617368e-06, "loss": 0.5464, "step": 7922 }, { "epoch": 0.51, "grad_norm": 1.305326223373413, "learning_rate": 5.0580137352749105e-06, "loss": 0.5652, "step": 7923 }, { "epoch": 0.51, "grad_norm": 1.1633793115615845, "learning_rate": 5.0569684883968215e-06, "loss": 0.494, "step": 7924 }, { "epoch": 0.51, "grad_norm": 1.087225317955017, "learning_rate": 5.055923239028788e-06, "loss": 0.4496, "step": 7925 }, { "epoch": 0.51, "grad_norm": 1.4168436527252197, "learning_rate": 5.054877987216494e-06, "loss": 0.5482, "step": 7926 }, { "epoch": 0.51, "grad_norm": 1.1483304500579834, "learning_rate": 5.053832733005625e-06, "loss": 0.5045, "step": 7927 }, { "epoch": 0.51, "grad_norm": 1.0946357250213623, "learning_rate": 5.052787476441866e-06, "loss": 0.494, "step": 7928 }, { "epoch": 0.51, "grad_norm": 1.2851709127426147, "learning_rate": 5.0517422175709015e-06, "loss": 0.5526, "step": 7929 }, { "epoch": 0.51, "grad_norm": 1.3485356569290161, "learning_rate": 5.050696956438419e-06, "loss": 0.5206, "step": 7930 }, { "epoch": 0.51, "grad_norm": 1.2161924839019775, "learning_rate": 5.049651693090104e-06, "loss": 0.5367, "step": 7931 }, { "epoch": 0.51, "grad_norm": 1.204067349433899, "learning_rate": 5.0486064275716405e-06, "loss": 0.5448, "step": 7932 }, { "epoch": 0.51, "grad_norm": 1.1050083637237549, "learning_rate": 5.047561159928716e-06, "loss": 0.5084, "step": 7933 }, { "epoch": 0.51, "grad_norm": 1.1470496654510498, "learning_rate": 5.046515890207015e-06, "loss": 0.508, "step": 7934 }, { "epoch": 0.51, "grad_norm": 1.3372623920440674, "learning_rate": 5.045470618452226e-06, "loss": 0.576, "step": 7935 }, { "epoch": 0.51, "grad_norm": 1.2595409154891968, "learning_rate": 5.044425344710033e-06, "loss": 0.5374, "step": 7936 }, { "epoch": 0.51, "grad_norm": 1.0863436460494995, "learning_rate": 5.043380069026123e-06, "loss": 0.515, "step": 7937 }, { "epoch": 0.51, "grad_norm": 1.371933102607727, "learning_rate": 5.042334791446181e-06, "loss": 0.5561, "step": 7938 }, { "epoch": 0.51, "grad_norm": 1.279881238937378, "learning_rate": 5.041289512015896e-06, "loss": 0.5975, "step": 7939 }, { "epoch": 0.51, "grad_norm": 1.1908352375030518, "learning_rate": 5.040244230780952e-06, "loss": 0.5586, "step": 7940 }, { "epoch": 0.51, "grad_norm": 1.2724689245224, "learning_rate": 5.039198947787036e-06, "loss": 0.5254, "step": 7941 }, { "epoch": 0.51, "grad_norm": 1.1035637855529785, "learning_rate": 5.038153663079837e-06, "loss": 0.4858, "step": 7942 }, { "epoch": 0.51, "grad_norm": 1.0615663528442383, "learning_rate": 5.0371083767050375e-06, "loss": 0.5031, "step": 7943 }, { "epoch": 0.51, "grad_norm": 1.166831135749817, "learning_rate": 5.036063088708327e-06, "loss": 0.5577, "step": 7944 }, { "epoch": 0.51, "grad_norm": 1.190388560295105, "learning_rate": 5.035017799135393e-06, "loss": 0.5757, "step": 7945 }, { "epoch": 0.51, "grad_norm": 1.2623449563980103, "learning_rate": 5.0339725080319215e-06, "loss": 0.5689, "step": 7946 }, { "epoch": 0.51, "grad_norm": 1.2201136350631714, "learning_rate": 5.032927215443598e-06, "loss": 0.5056, "step": 7947 }, { "epoch": 0.51, "grad_norm": 1.2564033269882202, "learning_rate": 5.0318819214161126e-06, "loss": 0.5549, "step": 7948 }, { "epoch": 0.51, "grad_norm": 1.1667226552963257, "learning_rate": 5.030836625995149e-06, "loss": 0.5343, "step": 7949 }, { "epoch": 0.51, "grad_norm": 1.156650185585022, "learning_rate": 5.0297913292263965e-06, "loss": 0.5017, "step": 7950 }, { "epoch": 0.51, "grad_norm": 1.1575719118118286, "learning_rate": 5.028746031155544e-06, "loss": 0.5139, "step": 7951 }, { "epoch": 0.51, "grad_norm": 1.1279442310333252, "learning_rate": 5.027700731828273e-06, "loss": 0.5544, "step": 7952 }, { "epoch": 0.51, "grad_norm": 1.1632758378982544, "learning_rate": 5.026655431290277e-06, "loss": 0.5314, "step": 7953 }, { "epoch": 0.51, "grad_norm": 1.1817189455032349, "learning_rate": 5.025610129587241e-06, "loss": 0.5628, "step": 7954 }, { "epoch": 0.51, "grad_norm": 1.2064439058303833, "learning_rate": 5.024564826764851e-06, "loss": 0.5237, "step": 7955 }, { "epoch": 0.51, "grad_norm": 1.1610326766967773, "learning_rate": 5.023519522868796e-06, "loss": 0.5471, "step": 7956 }, { "epoch": 0.51, "grad_norm": 1.1361291408538818, "learning_rate": 5.022474217944764e-06, "loss": 0.5497, "step": 7957 }, { "epoch": 0.51, "grad_norm": 1.1660046577453613, "learning_rate": 5.0214289120384415e-06, "loss": 0.5953, "step": 7958 }, { "epoch": 0.51, "grad_norm": 1.2393817901611328, "learning_rate": 5.020383605195517e-06, "loss": 0.5126, "step": 7959 }, { "epoch": 0.51, "grad_norm": 1.3413686752319336, "learning_rate": 5.019338297461678e-06, "loss": 0.5298, "step": 7960 }, { "epoch": 0.51, "grad_norm": 1.2176527976989746, "learning_rate": 5.01829298888261e-06, "loss": 0.5837, "step": 7961 }, { "epoch": 0.51, "grad_norm": 1.1715296506881714, "learning_rate": 5.017247679504004e-06, "loss": 0.5706, "step": 7962 }, { "epoch": 0.51, "grad_norm": 1.3995699882507324, "learning_rate": 5.016202369371547e-06, "loss": 0.5324, "step": 7963 }, { "epoch": 0.51, "grad_norm": 1.1447882652282715, "learning_rate": 5.015157058530925e-06, "loss": 0.4922, "step": 7964 }, { "epoch": 0.51, "grad_norm": 1.2180103063583374, "learning_rate": 5.014111747027827e-06, "loss": 0.5369, "step": 7965 }, { "epoch": 0.51, "grad_norm": 1.1702316999435425, "learning_rate": 5.0130664349079426e-06, "loss": 0.5526, "step": 7966 }, { "epoch": 0.51, "grad_norm": 1.2097395658493042, "learning_rate": 5.012021122216957e-06, "loss": 0.5417, "step": 7967 }, { "epoch": 0.51, "grad_norm": 1.179652214050293, "learning_rate": 5.010975809000558e-06, "loss": 0.5671, "step": 7968 }, { "epoch": 0.51, "grad_norm": 1.1894699335098267, "learning_rate": 5.009930495304437e-06, "loss": 0.5108, "step": 7969 }, { "epoch": 0.51, "grad_norm": 1.2579896450042725, "learning_rate": 5.008885181174278e-06, "loss": 0.5708, "step": 7970 }, { "epoch": 0.51, "grad_norm": 1.1646790504455566, "learning_rate": 5.0078398666557705e-06, "loss": 0.5332, "step": 7971 }, { "epoch": 0.51, "grad_norm": 1.185076355934143, "learning_rate": 5.006794551794604e-06, "loss": 0.4804, "step": 7972 }, { "epoch": 0.51, "grad_norm": 1.1764159202575684, "learning_rate": 5.0057492366364644e-06, "loss": 0.5334, "step": 7973 }, { "epoch": 0.51, "grad_norm": 1.1013439893722534, "learning_rate": 5.00470392122704e-06, "loss": 0.5405, "step": 7974 }, { "epoch": 0.51, "grad_norm": 1.163038969039917, "learning_rate": 5.003658605612021e-06, "loss": 0.5505, "step": 7975 }, { "epoch": 0.51, "grad_norm": 1.1950868368148804, "learning_rate": 5.002613289837092e-06, "loss": 0.5704, "step": 7976 }, { "epoch": 0.51, "grad_norm": 1.1458388566970825, "learning_rate": 5.001567973947943e-06, "loss": 0.5734, "step": 7977 }, { "epoch": 0.51, "grad_norm": 1.0864287614822388, "learning_rate": 5.000522657990263e-06, "loss": 0.4848, "step": 7978 }, { "epoch": 0.52, "grad_norm": 1.1924909353256226, "learning_rate": 4.9994773420097395e-06, "loss": 0.5635, "step": 7979 }, { "epoch": 0.52, "grad_norm": 1.0951449871063232, "learning_rate": 4.998432026052059e-06, "loss": 0.5394, "step": 7980 }, { "epoch": 0.52, "grad_norm": 1.1498454809188843, "learning_rate": 4.99738671016291e-06, "loss": 0.5331, "step": 7981 }, { "epoch": 0.52, "grad_norm": 1.252240777015686, "learning_rate": 4.996341394387981e-06, "loss": 0.4939, "step": 7982 }, { "epoch": 0.52, "grad_norm": 1.325456976890564, "learning_rate": 4.9952960787729606e-06, "loss": 0.5841, "step": 7983 }, { "epoch": 0.52, "grad_norm": 1.030390739440918, "learning_rate": 4.994250763363536e-06, "loss": 0.4716, "step": 7984 }, { "epoch": 0.52, "grad_norm": 1.3147600889205933, "learning_rate": 4.993205448205399e-06, "loss": 0.4593, "step": 7985 }, { "epoch": 0.52, "grad_norm": 1.274001121520996, "learning_rate": 4.992160133344231e-06, "loss": 0.5735, "step": 7986 }, { "epoch": 0.52, "grad_norm": 1.1115336418151855, "learning_rate": 4.991114818825724e-06, "loss": 0.5521, "step": 7987 }, { "epoch": 0.52, "grad_norm": 1.2188843488693237, "learning_rate": 4.990069504695565e-06, "loss": 0.5576, "step": 7988 }, { "epoch": 0.52, "grad_norm": 1.095350980758667, "learning_rate": 4.989024190999443e-06, "loss": 0.5365, "step": 7989 }, { "epoch": 0.52, "grad_norm": 1.1856187582015991, "learning_rate": 4.987978877783044e-06, "loss": 0.5151, "step": 7990 }, { "epoch": 0.52, "grad_norm": 1.1109943389892578, "learning_rate": 4.98693356509206e-06, "loss": 0.5145, "step": 7991 }, { "epoch": 0.52, "grad_norm": 1.1336740255355835, "learning_rate": 4.985888252972174e-06, "loss": 0.5055, "step": 7992 }, { "epoch": 0.52, "grad_norm": 1.1583138704299927, "learning_rate": 4.984842941469077e-06, "loss": 0.5788, "step": 7993 }, { "epoch": 0.52, "grad_norm": 1.1263951063156128, "learning_rate": 4.983797630628455e-06, "loss": 0.5008, "step": 7994 }, { "epoch": 0.52, "grad_norm": 1.2601590156555176, "learning_rate": 4.982752320495997e-06, "loss": 0.55, "step": 7995 }, { "epoch": 0.52, "grad_norm": 1.2680455446243286, "learning_rate": 4.981707011117391e-06, "loss": 0.5401, "step": 7996 }, { "epoch": 0.52, "grad_norm": 1.167764663696289, "learning_rate": 4.980661702538326e-06, "loss": 0.5212, "step": 7997 }, { "epoch": 0.52, "grad_norm": 1.1393646001815796, "learning_rate": 4.979616394804485e-06, "loss": 0.5325, "step": 7998 }, { "epoch": 0.52, "grad_norm": 1.2648205757141113, "learning_rate": 4.978571087961559e-06, "loss": 0.5397, "step": 7999 }, { "epoch": 0.52, "grad_norm": 1.1836178302764893, "learning_rate": 4.977525782055238e-06, "loss": 0.5422, "step": 8000 }, { "epoch": 0.52, "grad_norm": 1.0607830286026, "learning_rate": 4.9764804771312045e-06, "loss": 0.5205, "step": 8001 }, { "epoch": 0.52, "grad_norm": 1.3224231004714966, "learning_rate": 4.975435173235151e-06, "loss": 0.5857, "step": 8002 }, { "epoch": 0.52, "grad_norm": 1.162575602531433, "learning_rate": 4.974389870412761e-06, "loss": 0.5885, "step": 8003 }, { "epoch": 0.52, "grad_norm": 1.1551181077957153, "learning_rate": 4.973344568709724e-06, "loss": 0.5228, "step": 8004 }, { "epoch": 0.52, "grad_norm": 1.146946668624878, "learning_rate": 4.9722992681717284e-06, "loss": 0.508, "step": 8005 }, { "epoch": 0.52, "grad_norm": 1.1346758604049683, "learning_rate": 4.971253968844458e-06, "loss": 0.5099, "step": 8006 }, { "epoch": 0.52, "grad_norm": 1.2154797315597534, "learning_rate": 4.9702086707736034e-06, "loss": 0.5602, "step": 8007 }, { "epoch": 0.52, "grad_norm": 1.1127598285675049, "learning_rate": 4.969163374004852e-06, "loss": 0.551, "step": 8008 }, { "epoch": 0.52, "grad_norm": 1.1615647077560425, "learning_rate": 4.96811807858389e-06, "loss": 0.5459, "step": 8009 }, { "epoch": 0.52, "grad_norm": 1.009620189666748, "learning_rate": 4.967072784556403e-06, "loss": 0.5063, "step": 8010 }, { "epoch": 0.52, "grad_norm": 1.179188847541809, "learning_rate": 4.96602749196808e-06, "loss": 0.5508, "step": 8011 }, { "epoch": 0.52, "grad_norm": 1.1886231899261475, "learning_rate": 4.964982200864608e-06, "loss": 0.4917, "step": 8012 }, { "epoch": 0.52, "grad_norm": 1.1808737516403198, "learning_rate": 4.963936911291673e-06, "loss": 0.4936, "step": 8013 }, { "epoch": 0.52, "grad_norm": 1.2200852632522583, "learning_rate": 4.962891623294965e-06, "loss": 0.5186, "step": 8014 }, { "epoch": 0.52, "grad_norm": 1.1227803230285645, "learning_rate": 4.961846336920167e-06, "loss": 0.5163, "step": 8015 }, { "epoch": 0.52, "grad_norm": 1.154115915298462, "learning_rate": 4.960801052212966e-06, "loss": 0.51, "step": 8016 }, { "epoch": 0.52, "grad_norm": 1.1490776538848877, "learning_rate": 4.95975576921905e-06, "loss": 0.5178, "step": 8017 }, { "epoch": 0.52, "grad_norm": 1.1263593435287476, "learning_rate": 4.958710487984106e-06, "loss": 0.5424, "step": 8018 }, { "epoch": 0.52, "grad_norm": 1.1812257766723633, "learning_rate": 4.95766520855382e-06, "loss": 0.5376, "step": 8019 }, { "epoch": 0.52, "grad_norm": 1.2001490592956543, "learning_rate": 4.95661993097388e-06, "loss": 0.5269, "step": 8020 }, { "epoch": 0.52, "grad_norm": 1.202764630317688, "learning_rate": 4.9555746552899696e-06, "loss": 0.4985, "step": 8021 }, { "epoch": 0.52, "grad_norm": 1.157419204711914, "learning_rate": 4.954529381547776e-06, "loss": 0.5511, "step": 8022 }, { "epoch": 0.52, "grad_norm": 1.162078619003296, "learning_rate": 4.953484109792985e-06, "loss": 0.5062, "step": 8023 }, { "epoch": 0.52, "grad_norm": 1.238012671470642, "learning_rate": 4.952438840071285e-06, "loss": 0.5672, "step": 8024 }, { "epoch": 0.52, "grad_norm": 1.1640820503234863, "learning_rate": 4.95139357242836e-06, "loss": 0.5246, "step": 8025 }, { "epoch": 0.52, "grad_norm": 1.1879322528839111, "learning_rate": 4.950348306909898e-06, "loss": 0.5152, "step": 8026 }, { "epoch": 0.52, "grad_norm": 1.1495215892791748, "learning_rate": 4.949303043561582e-06, "loss": 0.5308, "step": 8027 }, { "epoch": 0.52, "grad_norm": 1.1156377792358398, "learning_rate": 4.948257782429099e-06, "loss": 0.5197, "step": 8028 }, { "epoch": 0.52, "grad_norm": 1.179383635520935, "learning_rate": 4.947212523558136e-06, "loss": 0.6083, "step": 8029 }, { "epoch": 0.52, "grad_norm": 1.1388704776763916, "learning_rate": 4.946167266994376e-06, "loss": 0.4771, "step": 8030 }, { "epoch": 0.52, "grad_norm": 1.2416695356369019, "learning_rate": 4.945122012783506e-06, "loss": 0.5828, "step": 8031 }, { "epoch": 0.52, "grad_norm": 1.2354776859283447, "learning_rate": 4.944076760971214e-06, "loss": 0.558, "step": 8032 }, { "epoch": 0.52, "grad_norm": 1.124061942100525, "learning_rate": 4.943031511603179e-06, "loss": 0.4991, "step": 8033 }, { "epoch": 0.52, "grad_norm": 1.2004597187042236, "learning_rate": 4.941986264725091e-06, "loss": 0.5292, "step": 8034 }, { "epoch": 0.52, "grad_norm": 1.1752758026123047, "learning_rate": 4.940941020382633e-06, "loss": 0.5491, "step": 8035 }, { "epoch": 0.52, "grad_norm": 1.1729357242584229, "learning_rate": 4.9398957786214914e-06, "loss": 0.4517, "step": 8036 }, { "epoch": 0.52, "grad_norm": 1.2196366786956787, "learning_rate": 4.93885053948735e-06, "loss": 0.5266, "step": 8037 }, { "epoch": 0.52, "grad_norm": 1.2172889709472656, "learning_rate": 4.937805303025895e-06, "loss": 0.545, "step": 8038 }, { "epoch": 0.52, "grad_norm": 1.0790815353393555, "learning_rate": 4.936760069282809e-06, "loss": 0.5169, "step": 8039 }, { "epoch": 0.52, "grad_norm": 1.24812912940979, "learning_rate": 4.935714838303776e-06, "loss": 0.5004, "step": 8040 }, { "epoch": 0.52, "grad_norm": 1.2482950687408447, "learning_rate": 4.934669610134482e-06, "loss": 0.5632, "step": 8041 }, { "epoch": 0.52, "grad_norm": 1.2721889019012451, "learning_rate": 4.9336243848206115e-06, "loss": 0.5209, "step": 8042 }, { "epoch": 0.52, "grad_norm": 1.1807399988174438, "learning_rate": 4.932579162407849e-06, "loss": 0.4879, "step": 8043 }, { "epoch": 0.52, "grad_norm": 1.121546745300293, "learning_rate": 4.931533942941875e-06, "loss": 0.5068, "step": 8044 }, { "epoch": 0.52, "grad_norm": 1.108538031578064, "learning_rate": 4.930488726468377e-06, "loss": 0.4989, "step": 8045 }, { "epoch": 0.52, "grad_norm": 1.0460768938064575, "learning_rate": 4.9294435130330355e-06, "loss": 0.5176, "step": 8046 }, { "epoch": 0.52, "grad_norm": 1.0753644704818726, "learning_rate": 4.928398302681539e-06, "loss": 0.5142, "step": 8047 }, { "epoch": 0.52, "grad_norm": 1.1543495655059814, "learning_rate": 4.9273530954595664e-06, "loss": 0.5759, "step": 8048 }, { "epoch": 0.52, "grad_norm": 1.2370078563690186, "learning_rate": 4.926307891412803e-06, "loss": 0.5246, "step": 8049 }, { "epoch": 0.52, "grad_norm": 1.2264692783355713, "learning_rate": 4.925262690586931e-06, "loss": 0.5904, "step": 8050 }, { "epoch": 0.52, "grad_norm": 1.3982232809066772, "learning_rate": 4.9242174930276345e-06, "loss": 0.5609, "step": 8051 }, { "epoch": 0.52, "grad_norm": 1.262374997138977, "learning_rate": 4.923172298780595e-06, "loss": 0.5642, "step": 8052 }, { "epoch": 0.52, "grad_norm": 1.0942126512527466, "learning_rate": 4.922127107891497e-06, "loss": 0.5108, "step": 8053 }, { "epoch": 0.52, "grad_norm": 1.2476791143417358, "learning_rate": 4.921081920406021e-06, "loss": 0.5191, "step": 8054 }, { "epoch": 0.52, "grad_norm": 1.111867070198059, "learning_rate": 4.9200367363698525e-06, "loss": 0.5314, "step": 8055 }, { "epoch": 0.52, "grad_norm": 1.1553738117218018, "learning_rate": 4.918991555828672e-06, "loss": 0.5619, "step": 8056 }, { "epoch": 0.52, "grad_norm": 1.5162550210952759, "learning_rate": 4.91794637882816e-06, "loss": 0.5522, "step": 8057 }, { "epoch": 0.52, "grad_norm": 1.2708295583724976, "learning_rate": 4.9169012054139995e-06, "loss": 0.5562, "step": 8058 }, { "epoch": 0.52, "grad_norm": 1.2818256616592407, "learning_rate": 4.915856035631874e-06, "loss": 0.5234, "step": 8059 }, { "epoch": 0.52, "grad_norm": 1.1319606304168701, "learning_rate": 4.914810869527464e-06, "loss": 0.5002, "step": 8060 }, { "epoch": 0.52, "grad_norm": 1.1670423746109009, "learning_rate": 4.9137657071464525e-06, "loss": 0.5199, "step": 8061 }, { "epoch": 0.52, "grad_norm": 1.0960522890090942, "learning_rate": 4.9127205485345184e-06, "loss": 0.5262, "step": 8062 }, { "epoch": 0.52, "grad_norm": 1.1441442966461182, "learning_rate": 4.911675393737343e-06, "loss": 0.5259, "step": 8063 }, { "epoch": 0.52, "grad_norm": 1.1718419790267944, "learning_rate": 4.910630242800609e-06, "loss": 0.5769, "step": 8064 }, { "epoch": 0.52, "grad_norm": 1.1298104524612427, "learning_rate": 4.9095850957699955e-06, "loss": 0.5721, "step": 8065 }, { "epoch": 0.52, "grad_norm": 1.2560489177703857, "learning_rate": 4.9085399526911846e-06, "loss": 0.5462, "step": 8066 }, { "epoch": 0.52, "grad_norm": 1.1005854606628418, "learning_rate": 4.907494813609858e-06, "loss": 0.5378, "step": 8067 }, { "epoch": 0.52, "grad_norm": 1.2769521474838257, "learning_rate": 4.906449678571694e-06, "loss": 0.528, "step": 8068 }, { "epoch": 0.52, "grad_norm": 1.3173587322235107, "learning_rate": 4.905404547622372e-06, "loss": 0.5333, "step": 8069 }, { "epoch": 0.52, "grad_norm": 1.1700730323791504, "learning_rate": 4.904359420807574e-06, "loss": 0.5243, "step": 8070 }, { "epoch": 0.52, "grad_norm": 1.1872156858444214, "learning_rate": 4.903314298172978e-06, "loss": 0.6073, "step": 8071 }, { "epoch": 0.52, "grad_norm": 1.0911544561386108, "learning_rate": 4.902269179764265e-06, "loss": 0.512, "step": 8072 }, { "epoch": 0.52, "grad_norm": 1.168587327003479, "learning_rate": 4.901224065627116e-06, "loss": 0.5145, "step": 8073 }, { "epoch": 0.52, "grad_norm": 1.174683690071106, "learning_rate": 4.900178955807206e-06, "loss": 0.5248, "step": 8074 }, { "epoch": 0.52, "grad_norm": 1.1555927991867065, "learning_rate": 4.899133850350217e-06, "loss": 0.5518, "step": 8075 }, { "epoch": 0.52, "grad_norm": 1.1114355325698853, "learning_rate": 4.898088749301826e-06, "loss": 0.5672, "step": 8076 }, { "epoch": 0.52, "grad_norm": 1.1170520782470703, "learning_rate": 4.897043652707714e-06, "loss": 0.5609, "step": 8077 }, { "epoch": 0.52, "grad_norm": 1.1763283014297485, "learning_rate": 4.895998560613558e-06, "loss": 0.5764, "step": 8078 }, { "epoch": 0.52, "grad_norm": 1.0455819368362427, "learning_rate": 4.8949534730650376e-06, "loss": 0.4961, "step": 8079 }, { "epoch": 0.52, "grad_norm": 1.3496382236480713, "learning_rate": 4.893908390107829e-06, "loss": 0.5712, "step": 8080 }, { "epoch": 0.52, "grad_norm": 1.1703314781188965, "learning_rate": 4.892863311787611e-06, "loss": 0.5257, "step": 8081 }, { "epoch": 0.52, "grad_norm": 1.1455031633377075, "learning_rate": 4.89181823815006e-06, "loss": 0.5317, "step": 8082 }, { "epoch": 0.52, "grad_norm": 1.1117074489593506, "learning_rate": 4.890773169240856e-06, "loss": 0.5012, "step": 8083 }, { "epoch": 0.52, "grad_norm": 1.2292754650115967, "learning_rate": 4.889728105105674e-06, "loss": 0.5778, "step": 8084 }, { "epoch": 0.52, "grad_norm": 1.0543382167816162, "learning_rate": 4.888683045790194e-06, "loss": 0.5529, "step": 8085 }, { "epoch": 0.52, "grad_norm": 1.1711417436599731, "learning_rate": 4.88763799134009e-06, "loss": 0.5091, "step": 8086 }, { "epoch": 0.52, "grad_norm": 1.138021469116211, "learning_rate": 4.886592941801039e-06, "loss": 0.4955, "step": 8087 }, { "epoch": 0.52, "grad_norm": 1.1331840753555298, "learning_rate": 4.885547897218718e-06, "loss": 0.5047, "step": 8088 }, { "epoch": 0.52, "grad_norm": 1.1870094537734985, "learning_rate": 4.8845028576388035e-06, "loss": 0.5073, "step": 8089 }, { "epoch": 0.52, "grad_norm": 1.259114146232605, "learning_rate": 4.883457823106972e-06, "loss": 0.5643, "step": 8090 }, { "epoch": 0.52, "grad_norm": 1.1473909616470337, "learning_rate": 4.882412793668896e-06, "loss": 0.498, "step": 8091 }, { "epoch": 0.52, "grad_norm": 1.1350061893463135, "learning_rate": 4.881367769370255e-06, "loss": 0.5497, "step": 8092 }, { "epoch": 0.52, "grad_norm": 1.2326804399490356, "learning_rate": 4.8803227502567215e-06, "loss": 0.5104, "step": 8093 }, { "epoch": 0.52, "grad_norm": 1.1917814016342163, "learning_rate": 4.879277736373973e-06, "loss": 0.5264, "step": 8094 }, { "epoch": 0.52, "grad_norm": 1.1354056596755981, "learning_rate": 4.878232727767684e-06, "loss": 0.5037, "step": 8095 }, { "epoch": 0.52, "grad_norm": 1.074249267578125, "learning_rate": 4.877187724483527e-06, "loss": 0.5145, "step": 8096 }, { "epoch": 0.52, "grad_norm": 1.0869940519332886, "learning_rate": 4.8761427265671775e-06, "loss": 0.4908, "step": 8097 }, { "epoch": 0.52, "grad_norm": 1.1052273511886597, "learning_rate": 4.875097734064311e-06, "loss": 0.5315, "step": 8098 }, { "epoch": 0.52, "grad_norm": 1.2726739645004272, "learning_rate": 4.874052747020599e-06, "loss": 0.5758, "step": 8099 }, { "epoch": 0.52, "grad_norm": 1.096091389656067, "learning_rate": 4.873007765481717e-06, "loss": 0.5179, "step": 8100 }, { "epoch": 0.52, "grad_norm": 1.082101821899414, "learning_rate": 4.871962789493337e-06, "loss": 0.4865, "step": 8101 }, { "epoch": 0.52, "grad_norm": 1.0939668416976929, "learning_rate": 4.8709178191011355e-06, "loss": 0.5441, "step": 8102 }, { "epoch": 0.52, "grad_norm": 1.0952324867248535, "learning_rate": 4.8698728543507815e-06, "loss": 0.4398, "step": 8103 }, { "epoch": 0.52, "grad_norm": 1.4799935817718506, "learning_rate": 4.86882789528795e-06, "loss": 0.5225, "step": 8104 }, { "epoch": 0.52, "grad_norm": 1.1707372665405273, "learning_rate": 4.867782941958311e-06, "loss": 0.5454, "step": 8105 }, { "epoch": 0.52, "grad_norm": 1.1886184215545654, "learning_rate": 4.86673799440754e-06, "loss": 0.5562, "step": 8106 }, { "epoch": 0.52, "grad_norm": 1.161239743232727, "learning_rate": 4.865693052681306e-06, "loss": 0.5204, "step": 8107 }, { "epoch": 0.52, "grad_norm": 1.1338666677474976, "learning_rate": 4.864648116825286e-06, "loss": 0.5552, "step": 8108 }, { "epoch": 0.52, "grad_norm": 1.094880223274231, "learning_rate": 4.863603186885144e-06, "loss": 0.5083, "step": 8109 }, { "epoch": 0.52, "grad_norm": 1.1502455472946167, "learning_rate": 4.862558262906555e-06, "loss": 0.5393, "step": 8110 }, { "epoch": 0.52, "grad_norm": 1.2081069946289062, "learning_rate": 4.8615133449351905e-06, "loss": 0.4886, "step": 8111 }, { "epoch": 0.52, "grad_norm": 1.2072110176086426, "learning_rate": 4.860468433016719e-06, "loss": 0.5148, "step": 8112 }, { "epoch": 0.52, "grad_norm": 1.1886917352676392, "learning_rate": 4.859423527196814e-06, "loss": 0.5091, "step": 8113 }, { "epoch": 0.52, "grad_norm": 1.1402838230133057, "learning_rate": 4.858378627521145e-06, "loss": 0.5692, "step": 8114 }, { "epoch": 0.52, "grad_norm": 1.260360598564148, "learning_rate": 4.8573337340353795e-06, "loss": 0.579, "step": 8115 }, { "epoch": 0.52, "grad_norm": 1.1929582357406616, "learning_rate": 4.856288846785188e-06, "loss": 0.5556, "step": 8116 }, { "epoch": 0.52, "grad_norm": 1.218937873840332, "learning_rate": 4.85524396581624e-06, "loss": 0.5154, "step": 8117 }, { "epoch": 0.52, "grad_norm": 1.0734894275665283, "learning_rate": 4.8541990911742044e-06, "loss": 0.4932, "step": 8118 }, { "epoch": 0.52, "grad_norm": 1.320585012435913, "learning_rate": 4.853154222904751e-06, "loss": 0.534, "step": 8119 }, { "epoch": 0.52, "grad_norm": 1.2579432725906372, "learning_rate": 4.8521093610535495e-06, "loss": 0.5405, "step": 8120 }, { "epoch": 0.52, "grad_norm": 1.0989280939102173, "learning_rate": 4.851064505666265e-06, "loss": 0.4819, "step": 8121 }, { "epoch": 0.52, "grad_norm": 1.1162225008010864, "learning_rate": 4.850019656788565e-06, "loss": 0.5326, "step": 8122 }, { "epoch": 0.52, "grad_norm": 1.228069543838501, "learning_rate": 4.8489748144661205e-06, "loss": 0.5503, "step": 8123 }, { "epoch": 0.52, "grad_norm": 1.089024305343628, "learning_rate": 4.847929978744596e-06, "loss": 0.5608, "step": 8124 }, { "epoch": 0.52, "grad_norm": 1.225210189819336, "learning_rate": 4.84688514966966e-06, "loss": 0.5426, "step": 8125 }, { "epoch": 0.52, "grad_norm": 1.1106544733047485, "learning_rate": 4.845840327286982e-06, "loss": 0.5195, "step": 8126 }, { "epoch": 0.52, "grad_norm": 1.273520827293396, "learning_rate": 4.844795511642223e-06, "loss": 0.5881, "step": 8127 }, { "epoch": 0.52, "grad_norm": 1.091975212097168, "learning_rate": 4.843750702781052e-06, "loss": 0.4727, "step": 8128 }, { "epoch": 0.52, "grad_norm": 1.1829261779785156, "learning_rate": 4.842705900749134e-06, "loss": 0.5601, "step": 8129 }, { "epoch": 0.52, "grad_norm": 1.1854184865951538, "learning_rate": 4.841661105592135e-06, "loss": 0.5427, "step": 8130 }, { "epoch": 0.52, "grad_norm": 1.3168706893920898, "learning_rate": 4.840616317355724e-06, "loss": 0.5989, "step": 8131 }, { "epoch": 0.52, "grad_norm": 1.174994707107544, "learning_rate": 4.839571536085559e-06, "loss": 0.5631, "step": 8132 }, { "epoch": 0.52, "grad_norm": 1.2294015884399414, "learning_rate": 4.838526761827309e-06, "loss": 0.5507, "step": 8133 }, { "epoch": 0.53, "grad_norm": 1.1937172412872314, "learning_rate": 4.837481994626638e-06, "loss": 0.5509, "step": 8134 }, { "epoch": 0.53, "grad_norm": 1.1911507844924927, "learning_rate": 4.8364372345292086e-06, "loss": 0.5609, "step": 8135 }, { "epoch": 0.53, "grad_norm": 1.1210026741027832, "learning_rate": 4.835392481580688e-06, "loss": 0.543, "step": 8136 }, { "epoch": 0.53, "grad_norm": 1.1895703077316284, "learning_rate": 4.834347735826737e-06, "loss": 0.525, "step": 8137 }, { "epoch": 0.53, "grad_norm": 1.2053834199905396, "learning_rate": 4.833302997313017e-06, "loss": 0.5037, "step": 8138 }, { "epoch": 0.53, "grad_norm": 1.1917120218276978, "learning_rate": 4.832258266085194e-06, "loss": 0.5215, "step": 8139 }, { "epoch": 0.53, "grad_norm": 1.2326312065124512, "learning_rate": 4.83121354218893e-06, "loss": 0.5214, "step": 8140 }, { "epoch": 0.53, "grad_norm": 1.1464729309082031, "learning_rate": 4.830168825669886e-06, "loss": 0.587, "step": 8141 }, { "epoch": 0.53, "grad_norm": 1.1351741552352905, "learning_rate": 4.829124116573724e-06, "loss": 0.5159, "step": 8142 }, { "epoch": 0.53, "grad_norm": 1.068610429763794, "learning_rate": 4.8280794149461075e-06, "loss": 0.4846, "step": 8143 }, { "epoch": 0.53, "grad_norm": 1.1548054218292236, "learning_rate": 4.827034720832695e-06, "loss": 0.5241, "step": 8144 }, { "epoch": 0.53, "grad_norm": 1.0862103700637817, "learning_rate": 4.825990034279149e-06, "loss": 0.4901, "step": 8145 }, { "epoch": 0.53, "grad_norm": 1.2017786502838135, "learning_rate": 4.824945355331129e-06, "loss": 0.5797, "step": 8146 }, { "epoch": 0.53, "grad_norm": 1.1183699369430542, "learning_rate": 4.823900684034296e-06, "loss": 0.4929, "step": 8147 }, { "epoch": 0.53, "grad_norm": 1.154875636100769, "learning_rate": 4.82285602043431e-06, "loss": 0.5673, "step": 8148 }, { "epoch": 0.53, "grad_norm": 1.140005111694336, "learning_rate": 4.821811364576831e-06, "loss": 0.527, "step": 8149 }, { "epoch": 0.53, "grad_norm": 1.168646216392517, "learning_rate": 4.820766716507518e-06, "loss": 0.5004, "step": 8150 }, { "epoch": 0.53, "grad_norm": 1.1852738857269287, "learning_rate": 4.819722076272028e-06, "loss": 0.5505, "step": 8151 }, { "epoch": 0.53, "grad_norm": 1.1448134183883667, "learning_rate": 4.818677443916021e-06, "loss": 0.5607, "step": 8152 }, { "epoch": 0.53, "grad_norm": 1.2308924198150635, "learning_rate": 4.817632819485154e-06, "loss": 0.5611, "step": 8153 }, { "epoch": 0.53, "grad_norm": 1.111306071281433, "learning_rate": 4.816588203025087e-06, "loss": 0.537, "step": 8154 }, { "epoch": 0.53, "grad_norm": 1.083564043045044, "learning_rate": 4.815543594581479e-06, "loss": 0.5026, "step": 8155 }, { "epoch": 0.53, "grad_norm": 1.190176010131836, "learning_rate": 4.814498994199982e-06, "loss": 0.526, "step": 8156 }, { "epoch": 0.53, "grad_norm": 1.1270779371261597, "learning_rate": 4.813454401926255e-06, "loss": 0.5534, "step": 8157 }, { "epoch": 0.53, "grad_norm": 1.166580319404602, "learning_rate": 4.8124098178059555e-06, "loss": 0.5002, "step": 8158 }, { "epoch": 0.53, "grad_norm": 1.1510450839996338, "learning_rate": 4.811365241884738e-06, "loss": 0.5506, "step": 8159 }, { "epoch": 0.53, "grad_norm": 1.2200316190719604, "learning_rate": 4.81032067420826e-06, "loss": 0.5113, "step": 8160 }, { "epoch": 0.53, "grad_norm": 1.0803406238555908, "learning_rate": 4.809276114822177e-06, "loss": 0.5238, "step": 8161 }, { "epoch": 0.53, "grad_norm": 1.2969518899917603, "learning_rate": 4.808231563772142e-06, "loss": 0.5158, "step": 8162 }, { "epoch": 0.53, "grad_norm": 1.1710660457611084, "learning_rate": 4.8071870211038095e-06, "loss": 0.5318, "step": 8163 }, { "epoch": 0.53, "grad_norm": 1.1712462902069092, "learning_rate": 4.806142486862836e-06, "loss": 0.5438, "step": 8164 }, { "epoch": 0.53, "grad_norm": 1.2252744436264038, "learning_rate": 4.805097961094873e-06, "loss": 0.5584, "step": 8165 }, { "epoch": 0.53, "grad_norm": 1.2238998413085938, "learning_rate": 4.804053443845576e-06, "loss": 0.5305, "step": 8166 }, { "epoch": 0.53, "grad_norm": 1.130437970161438, "learning_rate": 4.8030089351605995e-06, "loss": 0.493, "step": 8167 }, { "epoch": 0.53, "grad_norm": 1.1708548069000244, "learning_rate": 4.8019644350855925e-06, "loss": 0.558, "step": 8168 }, { "epoch": 0.53, "grad_norm": 1.2749650478363037, "learning_rate": 4.8009199436662084e-06, "loss": 0.5142, "step": 8169 }, { "epoch": 0.53, "grad_norm": 1.2005970478057861, "learning_rate": 4.7998754609481e-06, "loss": 0.5268, "step": 8170 }, { "epoch": 0.53, "grad_norm": 1.1499732732772827, "learning_rate": 4.798830986976919e-06, "loss": 0.5533, "step": 8171 }, { "epoch": 0.53, "grad_norm": 1.1517150402069092, "learning_rate": 4.797786521798319e-06, "loss": 0.5546, "step": 8172 }, { "epoch": 0.53, "grad_norm": 1.1873040199279785, "learning_rate": 4.796742065457946e-06, "loss": 0.5123, "step": 8173 }, { "epoch": 0.53, "grad_norm": 1.2783246040344238, "learning_rate": 4.795697618001453e-06, "loss": 0.5275, "step": 8174 }, { "epoch": 0.53, "grad_norm": 1.1560896635055542, "learning_rate": 4.79465317947449e-06, "loss": 0.516, "step": 8175 }, { "epoch": 0.53, "grad_norm": 1.1652567386627197, "learning_rate": 4.7936087499227065e-06, "loss": 0.5257, "step": 8176 }, { "epoch": 0.53, "grad_norm": 1.1058623790740967, "learning_rate": 4.792564329391752e-06, "loss": 0.4866, "step": 8177 }, { "epoch": 0.53, "grad_norm": 1.1316611766815186, "learning_rate": 4.791519917927277e-06, "loss": 0.5091, "step": 8178 }, { "epoch": 0.53, "grad_norm": 1.0980768203735352, "learning_rate": 4.790475515574927e-06, "loss": 0.5546, "step": 8179 }, { "epoch": 0.53, "grad_norm": 1.2328983545303345, "learning_rate": 4.789431122380353e-06, "loss": 0.5147, "step": 8180 }, { "epoch": 0.53, "grad_norm": 1.2340418100357056, "learning_rate": 4.7883867383891995e-06, "loss": 0.5421, "step": 8181 }, { "epoch": 0.53, "grad_norm": 1.0533370971679688, "learning_rate": 4.787342363647116e-06, "loss": 0.5113, "step": 8182 }, { "epoch": 0.53, "grad_norm": 1.04391348361969, "learning_rate": 4.786297998199751e-06, "loss": 0.5131, "step": 8183 }, { "epoch": 0.53, "grad_norm": 1.2403700351715088, "learning_rate": 4.785253642092748e-06, "loss": 0.5552, "step": 8184 }, { "epoch": 0.53, "grad_norm": 1.124801516532898, "learning_rate": 4.7842092953717545e-06, "loss": 0.5455, "step": 8185 }, { "epoch": 0.53, "grad_norm": 1.118610143661499, "learning_rate": 4.783164958082415e-06, "loss": 0.4893, "step": 8186 }, { "epoch": 0.53, "grad_norm": 1.0890580415725708, "learning_rate": 4.782120630270377e-06, "loss": 0.531, "step": 8187 }, { "epoch": 0.53, "grad_norm": 1.25242280960083, "learning_rate": 4.7810763119812834e-06, "loss": 0.5915, "step": 8188 }, { "epoch": 0.53, "grad_norm": 1.2167284488677979, "learning_rate": 4.7800320032607795e-06, "loss": 0.5348, "step": 8189 }, { "epoch": 0.53, "grad_norm": 1.1976382732391357, "learning_rate": 4.77898770415451e-06, "loss": 0.5643, "step": 8190 }, { "epoch": 0.53, "grad_norm": 1.125930905342102, "learning_rate": 4.777943414708116e-06, "loss": 0.5112, "step": 8191 }, { "epoch": 0.53, "grad_norm": 1.1630514860153198, "learning_rate": 4.776899134967244e-06, "loss": 0.5114, "step": 8192 }, { "epoch": 0.53, "grad_norm": 1.1124117374420166, "learning_rate": 4.7758548649775345e-06, "loss": 0.5164, "step": 8193 }, { "epoch": 0.53, "grad_norm": 1.2924450635910034, "learning_rate": 4.77481060478463e-06, "loss": 0.5156, "step": 8194 }, { "epoch": 0.53, "grad_norm": 1.1817933320999146, "learning_rate": 4.7737663544341735e-06, "loss": 0.5122, "step": 8195 }, { "epoch": 0.53, "grad_norm": 1.1722376346588135, "learning_rate": 4.772722113971808e-06, "loss": 0.5529, "step": 8196 }, { "epoch": 0.53, "grad_norm": 1.1533440351486206, "learning_rate": 4.77167788344317e-06, "loss": 0.5285, "step": 8197 }, { "epoch": 0.53, "grad_norm": 1.1611878871917725, "learning_rate": 4.770633662893903e-06, "loss": 0.5418, "step": 8198 }, { "epoch": 0.53, "grad_norm": 1.1369613409042358, "learning_rate": 4.769589452369646e-06, "loss": 0.4853, "step": 8199 }, { "epoch": 0.53, "grad_norm": 1.2472726106643677, "learning_rate": 4.76854525191604e-06, "loss": 0.5197, "step": 8200 }, { "epoch": 0.53, "grad_norm": 1.1809794902801514, "learning_rate": 4.767501061578724e-06, "loss": 0.5918, "step": 8201 }, { "epoch": 0.53, "grad_norm": 1.0621980428695679, "learning_rate": 4.76645688140334e-06, "loss": 0.5007, "step": 8202 }, { "epoch": 0.53, "grad_norm": 1.1870152950286865, "learning_rate": 4.76541271143552e-06, "loss": 0.5352, "step": 8203 }, { "epoch": 0.53, "grad_norm": 1.1578559875488281, "learning_rate": 4.764368551720905e-06, "loss": 0.5403, "step": 8204 }, { "epoch": 0.53, "grad_norm": 1.2727464437484741, "learning_rate": 4.763324402305133e-06, "loss": 0.4801, "step": 8205 }, { "epoch": 0.53, "grad_norm": 1.2291686534881592, "learning_rate": 4.76228026323384e-06, "loss": 0.5297, "step": 8206 }, { "epoch": 0.53, "grad_norm": 1.1000216007232666, "learning_rate": 4.761236134552664e-06, "loss": 0.5005, "step": 8207 }, { "epoch": 0.53, "grad_norm": 1.290738582611084, "learning_rate": 4.760192016307242e-06, "loss": 0.5777, "step": 8208 }, { "epoch": 0.53, "grad_norm": 1.1356478929519653, "learning_rate": 4.759147908543208e-06, "loss": 0.5588, "step": 8209 }, { "epoch": 0.53, "grad_norm": 1.1060914993286133, "learning_rate": 4.758103811306195e-06, "loss": 0.5028, "step": 8210 }, { "epoch": 0.53, "grad_norm": 1.1444059610366821, "learning_rate": 4.757059724641842e-06, "loss": 0.5374, "step": 8211 }, { "epoch": 0.53, "grad_norm": 1.1986050605773926, "learning_rate": 4.756015648595781e-06, "loss": 0.5286, "step": 8212 }, { "epoch": 0.53, "grad_norm": 1.2689591646194458, "learning_rate": 4.7549715832136464e-06, "loss": 0.5592, "step": 8213 }, { "epoch": 0.53, "grad_norm": 1.2563039064407349, "learning_rate": 4.753927528541075e-06, "loss": 0.5596, "step": 8214 }, { "epoch": 0.53, "grad_norm": 1.151469111442566, "learning_rate": 4.752883484623693e-06, "loss": 0.551, "step": 8215 }, { "epoch": 0.53, "grad_norm": 1.3193284273147583, "learning_rate": 4.751839451507137e-06, "loss": 0.5831, "step": 8216 }, { "epoch": 0.53, "grad_norm": 1.3513391017913818, "learning_rate": 4.750795429237038e-06, "loss": 0.5156, "step": 8217 }, { "epoch": 0.53, "grad_norm": 1.1298017501831055, "learning_rate": 4.749751417859028e-06, "loss": 0.5026, "step": 8218 }, { "epoch": 0.53, "grad_norm": 1.201616883277893, "learning_rate": 4.74870741741874e-06, "loss": 0.539, "step": 8219 }, { "epoch": 0.53, "grad_norm": 1.174088478088379, "learning_rate": 4.7476634279618e-06, "loss": 0.4932, "step": 8220 }, { "epoch": 0.53, "grad_norm": 1.2032957077026367, "learning_rate": 4.746619449533841e-06, "loss": 0.5279, "step": 8221 }, { "epoch": 0.53, "grad_norm": 1.1041362285614014, "learning_rate": 4.7455754821804914e-06, "loss": 0.5724, "step": 8222 }, { "epoch": 0.53, "grad_norm": 1.1698580980300903, "learning_rate": 4.744531525947382e-06, "loss": 0.5211, "step": 8223 }, { "epoch": 0.53, "grad_norm": 1.226369857788086, "learning_rate": 4.74348758088014e-06, "loss": 0.4979, "step": 8224 }, { "epoch": 0.53, "grad_norm": 1.1493768692016602, "learning_rate": 4.742443647024396e-06, "loss": 0.5237, "step": 8225 }, { "epoch": 0.53, "grad_norm": 1.1992640495300293, "learning_rate": 4.741399724425773e-06, "loss": 0.5751, "step": 8226 }, { "epoch": 0.53, "grad_norm": 1.1859867572784424, "learning_rate": 4.740355813129902e-06, "loss": 0.5017, "step": 8227 }, { "epoch": 0.53, "grad_norm": 1.2125965356826782, "learning_rate": 4.7393119131824085e-06, "loss": 0.5253, "step": 8228 }, { "epoch": 0.53, "grad_norm": 1.1595895290374756, "learning_rate": 4.738268024628918e-06, "loss": 0.5534, "step": 8229 }, { "epoch": 0.53, "grad_norm": 1.184397578239441, "learning_rate": 4.7372241475150585e-06, "loss": 0.5013, "step": 8230 }, { "epoch": 0.53, "grad_norm": 1.284034252166748, "learning_rate": 4.7361802818864534e-06, "loss": 0.5387, "step": 8231 }, { "epoch": 0.53, "grad_norm": 1.2238459587097168, "learning_rate": 4.735136427788726e-06, "loss": 0.5375, "step": 8232 }, { "epoch": 0.53, "grad_norm": 1.2300299406051636, "learning_rate": 4.734092585267502e-06, "loss": 0.5752, "step": 8233 }, { "epoch": 0.53, "grad_norm": 1.2259025573730469, "learning_rate": 4.733048754368407e-06, "loss": 0.4982, "step": 8234 }, { "epoch": 0.53, "grad_norm": 1.079509973526001, "learning_rate": 4.73200493513706e-06, "loss": 0.5213, "step": 8235 }, { "epoch": 0.53, "grad_norm": 1.2868763208389282, "learning_rate": 4.7309611276190864e-06, "loss": 0.5453, "step": 8236 }, { "epoch": 0.53, "grad_norm": 1.479746699333191, "learning_rate": 4.729917331860109e-06, "loss": 0.5552, "step": 8237 }, { "epoch": 0.53, "grad_norm": 0.9878431558609009, "learning_rate": 4.728873547905747e-06, "loss": 0.4808, "step": 8238 }, { "epoch": 0.53, "grad_norm": 1.3224519491195679, "learning_rate": 4.727829775801624e-06, "loss": 0.4934, "step": 8239 }, { "epoch": 0.53, "grad_norm": 1.0883026123046875, "learning_rate": 4.726786015593358e-06, "loss": 0.5208, "step": 8240 }, { "epoch": 0.53, "grad_norm": 1.1957404613494873, "learning_rate": 4.72574226732657e-06, "loss": 0.5283, "step": 8241 }, { "epoch": 0.53, "grad_norm": 1.0652785301208496, "learning_rate": 4.7246985310468804e-06, "loss": 0.5121, "step": 8242 }, { "epoch": 0.53, "grad_norm": 1.3836363554000854, "learning_rate": 4.723654806799909e-06, "loss": 0.5196, "step": 8243 }, { "epoch": 0.53, "grad_norm": 1.1898486614227295, "learning_rate": 4.722611094631272e-06, "loss": 0.5561, "step": 8244 }, { "epoch": 0.53, "grad_norm": 1.2318646907806396, "learning_rate": 4.721567394586588e-06, "loss": 0.5038, "step": 8245 }, { "epoch": 0.53, "grad_norm": 1.1523964405059814, "learning_rate": 4.720523706711475e-06, "loss": 0.4888, "step": 8246 }, { "epoch": 0.53, "grad_norm": 1.3389625549316406, "learning_rate": 4.719480031051549e-06, "loss": 0.5958, "step": 8247 }, { "epoch": 0.53, "grad_norm": 1.190407633781433, "learning_rate": 4.718436367652428e-06, "loss": 0.5662, "step": 8248 }, { "epoch": 0.53, "grad_norm": 1.3185005187988281, "learning_rate": 4.717392716559729e-06, "loss": 0.512, "step": 8249 }, { "epoch": 0.53, "grad_norm": 1.160508632659912, "learning_rate": 4.716349077819062e-06, "loss": 0.5431, "step": 8250 }, { "epoch": 0.53, "grad_norm": 1.0407297611236572, "learning_rate": 4.715305451476046e-06, "loss": 0.4718, "step": 8251 }, { "epoch": 0.53, "grad_norm": 1.1080106496810913, "learning_rate": 4.7142618375762935e-06, "loss": 0.5013, "step": 8252 }, { "epoch": 0.53, "grad_norm": 1.18071711063385, "learning_rate": 4.713218236165419e-06, "loss": 0.5505, "step": 8253 }, { "epoch": 0.53, "grad_norm": 1.222029209136963, "learning_rate": 4.712174647289036e-06, "loss": 0.5751, "step": 8254 }, { "epoch": 0.53, "grad_norm": 1.1915888786315918, "learning_rate": 4.711131070992759e-06, "loss": 0.5191, "step": 8255 }, { "epoch": 0.53, "grad_norm": 1.2419116497039795, "learning_rate": 4.710087507322195e-06, "loss": 0.5241, "step": 8256 }, { "epoch": 0.53, "grad_norm": 1.183193325996399, "learning_rate": 4.7090439563229575e-06, "loss": 0.4783, "step": 8257 }, { "epoch": 0.53, "grad_norm": 1.2076468467712402, "learning_rate": 4.7080004180406585e-06, "loss": 0.5233, "step": 8258 }, { "epoch": 0.53, "grad_norm": 1.190962314605713, "learning_rate": 4.706956892520908e-06, "loss": 0.5638, "step": 8259 }, { "epoch": 0.53, "grad_norm": 1.1383670568466187, "learning_rate": 4.705913379809318e-06, "loss": 0.5744, "step": 8260 }, { "epoch": 0.53, "grad_norm": 1.253123164176941, "learning_rate": 4.704869879951493e-06, "loss": 0.548, "step": 8261 }, { "epoch": 0.53, "grad_norm": 1.1627936363220215, "learning_rate": 4.703826392993044e-06, "loss": 0.5161, "step": 8262 }, { "epoch": 0.53, "grad_norm": 1.224907636642456, "learning_rate": 4.7027829189795785e-06, "loss": 0.4902, "step": 8263 }, { "epoch": 0.53, "grad_norm": 1.1197348833084106, "learning_rate": 4.7017394579567065e-06, "loss": 0.4872, "step": 8264 }, { "epoch": 0.53, "grad_norm": 1.3378435373306274, "learning_rate": 4.700696009970031e-06, "loss": 0.5541, "step": 8265 }, { "epoch": 0.53, "grad_norm": 1.1515387296676636, "learning_rate": 4.699652575065165e-06, "loss": 0.5182, "step": 8266 }, { "epoch": 0.53, "grad_norm": 1.3208354711532593, "learning_rate": 4.698609153287707e-06, "loss": 0.5763, "step": 8267 }, { "epoch": 0.53, "grad_norm": 1.1577057838439941, "learning_rate": 4.697565744683265e-06, "loss": 0.4981, "step": 8268 }, { "epoch": 0.53, "grad_norm": 1.3654180765151978, "learning_rate": 4.6965223492974444e-06, "loss": 0.5526, "step": 8269 }, { "epoch": 0.53, "grad_norm": 1.1825308799743652, "learning_rate": 4.695478967175849e-06, "loss": 0.4992, "step": 8270 }, { "epoch": 0.53, "grad_norm": 1.139505386352539, "learning_rate": 4.694435598364082e-06, "loss": 0.5274, "step": 8271 }, { "epoch": 0.53, "grad_norm": 1.1712504625320435, "learning_rate": 4.6933922429077485e-06, "loss": 0.5606, "step": 8272 }, { "epoch": 0.53, "grad_norm": 1.1755715608596802, "learning_rate": 4.692348900852448e-06, "loss": 0.57, "step": 8273 }, { "epoch": 0.53, "grad_norm": 1.1107659339904785, "learning_rate": 4.691305572243783e-06, "loss": 0.4852, "step": 8274 }, { "epoch": 0.53, "grad_norm": 1.1149054765701294, "learning_rate": 4.6902622571273535e-06, "loss": 0.4997, "step": 8275 }, { "epoch": 0.53, "grad_norm": 1.2685092687606812, "learning_rate": 4.689218955548764e-06, "loss": 0.5039, "step": 8276 }, { "epoch": 0.53, "grad_norm": 1.1694689989089966, "learning_rate": 4.688175667553611e-06, "loss": 0.5519, "step": 8277 }, { "epoch": 0.53, "grad_norm": 1.215511441230774, "learning_rate": 4.6871323931874955e-06, "loss": 0.5231, "step": 8278 }, { "epoch": 0.53, "grad_norm": 1.2799137830734253, "learning_rate": 4.686089132496016e-06, "loss": 0.5561, "step": 8279 }, { "epoch": 0.53, "grad_norm": 1.1892434358596802, "learning_rate": 4.6850458855247695e-06, "loss": 0.4914, "step": 8280 }, { "epoch": 0.53, "grad_norm": 1.1434156894683838, "learning_rate": 4.684002652319356e-06, "loss": 0.4902, "step": 8281 }, { "epoch": 0.53, "grad_norm": 1.2245309352874756, "learning_rate": 4.682959432925371e-06, "loss": 0.5157, "step": 8282 }, { "epoch": 0.53, "grad_norm": 1.1964606046676636, "learning_rate": 4.68191622738841e-06, "loss": 0.4905, "step": 8283 }, { "epoch": 0.53, "grad_norm": 1.1741527318954468, "learning_rate": 4.680873035754072e-06, "loss": 0.5325, "step": 8284 }, { "epoch": 0.53, "grad_norm": 1.255850076675415, "learning_rate": 4.67982985806795e-06, "loss": 0.5196, "step": 8285 }, { "epoch": 0.53, "grad_norm": 1.2920328378677368, "learning_rate": 4.678786694375637e-06, "loss": 0.5342, "step": 8286 }, { "epoch": 0.53, "grad_norm": 1.1665908098220825, "learning_rate": 4.6777435447227305e-06, "loss": 0.5424, "step": 8287 }, { "epoch": 0.53, "grad_norm": 3.055925130844116, "learning_rate": 4.67670040915482e-06, "loss": 0.5151, "step": 8288 }, { "epoch": 0.54, "grad_norm": 1.1017305850982666, "learning_rate": 4.675657287717503e-06, "loss": 0.4584, "step": 8289 }, { "epoch": 0.54, "grad_norm": 1.30667245388031, "learning_rate": 4.67461418045637e-06, "loss": 0.5606, "step": 8290 }, { "epoch": 0.54, "grad_norm": 1.0303785800933838, "learning_rate": 4.673571087417009e-06, "loss": 0.5151, "step": 8291 }, { "epoch": 0.54, "grad_norm": 1.2266768217086792, "learning_rate": 4.6725280086450144e-06, "loss": 0.5473, "step": 8292 }, { "epoch": 0.54, "grad_norm": 1.179612636566162, "learning_rate": 4.671484944185976e-06, "loss": 0.5697, "step": 8293 }, { "epoch": 0.54, "grad_norm": 1.1859338283538818, "learning_rate": 4.6704418940854825e-06, "loss": 0.5233, "step": 8294 }, { "epoch": 0.54, "grad_norm": 1.2371785640716553, "learning_rate": 4.669398858389123e-06, "loss": 0.54, "step": 8295 }, { "epoch": 0.54, "grad_norm": 1.1314115524291992, "learning_rate": 4.668355837142489e-06, "loss": 0.5714, "step": 8296 }, { "epoch": 0.54, "grad_norm": 1.4805244207382202, "learning_rate": 4.667312830391164e-06, "loss": 0.5147, "step": 8297 }, { "epoch": 0.54, "grad_norm": 1.2190511226654053, "learning_rate": 4.666269838180737e-06, "loss": 0.5693, "step": 8298 }, { "epoch": 0.54, "grad_norm": 1.243947148323059, "learning_rate": 4.665226860556793e-06, "loss": 0.5798, "step": 8299 }, { "epoch": 0.54, "grad_norm": 1.142733097076416, "learning_rate": 4.664183897564921e-06, "loss": 0.4994, "step": 8300 }, { "epoch": 0.54, "grad_norm": 1.2227497100830078, "learning_rate": 4.663140949250705e-06, "loss": 0.5957, "step": 8301 }, { "epoch": 0.54, "grad_norm": 1.149147391319275, "learning_rate": 4.662098015659728e-06, "loss": 0.5641, "step": 8302 }, { "epoch": 0.54, "grad_norm": 1.1996893882751465, "learning_rate": 4.661055096837575e-06, "loss": 0.5141, "step": 8303 }, { "epoch": 0.54, "grad_norm": 1.1364271640777588, "learning_rate": 4.6600121928298285e-06, "loss": 0.5395, "step": 8304 }, { "epoch": 0.54, "grad_norm": 1.17295241355896, "learning_rate": 4.6589693036820715e-06, "loss": 0.5051, "step": 8305 }, { "epoch": 0.54, "grad_norm": 1.1902812719345093, "learning_rate": 4.657926429439887e-06, "loss": 0.5358, "step": 8306 }, { "epoch": 0.54, "grad_norm": 1.1823246479034424, "learning_rate": 4.656883570148858e-06, "loss": 0.5551, "step": 8307 }, { "epoch": 0.54, "grad_norm": 1.2203583717346191, "learning_rate": 4.65584072585456e-06, "loss": 0.5397, "step": 8308 }, { "epoch": 0.54, "grad_norm": 1.1477009057998657, "learning_rate": 4.6547978966025766e-06, "loss": 0.4733, "step": 8309 }, { "epoch": 0.54, "grad_norm": 1.1368786096572876, "learning_rate": 4.653755082438487e-06, "loss": 0.4928, "step": 8310 }, { "epoch": 0.54, "grad_norm": 1.2346371412277222, "learning_rate": 4.652712283407868e-06, "loss": 0.5479, "step": 8311 }, { "epoch": 0.54, "grad_norm": 1.1506986618041992, "learning_rate": 4.6516694995563e-06, "loss": 0.5713, "step": 8312 }, { "epoch": 0.54, "grad_norm": 1.170474648475647, "learning_rate": 4.650626730929362e-06, "loss": 0.5288, "step": 8313 }, { "epoch": 0.54, "grad_norm": 1.1259889602661133, "learning_rate": 4.649583977572626e-06, "loss": 0.4803, "step": 8314 }, { "epoch": 0.54, "grad_norm": 1.1947838068008423, "learning_rate": 4.648541239531669e-06, "loss": 0.508, "step": 8315 }, { "epoch": 0.54, "grad_norm": 1.2833681106567383, "learning_rate": 4.647498516852068e-06, "loss": 0.5324, "step": 8316 }, { "epoch": 0.54, "grad_norm": 1.2459712028503418, "learning_rate": 4.646455809579398e-06, "loss": 0.5593, "step": 8317 }, { "epoch": 0.54, "grad_norm": 1.11014986038208, "learning_rate": 4.645413117759232e-06, "loss": 0.5005, "step": 8318 }, { "epoch": 0.54, "grad_norm": 1.1412367820739746, "learning_rate": 4.644370441437147e-06, "loss": 0.5681, "step": 8319 }, { "epoch": 0.54, "grad_norm": 1.1769644021987915, "learning_rate": 4.643327780658709e-06, "loss": 0.497, "step": 8320 }, { "epoch": 0.54, "grad_norm": 1.0603816509246826, "learning_rate": 4.642285135469494e-06, "loss": 0.4961, "step": 8321 }, { "epoch": 0.54, "grad_norm": 1.1607720851898193, "learning_rate": 4.641242505915073e-06, "loss": 0.5341, "step": 8322 }, { "epoch": 0.54, "grad_norm": 1.0670520067214966, "learning_rate": 4.640199892041017e-06, "loss": 0.4376, "step": 8323 }, { "epoch": 0.54, "grad_norm": 1.2789846658706665, "learning_rate": 4.639157293892894e-06, "loss": 0.5624, "step": 8324 }, { "epoch": 0.54, "grad_norm": 1.100825548171997, "learning_rate": 4.638114711516277e-06, "loss": 0.5222, "step": 8325 }, { "epoch": 0.54, "grad_norm": 1.1561042070388794, "learning_rate": 4.63707214495673e-06, "loss": 0.5537, "step": 8326 }, { "epoch": 0.54, "grad_norm": 1.1622505187988281, "learning_rate": 4.6360295942598245e-06, "loss": 0.5338, "step": 8327 }, { "epoch": 0.54, "grad_norm": 1.1897518634796143, "learning_rate": 4.634987059471127e-06, "loss": 0.5574, "step": 8328 }, { "epoch": 0.54, "grad_norm": 1.2327921390533447, "learning_rate": 4.633944540636201e-06, "loss": 0.5034, "step": 8329 }, { "epoch": 0.54, "grad_norm": 1.1386817693710327, "learning_rate": 4.632902037800616e-06, "loss": 0.5108, "step": 8330 }, { "epoch": 0.54, "grad_norm": 1.1135191917419434, "learning_rate": 4.6318595510099365e-06, "loss": 0.5718, "step": 8331 }, { "epoch": 0.54, "grad_norm": 1.0730805397033691, "learning_rate": 4.630817080309726e-06, "loss": 0.5147, "step": 8332 }, { "epoch": 0.54, "grad_norm": 1.0904302597045898, "learning_rate": 4.629774625745548e-06, "loss": 0.5188, "step": 8333 }, { "epoch": 0.54, "grad_norm": 1.0880231857299805, "learning_rate": 4.628732187362966e-06, "loss": 0.4403, "step": 8334 }, { "epoch": 0.54, "grad_norm": 1.172791600227356, "learning_rate": 4.6276897652075415e-06, "loss": 0.5274, "step": 8335 }, { "epoch": 0.54, "grad_norm": 1.28619384765625, "learning_rate": 4.626647359324837e-06, "loss": 0.5121, "step": 8336 }, { "epoch": 0.54, "grad_norm": 1.2342822551727295, "learning_rate": 4.625604969760415e-06, "loss": 0.5553, "step": 8337 }, { "epoch": 0.54, "grad_norm": 1.1665419340133667, "learning_rate": 4.624562596559832e-06, "loss": 0.5712, "step": 8338 }, { "epoch": 0.54, "grad_norm": 1.2103588581085205, "learning_rate": 4.6235202397686495e-06, "loss": 0.5504, "step": 8339 }, { "epoch": 0.54, "grad_norm": 1.250511884689331, "learning_rate": 4.622477899432426e-06, "loss": 0.5114, "step": 8340 }, { "epoch": 0.54, "grad_norm": 1.192018747329712, "learning_rate": 4.62143557559672e-06, "loss": 0.5774, "step": 8341 }, { "epoch": 0.54, "grad_norm": 1.1374760866165161, "learning_rate": 4.620393268307087e-06, "loss": 0.4861, "step": 8342 }, { "epoch": 0.54, "grad_norm": 1.1821938753128052, "learning_rate": 4.6193509776090875e-06, "loss": 0.5378, "step": 8343 }, { "epoch": 0.54, "grad_norm": 1.1108036041259766, "learning_rate": 4.618308703548273e-06, "loss": 0.5474, "step": 8344 }, { "epoch": 0.54, "grad_norm": 1.1902910470962524, "learning_rate": 4.6172664461702e-06, "loss": 0.5063, "step": 8345 }, { "epoch": 0.54, "grad_norm": 1.136123776435852, "learning_rate": 4.6162242055204225e-06, "loss": 0.5596, "step": 8346 }, { "epoch": 0.54, "grad_norm": 1.1379966735839844, "learning_rate": 4.6151819816444956e-06, "loss": 0.5021, "step": 8347 }, { "epoch": 0.54, "grad_norm": 1.2264130115509033, "learning_rate": 4.614139774587973e-06, "loss": 0.5004, "step": 8348 }, { "epoch": 0.54, "grad_norm": 1.1679620742797852, "learning_rate": 4.6130975843964034e-06, "loss": 0.5241, "step": 8349 }, { "epoch": 0.54, "grad_norm": 1.105900526046753, "learning_rate": 4.612055411115341e-06, "loss": 0.5473, "step": 8350 }, { "epoch": 0.54, "grad_norm": 1.0860881805419922, "learning_rate": 4.611013254790335e-06, "loss": 0.525, "step": 8351 }, { "epoch": 0.54, "grad_norm": 1.2787575721740723, "learning_rate": 4.609971115466936e-06, "loss": 0.5599, "step": 8352 }, { "epoch": 0.54, "grad_norm": 1.3727887868881226, "learning_rate": 4.608928993190693e-06, "loss": 0.5827, "step": 8353 }, { "epoch": 0.54, "grad_norm": 1.1710681915283203, "learning_rate": 4.607886888007157e-06, "loss": 0.5314, "step": 8354 }, { "epoch": 0.54, "grad_norm": 1.1498843431472778, "learning_rate": 4.606844799961871e-06, "loss": 0.5265, "step": 8355 }, { "epoch": 0.54, "grad_norm": 1.2130279541015625, "learning_rate": 4.605802729100385e-06, "loss": 0.5097, "step": 8356 }, { "epoch": 0.54, "grad_norm": 1.1696064472198486, "learning_rate": 4.604760675468245e-06, "loss": 0.505, "step": 8357 }, { "epoch": 0.54, "grad_norm": 1.088016390800476, "learning_rate": 4.603718639110995e-06, "loss": 0.5467, "step": 8358 }, { "epoch": 0.54, "grad_norm": 1.136351227760315, "learning_rate": 4.602676620074183e-06, "loss": 0.5226, "step": 8359 }, { "epoch": 0.54, "grad_norm": 1.1577098369598389, "learning_rate": 4.601634618403351e-06, "loss": 0.5975, "step": 8360 }, { "epoch": 0.54, "grad_norm": 1.4005248546600342, "learning_rate": 4.60059263414404e-06, "loss": 0.541, "step": 8361 }, { "epoch": 0.54, "grad_norm": 1.1017706394195557, "learning_rate": 4.599550667341795e-06, "loss": 0.4841, "step": 8362 }, { "epoch": 0.54, "grad_norm": 1.1912086009979248, "learning_rate": 4.5985087180421574e-06, "loss": 0.5501, "step": 8363 }, { "epoch": 0.54, "grad_norm": 1.1854922771453857, "learning_rate": 4.597466786290668e-06, "loss": 0.5136, "step": 8364 }, { "epoch": 0.54, "grad_norm": 1.0154883861541748, "learning_rate": 4.596424872132867e-06, "loss": 0.4731, "step": 8365 }, { "epoch": 0.54, "grad_norm": 1.2279188632965088, "learning_rate": 4.595382975614294e-06, "loss": 0.5547, "step": 8366 }, { "epoch": 0.54, "grad_norm": 1.221805214881897, "learning_rate": 4.594341096780486e-06, "loss": 0.4994, "step": 8367 }, { "epoch": 0.54, "grad_norm": 1.1112209558486938, "learning_rate": 4.593299235676982e-06, "loss": 0.5248, "step": 8368 }, { "epoch": 0.54, "grad_norm": 1.1692521572113037, "learning_rate": 4.5922573923493194e-06, "loss": 0.5443, "step": 8369 }, { "epoch": 0.54, "grad_norm": 1.2039172649383545, "learning_rate": 4.591215566843035e-06, "loss": 0.5355, "step": 8370 }, { "epoch": 0.54, "grad_norm": 1.2191836833953857, "learning_rate": 4.590173759203663e-06, "loss": 0.533, "step": 8371 }, { "epoch": 0.54, "grad_norm": 1.1876764297485352, "learning_rate": 4.58913196947674e-06, "loss": 0.5398, "step": 8372 }, { "epoch": 0.54, "grad_norm": 1.1723453998565674, "learning_rate": 4.588090197707796e-06, "loss": 0.5239, "step": 8373 }, { "epoch": 0.54, "grad_norm": 1.1167707443237305, "learning_rate": 4.587048443942369e-06, "loss": 0.4637, "step": 8374 }, { "epoch": 0.54, "grad_norm": 1.1074647903442383, "learning_rate": 4.5860067082259875e-06, "loss": 0.5628, "step": 8375 }, { "epoch": 0.54, "grad_norm": 1.1111037731170654, "learning_rate": 4.584964990604185e-06, "loss": 0.5131, "step": 8376 }, { "epoch": 0.54, "grad_norm": 1.1233246326446533, "learning_rate": 4.583923291122491e-06, "loss": 0.4847, "step": 8377 }, { "epoch": 0.54, "grad_norm": 1.1792055368423462, "learning_rate": 4.5828816098264385e-06, "loss": 0.5033, "step": 8378 }, { "epoch": 0.54, "grad_norm": 1.2452949285507202, "learning_rate": 4.581839946761554e-06, "loss": 0.5658, "step": 8379 }, { "epoch": 0.54, "grad_norm": 1.1913222074508667, "learning_rate": 4.580798301973366e-06, "loss": 0.5571, "step": 8380 }, { "epoch": 0.54, "grad_norm": 1.0833463668823242, "learning_rate": 4.579756675507403e-06, "loss": 0.5423, "step": 8381 }, { "epoch": 0.54, "grad_norm": 1.1185925006866455, "learning_rate": 4.578715067409191e-06, "loss": 0.5241, "step": 8382 }, { "epoch": 0.54, "grad_norm": 1.31003999710083, "learning_rate": 4.577673477724257e-06, "loss": 0.5044, "step": 8383 }, { "epoch": 0.54, "grad_norm": 1.243599772453308, "learning_rate": 4.576631906498127e-06, "loss": 0.5298, "step": 8384 }, { "epoch": 0.54, "grad_norm": 1.3553879261016846, "learning_rate": 4.575590353776322e-06, "loss": 0.5501, "step": 8385 }, { "epoch": 0.54, "grad_norm": 1.1702347993850708, "learning_rate": 4.574548819604368e-06, "loss": 0.5388, "step": 8386 }, { "epoch": 0.54, "grad_norm": 1.1408571004867554, "learning_rate": 4.5735073040277875e-06, "loss": 0.5491, "step": 8387 }, { "epoch": 0.54, "grad_norm": 1.0882397890090942, "learning_rate": 4.572465807092102e-06, "loss": 0.5632, "step": 8388 }, { "epoch": 0.54, "grad_norm": 1.1221855878829956, "learning_rate": 4.571424328842835e-06, "loss": 0.544, "step": 8389 }, { "epoch": 0.54, "grad_norm": 1.167973279953003, "learning_rate": 4.570382869325503e-06, "loss": 0.5379, "step": 8390 }, { "epoch": 0.54, "grad_norm": 1.18844473361969, "learning_rate": 4.569341428585627e-06, "loss": 0.54, "step": 8391 }, { "epoch": 0.54, "grad_norm": 1.1708290576934814, "learning_rate": 4.568300006668728e-06, "loss": 0.536, "step": 8392 }, { "epoch": 0.54, "grad_norm": 1.0592957735061646, "learning_rate": 4.56725860362032e-06, "loss": 0.5017, "step": 8393 }, { "epoch": 0.54, "grad_norm": 1.1929655075073242, "learning_rate": 4.566217219485922e-06, "loss": 0.5179, "step": 8394 }, { "epoch": 0.54, "grad_norm": 1.1676950454711914, "learning_rate": 4.565175854311052e-06, "loss": 0.5623, "step": 8395 }, { "epoch": 0.54, "grad_norm": 1.2041987180709839, "learning_rate": 4.564134508141223e-06, "loss": 0.5426, "step": 8396 }, { "epoch": 0.54, "grad_norm": 1.088073492050171, "learning_rate": 4.5630931810219484e-06, "loss": 0.4746, "step": 8397 }, { "epoch": 0.54, "grad_norm": 1.3902627229690552, "learning_rate": 4.562051872998745e-06, "loss": 0.5221, "step": 8398 }, { "epoch": 0.54, "grad_norm": 1.171704888343811, "learning_rate": 4.561010584117123e-06, "loss": 0.6068, "step": 8399 }, { "epoch": 0.54, "grad_norm": 1.1624778509140015, "learning_rate": 4.559969314422596e-06, "loss": 0.5614, "step": 8400 }, { "epoch": 0.54, "grad_norm": 1.1548948287963867, "learning_rate": 4.558928063960678e-06, "loss": 0.5126, "step": 8401 }, { "epoch": 0.54, "grad_norm": 1.2050766944885254, "learning_rate": 4.557886832776874e-06, "loss": 0.522, "step": 8402 }, { "epoch": 0.54, "grad_norm": 1.2451058626174927, "learning_rate": 4.556845620916695e-06, "loss": 0.568, "step": 8403 }, { "epoch": 0.54, "grad_norm": 1.0589234828948975, "learning_rate": 4.555804428425651e-06, "loss": 0.5043, "step": 8404 }, { "epoch": 0.54, "grad_norm": 1.1293812990188599, "learning_rate": 4.55476325534925e-06, "loss": 0.4875, "step": 8405 }, { "epoch": 0.54, "grad_norm": 1.259395718574524, "learning_rate": 4.553722101732998e-06, "loss": 0.5384, "step": 8406 }, { "epoch": 0.54, "grad_norm": 1.1776481866836548, "learning_rate": 4.552680967622403e-06, "loss": 0.5264, "step": 8407 }, { "epoch": 0.54, "grad_norm": 1.4396710395812988, "learning_rate": 4.551639853062968e-06, "loss": 0.6204, "step": 8408 }, { "epoch": 0.54, "grad_norm": 1.3132779598236084, "learning_rate": 4.550598758100197e-06, "loss": 0.5763, "step": 8409 }, { "epoch": 0.54, "grad_norm": 1.2011566162109375, "learning_rate": 4.549557682779595e-06, "loss": 0.5576, "step": 8410 }, { "epoch": 0.54, "grad_norm": 1.1547526121139526, "learning_rate": 4.5485166271466655e-06, "loss": 0.515, "step": 8411 }, { "epoch": 0.54, "grad_norm": 1.0349594354629517, "learning_rate": 4.547475591246909e-06, "loss": 0.5078, "step": 8412 }, { "epoch": 0.54, "grad_norm": 1.1944376230239868, "learning_rate": 4.546434575125828e-06, "loss": 0.5525, "step": 8413 }, { "epoch": 0.54, "grad_norm": 1.1412009000778198, "learning_rate": 4.54539357882892e-06, "loss": 0.5294, "step": 8414 }, { "epoch": 0.54, "grad_norm": 1.215726375579834, "learning_rate": 4.544352602401686e-06, "loss": 0.5328, "step": 8415 }, { "epoch": 0.54, "grad_norm": 1.3297280073165894, "learning_rate": 4.543311645889624e-06, "loss": 0.5514, "step": 8416 }, { "epoch": 0.54, "grad_norm": 1.2255325317382812, "learning_rate": 4.542270709338233e-06, "loss": 0.5418, "step": 8417 }, { "epoch": 0.54, "grad_norm": 1.1049333810806274, "learning_rate": 4.541229792793007e-06, "loss": 0.4968, "step": 8418 }, { "epoch": 0.54, "grad_norm": 1.2192237377166748, "learning_rate": 4.540188896299444e-06, "loss": 0.5347, "step": 8419 }, { "epoch": 0.54, "grad_norm": 1.3223457336425781, "learning_rate": 4.539148019903037e-06, "loss": 0.5247, "step": 8420 }, { "epoch": 0.54, "grad_norm": 1.9544628858566284, "learning_rate": 4.5381071636492826e-06, "loss": 0.5552, "step": 8421 }, { "epoch": 0.54, "grad_norm": 1.0721946954727173, "learning_rate": 4.5370663275836714e-06, "loss": 0.5335, "step": 8422 }, { "epoch": 0.54, "grad_norm": 1.340624451637268, "learning_rate": 4.536025511751696e-06, "loss": 0.5503, "step": 8423 }, { "epoch": 0.54, "grad_norm": 1.2359447479248047, "learning_rate": 4.5349847161988484e-06, "loss": 0.5478, "step": 8424 }, { "epoch": 0.54, "grad_norm": 1.1679178476333618, "learning_rate": 4.533943940970621e-06, "loss": 0.5152, "step": 8425 }, { "epoch": 0.54, "grad_norm": 1.1479105949401855, "learning_rate": 4.532903186112501e-06, "loss": 0.5111, "step": 8426 }, { "epoch": 0.54, "grad_norm": 1.2086552381515503, "learning_rate": 4.531862451669976e-06, "loss": 0.5472, "step": 8427 }, { "epoch": 0.54, "grad_norm": 1.148798942565918, "learning_rate": 4.5308217376885356e-06, "loss": 0.5473, "step": 8428 }, { "epoch": 0.54, "grad_norm": 1.2595235109329224, "learning_rate": 4.529781044213667e-06, "loss": 0.5251, "step": 8429 }, { "epoch": 0.54, "grad_norm": 1.138476014137268, "learning_rate": 4.528740371290858e-06, "loss": 0.4982, "step": 8430 }, { "epoch": 0.54, "grad_norm": 1.1542259454727173, "learning_rate": 4.527699718965589e-06, "loss": 0.4841, "step": 8431 }, { "epoch": 0.54, "grad_norm": 1.1115303039550781, "learning_rate": 4.526659087283347e-06, "loss": 0.531, "step": 8432 }, { "epoch": 0.54, "grad_norm": 1.361060380935669, "learning_rate": 4.525618476289614e-06, "loss": 0.5935, "step": 8433 }, { "epoch": 0.54, "grad_norm": 1.0599514245986938, "learning_rate": 4.524577886029875e-06, "loss": 0.5295, "step": 8434 }, { "epoch": 0.54, "grad_norm": 1.0971136093139648, "learning_rate": 4.523537316549609e-06, "loss": 0.5514, "step": 8435 }, { "epoch": 0.54, "grad_norm": 1.4212485551834106, "learning_rate": 4.5224967678943e-06, "loss": 0.5681, "step": 8436 }, { "epoch": 0.54, "grad_norm": 1.1089556217193604, "learning_rate": 4.5214562401094234e-06, "loss": 0.5096, "step": 8437 }, { "epoch": 0.54, "grad_norm": 1.3987956047058105, "learning_rate": 4.520415733240459e-06, "loss": 0.5417, "step": 8438 }, { "epoch": 0.54, "grad_norm": 1.2730149030685425, "learning_rate": 4.519375247332887e-06, "loss": 0.5453, "step": 8439 }, { "epoch": 0.54, "grad_norm": 1.227290391921997, "learning_rate": 4.518334782432183e-06, "loss": 0.5272, "step": 8440 }, { "epoch": 0.54, "grad_norm": 1.238078236579895, "learning_rate": 4.517294338583822e-06, "loss": 0.5328, "step": 8441 }, { "epoch": 0.54, "grad_norm": 1.1083098649978638, "learning_rate": 4.516253915833282e-06, "loss": 0.5304, "step": 8442 }, { "epoch": 0.54, "grad_norm": 1.0641571283340454, "learning_rate": 4.515213514226033e-06, "loss": 0.5164, "step": 8443 }, { "epoch": 0.55, "grad_norm": 1.0262107849121094, "learning_rate": 4.514173133807552e-06, "loss": 0.5258, "step": 8444 }, { "epoch": 0.55, "grad_norm": 1.3172509670257568, "learning_rate": 4.513132774623308e-06, "loss": 0.5581, "step": 8445 }, { "epoch": 0.55, "grad_norm": 1.1986644268035889, "learning_rate": 4.512092436718776e-06, "loss": 0.5071, "step": 8446 }, { "epoch": 0.55, "grad_norm": 1.267018437385559, "learning_rate": 4.511052120139424e-06, "loss": 0.5405, "step": 8447 }, { "epoch": 0.55, "grad_norm": 1.103203296661377, "learning_rate": 4.510011824930724e-06, "loss": 0.5287, "step": 8448 }, { "epoch": 0.55, "grad_norm": 1.1964004039764404, "learning_rate": 4.508971551138142e-06, "loss": 0.5398, "step": 8449 }, { "epoch": 0.55, "grad_norm": 1.086954951286316, "learning_rate": 4.507931298807145e-06, "loss": 0.4807, "step": 8450 }, { "epoch": 0.55, "grad_norm": 1.08817458152771, "learning_rate": 4.506891067983203e-06, "loss": 0.5033, "step": 8451 }, { "epoch": 0.55, "grad_norm": 1.189679503440857, "learning_rate": 4.5058508587117785e-06, "loss": 0.5449, "step": 8452 }, { "epoch": 0.55, "grad_norm": 1.190191388130188, "learning_rate": 4.504810671038339e-06, "loss": 0.5233, "step": 8453 }, { "epoch": 0.55, "grad_norm": 1.245374083518982, "learning_rate": 4.50377050500835e-06, "loss": 0.5561, "step": 8454 }, { "epoch": 0.55, "grad_norm": 1.2271279096603394, "learning_rate": 4.502730360667269e-06, "loss": 0.5752, "step": 8455 }, { "epoch": 0.55, "grad_norm": 1.0972859859466553, "learning_rate": 4.501690238060561e-06, "loss": 0.4756, "step": 8456 }, { "epoch": 0.55, "grad_norm": 1.3469269275665283, "learning_rate": 4.500650137233688e-06, "loss": 0.577, "step": 8457 }, { "epoch": 0.55, "grad_norm": 1.099396824836731, "learning_rate": 4.499610058232108e-06, "loss": 0.5583, "step": 8458 }, { "epoch": 0.55, "grad_norm": 1.141141414642334, "learning_rate": 4.4985700011012835e-06, "loss": 0.5434, "step": 8459 }, { "epoch": 0.55, "grad_norm": 1.1398698091506958, "learning_rate": 4.49752996588667e-06, "loss": 0.5424, "step": 8460 }, { "epoch": 0.55, "grad_norm": 1.1539851427078247, "learning_rate": 4.496489952633724e-06, "loss": 0.5616, "step": 8461 }, { "epoch": 0.55, "grad_norm": 1.0786105394363403, "learning_rate": 4.4954499613879035e-06, "loss": 0.513, "step": 8462 }, { "epoch": 0.55, "grad_norm": 1.0480812788009644, "learning_rate": 4.494409992194664e-06, "loss": 0.482, "step": 8463 }, { "epoch": 0.55, "grad_norm": 1.311811089515686, "learning_rate": 4.493370045099458e-06, "loss": 0.5714, "step": 8464 }, { "epoch": 0.55, "grad_norm": 1.2003262042999268, "learning_rate": 4.492330120147741e-06, "loss": 0.5245, "step": 8465 }, { "epoch": 0.55, "grad_norm": 1.3155838251113892, "learning_rate": 4.4912902173849655e-06, "loss": 0.5414, "step": 8466 }, { "epoch": 0.55, "grad_norm": 1.1633474826812744, "learning_rate": 4.490250336856581e-06, "loss": 0.5034, "step": 8467 }, { "epoch": 0.55, "grad_norm": 1.2593234777450562, "learning_rate": 4.48921047860804e-06, "loss": 0.5486, "step": 8468 }, { "epoch": 0.55, "grad_norm": 1.2646344900131226, "learning_rate": 4.48817064268479e-06, "loss": 0.5381, "step": 8469 }, { "epoch": 0.55, "grad_norm": 1.039194941520691, "learning_rate": 4.487130829132282e-06, "loss": 0.5008, "step": 8470 }, { "epoch": 0.55, "grad_norm": 1.1013985872268677, "learning_rate": 4.486091037995961e-06, "loss": 0.5274, "step": 8471 }, { "epoch": 0.55, "grad_norm": 1.2337288856506348, "learning_rate": 4.485051269321275e-06, "loss": 0.496, "step": 8472 }, { "epoch": 0.55, "grad_norm": 1.170639157295227, "learning_rate": 4.4840115231536706e-06, "loss": 0.5227, "step": 8473 }, { "epoch": 0.55, "grad_norm": 1.236730933189392, "learning_rate": 4.48297179953859e-06, "loss": 0.5652, "step": 8474 }, { "epoch": 0.55, "grad_norm": 1.1788033246994019, "learning_rate": 4.481932098521479e-06, "loss": 0.5117, "step": 8475 }, { "epoch": 0.55, "grad_norm": 1.1324412822723389, "learning_rate": 4.480892420147778e-06, "loss": 0.4734, "step": 8476 }, { "epoch": 0.55, "grad_norm": 1.2933310270309448, "learning_rate": 4.4798527644629326e-06, "loss": 0.5343, "step": 8477 }, { "epoch": 0.55, "grad_norm": 1.1241790056228638, "learning_rate": 4.478813131512379e-06, "loss": 0.5469, "step": 8478 }, { "epoch": 0.55, "grad_norm": 1.1280893087387085, "learning_rate": 4.477773521341558e-06, "loss": 0.5108, "step": 8479 }, { "epoch": 0.55, "grad_norm": 1.1556227207183838, "learning_rate": 4.47673393399591e-06, "loss": 0.5313, "step": 8480 }, { "epoch": 0.55, "grad_norm": 1.0691947937011719, "learning_rate": 4.475694369520871e-06, "loss": 0.5097, "step": 8481 }, { "epoch": 0.55, "grad_norm": 1.2212680578231812, "learning_rate": 4.474654827961878e-06, "loss": 0.5278, "step": 8482 }, { "epoch": 0.55, "grad_norm": 1.107496738433838, "learning_rate": 4.47361530936437e-06, "loss": 0.4753, "step": 8483 }, { "epoch": 0.55, "grad_norm": 1.0895366668701172, "learning_rate": 4.472575813773777e-06, "loss": 0.5047, "step": 8484 }, { "epoch": 0.55, "grad_norm": 1.238882064819336, "learning_rate": 4.4715363412355335e-06, "loss": 0.5302, "step": 8485 }, { "epoch": 0.55, "grad_norm": 1.1236438751220703, "learning_rate": 4.470496891795073e-06, "loss": 0.5218, "step": 8486 }, { "epoch": 0.55, "grad_norm": 1.2206449508666992, "learning_rate": 4.469457465497828e-06, "loss": 0.5736, "step": 8487 }, { "epoch": 0.55, "grad_norm": 1.0349236726760864, "learning_rate": 4.4684180623892285e-06, "loss": 0.5053, "step": 8488 }, { "epoch": 0.55, "grad_norm": 1.3112399578094482, "learning_rate": 4.467378682514705e-06, "loss": 0.5339, "step": 8489 }, { "epoch": 0.55, "grad_norm": 1.1722979545593262, "learning_rate": 4.466339325919684e-06, "loss": 0.5564, "step": 8490 }, { "epoch": 0.55, "grad_norm": 1.1299420595169067, "learning_rate": 4.465299992649593e-06, "loss": 0.4939, "step": 8491 }, { "epoch": 0.55, "grad_norm": 1.252631664276123, "learning_rate": 4.4642606827498606e-06, "loss": 0.5329, "step": 8492 }, { "epoch": 0.55, "grad_norm": 1.151511788368225, "learning_rate": 4.463221396265912e-06, "loss": 0.4705, "step": 8493 }, { "epoch": 0.55, "grad_norm": 1.2597663402557373, "learning_rate": 4.46218213324317e-06, "loss": 0.5457, "step": 8494 }, { "epoch": 0.55, "grad_norm": 1.3414058685302734, "learning_rate": 4.461142893727063e-06, "loss": 0.5433, "step": 8495 }, { "epoch": 0.55, "grad_norm": 1.1947911977767944, "learning_rate": 4.460103677763006e-06, "loss": 0.5535, "step": 8496 }, { "epoch": 0.55, "grad_norm": 1.1794248819351196, "learning_rate": 4.459064485396426e-06, "loss": 0.5071, "step": 8497 }, { "epoch": 0.55, "grad_norm": 1.2104437351226807, "learning_rate": 4.45802531667274e-06, "loss": 0.5402, "step": 8498 }, { "epoch": 0.55, "grad_norm": 1.175803542137146, "learning_rate": 4.456986171637371e-06, "loss": 0.5153, "step": 8499 }, { "epoch": 0.55, "grad_norm": 1.158034086227417, "learning_rate": 4.4559470503357336e-06, "loss": 0.5189, "step": 8500 }, { "epoch": 0.55, "grad_norm": 1.1457231044769287, "learning_rate": 4.45490795281325e-06, "loss": 0.5185, "step": 8501 }, { "epoch": 0.55, "grad_norm": 1.250380039215088, "learning_rate": 4.453868879115332e-06, "loss": 0.5394, "step": 8502 }, { "epoch": 0.55, "grad_norm": 1.2454006671905518, "learning_rate": 4.452829829287395e-06, "loss": 0.5966, "step": 8503 }, { "epoch": 0.55, "grad_norm": 1.0959453582763672, "learning_rate": 4.451790803374855e-06, "loss": 0.5444, "step": 8504 }, { "epoch": 0.55, "grad_norm": 1.4827972650527954, "learning_rate": 4.450751801423125e-06, "loss": 0.4676, "step": 8505 }, { "epoch": 0.55, "grad_norm": 1.115105152130127, "learning_rate": 4.449712823477617e-06, "loss": 0.5137, "step": 8506 }, { "epoch": 0.55, "grad_norm": 1.2486501932144165, "learning_rate": 4.448673869583742e-06, "loss": 0.5237, "step": 8507 }, { "epoch": 0.55, "grad_norm": 1.1425057649612427, "learning_rate": 4.447634939786909e-06, "loss": 0.5263, "step": 8508 }, { "epoch": 0.55, "grad_norm": 1.1270464658737183, "learning_rate": 4.446596034132527e-06, "loss": 0.5018, "step": 8509 }, { "epoch": 0.55, "grad_norm": 1.1542056798934937, "learning_rate": 4.445557152666006e-06, "loss": 0.5321, "step": 8510 }, { "epoch": 0.55, "grad_norm": 1.1431976556777954, "learning_rate": 4.44451829543275e-06, "loss": 0.5182, "step": 8511 }, { "epoch": 0.55, "grad_norm": 1.111943006515503, "learning_rate": 4.443479462478166e-06, "loss": 0.5006, "step": 8512 }, { "epoch": 0.55, "grad_norm": 1.131866693496704, "learning_rate": 4.4424406538476614e-06, "loss": 0.5004, "step": 8513 }, { "epoch": 0.55, "grad_norm": 1.5740610361099243, "learning_rate": 4.441401869586635e-06, "loss": 0.5496, "step": 8514 }, { "epoch": 0.55, "grad_norm": 1.2232881784439087, "learning_rate": 4.440363109740493e-06, "loss": 0.5019, "step": 8515 }, { "epoch": 0.55, "grad_norm": 1.2432276010513306, "learning_rate": 4.439324374354634e-06, "loss": 0.4961, "step": 8516 }, { "epoch": 0.55, "grad_norm": 1.1469382047653198, "learning_rate": 4.43828566347446e-06, "loss": 0.5409, "step": 8517 }, { "epoch": 0.55, "grad_norm": 1.1823166608810425, "learning_rate": 4.437246977145372e-06, "loss": 0.4677, "step": 8518 }, { "epoch": 0.55, "grad_norm": 1.2593129873275757, "learning_rate": 4.436208315412767e-06, "loss": 0.5118, "step": 8519 }, { "epoch": 0.55, "grad_norm": 1.161795973777771, "learning_rate": 4.435169678322041e-06, "loss": 0.5077, "step": 8520 }, { "epoch": 0.55, "grad_norm": 1.2768282890319824, "learning_rate": 4.43413106591859e-06, "loss": 0.5823, "step": 8521 }, { "epoch": 0.55, "grad_norm": 1.106371521949768, "learning_rate": 4.4330924782478105e-06, "loss": 0.5052, "step": 8522 }, { "epoch": 0.55, "grad_norm": 1.2986583709716797, "learning_rate": 4.432053915355097e-06, "loss": 0.543, "step": 8523 }, { "epoch": 0.55, "grad_norm": 1.2192072868347168, "learning_rate": 4.431015377285843e-06, "loss": 0.4955, "step": 8524 }, { "epoch": 0.55, "grad_norm": 1.0793415307998657, "learning_rate": 4.429976864085436e-06, "loss": 0.4718, "step": 8525 }, { "epoch": 0.55, "grad_norm": 1.0969924926757812, "learning_rate": 4.428938375799271e-06, "loss": 0.5055, "step": 8526 }, { "epoch": 0.55, "grad_norm": 1.1468724012374878, "learning_rate": 4.427899912472735e-06, "loss": 0.5295, "step": 8527 }, { "epoch": 0.55, "grad_norm": 1.1994199752807617, "learning_rate": 4.426861474151219e-06, "loss": 0.5664, "step": 8528 }, { "epoch": 0.55, "grad_norm": 1.2319304943084717, "learning_rate": 4.425823060880108e-06, "loss": 0.5659, "step": 8529 }, { "epoch": 0.55, "grad_norm": 1.350762128829956, "learning_rate": 4.424784672704793e-06, "loss": 0.5509, "step": 8530 }, { "epoch": 0.55, "grad_norm": 1.152132272720337, "learning_rate": 4.423746309670653e-06, "loss": 0.5588, "step": 8531 }, { "epoch": 0.55, "grad_norm": 1.09466552734375, "learning_rate": 4.422707971823075e-06, "loss": 0.4761, "step": 8532 }, { "epoch": 0.55, "grad_norm": 1.1608469486236572, "learning_rate": 4.421669659207442e-06, "loss": 0.5279, "step": 8533 }, { "epoch": 0.55, "grad_norm": 1.2098952531814575, "learning_rate": 4.420631371869136e-06, "loss": 0.5359, "step": 8534 }, { "epoch": 0.55, "grad_norm": 1.1505208015441895, "learning_rate": 4.4195931098535385e-06, "loss": 0.5084, "step": 8535 }, { "epoch": 0.55, "grad_norm": 1.1381556987762451, "learning_rate": 4.418554873206031e-06, "loss": 0.5897, "step": 8536 }, { "epoch": 0.55, "grad_norm": 1.1035133600234985, "learning_rate": 4.417516661971987e-06, "loss": 0.5235, "step": 8537 }, { "epoch": 0.55, "grad_norm": 1.0684577226638794, "learning_rate": 4.416478476196788e-06, "loss": 0.5512, "step": 8538 }, { "epoch": 0.55, "grad_norm": 1.2218233346939087, "learning_rate": 4.415440315925808e-06, "loss": 0.5258, "step": 8539 }, { "epoch": 0.55, "grad_norm": 1.2079412937164307, "learning_rate": 4.414402181204424e-06, "loss": 0.4986, "step": 8540 }, { "epoch": 0.55, "grad_norm": 1.1247212886810303, "learning_rate": 4.41336407207801e-06, "loss": 0.5059, "step": 8541 }, { "epoch": 0.55, "grad_norm": 1.107690691947937, "learning_rate": 4.412325988591941e-06, "loss": 0.4987, "step": 8542 }, { "epoch": 0.55, "grad_norm": 1.1262763738632202, "learning_rate": 4.411287930791585e-06, "loss": 0.4919, "step": 8543 }, { "epoch": 0.55, "grad_norm": 1.2099804878234863, "learning_rate": 4.410249898722314e-06, "loss": 0.5619, "step": 8544 }, { "epoch": 0.55, "grad_norm": 1.139809012413025, "learning_rate": 4.409211892429499e-06, "loss": 0.5381, "step": 8545 }, { "epoch": 0.55, "grad_norm": 1.1890949010849, "learning_rate": 4.408173911958507e-06, "loss": 0.544, "step": 8546 }, { "epoch": 0.55, "grad_norm": 1.2090427875518799, "learning_rate": 4.4071359573547065e-06, "loss": 0.5143, "step": 8547 }, { "epoch": 0.55, "grad_norm": 1.1798779964447021, "learning_rate": 4.406098028663467e-06, "loss": 0.5645, "step": 8548 }, { "epoch": 0.55, "grad_norm": 1.2627142667770386, "learning_rate": 4.405060125930148e-06, "loss": 0.5122, "step": 8549 }, { "epoch": 0.55, "grad_norm": 1.1546744108200073, "learning_rate": 4.404022249200115e-06, "loss": 0.5152, "step": 8550 }, { "epoch": 0.55, "grad_norm": 1.1307005882263184, "learning_rate": 4.402984398518732e-06, "loss": 0.4772, "step": 8551 }, { "epoch": 0.55, "grad_norm": 1.0744391679763794, "learning_rate": 4.401946573931362e-06, "loss": 0.5008, "step": 8552 }, { "epoch": 0.55, "grad_norm": 1.1491992473602295, "learning_rate": 4.4009087754833626e-06, "loss": 0.5237, "step": 8553 }, { "epoch": 0.55, "grad_norm": 1.1376428604125977, "learning_rate": 4.399871003220097e-06, "loss": 0.5403, "step": 8554 }, { "epoch": 0.55, "grad_norm": 1.086959958076477, "learning_rate": 4.39883325718692e-06, "loss": 0.4926, "step": 8555 }, { "epoch": 0.55, "grad_norm": 1.2632845640182495, "learning_rate": 4.397795537429191e-06, "loss": 0.5773, "step": 8556 }, { "epoch": 0.55, "grad_norm": 1.1780413389205933, "learning_rate": 4.396757843992266e-06, "loss": 0.5369, "step": 8557 }, { "epoch": 0.55, "grad_norm": 1.1472883224487305, "learning_rate": 4.395720176921497e-06, "loss": 0.4915, "step": 8558 }, { "epoch": 0.55, "grad_norm": 1.2019221782684326, "learning_rate": 4.394682536262242e-06, "loss": 0.5791, "step": 8559 }, { "epoch": 0.55, "grad_norm": 1.1297639608383179, "learning_rate": 4.39364492205985e-06, "loss": 0.5121, "step": 8560 }, { "epoch": 0.55, "grad_norm": 1.1685681343078613, "learning_rate": 4.3926073343596746e-06, "loss": 0.4915, "step": 8561 }, { "epoch": 0.55, "grad_norm": 1.2218201160430908, "learning_rate": 4.3915697732070665e-06, "loss": 0.5362, "step": 8562 }, { "epoch": 0.55, "grad_norm": 1.354779601097107, "learning_rate": 4.390532238647373e-06, "loss": 0.5457, "step": 8563 }, { "epoch": 0.55, "grad_norm": 1.1261500120162964, "learning_rate": 4.389494730725943e-06, "loss": 0.521, "step": 8564 }, { "epoch": 0.55, "grad_norm": 1.1967377662658691, "learning_rate": 4.388457249488124e-06, "loss": 0.527, "step": 8565 }, { "epoch": 0.55, "grad_norm": 1.0526056289672852, "learning_rate": 4.387419794979261e-06, "loss": 0.483, "step": 8566 }, { "epoch": 0.55, "grad_norm": 1.0695915222167969, "learning_rate": 4.386382367244696e-06, "loss": 0.4977, "step": 8567 }, { "epoch": 0.55, "grad_norm": 1.2179945707321167, "learning_rate": 4.385344966329776e-06, "loss": 0.5375, "step": 8568 }, { "epoch": 0.55, "grad_norm": 1.3022977113723755, "learning_rate": 4.384307592279842e-06, "loss": 0.5436, "step": 8569 }, { "epoch": 0.55, "grad_norm": 1.2224550247192383, "learning_rate": 4.3832702451402335e-06, "loss": 0.553, "step": 8570 }, { "epoch": 0.55, "grad_norm": 1.195275902748108, "learning_rate": 4.382232924956294e-06, "loss": 0.4893, "step": 8571 }, { "epoch": 0.55, "grad_norm": 1.2359614372253418, "learning_rate": 4.381195631773358e-06, "loss": 0.5195, "step": 8572 }, { "epoch": 0.55, "grad_norm": 1.2644037008285522, "learning_rate": 4.380158365636763e-06, "loss": 0.5593, "step": 8573 }, { "epoch": 0.55, "grad_norm": 1.2440910339355469, "learning_rate": 4.379121126591847e-06, "loss": 0.564, "step": 8574 }, { "epoch": 0.55, "grad_norm": 1.2932639122009277, "learning_rate": 4.378083914683945e-06, "loss": 0.546, "step": 8575 }, { "epoch": 0.55, "grad_norm": 1.2394517660140991, "learning_rate": 4.377046729958391e-06, "loss": 0.5245, "step": 8576 }, { "epoch": 0.55, "grad_norm": 1.1606131792068481, "learning_rate": 4.376009572460519e-06, "loss": 0.5006, "step": 8577 }, { "epoch": 0.55, "grad_norm": 1.260922908782959, "learning_rate": 4.374972442235656e-06, "loss": 0.4732, "step": 8578 }, { "epoch": 0.55, "grad_norm": 1.1926002502441406, "learning_rate": 4.373935339329135e-06, "loss": 0.5617, "step": 8579 }, { "epoch": 0.55, "grad_norm": 1.2874349355697632, "learning_rate": 4.372898263786286e-06, "loss": 0.5355, "step": 8580 }, { "epoch": 0.55, "grad_norm": 1.2603859901428223, "learning_rate": 4.371861215652435e-06, "loss": 0.5391, "step": 8581 }, { "epoch": 0.55, "grad_norm": 1.0734531879425049, "learning_rate": 4.37082419497291e-06, "loss": 0.5129, "step": 8582 }, { "epoch": 0.55, "grad_norm": 1.1836694478988647, "learning_rate": 4.3697872017930385e-06, "loss": 0.5033, "step": 8583 }, { "epoch": 0.55, "grad_norm": 1.2007904052734375, "learning_rate": 4.368750236158139e-06, "loss": 0.5485, "step": 8584 }, { "epoch": 0.55, "grad_norm": 1.095102310180664, "learning_rate": 4.367713298113539e-06, "loss": 0.5045, "step": 8585 }, { "epoch": 0.55, "grad_norm": 1.1529299020767212, "learning_rate": 4.36667638770456e-06, "loss": 0.5557, "step": 8586 }, { "epoch": 0.55, "grad_norm": 1.2472258806228638, "learning_rate": 4.365639504976522e-06, "loss": 0.5635, "step": 8587 }, { "epoch": 0.55, "grad_norm": 1.110693097114563, "learning_rate": 4.364602649974744e-06, "loss": 0.5121, "step": 8588 }, { "epoch": 0.55, "grad_norm": 1.0938405990600586, "learning_rate": 4.363565822744547e-06, "loss": 0.5323, "step": 8589 }, { "epoch": 0.55, "grad_norm": 1.1920406818389893, "learning_rate": 4.362529023331244e-06, "loss": 0.544, "step": 8590 }, { "epoch": 0.55, "grad_norm": 1.1609477996826172, "learning_rate": 4.3614922517801525e-06, "loss": 0.5144, "step": 8591 }, { "epoch": 0.55, "grad_norm": 1.186978816986084, "learning_rate": 4.3604555081365874e-06, "loss": 0.5561, "step": 8592 }, { "epoch": 0.55, "grad_norm": 1.1442515850067139, "learning_rate": 4.359418792445862e-06, "loss": 0.5542, "step": 8593 }, { "epoch": 0.55, "grad_norm": 1.1830612421035767, "learning_rate": 4.358382104753288e-06, "loss": 0.5253, "step": 8594 }, { "epoch": 0.55, "grad_norm": 1.2429934740066528, "learning_rate": 4.357345445104179e-06, "loss": 0.5368, "step": 8595 }, { "epoch": 0.55, "grad_norm": 1.2498847246170044, "learning_rate": 4.3563088135438415e-06, "loss": 0.5384, "step": 8596 }, { "epoch": 0.55, "grad_norm": 1.1858317852020264, "learning_rate": 4.355272210117584e-06, "loss": 0.5089, "step": 8597 }, { "epoch": 0.55, "grad_norm": 1.0960577726364136, "learning_rate": 4.354235634870715e-06, "loss": 0.4976, "step": 8598 }, { "epoch": 0.56, "grad_norm": 1.0980515480041504, "learning_rate": 4.353199087848541e-06, "loss": 0.5022, "step": 8599 }, { "epoch": 0.56, "grad_norm": 1.1153497695922852, "learning_rate": 4.3521625690963674e-06, "loss": 0.4938, "step": 8600 }, { "epoch": 0.56, "grad_norm": 1.1843897104263306, "learning_rate": 4.351126078659494e-06, "loss": 0.5729, "step": 8601 }, { "epoch": 0.56, "grad_norm": 1.1083760261535645, "learning_rate": 4.350089616583226e-06, "loss": 0.523, "step": 8602 }, { "epoch": 0.56, "grad_norm": 1.2159172296524048, "learning_rate": 4.349053182912864e-06, "loss": 0.5531, "step": 8603 }, { "epoch": 0.56, "grad_norm": 1.1430468559265137, "learning_rate": 4.34801677769371e-06, "loss": 0.5124, "step": 8604 }, { "epoch": 0.56, "grad_norm": 1.0533201694488525, "learning_rate": 4.346980400971058e-06, "loss": 0.5205, "step": 8605 }, { "epoch": 0.56, "grad_norm": 1.212537407875061, "learning_rate": 4.345944052790209e-06, "loss": 0.5378, "step": 8606 }, { "epoch": 0.56, "grad_norm": 1.1919857263565063, "learning_rate": 4.344907733196458e-06, "loss": 0.5321, "step": 8607 }, { "epoch": 0.56, "grad_norm": 1.1127787828445435, "learning_rate": 4.3438714422351e-06, "loss": 0.5011, "step": 8608 }, { "epoch": 0.56, "grad_norm": 1.2003905773162842, "learning_rate": 4.342835179951428e-06, "loss": 0.518, "step": 8609 }, { "epoch": 0.56, "grad_norm": 1.229015827178955, "learning_rate": 4.341798946390734e-06, "loss": 0.5296, "step": 8610 }, { "epoch": 0.56, "grad_norm": 1.655888557434082, "learning_rate": 4.34076274159831e-06, "loss": 0.5101, "step": 8611 }, { "epoch": 0.56, "grad_norm": 1.1854472160339355, "learning_rate": 4.339726565619448e-06, "loss": 0.5322, "step": 8612 }, { "epoch": 0.56, "grad_norm": 1.2222310304641724, "learning_rate": 4.338690418499433e-06, "loss": 0.5218, "step": 8613 }, { "epoch": 0.56, "grad_norm": 1.0479143857955933, "learning_rate": 4.3376543002835526e-06, "loss": 0.4758, "step": 8614 }, { "epoch": 0.56, "grad_norm": 1.253853678703308, "learning_rate": 4.336618211017094e-06, "loss": 0.5327, "step": 8615 }, { "epoch": 0.56, "grad_norm": 1.1480847597122192, "learning_rate": 4.335582150745342e-06, "loss": 0.5251, "step": 8616 }, { "epoch": 0.56, "grad_norm": 1.1318682432174683, "learning_rate": 4.3345461195135805e-06, "loss": 0.5341, "step": 8617 }, { "epoch": 0.56, "grad_norm": 1.2022451162338257, "learning_rate": 4.3335101173670925e-06, "loss": 0.5332, "step": 8618 }, { "epoch": 0.56, "grad_norm": 1.2108032703399658, "learning_rate": 4.332474144351156e-06, "loss": 0.5401, "step": 8619 }, { "epoch": 0.56, "grad_norm": 1.0901739597320557, "learning_rate": 4.3314382005110525e-06, "loss": 0.5371, "step": 8620 }, { "epoch": 0.56, "grad_norm": 1.205195426940918, "learning_rate": 4.330402285892059e-06, "loss": 0.5695, "step": 8621 }, { "epoch": 0.56, "grad_norm": 1.247206687927246, "learning_rate": 4.3293664005394555e-06, "loss": 0.5515, "step": 8622 }, { "epoch": 0.56, "grad_norm": 1.118229866027832, "learning_rate": 4.3283305444985166e-06, "loss": 0.5015, "step": 8623 }, { "epoch": 0.56, "grad_norm": 1.1305110454559326, "learning_rate": 4.327294717814518e-06, "loss": 0.4962, "step": 8624 }, { "epoch": 0.56, "grad_norm": 1.2871427536010742, "learning_rate": 4.32625892053273e-06, "loss": 0.5381, "step": 8625 }, { "epoch": 0.56, "grad_norm": 1.1518239974975586, "learning_rate": 4.325223152698426e-06, "loss": 0.477, "step": 8626 }, { "epoch": 0.56, "grad_norm": 1.250191330909729, "learning_rate": 4.3241874143568765e-06, "loss": 0.5152, "step": 8627 }, { "epoch": 0.56, "grad_norm": 1.1009513139724731, "learning_rate": 4.323151705553352e-06, "loss": 0.5416, "step": 8628 }, { "epoch": 0.56, "grad_norm": 1.1699094772338867, "learning_rate": 4.322116026333121e-06, "loss": 0.4886, "step": 8629 }, { "epoch": 0.56, "grad_norm": 1.0766059160232544, "learning_rate": 4.32108037674145e-06, "loss": 0.514, "step": 8630 }, { "epoch": 0.56, "grad_norm": 1.0977146625518799, "learning_rate": 4.3200447568236035e-06, "loss": 0.517, "step": 8631 }, { "epoch": 0.56, "grad_norm": 1.2384313344955444, "learning_rate": 4.319009166624846e-06, "loss": 0.5895, "step": 8632 }, { "epoch": 0.56, "grad_norm": 1.0957661867141724, "learning_rate": 4.31797360619044e-06, "loss": 0.482, "step": 8633 }, { "epoch": 0.56, "grad_norm": 1.2873685359954834, "learning_rate": 4.316938075565649e-06, "loss": 0.5361, "step": 8634 }, { "epoch": 0.56, "grad_norm": 1.4050251245498657, "learning_rate": 4.315902574795732e-06, "loss": 0.5524, "step": 8635 }, { "epoch": 0.56, "grad_norm": 1.1709250211715698, "learning_rate": 4.314867103925951e-06, "loss": 0.5186, "step": 8636 }, { "epoch": 0.56, "grad_norm": 1.2587692737579346, "learning_rate": 4.313831663001559e-06, "loss": 0.5564, "step": 8637 }, { "epoch": 0.56, "grad_norm": 1.1209526062011719, "learning_rate": 4.312796252067814e-06, "loss": 0.5408, "step": 8638 }, { "epoch": 0.56, "grad_norm": 1.2096246480941772, "learning_rate": 4.311760871169972e-06, "loss": 0.5491, "step": 8639 }, { "epoch": 0.56, "grad_norm": 1.2443019151687622, "learning_rate": 4.310725520353287e-06, "loss": 0.552, "step": 8640 }, { "epoch": 0.56, "grad_norm": 1.2166650295257568, "learning_rate": 4.309690199663011e-06, "loss": 0.5498, "step": 8641 }, { "epoch": 0.56, "grad_norm": 1.168073058128357, "learning_rate": 4.308654909144398e-06, "loss": 0.5304, "step": 8642 }, { "epoch": 0.56, "grad_norm": 1.204573392868042, "learning_rate": 4.307619648842692e-06, "loss": 0.5146, "step": 8643 }, { "epoch": 0.56, "grad_norm": 1.133185863494873, "learning_rate": 4.306584418803145e-06, "loss": 0.5263, "step": 8644 }, { "epoch": 0.56, "grad_norm": 1.1751258373260498, "learning_rate": 4.305549219071005e-06, "loss": 0.5425, "step": 8645 }, { "epoch": 0.56, "grad_norm": 1.1571329832077026, "learning_rate": 4.304514049691517e-06, "loss": 0.5141, "step": 8646 }, { "epoch": 0.56, "grad_norm": 1.1698247194290161, "learning_rate": 4.303478910709927e-06, "loss": 0.5585, "step": 8647 }, { "epoch": 0.56, "grad_norm": 1.225886583328247, "learning_rate": 4.302443802171474e-06, "loss": 0.5773, "step": 8648 }, { "epoch": 0.56, "grad_norm": 1.1888483762741089, "learning_rate": 4.3014087241214034e-06, "loss": 0.5384, "step": 8649 }, { "epoch": 0.56, "grad_norm": 1.1818519830703735, "learning_rate": 4.3003736766049545e-06, "loss": 0.5324, "step": 8650 }, { "epoch": 0.56, "grad_norm": 1.1831004619598389, "learning_rate": 4.299338659667369e-06, "loss": 0.5442, "step": 8651 }, { "epoch": 0.56, "grad_norm": 1.1118723154067993, "learning_rate": 4.298303673353881e-06, "loss": 0.4774, "step": 8652 }, { "epoch": 0.56, "grad_norm": 1.1409094333648682, "learning_rate": 4.297268717709731e-06, "loss": 0.516, "step": 8653 }, { "epoch": 0.56, "grad_norm": 1.234617829322815, "learning_rate": 4.296233792780151e-06, "loss": 0.479, "step": 8654 }, { "epoch": 0.56, "grad_norm": 1.2083244323730469, "learning_rate": 4.295198898610376e-06, "loss": 0.5412, "step": 8655 }, { "epoch": 0.56, "grad_norm": 1.2614160776138306, "learning_rate": 4.294164035245639e-06, "loss": 0.5234, "step": 8656 }, { "epoch": 0.56, "grad_norm": 1.097342610359192, "learning_rate": 4.293129202731171e-06, "loss": 0.5214, "step": 8657 }, { "epoch": 0.56, "grad_norm": 1.2606779336929321, "learning_rate": 4.292094401112201e-06, "loss": 0.4564, "step": 8658 }, { "epoch": 0.56, "grad_norm": 1.1484721899032593, "learning_rate": 4.29105963043396e-06, "loss": 0.5511, "step": 8659 }, { "epoch": 0.56, "grad_norm": 1.1238991022109985, "learning_rate": 4.290024890741674e-06, "loss": 0.5165, "step": 8660 }, { "epoch": 0.56, "grad_norm": 1.1249855756759644, "learning_rate": 4.2889901820805665e-06, "loss": 0.5013, "step": 8661 }, { "epoch": 0.56, "grad_norm": 1.2115570306777954, "learning_rate": 4.287955504495863e-06, "loss": 0.5497, "step": 8662 }, { "epoch": 0.56, "grad_norm": 1.318038821220398, "learning_rate": 4.286920858032788e-06, "loss": 0.5254, "step": 8663 }, { "epoch": 0.56, "grad_norm": 1.172632098197937, "learning_rate": 4.2858862427365635e-06, "loss": 0.522, "step": 8664 }, { "epoch": 0.56, "grad_norm": 1.2012218236923218, "learning_rate": 4.284851658652409e-06, "loss": 0.5155, "step": 8665 }, { "epoch": 0.56, "grad_norm": 1.3578380346298218, "learning_rate": 4.283817105825543e-06, "loss": 0.5276, "step": 8666 }, { "epoch": 0.56, "grad_norm": 1.170157551765442, "learning_rate": 4.282782584301183e-06, "loss": 0.5366, "step": 8667 }, { "epoch": 0.56, "grad_norm": 1.2544236183166504, "learning_rate": 4.281748094124545e-06, "loss": 0.533, "step": 8668 }, { "epoch": 0.56, "grad_norm": 1.1956238746643066, "learning_rate": 4.280713635340846e-06, "loss": 0.5067, "step": 8669 }, { "epoch": 0.56, "grad_norm": 1.1518948078155518, "learning_rate": 4.279679207995298e-06, "loss": 0.4898, "step": 8670 }, { "epoch": 0.56, "grad_norm": 1.1785300970077515, "learning_rate": 4.278644812133115e-06, "loss": 0.5491, "step": 8671 }, { "epoch": 0.56, "grad_norm": 1.2576686143875122, "learning_rate": 4.277610447799504e-06, "loss": 0.5892, "step": 8672 }, { "epoch": 0.56, "grad_norm": 1.3072315454483032, "learning_rate": 4.2765761150396764e-06, "loss": 0.5737, "step": 8673 }, { "epoch": 0.56, "grad_norm": 1.1573066711425781, "learning_rate": 4.27554181389884e-06, "loss": 0.4771, "step": 8674 }, { "epoch": 0.56, "grad_norm": 1.176507592201233, "learning_rate": 4.2745075444222025e-06, "loss": 0.5839, "step": 8675 }, { "epoch": 0.56, "grad_norm": 1.1955366134643555, "learning_rate": 4.273473306654968e-06, "loss": 0.5479, "step": 8676 }, { "epoch": 0.56, "grad_norm": 1.306440830230713, "learning_rate": 4.272439100642344e-06, "loss": 0.5489, "step": 8677 }, { "epoch": 0.56, "grad_norm": 1.0823121070861816, "learning_rate": 4.271404926429527e-06, "loss": 0.5676, "step": 8678 }, { "epoch": 0.56, "grad_norm": 1.1063352823257446, "learning_rate": 4.2703707840617205e-06, "loss": 0.5176, "step": 8679 }, { "epoch": 0.56, "grad_norm": 1.153518795967102, "learning_rate": 4.269336673584124e-06, "loss": 0.5196, "step": 8680 }, { "epoch": 0.56, "grad_norm": 1.1429346799850464, "learning_rate": 4.268302595041938e-06, "loss": 0.5541, "step": 8681 }, { "epoch": 0.56, "grad_norm": 1.1741693019866943, "learning_rate": 4.267268548480356e-06, "loss": 0.5164, "step": 8682 }, { "epoch": 0.56, "grad_norm": 1.169180154800415, "learning_rate": 4.2662345339445795e-06, "loss": 0.5274, "step": 8683 }, { "epoch": 0.56, "grad_norm": 1.1938097476959229, "learning_rate": 4.2652005514797944e-06, "loss": 0.5294, "step": 8684 }, { "epoch": 0.56, "grad_norm": 1.1717605590820312, "learning_rate": 4.264166601131199e-06, "loss": 0.5176, "step": 8685 }, { "epoch": 0.56, "grad_norm": 1.1869924068450928, "learning_rate": 4.2631326829439814e-06, "loss": 0.5513, "step": 8686 }, { "epoch": 0.56, "grad_norm": 1.1182397603988647, "learning_rate": 4.262098796963334e-06, "loss": 0.5409, "step": 8687 }, { "epoch": 0.56, "grad_norm": 1.1335474252700806, "learning_rate": 4.261064943234446e-06, "loss": 0.5335, "step": 8688 }, { "epoch": 0.56, "grad_norm": 1.2694332599639893, "learning_rate": 4.2600311218025015e-06, "loss": 0.5171, "step": 8689 }, { "epoch": 0.56, "grad_norm": 1.2960875034332275, "learning_rate": 4.258997332712686e-06, "loss": 0.5554, "step": 8690 }, { "epoch": 0.56, "grad_norm": 1.255955696105957, "learning_rate": 4.257963576010187e-06, "loss": 0.5058, "step": 8691 }, { "epoch": 0.56, "grad_norm": 1.1542004346847534, "learning_rate": 4.256929851740185e-06, "loss": 0.4843, "step": 8692 }, { "epoch": 0.56, "grad_norm": 1.207047700881958, "learning_rate": 4.255896159947862e-06, "loss": 0.5612, "step": 8693 }, { "epoch": 0.56, "grad_norm": 1.231585144996643, "learning_rate": 4.2548625006784e-06, "loss": 0.6142, "step": 8694 }, { "epoch": 0.56, "grad_norm": 1.1546627283096313, "learning_rate": 4.253828873976974e-06, "loss": 0.5306, "step": 8695 }, { "epoch": 0.56, "grad_norm": 1.2320239543914795, "learning_rate": 4.252795279888762e-06, "loss": 0.5194, "step": 8696 }, { "epoch": 0.56, "grad_norm": 1.3080006837844849, "learning_rate": 4.251761718458942e-06, "loss": 0.5132, "step": 8697 }, { "epoch": 0.56, "grad_norm": 1.2592412233352661, "learning_rate": 4.2507281897326855e-06, "loss": 0.5616, "step": 8698 }, { "epoch": 0.56, "grad_norm": 1.2193944454193115, "learning_rate": 4.2496946937551666e-06, "loss": 0.5288, "step": 8699 }, { "epoch": 0.56, "grad_norm": 1.1658354997634888, "learning_rate": 4.248661230571558e-06, "loss": 0.4778, "step": 8700 }, { "epoch": 0.56, "grad_norm": 1.1707239151000977, "learning_rate": 4.2476278002270275e-06, "loss": 0.5152, "step": 8701 }, { "epoch": 0.56, "grad_norm": 1.3646031618118286, "learning_rate": 4.246594402766746e-06, "loss": 0.5699, "step": 8702 }, { "epoch": 0.56, "grad_norm": 1.2466542720794678, "learning_rate": 4.245561038235878e-06, "loss": 0.5426, "step": 8703 }, { "epoch": 0.56, "grad_norm": 1.2590322494506836, "learning_rate": 4.244527706679591e-06, "loss": 0.5573, "step": 8704 }, { "epoch": 0.56, "grad_norm": 1.2172809839248657, "learning_rate": 4.243494408143049e-06, "loss": 0.5921, "step": 8705 }, { "epoch": 0.56, "grad_norm": 1.2287487983703613, "learning_rate": 4.242461142671415e-06, "loss": 0.554, "step": 8706 }, { "epoch": 0.56, "grad_norm": 1.3419278860092163, "learning_rate": 4.24142791030985e-06, "loss": 0.4834, "step": 8707 }, { "epoch": 0.56, "grad_norm": 1.224153995513916, "learning_rate": 4.240394711103513e-06, "loss": 0.5174, "step": 8708 }, { "epoch": 0.56, "grad_norm": 1.2260994911193848, "learning_rate": 4.239361545097563e-06, "loss": 0.5554, "step": 8709 }, { "epoch": 0.56, "grad_norm": 1.187200665473938, "learning_rate": 4.238328412337158e-06, "loss": 0.5332, "step": 8710 }, { "epoch": 0.56, "grad_norm": 1.265734076499939, "learning_rate": 4.237295312867452e-06, "loss": 0.5331, "step": 8711 }, { "epoch": 0.56, "grad_norm": 1.119012713432312, "learning_rate": 4.2362622467336034e-06, "loss": 0.5199, "step": 8712 }, { "epoch": 0.56, "grad_norm": 1.145397663116455, "learning_rate": 4.235229213980759e-06, "loss": 0.5381, "step": 8713 }, { "epoch": 0.56, "grad_norm": 1.1973185539245605, "learning_rate": 4.234196214654072e-06, "loss": 0.5406, "step": 8714 }, { "epoch": 0.56, "grad_norm": 1.0878669023513794, "learning_rate": 4.233163248798693e-06, "loss": 0.5099, "step": 8715 }, { "epoch": 0.56, "grad_norm": 1.0892246961593628, "learning_rate": 4.23213031645977e-06, "loss": 0.4748, "step": 8716 }, { "epoch": 0.56, "grad_norm": 1.0926846265792847, "learning_rate": 4.23109741768245e-06, "loss": 0.5457, "step": 8717 }, { "epoch": 0.56, "grad_norm": 1.324525237083435, "learning_rate": 4.23006455251188e-06, "loss": 0.5478, "step": 8718 }, { "epoch": 0.56, "grad_norm": 1.1703797578811646, "learning_rate": 4.2290317209932e-06, "loss": 0.5638, "step": 8719 }, { "epoch": 0.56, "grad_norm": 1.193540096282959, "learning_rate": 4.227998923171553e-06, "loss": 0.5429, "step": 8720 }, { "epoch": 0.56, "grad_norm": 1.2051092386245728, "learning_rate": 4.226966159092083e-06, "loss": 0.5492, "step": 8721 }, { "epoch": 0.56, "grad_norm": 1.222412347793579, "learning_rate": 4.225933428799927e-06, "loss": 0.5346, "step": 8722 }, { "epoch": 0.56, "grad_norm": 1.1349737644195557, "learning_rate": 4.224900732340223e-06, "loss": 0.5639, "step": 8723 }, { "epoch": 0.56, "grad_norm": 1.1060073375701904, "learning_rate": 4.223868069758112e-06, "loss": 0.5167, "step": 8724 }, { "epoch": 0.56, "grad_norm": 1.167260766029358, "learning_rate": 4.222835441098722e-06, "loss": 0.5569, "step": 8725 }, { "epoch": 0.56, "grad_norm": 1.082657814025879, "learning_rate": 4.22180284640719e-06, "loss": 0.4975, "step": 8726 }, { "epoch": 0.56, "grad_norm": 1.1596431732177734, "learning_rate": 4.220770285728648e-06, "loss": 0.5233, "step": 8727 }, { "epoch": 0.56, "grad_norm": 1.2113510370254517, "learning_rate": 4.219737759108226e-06, "loss": 0.5674, "step": 8728 }, { "epoch": 0.56, "grad_norm": 1.1469523906707764, "learning_rate": 4.218705266591056e-06, "loss": 0.561, "step": 8729 }, { "epoch": 0.56, "grad_norm": 1.0897706747055054, "learning_rate": 4.2176728082222605e-06, "loss": 0.4932, "step": 8730 }, { "epoch": 0.56, "grad_norm": 1.0974359512329102, "learning_rate": 4.216640384046968e-06, "loss": 0.5016, "step": 8731 }, { "epoch": 0.56, "grad_norm": 1.111791729927063, "learning_rate": 4.215607994110304e-06, "loss": 0.5306, "step": 8732 }, { "epoch": 0.56, "grad_norm": 1.136636734008789, "learning_rate": 4.2145756384573895e-06, "loss": 0.5086, "step": 8733 }, { "epoch": 0.56, "grad_norm": 1.1989824771881104, "learning_rate": 4.213543317133349e-06, "loss": 0.5634, "step": 8734 }, { "epoch": 0.56, "grad_norm": 1.0894105434417725, "learning_rate": 4.2125110301833025e-06, "loss": 0.5009, "step": 8735 }, { "epoch": 0.56, "grad_norm": 1.2370774745941162, "learning_rate": 4.211478777652364e-06, "loss": 0.5508, "step": 8736 }, { "epoch": 0.56, "grad_norm": 1.2395347356796265, "learning_rate": 4.210446559585655e-06, "loss": 0.5625, "step": 8737 }, { "epoch": 0.56, "grad_norm": 1.2889294624328613, "learning_rate": 4.2094143760282896e-06, "loss": 0.5684, "step": 8738 }, { "epoch": 0.56, "grad_norm": 1.2765460014343262, "learning_rate": 4.2083822270253824e-06, "loss": 0.5065, "step": 8739 }, { "epoch": 0.56, "grad_norm": 1.2024942636489868, "learning_rate": 4.207350112622046e-06, "loss": 0.5243, "step": 8740 }, { "epoch": 0.56, "grad_norm": 1.1582800149917603, "learning_rate": 4.206318032863391e-06, "loss": 0.5353, "step": 8741 }, { "epoch": 0.56, "grad_norm": 1.1543314456939697, "learning_rate": 4.2052859877945265e-06, "loss": 0.5252, "step": 8742 }, { "epoch": 0.56, "grad_norm": 1.2390519380569458, "learning_rate": 4.2042539774605606e-06, "loss": 0.5283, "step": 8743 }, { "epoch": 0.56, "grad_norm": 1.1245794296264648, "learning_rate": 4.203222001906602e-06, "loss": 0.4938, "step": 8744 }, { "epoch": 0.56, "grad_norm": 1.138095736503601, "learning_rate": 4.202190061177752e-06, "loss": 0.4552, "step": 8745 }, { "epoch": 0.56, "grad_norm": 1.1233950853347778, "learning_rate": 4.201158155319118e-06, "loss": 0.535, "step": 8746 }, { "epoch": 0.56, "grad_norm": 1.2389516830444336, "learning_rate": 4.200126284375799e-06, "loss": 0.5879, "step": 8747 }, { "epoch": 0.56, "grad_norm": 1.0720360279083252, "learning_rate": 4.199094448392896e-06, "loss": 0.4456, "step": 8748 }, { "epoch": 0.56, "grad_norm": 1.139595866203308, "learning_rate": 4.19806264741551e-06, "loss": 0.5017, "step": 8749 }, { "epoch": 0.56, "grad_norm": 1.2627555131912231, "learning_rate": 4.197030881488734e-06, "loss": 0.5282, "step": 8750 }, { "epoch": 0.56, "grad_norm": 1.0927118062973022, "learning_rate": 4.1959991506576675e-06, "loss": 0.5342, "step": 8751 }, { "epoch": 0.56, "grad_norm": 1.2681368589401245, "learning_rate": 4.194967454967404e-06, "loss": 0.5562, "step": 8752 }, { "epoch": 0.57, "grad_norm": 1.1092218160629272, "learning_rate": 4.193935794463038e-06, "loss": 0.5231, "step": 8753 }, { "epoch": 0.57, "grad_norm": 1.1739355325698853, "learning_rate": 4.192904169189657e-06, "loss": 0.5296, "step": 8754 }, { "epoch": 0.57, "grad_norm": 1.2564270496368408, "learning_rate": 4.191872579192351e-06, "loss": 0.5557, "step": 8755 }, { "epoch": 0.57, "grad_norm": 1.1622661352157593, "learning_rate": 4.190841024516211e-06, "loss": 0.5561, "step": 8756 }, { "epoch": 0.57, "grad_norm": 1.3217723369598389, "learning_rate": 4.189809505206321e-06, "loss": 0.4798, "step": 8757 }, { "epoch": 0.57, "grad_norm": 1.29007089138031, "learning_rate": 4.188778021307768e-06, "loss": 0.566, "step": 8758 }, { "epoch": 0.57, "grad_norm": 1.3505369424819946, "learning_rate": 4.187746572865637e-06, "loss": 0.5363, "step": 8759 }, { "epoch": 0.57, "grad_norm": 1.240246057510376, "learning_rate": 4.186715159925005e-06, "loss": 0.4931, "step": 8760 }, { "epoch": 0.57, "grad_norm": 1.2832337617874146, "learning_rate": 4.185683782530955e-06, "loss": 0.517, "step": 8761 }, { "epoch": 0.57, "grad_norm": 1.161797285079956, "learning_rate": 4.184652440728567e-06, "loss": 0.4832, "step": 8762 }, { "epoch": 0.57, "grad_norm": 1.2433927059173584, "learning_rate": 4.183621134562916e-06, "loss": 0.494, "step": 8763 }, { "epoch": 0.57, "grad_norm": 1.1893651485443115, "learning_rate": 4.18258986407908e-06, "loss": 0.5457, "step": 8764 }, { "epoch": 0.57, "grad_norm": 1.1815669536590576, "learning_rate": 4.181558629322133e-06, "loss": 0.5257, "step": 8765 }, { "epoch": 0.57, "grad_norm": 1.3293416500091553, "learning_rate": 4.180527430337145e-06, "loss": 0.5375, "step": 8766 }, { "epoch": 0.57, "grad_norm": 1.280155062675476, "learning_rate": 4.179496267169189e-06, "loss": 0.5644, "step": 8767 }, { "epoch": 0.57, "grad_norm": 1.176673173904419, "learning_rate": 4.178465139863334e-06, "loss": 0.5256, "step": 8768 }, { "epoch": 0.57, "grad_norm": 1.2069891691207886, "learning_rate": 4.177434048464648e-06, "loss": 0.5081, "step": 8769 }, { "epoch": 0.57, "grad_norm": 1.2275086641311646, "learning_rate": 4.176402993018198e-06, "loss": 0.4765, "step": 8770 }, { "epoch": 0.57, "grad_norm": 1.2166497707366943, "learning_rate": 4.17537197356905e-06, "loss": 0.523, "step": 8771 }, { "epoch": 0.57, "grad_norm": 1.1349215507507324, "learning_rate": 4.174340990162264e-06, "loss": 0.5354, "step": 8772 }, { "epoch": 0.57, "grad_norm": 1.1747136116027832, "learning_rate": 4.173310042842902e-06, "loss": 0.5653, "step": 8773 }, { "epoch": 0.57, "grad_norm": 1.1141473054885864, "learning_rate": 4.172279131656027e-06, "loss": 0.4984, "step": 8774 }, { "epoch": 0.57, "grad_norm": 1.1001518964767456, "learning_rate": 4.171248256646693e-06, "loss": 0.5338, "step": 8775 }, { "epoch": 0.57, "grad_norm": 1.2324070930480957, "learning_rate": 4.170217417859963e-06, "loss": 0.4952, "step": 8776 }, { "epoch": 0.57, "grad_norm": 1.0849918127059937, "learning_rate": 4.169186615340887e-06, "loss": 0.5265, "step": 8777 }, { "epoch": 0.57, "grad_norm": 1.1087048053741455, "learning_rate": 4.16815584913452e-06, "loss": 0.5129, "step": 8778 }, { "epoch": 0.57, "grad_norm": 1.204813003540039, "learning_rate": 4.167125119285915e-06, "loss": 0.5449, "step": 8779 }, { "epoch": 0.57, "grad_norm": 1.1186420917510986, "learning_rate": 4.166094425840122e-06, "loss": 0.532, "step": 8780 }, { "epoch": 0.57, "grad_norm": 1.1636980772018433, "learning_rate": 4.1650637688421905e-06, "loss": 0.5135, "step": 8781 }, { "epoch": 0.57, "grad_norm": 1.1383905410766602, "learning_rate": 4.164033148337169e-06, "loss": 0.5492, "step": 8782 }, { "epoch": 0.57, "grad_norm": 1.2517054080963135, "learning_rate": 4.163002564370099e-06, "loss": 0.5522, "step": 8783 }, { "epoch": 0.57, "grad_norm": 1.2794060707092285, "learning_rate": 4.161972016986029e-06, "loss": 0.524, "step": 8784 }, { "epoch": 0.57, "grad_norm": 1.1002395153045654, "learning_rate": 4.160941506229999e-06, "loss": 0.526, "step": 8785 }, { "epoch": 0.57, "grad_norm": 1.2713172435760498, "learning_rate": 4.1599110321470525e-06, "loss": 0.5138, "step": 8786 }, { "epoch": 0.57, "grad_norm": 1.07826566696167, "learning_rate": 4.158880594782226e-06, "loss": 0.5564, "step": 8787 }, { "epoch": 0.57, "grad_norm": 1.1038435697555542, "learning_rate": 4.15785019418056e-06, "loss": 0.5332, "step": 8788 }, { "epoch": 0.57, "grad_norm": 1.1052356958389282, "learning_rate": 4.156819830387088e-06, "loss": 0.525, "step": 8789 }, { "epoch": 0.57, "grad_norm": 1.1886874437332153, "learning_rate": 4.155789503446845e-06, "loss": 0.5293, "step": 8790 }, { "epoch": 0.57, "grad_norm": 1.1632248163223267, "learning_rate": 4.154759213404867e-06, "loss": 0.5483, "step": 8791 }, { "epoch": 0.57, "grad_norm": 1.1661559343338013, "learning_rate": 4.153728960306182e-06, "loss": 0.5197, "step": 8792 }, { "epoch": 0.57, "grad_norm": 1.570223093032837, "learning_rate": 4.152698744195819e-06, "loss": 0.5403, "step": 8793 }, { "epoch": 0.57, "grad_norm": 1.1676846742630005, "learning_rate": 4.1516685651188106e-06, "loss": 0.5253, "step": 8794 }, { "epoch": 0.57, "grad_norm": 1.3122797012329102, "learning_rate": 4.15063842312018e-06, "loss": 0.5711, "step": 8795 }, { "epoch": 0.57, "grad_norm": 1.054804801940918, "learning_rate": 4.149608318244952e-06, "loss": 0.4722, "step": 8796 }, { "epoch": 0.57, "grad_norm": 1.2180310487747192, "learning_rate": 4.148578250538149e-06, "loss": 0.5798, "step": 8797 }, { "epoch": 0.57, "grad_norm": 1.2574735879898071, "learning_rate": 4.147548220044795e-06, "loss": 0.519, "step": 8798 }, { "epoch": 0.57, "grad_norm": 1.195935845375061, "learning_rate": 4.146518226809908e-06, "loss": 0.5691, "step": 8799 }, { "epoch": 0.57, "grad_norm": 1.1850107908248901, "learning_rate": 4.145488270878511e-06, "loss": 0.5088, "step": 8800 }, { "epoch": 0.57, "grad_norm": 1.190210223197937, "learning_rate": 4.144458352295614e-06, "loss": 0.4993, "step": 8801 }, { "epoch": 0.57, "grad_norm": 1.0786561965942383, "learning_rate": 4.143428471106235e-06, "loss": 0.5612, "step": 8802 }, { "epoch": 0.57, "grad_norm": 1.1405727863311768, "learning_rate": 4.142398627355388e-06, "loss": 0.536, "step": 8803 }, { "epoch": 0.57, "grad_norm": 1.1700042486190796, "learning_rate": 4.141368821088084e-06, "loss": 0.4743, "step": 8804 }, { "epoch": 0.57, "grad_norm": 1.1946055889129639, "learning_rate": 4.140339052349333e-06, "loss": 0.4868, "step": 8805 }, { "epoch": 0.57, "grad_norm": 1.2349497079849243, "learning_rate": 4.139309321184146e-06, "loss": 0.576, "step": 8806 }, { "epoch": 0.57, "grad_norm": 1.239960789680481, "learning_rate": 4.1382796276375275e-06, "loss": 0.5445, "step": 8807 }, { "epoch": 0.57, "grad_norm": 1.1007165908813477, "learning_rate": 4.137249971754482e-06, "loss": 0.5246, "step": 8808 }, { "epoch": 0.57, "grad_norm": 1.100674033164978, "learning_rate": 4.136220353580015e-06, "loss": 0.5076, "step": 8809 }, { "epoch": 0.57, "grad_norm": 1.304419994354248, "learning_rate": 4.135190773159127e-06, "loss": 0.5628, "step": 8810 }, { "epoch": 0.57, "grad_norm": 1.2245348691940308, "learning_rate": 4.134161230536819e-06, "loss": 0.5551, "step": 8811 }, { "epoch": 0.57, "grad_norm": 1.2292925119400024, "learning_rate": 4.133131725758093e-06, "loss": 0.4957, "step": 8812 }, { "epoch": 0.57, "grad_norm": 1.1770659685134888, "learning_rate": 4.1321022588679396e-06, "loss": 0.5083, "step": 8813 }, { "epoch": 0.57, "grad_norm": 1.3062068223953247, "learning_rate": 4.131072829911358e-06, "loss": 0.569, "step": 8814 }, { "epoch": 0.57, "grad_norm": 1.1601253747940063, "learning_rate": 4.13004343893334e-06, "loss": 0.5157, "step": 8815 }, { "epoch": 0.57, "grad_norm": 1.2292306423187256, "learning_rate": 4.1290140859788795e-06, "loss": 0.5058, "step": 8816 }, { "epoch": 0.57, "grad_norm": 1.186496376991272, "learning_rate": 4.127984771092968e-06, "loss": 0.554, "step": 8817 }, { "epoch": 0.57, "grad_norm": 1.1470420360565186, "learning_rate": 4.126955494320591e-06, "loss": 0.505, "step": 8818 }, { "epoch": 0.57, "grad_norm": 1.17499840259552, "learning_rate": 4.125926255706736e-06, "loss": 0.5451, "step": 8819 }, { "epoch": 0.57, "grad_norm": 1.2671234607696533, "learning_rate": 4.12489705529639e-06, "loss": 0.5455, "step": 8820 }, { "epoch": 0.57, "grad_norm": 1.2266572713851929, "learning_rate": 4.123867893134535e-06, "loss": 0.5076, "step": 8821 }, { "epoch": 0.57, "grad_norm": 1.1595591306686401, "learning_rate": 4.122838769266154e-06, "loss": 0.4883, "step": 8822 }, { "epoch": 0.57, "grad_norm": 1.300221562385559, "learning_rate": 4.12180968373623e-06, "loss": 0.5054, "step": 8823 }, { "epoch": 0.57, "grad_norm": 1.339013695716858, "learning_rate": 4.120780636589737e-06, "loss": 0.5975, "step": 8824 }, { "epoch": 0.57, "grad_norm": 1.1887843608856201, "learning_rate": 4.1197516278716535e-06, "loss": 0.5692, "step": 8825 }, { "epoch": 0.57, "grad_norm": 1.138754963874817, "learning_rate": 4.118722657626955e-06, "loss": 0.512, "step": 8826 }, { "epoch": 0.57, "grad_norm": 1.172764778137207, "learning_rate": 4.117693725900616e-06, "loss": 0.5202, "step": 8827 }, { "epoch": 0.57, "grad_norm": 1.1811623573303223, "learning_rate": 4.116664832737607e-06, "loss": 0.5404, "step": 8828 }, { "epoch": 0.57, "grad_norm": 1.1825511455535889, "learning_rate": 4.115635978182902e-06, "loss": 0.4828, "step": 8829 }, { "epoch": 0.57, "grad_norm": 1.222298264503479, "learning_rate": 4.114607162281465e-06, "loss": 0.5589, "step": 8830 }, { "epoch": 0.57, "grad_norm": 1.2722822427749634, "learning_rate": 4.113578385078264e-06, "loss": 0.492, "step": 8831 }, { "epoch": 0.57, "grad_norm": 1.1864851713180542, "learning_rate": 4.112549646618264e-06, "loss": 0.5325, "step": 8832 }, { "epoch": 0.57, "grad_norm": 1.210839033126831, "learning_rate": 4.111520946946431e-06, "loss": 0.5128, "step": 8833 }, { "epoch": 0.57, "grad_norm": 1.222941517829895, "learning_rate": 4.110492286107725e-06, "loss": 0.5626, "step": 8834 }, { "epoch": 0.57, "grad_norm": 1.1704027652740479, "learning_rate": 4.109463664147106e-06, "loss": 0.5036, "step": 8835 }, { "epoch": 0.57, "grad_norm": 1.1621596813201904, "learning_rate": 4.1084350811095315e-06, "loss": 0.4812, "step": 8836 }, { "epoch": 0.57, "grad_norm": 1.2826359272003174, "learning_rate": 4.10740653703996e-06, "loss": 0.5625, "step": 8837 }, { "epoch": 0.57, "grad_norm": 1.3236355781555176, "learning_rate": 4.106378031983347e-06, "loss": 0.528, "step": 8838 }, { "epoch": 0.57, "grad_norm": 1.1765698194503784, "learning_rate": 4.105349565984643e-06, "loss": 0.5242, "step": 8839 }, { "epoch": 0.57, "grad_norm": 1.2347326278686523, "learning_rate": 4.1043211390888025e-06, "loss": 0.5389, "step": 8840 }, { "epoch": 0.57, "grad_norm": 1.0976353883743286, "learning_rate": 4.103292751340774e-06, "loss": 0.5145, "step": 8841 }, { "epoch": 0.57, "grad_norm": 1.1498279571533203, "learning_rate": 4.102264402785506e-06, "loss": 0.5273, "step": 8842 }, { "epoch": 0.57, "grad_norm": 1.247694969177246, "learning_rate": 4.101236093467944e-06, "loss": 0.5093, "step": 8843 }, { "epoch": 0.57, "grad_norm": 1.253477692604065, "learning_rate": 4.100207823433033e-06, "loss": 0.5441, "step": 8844 }, { "epoch": 0.57, "grad_norm": 1.1608327627182007, "learning_rate": 4.099179592725717e-06, "loss": 0.5408, "step": 8845 }, { "epoch": 0.57, "grad_norm": 1.149852991104126, "learning_rate": 4.098151401390936e-06, "loss": 0.5736, "step": 8846 }, { "epoch": 0.57, "grad_norm": 1.1436702013015747, "learning_rate": 4.097123249473634e-06, "loss": 0.577, "step": 8847 }, { "epoch": 0.57, "grad_norm": 1.2174750566482544, "learning_rate": 4.096095137018742e-06, "loss": 0.5396, "step": 8848 }, { "epoch": 0.57, "grad_norm": 1.2359421253204346, "learning_rate": 4.095067064071201e-06, "loss": 0.5273, "step": 8849 }, { "epoch": 0.57, "grad_norm": 1.269797682762146, "learning_rate": 4.094039030675942e-06, "loss": 0.5647, "step": 8850 }, { "epoch": 0.57, "grad_norm": 1.1954896450042725, "learning_rate": 4.0930110368779e-06, "loss": 0.5195, "step": 8851 }, { "epoch": 0.57, "grad_norm": 1.29911208152771, "learning_rate": 4.091983082722006e-06, "loss": 0.5478, "step": 8852 }, { "epoch": 0.57, "grad_norm": 1.1812036037445068, "learning_rate": 4.090955168253191e-06, "loss": 0.4995, "step": 8853 }, { "epoch": 0.57, "grad_norm": 1.1511433124542236, "learning_rate": 4.089927293516378e-06, "loss": 0.5131, "step": 8854 }, { "epoch": 0.57, "grad_norm": 1.1727484464645386, "learning_rate": 4.088899458556494e-06, "loss": 0.5617, "step": 8855 }, { "epoch": 0.57, "grad_norm": 1.1223604679107666, "learning_rate": 4.087871663418465e-06, "loss": 0.5116, "step": 8856 }, { "epoch": 0.57, "grad_norm": 1.1368203163146973, "learning_rate": 4.086843908147212e-06, "loss": 0.5056, "step": 8857 }, { "epoch": 0.57, "grad_norm": 1.1858934164047241, "learning_rate": 4.085816192787659e-06, "loss": 0.5228, "step": 8858 }, { "epoch": 0.57, "grad_norm": 2.25681734085083, "learning_rate": 4.084788517384718e-06, "loss": 0.5734, "step": 8859 }, { "epoch": 0.57, "grad_norm": 1.1071500778198242, "learning_rate": 4.08376088198331e-06, "loss": 0.4888, "step": 8860 }, { "epoch": 0.57, "grad_norm": 1.27470064163208, "learning_rate": 4.08273328662835e-06, "loss": 0.5874, "step": 8861 }, { "epoch": 0.57, "grad_norm": 1.1003060340881348, "learning_rate": 4.081705731364751e-06, "loss": 0.5649, "step": 8862 }, { "epoch": 0.57, "grad_norm": 1.263608694076538, "learning_rate": 4.080678216237426e-06, "loss": 0.5183, "step": 8863 }, { "epoch": 0.57, "grad_norm": 1.3175331354141235, "learning_rate": 4.079650741291287e-06, "loss": 0.5432, "step": 8864 }, { "epoch": 0.57, "grad_norm": 1.2446928024291992, "learning_rate": 4.078623306571236e-06, "loss": 0.5288, "step": 8865 }, { "epoch": 0.57, "grad_norm": 1.2713279724121094, "learning_rate": 4.077595912122184e-06, "loss": 0.5472, "step": 8866 }, { "epoch": 0.57, "grad_norm": 1.1106997728347778, "learning_rate": 4.076568557989034e-06, "loss": 0.5189, "step": 8867 }, { "epoch": 0.57, "grad_norm": 1.1618642807006836, "learning_rate": 4.07554124421669e-06, "loss": 0.5279, "step": 8868 }, { "epoch": 0.57, "grad_norm": 1.231833815574646, "learning_rate": 4.074513970850054e-06, "loss": 0.5016, "step": 8869 }, { "epoch": 0.57, "grad_norm": 1.2131569385528564, "learning_rate": 4.073486737934026e-06, "loss": 0.5123, "step": 8870 }, { "epoch": 0.57, "grad_norm": 1.3536715507507324, "learning_rate": 4.0724595455135e-06, "loss": 0.5609, "step": 8871 }, { "epoch": 0.57, "grad_norm": 1.4221869707107544, "learning_rate": 4.071432393633375e-06, "loss": 0.567, "step": 8872 }, { "epoch": 0.57, "grad_norm": 1.195929765701294, "learning_rate": 4.070405282338543e-06, "loss": 0.5389, "step": 8873 }, { "epoch": 0.57, "grad_norm": 1.142514944076538, "learning_rate": 4.069378211673899e-06, "loss": 0.4939, "step": 8874 }, { "epoch": 0.57, "grad_norm": 1.3064746856689453, "learning_rate": 4.068351181684333e-06, "loss": 0.5216, "step": 8875 }, { "epoch": 0.57, "grad_norm": 1.1945720911026, "learning_rate": 4.067324192414732e-06, "loss": 0.549, "step": 8876 }, { "epoch": 0.57, "grad_norm": 1.0676422119140625, "learning_rate": 4.066297243909985e-06, "loss": 0.4856, "step": 8877 }, { "epoch": 0.57, "grad_norm": 1.0946294069290161, "learning_rate": 4.065270336214976e-06, "loss": 0.5272, "step": 8878 }, { "epoch": 0.57, "grad_norm": 1.173052430152893, "learning_rate": 4.064243469374588e-06, "loss": 0.5311, "step": 8879 }, { "epoch": 0.57, "grad_norm": 1.311179280281067, "learning_rate": 4.063216643433705e-06, "loss": 0.5753, "step": 8880 }, { "epoch": 0.57, "grad_norm": 1.2175472974777222, "learning_rate": 4.0621898584372055e-06, "loss": 0.5186, "step": 8881 }, { "epoch": 0.57, "grad_norm": 1.1604211330413818, "learning_rate": 4.061163114429968e-06, "loss": 0.5178, "step": 8882 }, { "epoch": 0.57, "grad_norm": 1.3411797285079956, "learning_rate": 4.060136411456868e-06, "loss": 0.4855, "step": 8883 }, { "epoch": 0.57, "grad_norm": 1.0842405557632446, "learning_rate": 4.05910974956278e-06, "loss": 0.4706, "step": 8884 }, { "epoch": 0.57, "grad_norm": 1.2388019561767578, "learning_rate": 4.058083128792579e-06, "loss": 0.5046, "step": 8885 }, { "epoch": 0.57, "grad_norm": 1.2288185358047485, "learning_rate": 4.0570565491911325e-06, "loss": 0.5059, "step": 8886 }, { "epoch": 0.57, "grad_norm": 1.1767536401748657, "learning_rate": 4.056030010803312e-06, "loss": 0.545, "step": 8887 }, { "epoch": 0.57, "grad_norm": 1.166144847869873, "learning_rate": 4.055003513673985e-06, "loss": 0.4904, "step": 8888 }, { "epoch": 0.57, "grad_norm": 1.4368748664855957, "learning_rate": 4.053977057848016e-06, "loss": 0.5362, "step": 8889 }, { "epoch": 0.57, "grad_norm": 1.1761395931243896, "learning_rate": 4.052950643370268e-06, "loss": 0.4929, "step": 8890 }, { "epoch": 0.57, "grad_norm": 1.0827361345291138, "learning_rate": 4.0519242702856045e-06, "loss": 0.4669, "step": 8891 }, { "epoch": 0.57, "grad_norm": 1.3137198686599731, "learning_rate": 4.050897938638884e-06, "loss": 0.5226, "step": 8892 }, { "epoch": 0.57, "grad_norm": 1.2490885257720947, "learning_rate": 4.049871648474967e-06, "loss": 0.562, "step": 8893 }, { "epoch": 0.57, "grad_norm": 1.2674734592437744, "learning_rate": 4.048845399838711e-06, "loss": 0.5247, "step": 8894 }, { "epoch": 0.57, "grad_norm": 1.176784873008728, "learning_rate": 4.047819192774966e-06, "loss": 0.5014, "step": 8895 }, { "epoch": 0.57, "grad_norm": 1.2365214824676514, "learning_rate": 4.046793027328588e-06, "loss": 0.5916, "step": 8896 }, { "epoch": 0.57, "grad_norm": 1.1103661060333252, "learning_rate": 4.045766903544427e-06, "loss": 0.5195, "step": 8897 }, { "epoch": 0.57, "grad_norm": 1.2928205728530884, "learning_rate": 4.044740821467332e-06, "loss": 0.5155, "step": 8898 }, { "epoch": 0.57, "grad_norm": 1.1934940814971924, "learning_rate": 4.043714781142154e-06, "loss": 0.5523, "step": 8899 }, { "epoch": 0.57, "grad_norm": 1.2347477674484253, "learning_rate": 4.042688782613734e-06, "loss": 0.5196, "step": 8900 }, { "epoch": 0.57, "grad_norm": 1.1993699073791504, "learning_rate": 4.0416628259269165e-06, "loss": 0.5337, "step": 8901 }, { "epoch": 0.57, "grad_norm": 1.1543872356414795, "learning_rate": 4.0406369111265455e-06, "loss": 0.5307, "step": 8902 }, { "epoch": 0.57, "grad_norm": 1.2170571088790894, "learning_rate": 4.039611038257459e-06, "loss": 0.5335, "step": 8903 }, { "epoch": 0.57, "grad_norm": 1.2214752435684204, "learning_rate": 4.038585207364496e-06, "loss": 0.4956, "step": 8904 }, { "epoch": 0.57, "grad_norm": 1.0973854064941406, "learning_rate": 4.0375594184924955e-06, "loss": 0.5309, "step": 8905 }, { "epoch": 0.57, "grad_norm": 1.1471428871154785, "learning_rate": 4.036533671686288e-06, "loss": 0.5443, "step": 8906 }, { "epoch": 0.57, "grad_norm": 1.1056334972381592, "learning_rate": 4.035507966990707e-06, "loss": 0.5139, "step": 8907 }, { "epoch": 0.58, "grad_norm": 1.1749398708343506, "learning_rate": 4.034482304450585e-06, "loss": 0.4826, "step": 8908 }, { "epoch": 0.58, "grad_norm": 1.1569030284881592, "learning_rate": 4.03345668411075e-06, "loss": 0.5434, "step": 8909 }, { "epoch": 0.58, "grad_norm": 1.0857019424438477, "learning_rate": 4.032431106016031e-06, "loss": 0.4742, "step": 8910 }, { "epoch": 0.58, "grad_norm": 1.2390693426132202, "learning_rate": 4.031405570211252e-06, "loss": 0.5351, "step": 8911 }, { "epoch": 0.58, "grad_norm": 1.099611520767212, "learning_rate": 4.030380076741236e-06, "loss": 0.5179, "step": 8912 }, { "epoch": 0.58, "grad_norm": 1.2537449598312378, "learning_rate": 4.029354625650804e-06, "loss": 0.576, "step": 8913 }, { "epoch": 0.58, "grad_norm": 1.2795422077178955, "learning_rate": 4.0283292169847784e-06, "loss": 0.4863, "step": 8914 }, { "epoch": 0.58, "grad_norm": 1.1471632719039917, "learning_rate": 4.027303850787975e-06, "loss": 0.5388, "step": 8915 }, { "epoch": 0.58, "grad_norm": 1.3317536115646362, "learning_rate": 4.02627852710521e-06, "loss": 0.5412, "step": 8916 }, { "epoch": 0.58, "grad_norm": 1.1498664617538452, "learning_rate": 4.025253245981301e-06, "loss": 0.5308, "step": 8917 }, { "epoch": 0.58, "grad_norm": 1.111641526222229, "learning_rate": 4.024228007461056e-06, "loss": 0.5558, "step": 8918 }, { "epoch": 0.58, "grad_norm": 1.2479101419448853, "learning_rate": 4.023202811589288e-06, "loss": 0.5209, "step": 8919 }, { "epoch": 0.58, "grad_norm": 1.176448106765747, "learning_rate": 4.022177658410804e-06, "loss": 0.5467, "step": 8920 }, { "epoch": 0.58, "grad_norm": 1.1669362783432007, "learning_rate": 4.021152547970411e-06, "loss": 0.5168, "step": 8921 }, { "epoch": 0.58, "grad_norm": 1.1520438194274902, "learning_rate": 4.020127480312916e-06, "loss": 0.5283, "step": 8922 }, { "epoch": 0.58, "grad_norm": 1.4266914129257202, "learning_rate": 4.019102455483122e-06, "loss": 0.5349, "step": 8923 }, { "epoch": 0.58, "grad_norm": 1.2374986410140991, "learning_rate": 4.0180774735258275e-06, "loss": 0.5195, "step": 8924 }, { "epoch": 0.58, "grad_norm": 1.1539692878723145, "learning_rate": 4.017052534485833e-06, "loss": 0.5723, "step": 8925 }, { "epoch": 0.58, "grad_norm": 1.1770771741867065, "learning_rate": 4.016027638407935e-06, "loss": 0.5571, "step": 8926 }, { "epoch": 0.58, "grad_norm": 1.1853395700454712, "learning_rate": 4.015002785336933e-06, "loss": 0.4921, "step": 8927 }, { "epoch": 0.58, "grad_norm": 1.1746197938919067, "learning_rate": 4.013977975317616e-06, "loss": 0.5413, "step": 8928 }, { "epoch": 0.58, "grad_norm": 1.1435644626617432, "learning_rate": 4.012953208394778e-06, "loss": 0.511, "step": 8929 }, { "epoch": 0.58, "grad_norm": 1.1757473945617676, "learning_rate": 4.011928484613209e-06, "loss": 0.518, "step": 8930 }, { "epoch": 0.58, "grad_norm": 1.213793396949768, "learning_rate": 4.010903804017697e-06, "loss": 0.5731, "step": 8931 }, { "epoch": 0.58, "grad_norm": 1.1463642120361328, "learning_rate": 4.009879166653027e-06, "loss": 0.5147, "step": 8932 }, { "epoch": 0.58, "grad_norm": 1.1179379224777222, "learning_rate": 4.008854572563985e-06, "loss": 0.5611, "step": 8933 }, { "epoch": 0.58, "grad_norm": 1.20281183719635, "learning_rate": 4.007830021795352e-06, "loss": 0.5416, "step": 8934 }, { "epoch": 0.58, "grad_norm": 1.1269195079803467, "learning_rate": 4.006805514391908e-06, "loss": 0.5114, "step": 8935 }, { "epoch": 0.58, "grad_norm": 1.1682034730911255, "learning_rate": 4.0057810503984345e-06, "loss": 0.5398, "step": 8936 }, { "epoch": 0.58, "grad_norm": 1.3824551105499268, "learning_rate": 4.004756629859705e-06, "loss": 0.525, "step": 8937 }, { "epoch": 0.58, "grad_norm": 1.1632391214370728, "learning_rate": 4.0037322528204945e-06, "loss": 0.5385, "step": 8938 }, { "epoch": 0.58, "grad_norm": 1.1657930612564087, "learning_rate": 4.002707919325578e-06, "loss": 0.5677, "step": 8939 }, { "epoch": 0.58, "grad_norm": 1.2242351770401, "learning_rate": 4.001683629419725e-06, "loss": 0.549, "step": 8940 }, { "epoch": 0.58, "grad_norm": 1.213042140007019, "learning_rate": 4.000659383147707e-06, "loss": 0.5085, "step": 8941 }, { "epoch": 0.58, "grad_norm": 1.300681471824646, "learning_rate": 3.999635180554286e-06, "loss": 0.5472, "step": 8942 }, { "epoch": 0.58, "grad_norm": 1.2205314636230469, "learning_rate": 3.998611021684231e-06, "loss": 0.574, "step": 8943 }, { "epoch": 0.58, "grad_norm": 1.1244839429855347, "learning_rate": 3.997586906582304e-06, "loss": 0.5108, "step": 8944 }, { "epoch": 0.58, "grad_norm": 1.1788907051086426, "learning_rate": 3.996562835293268e-06, "loss": 0.521, "step": 8945 }, { "epoch": 0.58, "grad_norm": 1.2297673225402832, "learning_rate": 3.995538807861883e-06, "loss": 0.5478, "step": 8946 }, { "epoch": 0.58, "grad_norm": 1.2306662797927856, "learning_rate": 3.9945148243329035e-06, "loss": 0.5753, "step": 8947 }, { "epoch": 0.58, "grad_norm": 1.1170098781585693, "learning_rate": 3.9934908847510865e-06, "loss": 0.5057, "step": 8948 }, { "epoch": 0.58, "grad_norm": 1.2101565599441528, "learning_rate": 3.992466989161186e-06, "loss": 0.5645, "step": 8949 }, { "epoch": 0.58, "grad_norm": 1.2550594806671143, "learning_rate": 3.991443137607954e-06, "loss": 0.5418, "step": 8950 }, { "epoch": 0.58, "grad_norm": 1.1658728122711182, "learning_rate": 3.990419330136141e-06, "loss": 0.5437, "step": 8951 }, { "epoch": 0.58, "grad_norm": 1.0693955421447754, "learning_rate": 3.989395566790496e-06, "loss": 0.5152, "step": 8952 }, { "epoch": 0.58, "grad_norm": 1.1854866743087769, "learning_rate": 3.988371847615761e-06, "loss": 0.5205, "step": 8953 }, { "epoch": 0.58, "grad_norm": 1.263390064239502, "learning_rate": 3.987348172656682e-06, "loss": 0.5316, "step": 8954 }, { "epoch": 0.58, "grad_norm": 1.256057620048523, "learning_rate": 3.986324541958003e-06, "loss": 0.5381, "step": 8955 }, { "epoch": 0.58, "grad_norm": 1.1844590902328491, "learning_rate": 3.985300955564462e-06, "loss": 0.5547, "step": 8956 }, { "epoch": 0.58, "grad_norm": 1.1481678485870361, "learning_rate": 3.984277413520799e-06, "loss": 0.5496, "step": 8957 }, { "epoch": 0.58, "grad_norm": 1.2401777505874634, "learning_rate": 3.983253915871751e-06, "loss": 0.5811, "step": 8958 }, { "epoch": 0.58, "grad_norm": 1.1509339809417725, "learning_rate": 3.98223046266205e-06, "loss": 0.5061, "step": 8959 }, { "epoch": 0.58, "grad_norm": 1.248789668083191, "learning_rate": 3.981207053936429e-06, "loss": 0.5371, "step": 8960 }, { "epoch": 0.58, "grad_norm": 1.0901544094085693, "learning_rate": 3.980183689739619e-06, "loss": 0.517, "step": 8961 }, { "epoch": 0.58, "grad_norm": 1.086224913597107, "learning_rate": 3.979160370116349e-06, "loss": 0.4965, "step": 8962 }, { "epoch": 0.58, "grad_norm": 1.1913901567459106, "learning_rate": 3.9781370951113455e-06, "loss": 0.5335, "step": 8963 }, { "epoch": 0.58, "grad_norm": 1.1951441764831543, "learning_rate": 3.9771138647693345e-06, "loss": 0.5223, "step": 8964 }, { "epoch": 0.58, "grad_norm": 1.1677216291427612, "learning_rate": 3.976090679135036e-06, "loss": 0.4771, "step": 8965 }, { "epoch": 0.58, "grad_norm": 1.1658042669296265, "learning_rate": 3.975067538253172e-06, "loss": 0.5517, "step": 8966 }, { "epoch": 0.58, "grad_norm": 1.1689953804016113, "learning_rate": 3.974044442168462e-06, "loss": 0.5536, "step": 8967 }, { "epoch": 0.58, "grad_norm": 1.1416555643081665, "learning_rate": 3.973021390925621e-06, "loss": 0.4641, "step": 8968 }, { "epoch": 0.58, "grad_norm": 1.2239794731140137, "learning_rate": 3.971998384569368e-06, "loss": 0.5284, "step": 8969 }, { "epoch": 0.58, "grad_norm": 1.232844591140747, "learning_rate": 3.970975423144413e-06, "loss": 0.5563, "step": 8970 }, { "epoch": 0.58, "grad_norm": 1.1914308071136475, "learning_rate": 3.969952506695466e-06, "loss": 0.5016, "step": 8971 }, { "epoch": 0.58, "grad_norm": 1.1159436702728271, "learning_rate": 3.9689296352672375e-06, "loss": 0.5084, "step": 8972 }, { "epoch": 0.58, "grad_norm": 1.1520260572433472, "learning_rate": 3.9679068089044345e-06, "loss": 0.627, "step": 8973 }, { "epoch": 0.58, "grad_norm": 1.2455863952636719, "learning_rate": 3.966884027651763e-06, "loss": 0.5333, "step": 8974 }, { "epoch": 0.58, "grad_norm": 1.1231054067611694, "learning_rate": 3.965861291553924e-06, "loss": 0.5292, "step": 8975 }, { "epoch": 0.58, "grad_norm": 1.1527100801467896, "learning_rate": 3.96483860065562e-06, "loss": 0.5101, "step": 8976 }, { "epoch": 0.58, "grad_norm": 1.1705667972564697, "learning_rate": 3.9638159550015505e-06, "loss": 0.5185, "step": 8977 }, { "epoch": 0.58, "grad_norm": 1.2268270254135132, "learning_rate": 3.962793354636413e-06, "loss": 0.5195, "step": 8978 }, { "epoch": 0.58, "grad_norm": 1.1526076793670654, "learning_rate": 3.9617707996049005e-06, "loss": 0.4819, "step": 8979 }, { "epoch": 0.58, "grad_norm": 1.1685311794281006, "learning_rate": 3.960748289951708e-06, "loss": 0.5096, "step": 8980 }, { "epoch": 0.58, "grad_norm": 1.1078077554702759, "learning_rate": 3.959725825721526e-06, "loss": 0.5194, "step": 8981 }, { "epoch": 0.58, "grad_norm": 1.1478627920150757, "learning_rate": 3.958703406959045e-06, "loss": 0.4776, "step": 8982 }, { "epoch": 0.58, "grad_norm": 1.2574471235275269, "learning_rate": 3.9576810337089525e-06, "loss": 0.5477, "step": 8983 }, { "epoch": 0.58, "grad_norm": 1.1860371828079224, "learning_rate": 3.9566587060159315e-06, "loss": 0.5294, "step": 8984 }, { "epoch": 0.58, "grad_norm": 1.2033798694610596, "learning_rate": 3.955636423924666e-06, "loss": 0.5134, "step": 8985 }, { "epoch": 0.58, "grad_norm": 1.1160531044006348, "learning_rate": 3.954614187479838e-06, "loss": 0.5517, "step": 8986 }, { "epoch": 0.58, "grad_norm": 1.1321885585784912, "learning_rate": 3.953591996726129e-06, "loss": 0.5492, "step": 8987 }, { "epoch": 0.58, "grad_norm": 1.218845248222351, "learning_rate": 3.952569851708212e-06, "loss": 0.5245, "step": 8988 }, { "epoch": 0.58, "grad_norm": 1.20283842086792, "learning_rate": 3.951547752470765e-06, "loss": 0.515, "step": 8989 }, { "epoch": 0.58, "grad_norm": 1.1771548986434937, "learning_rate": 3.9505256990584595e-06, "loss": 0.5529, "step": 8990 }, { "epoch": 0.58, "grad_norm": 1.3936543464660645, "learning_rate": 3.949503691515969e-06, "loss": 0.526, "step": 8991 }, { "epoch": 0.58, "grad_norm": 1.19160795211792, "learning_rate": 3.948481729887961e-06, "loss": 0.5675, "step": 8992 }, { "epoch": 0.58, "grad_norm": 1.1112213134765625, "learning_rate": 3.947459814219107e-06, "loss": 0.5083, "step": 8993 }, { "epoch": 0.58, "grad_norm": 1.230401635169983, "learning_rate": 3.946437944554066e-06, "loss": 0.5282, "step": 8994 }, { "epoch": 0.58, "grad_norm": 1.1249723434448242, "learning_rate": 3.945416120937503e-06, "loss": 0.5078, "step": 8995 }, { "epoch": 0.58, "grad_norm": 1.094234824180603, "learning_rate": 3.9443943434140824e-06, "loss": 0.5126, "step": 8996 }, { "epoch": 0.58, "grad_norm": 1.0966465473175049, "learning_rate": 3.943372612028461e-06, "loss": 0.5049, "step": 8997 }, { "epoch": 0.58, "grad_norm": 1.3163789510726929, "learning_rate": 3.942350926825296e-06, "loss": 0.5195, "step": 8998 }, { "epoch": 0.58, "grad_norm": 1.2506566047668457, "learning_rate": 3.941329287849246e-06, "loss": 0.4947, "step": 8999 }, { "epoch": 0.58, "grad_norm": 1.2443788051605225, "learning_rate": 3.940307695144959e-06, "loss": 0.5665, "step": 9000 }, { "epoch": 0.58, "grad_norm": 1.1729567050933838, "learning_rate": 3.939286148757089e-06, "loss": 0.4922, "step": 9001 }, { "epoch": 0.58, "grad_norm": 1.2220412492752075, "learning_rate": 3.938264648730284e-06, "loss": 0.5088, "step": 9002 }, { "epoch": 0.58, "grad_norm": 1.1808840036392212, "learning_rate": 3.937243195109193e-06, "loss": 0.5188, "step": 9003 }, { "epoch": 0.58, "grad_norm": 1.144310712814331, "learning_rate": 3.936221787938459e-06, "loss": 0.5436, "step": 9004 }, { "epoch": 0.58, "grad_norm": 1.0982176065444946, "learning_rate": 3.9352004272627284e-06, "loss": 0.5261, "step": 9005 }, { "epoch": 0.58, "grad_norm": 1.160109043121338, "learning_rate": 3.9341791131266385e-06, "loss": 0.5519, "step": 9006 }, { "epoch": 0.58, "grad_norm": 1.199986219406128, "learning_rate": 3.933157845574829e-06, "loss": 0.5562, "step": 9007 }, { "epoch": 0.58, "grad_norm": 1.186305046081543, "learning_rate": 3.932136624651937e-06, "loss": 0.5113, "step": 9008 }, { "epoch": 0.58, "grad_norm": 1.1498140096664429, "learning_rate": 3.931115450402599e-06, "loss": 0.5307, "step": 9009 }, { "epoch": 0.58, "grad_norm": 1.2722845077514648, "learning_rate": 3.930094322871446e-06, "loss": 0.5526, "step": 9010 }, { "epoch": 0.58, "grad_norm": 1.3100162744522095, "learning_rate": 3.929073242103114e-06, "loss": 0.5328, "step": 9011 }, { "epoch": 0.58, "grad_norm": 1.2365177869796753, "learning_rate": 3.928052208142224e-06, "loss": 0.5021, "step": 9012 }, { "epoch": 0.58, "grad_norm": 1.1521400213241577, "learning_rate": 3.927031221033406e-06, "loss": 0.5267, "step": 9013 }, { "epoch": 0.58, "grad_norm": 1.1303932666778564, "learning_rate": 3.926010280821285e-06, "loss": 0.5125, "step": 9014 }, { "epoch": 0.58, "grad_norm": 1.1998990774154663, "learning_rate": 3.9249893875504844e-06, "loss": 0.5296, "step": 9015 }, { "epoch": 0.58, "grad_norm": 1.2591196298599243, "learning_rate": 3.923968541265625e-06, "loss": 0.5237, "step": 9016 }, { "epoch": 0.58, "grad_norm": 1.0935388803482056, "learning_rate": 3.922947742011323e-06, "loss": 0.5428, "step": 9017 }, { "epoch": 0.58, "grad_norm": 1.3116540908813477, "learning_rate": 3.921926989832197e-06, "loss": 0.53, "step": 9018 }, { "epoch": 0.58, "grad_norm": 1.2042971849441528, "learning_rate": 3.92090628477286e-06, "loss": 0.4936, "step": 9019 }, { "epoch": 0.58, "grad_norm": 1.2038999795913696, "learning_rate": 3.9198856268779265e-06, "loss": 0.5194, "step": 9020 }, { "epoch": 0.58, "grad_norm": 1.2079726457595825, "learning_rate": 3.918865016192005e-06, "loss": 0.5288, "step": 9021 }, { "epoch": 0.58, "grad_norm": 1.2360308170318604, "learning_rate": 3.917844452759704e-06, "loss": 0.5176, "step": 9022 }, { "epoch": 0.58, "grad_norm": 1.1667181253433228, "learning_rate": 3.91682393662563e-06, "loss": 0.5782, "step": 9023 }, { "epoch": 0.58, "grad_norm": 1.1611512899398804, "learning_rate": 3.915803467834387e-06, "loss": 0.4708, "step": 9024 }, { "epoch": 0.58, "grad_norm": 1.234495997428894, "learning_rate": 3.914783046430579e-06, "loss": 0.5172, "step": 9025 }, { "epoch": 0.58, "grad_norm": 1.2926026582717896, "learning_rate": 3.913762672458802e-06, "loss": 0.5679, "step": 9026 }, { "epoch": 0.58, "grad_norm": 1.1334775686264038, "learning_rate": 3.912742345963656e-06, "loss": 0.5542, "step": 9027 }, { "epoch": 0.58, "grad_norm": 1.1929746866226196, "learning_rate": 3.911722066989738e-06, "loss": 0.5204, "step": 9028 }, { "epoch": 0.58, "grad_norm": 1.2127455472946167, "learning_rate": 3.91070183558164e-06, "loss": 0.5159, "step": 9029 }, { "epoch": 0.58, "grad_norm": 1.142868161201477, "learning_rate": 3.909681651783956e-06, "loss": 0.5123, "step": 9030 }, { "epoch": 0.58, "grad_norm": 1.1902512311935425, "learning_rate": 3.908661515641271e-06, "loss": 0.517, "step": 9031 }, { "epoch": 0.58, "grad_norm": 1.1284414529800415, "learning_rate": 3.907641427198177e-06, "loss": 0.5238, "step": 9032 }, { "epoch": 0.58, "grad_norm": 1.1870837211608887, "learning_rate": 3.906621386499257e-06, "loss": 0.5859, "step": 9033 }, { "epoch": 0.58, "grad_norm": 1.1711547374725342, "learning_rate": 3.905601393589098e-06, "loss": 0.568, "step": 9034 }, { "epoch": 0.58, "grad_norm": 1.058821201324463, "learning_rate": 3.9045814485122765e-06, "loss": 0.4862, "step": 9035 }, { "epoch": 0.58, "grad_norm": 1.234835147857666, "learning_rate": 3.903561551313373e-06, "loss": 0.5455, "step": 9036 }, { "epoch": 0.58, "grad_norm": 1.2042256593704224, "learning_rate": 3.9025417020369656e-06, "loss": 0.5274, "step": 9037 }, { "epoch": 0.58, "grad_norm": 1.1040310859680176, "learning_rate": 3.901521900727629e-06, "loss": 0.4436, "step": 9038 }, { "epoch": 0.58, "grad_norm": 1.176737666130066, "learning_rate": 3.900502147429936e-06, "loss": 0.5454, "step": 9039 }, { "epoch": 0.58, "grad_norm": 1.1953365802764893, "learning_rate": 3.899482442188459e-06, "loss": 0.5023, "step": 9040 }, { "epoch": 0.58, "grad_norm": 1.1588914394378662, "learning_rate": 3.898462785047763e-06, "loss": 0.5447, "step": 9041 }, { "epoch": 0.58, "grad_norm": 1.2139381170272827, "learning_rate": 3.897443176052418e-06, "loss": 0.5146, "step": 9042 }, { "epoch": 0.58, "grad_norm": 1.1592410802841187, "learning_rate": 3.896423615246986e-06, "loss": 0.5516, "step": 9043 }, { "epoch": 0.58, "grad_norm": 1.3232749700546265, "learning_rate": 3.89540410267603e-06, "loss": 0.5275, "step": 9044 }, { "epoch": 0.58, "grad_norm": 1.1284033060073853, "learning_rate": 3.894384638384112e-06, "loss": 0.5368, "step": 9045 }, { "epoch": 0.58, "grad_norm": 1.1477510929107666, "learning_rate": 3.89336522241579e-06, "loss": 0.5411, "step": 9046 }, { "epoch": 0.58, "grad_norm": 1.166311502456665, "learning_rate": 3.892345854815618e-06, "loss": 0.5131, "step": 9047 }, { "epoch": 0.58, "grad_norm": 1.244269847869873, "learning_rate": 3.89132653562815e-06, "loss": 0.5657, "step": 9048 }, { "epoch": 0.58, "grad_norm": 1.1431567668914795, "learning_rate": 3.890307264897939e-06, "loss": 0.4862, "step": 9049 }, { "epoch": 0.58, "grad_norm": 1.180936574935913, "learning_rate": 3.8892880426695344e-06, "loss": 0.5083, "step": 9050 }, { "epoch": 0.58, "grad_norm": 1.312362551689148, "learning_rate": 3.8882688689874835e-06, "loss": 0.5725, "step": 9051 }, { "epoch": 0.58, "grad_norm": 1.236854910850525, "learning_rate": 3.887249743896335e-06, "loss": 0.5273, "step": 9052 }, { "epoch": 0.58, "grad_norm": 1.1845051050186157, "learning_rate": 3.886230667440626e-06, "loss": 0.5776, "step": 9053 }, { "epoch": 0.58, "grad_norm": 1.16437828540802, "learning_rate": 3.885211639664902e-06, "loss": 0.5399, "step": 9054 }, { "epoch": 0.58, "grad_norm": 1.1054052114486694, "learning_rate": 3.8841926606137e-06, "loss": 0.5217, "step": 9055 }, { "epoch": 0.58, "grad_norm": 1.2437909841537476, "learning_rate": 3.883173730331559e-06, "loss": 0.5206, "step": 9056 }, { "epoch": 0.58, "grad_norm": 1.185922384262085, "learning_rate": 3.882154848863013e-06, "loss": 0.526, "step": 9057 }, { "epoch": 0.58, "grad_norm": 1.0646158456802368, "learning_rate": 3.881136016252596e-06, "loss": 0.4954, "step": 9058 }, { "epoch": 0.58, "grad_norm": 1.224616289138794, "learning_rate": 3.880117232544835e-06, "loss": 0.512, "step": 9059 }, { "epoch": 0.58, "grad_norm": 1.205997347831726, "learning_rate": 3.879098497784259e-06, "loss": 0.4626, "step": 9060 }, { "epoch": 0.58, "grad_norm": 1.2634035348892212, "learning_rate": 3.878079812015398e-06, "loss": 0.5506, "step": 9061 }, { "epoch": 0.58, "grad_norm": 1.1846222877502441, "learning_rate": 3.877061175282773e-06, "loss": 0.5349, "step": 9062 }, { "epoch": 0.59, "grad_norm": 1.1882901191711426, "learning_rate": 3.876042587630907e-06, "loss": 0.5169, "step": 9063 }, { "epoch": 0.59, "grad_norm": 1.1941146850585938, "learning_rate": 3.87502404910432e-06, "loss": 0.5243, "step": 9064 }, { "epoch": 0.59, "grad_norm": 1.1304936408996582, "learning_rate": 3.874005559747529e-06, "loss": 0.5316, "step": 9065 }, { "epoch": 0.59, "grad_norm": 1.4897401332855225, "learning_rate": 3.87298711960505e-06, "loss": 0.5003, "step": 9066 }, { "epoch": 0.59, "grad_norm": 1.2599304914474487, "learning_rate": 3.8719687287213955e-06, "loss": 0.5697, "step": 9067 }, { "epoch": 0.59, "grad_norm": 1.157306432723999, "learning_rate": 3.870950387141078e-06, "loss": 0.5178, "step": 9068 }, { "epoch": 0.59, "grad_norm": 1.2001138925552368, "learning_rate": 3.869932094908606e-06, "loss": 0.5294, "step": 9069 }, { "epoch": 0.59, "grad_norm": 1.258463978767395, "learning_rate": 3.868913852068488e-06, "loss": 0.5328, "step": 9070 }, { "epoch": 0.59, "grad_norm": 1.2275828123092651, "learning_rate": 3.867895658665225e-06, "loss": 0.4972, "step": 9071 }, { "epoch": 0.59, "grad_norm": 1.14400315284729, "learning_rate": 3.866877514743324e-06, "loss": 0.4913, "step": 9072 }, { "epoch": 0.59, "grad_norm": 1.256781816482544, "learning_rate": 3.865859420347281e-06, "loss": 0.5307, "step": 9073 }, { "epoch": 0.59, "grad_norm": 1.180929183959961, "learning_rate": 3.864841375521598e-06, "loss": 0.5342, "step": 9074 }, { "epoch": 0.59, "grad_norm": 1.1532405614852905, "learning_rate": 3.86382338031077e-06, "loss": 0.5487, "step": 9075 }, { "epoch": 0.59, "grad_norm": 1.104636311531067, "learning_rate": 3.862805434759291e-06, "loss": 0.5021, "step": 9076 }, { "epoch": 0.59, "grad_norm": 1.251476764678955, "learning_rate": 3.861787538911652e-06, "loss": 0.5244, "step": 9077 }, { "epoch": 0.59, "grad_norm": 1.2713569402694702, "learning_rate": 3.860769692812342e-06, "loss": 0.4752, "step": 9078 }, { "epoch": 0.59, "grad_norm": 1.2513014078140259, "learning_rate": 3.85975189650585e-06, "loss": 0.4843, "step": 9079 }, { "epoch": 0.59, "grad_norm": 1.1143701076507568, "learning_rate": 3.85873415003666e-06, "loss": 0.5009, "step": 9080 }, { "epoch": 0.59, "grad_norm": 1.1119381189346313, "learning_rate": 3.857716453449259e-06, "loss": 0.4919, "step": 9081 }, { "epoch": 0.59, "grad_norm": 1.1720834970474243, "learning_rate": 3.856698806788123e-06, "loss": 0.5575, "step": 9082 }, { "epoch": 0.59, "grad_norm": 1.1875941753387451, "learning_rate": 3.85568121009773e-06, "loss": 0.5764, "step": 9083 }, { "epoch": 0.59, "grad_norm": 1.1768330335617065, "learning_rate": 3.854663663422561e-06, "loss": 0.5353, "step": 9084 }, { "epoch": 0.59, "grad_norm": 1.2220970392227173, "learning_rate": 3.8536461668070875e-06, "loss": 0.5098, "step": 9085 }, { "epoch": 0.59, "grad_norm": 1.13896906375885, "learning_rate": 3.852628720295782e-06, "loss": 0.5023, "step": 9086 }, { "epoch": 0.59, "grad_norm": 1.1931724548339844, "learning_rate": 3.851611323933118e-06, "loss": 0.5288, "step": 9087 }, { "epoch": 0.59, "grad_norm": 1.1923431158065796, "learning_rate": 3.8505939777635575e-06, "loss": 0.5166, "step": 9088 }, { "epoch": 0.59, "grad_norm": 1.1807032823562622, "learning_rate": 3.8495766818315695e-06, "loss": 0.5865, "step": 9089 }, { "epoch": 0.59, "grad_norm": 1.2581188678741455, "learning_rate": 3.848559436181615e-06, "loss": 0.4949, "step": 9090 }, { "epoch": 0.59, "grad_norm": 1.1613526344299316, "learning_rate": 3.847542240858158e-06, "loss": 0.5284, "step": 9091 }, { "epoch": 0.59, "grad_norm": 1.2494820356369019, "learning_rate": 3.846525095905656e-06, "loss": 0.5571, "step": 9092 }, { "epoch": 0.59, "grad_norm": 1.1959179639816284, "learning_rate": 3.845508001368569e-06, "loss": 0.4697, "step": 9093 }, { "epoch": 0.59, "grad_norm": 1.1393346786499023, "learning_rate": 3.844490957291345e-06, "loss": 0.4964, "step": 9094 }, { "epoch": 0.59, "grad_norm": 1.27742600440979, "learning_rate": 3.84347396371844e-06, "loss": 0.5438, "step": 9095 }, { "epoch": 0.59, "grad_norm": 1.1883883476257324, "learning_rate": 3.842457020694306e-06, "loss": 0.5265, "step": 9096 }, { "epoch": 0.59, "grad_norm": 1.219115972518921, "learning_rate": 3.8414401282633875e-06, "loss": 0.5577, "step": 9097 }, { "epoch": 0.59, "grad_norm": 1.1599771976470947, "learning_rate": 3.840423286470133e-06, "loss": 0.5129, "step": 9098 }, { "epoch": 0.59, "grad_norm": 1.153414011001587, "learning_rate": 3.839406495358986e-06, "loss": 0.5411, "step": 9099 }, { "epoch": 0.59, "grad_norm": 1.1299232244491577, "learning_rate": 3.838389754974385e-06, "loss": 0.518, "step": 9100 }, { "epoch": 0.59, "grad_norm": 1.147864580154419, "learning_rate": 3.837373065360771e-06, "loss": 0.5128, "step": 9101 }, { "epoch": 0.59, "grad_norm": 1.0939881801605225, "learning_rate": 3.836356426562579e-06, "loss": 0.4483, "step": 9102 }, { "epoch": 0.59, "grad_norm": 1.3355712890625, "learning_rate": 3.835339838624248e-06, "loss": 0.4683, "step": 9103 }, { "epoch": 0.59, "grad_norm": 1.1105846166610718, "learning_rate": 3.834323301590206e-06, "loss": 0.5465, "step": 9104 }, { "epoch": 0.59, "grad_norm": 1.217968225479126, "learning_rate": 3.8333068155048884e-06, "loss": 0.5562, "step": 9105 }, { "epoch": 0.59, "grad_norm": 1.2644904851913452, "learning_rate": 3.832290380412717e-06, "loss": 0.5182, "step": 9106 }, { "epoch": 0.59, "grad_norm": 1.074646234512329, "learning_rate": 3.831273996358121e-06, "loss": 0.5156, "step": 9107 }, { "epoch": 0.59, "grad_norm": 1.1442344188690186, "learning_rate": 3.830257663385522e-06, "loss": 0.5115, "step": 9108 }, { "epoch": 0.59, "grad_norm": 1.2251458168029785, "learning_rate": 3.829241381539344e-06, "loss": 0.5353, "step": 9109 }, { "epoch": 0.59, "grad_norm": 1.2571355104446411, "learning_rate": 3.828225150864003e-06, "loss": 0.5411, "step": 9110 }, { "epoch": 0.59, "grad_norm": 1.1252151727676392, "learning_rate": 3.8272089714039195e-06, "loss": 0.5354, "step": 9111 }, { "epoch": 0.59, "grad_norm": 1.127555012702942, "learning_rate": 3.826192843203505e-06, "loss": 0.5513, "step": 9112 }, { "epoch": 0.59, "grad_norm": 1.2537930011749268, "learning_rate": 3.825176766307172e-06, "loss": 0.5685, "step": 9113 }, { "epoch": 0.59, "grad_norm": 1.2303802967071533, "learning_rate": 3.824160740759334e-06, "loss": 0.559, "step": 9114 }, { "epoch": 0.59, "grad_norm": 1.3739218711853027, "learning_rate": 3.823144766604394e-06, "loss": 0.5056, "step": 9115 }, { "epoch": 0.59, "grad_norm": 1.2720179557800293, "learning_rate": 3.822128843886761e-06, "loss": 0.5337, "step": 9116 }, { "epoch": 0.59, "grad_norm": 1.2821989059448242, "learning_rate": 3.821112972650837e-06, "loss": 0.4822, "step": 9117 }, { "epoch": 0.59, "grad_norm": 1.2888764142990112, "learning_rate": 3.820097152941024e-06, "loss": 0.5451, "step": 9118 }, { "epoch": 0.59, "grad_norm": 1.231554627418518, "learning_rate": 3.819081384801719e-06, "loss": 0.5358, "step": 9119 }, { "epoch": 0.59, "grad_norm": 1.099134922027588, "learning_rate": 3.818065668277321e-06, "loss": 0.4804, "step": 9120 }, { "epoch": 0.59, "grad_norm": 1.2349704504013062, "learning_rate": 3.8170500034122216e-06, "loss": 0.5074, "step": 9121 }, { "epoch": 0.59, "grad_norm": 1.2880027294158936, "learning_rate": 3.816034390250817e-06, "loss": 0.5213, "step": 9122 }, { "epoch": 0.59, "grad_norm": 1.331965446472168, "learning_rate": 3.815018828837494e-06, "loss": 0.5015, "step": 9123 }, { "epoch": 0.59, "grad_norm": 1.1916760206222534, "learning_rate": 3.8140033192166385e-06, "loss": 0.5093, "step": 9124 }, { "epoch": 0.59, "grad_norm": 1.3811752796173096, "learning_rate": 3.8129878614326387e-06, "loss": 0.5616, "step": 9125 }, { "epoch": 0.59, "grad_norm": 1.305083990097046, "learning_rate": 3.8119724555298778e-06, "loss": 0.5748, "step": 9126 }, { "epoch": 0.59, "grad_norm": 1.2244009971618652, "learning_rate": 3.810957101552735e-06, "loss": 0.53, "step": 9127 }, { "epoch": 0.59, "grad_norm": 1.3172482252120972, "learning_rate": 3.809941799545591e-06, "loss": 0.5141, "step": 9128 }, { "epoch": 0.59, "grad_norm": 1.2282137870788574, "learning_rate": 3.8089265495528196e-06, "loss": 0.5121, "step": 9129 }, { "epoch": 0.59, "grad_norm": 1.2384048700332642, "learning_rate": 3.8079113516187955e-06, "loss": 0.5128, "step": 9130 }, { "epoch": 0.59, "grad_norm": 1.082183837890625, "learning_rate": 3.80689620578789e-06, "loss": 0.4772, "step": 9131 }, { "epoch": 0.59, "grad_norm": 1.3056256771087646, "learning_rate": 3.805881112104474e-06, "loss": 0.543, "step": 9132 }, { "epoch": 0.59, "grad_norm": 1.164989709854126, "learning_rate": 3.8048660706129128e-06, "loss": 0.5291, "step": 9133 }, { "epoch": 0.59, "grad_norm": 1.1900944709777832, "learning_rate": 3.8038510813575746e-06, "loss": 0.5413, "step": 9134 }, { "epoch": 0.59, "grad_norm": 1.1525485515594482, "learning_rate": 3.802836144382818e-06, "loss": 0.4847, "step": 9135 }, { "epoch": 0.59, "grad_norm": 1.1504552364349365, "learning_rate": 3.801821259733004e-06, "loss": 0.4665, "step": 9136 }, { "epoch": 0.59, "grad_norm": 1.1780368089675903, "learning_rate": 3.8008064274524916e-06, "loss": 0.5134, "step": 9137 }, { "epoch": 0.59, "grad_norm": 1.1432214975357056, "learning_rate": 3.799791647585636e-06, "loss": 0.5475, "step": 9138 }, { "epoch": 0.59, "grad_norm": 1.4145675897598267, "learning_rate": 3.7987769201767915e-06, "loss": 0.5571, "step": 9139 }, { "epoch": 0.59, "grad_norm": 1.1480680704116821, "learning_rate": 3.7977622452703107e-06, "loss": 0.5584, "step": 9140 }, { "epoch": 0.59, "grad_norm": 1.1717708110809326, "learning_rate": 3.7967476229105377e-06, "loss": 0.5752, "step": 9141 }, { "epoch": 0.59, "grad_norm": 1.2512096166610718, "learning_rate": 3.7957330531418224e-06, "loss": 0.5605, "step": 9142 }, { "epoch": 0.59, "grad_norm": 1.051074504852295, "learning_rate": 3.7947185360085078e-06, "loss": 0.4761, "step": 9143 }, { "epoch": 0.59, "grad_norm": 1.1549196243286133, "learning_rate": 3.793704071554936e-06, "loss": 0.4942, "step": 9144 }, { "epoch": 0.59, "grad_norm": 1.2646645307540894, "learning_rate": 3.7926896598254476e-06, "loss": 0.5579, "step": 9145 }, { "epoch": 0.59, "grad_norm": 1.191338062286377, "learning_rate": 3.7916753008643813e-06, "loss": 0.5453, "step": 9146 }, { "epoch": 0.59, "grad_norm": 1.2140086889266968, "learning_rate": 3.790660994716068e-06, "loss": 0.5397, "step": 9147 }, { "epoch": 0.59, "grad_norm": 1.1504212617874146, "learning_rate": 3.7896467414248422e-06, "loss": 0.4968, "step": 9148 }, { "epoch": 0.59, "grad_norm": 1.212848424911499, "learning_rate": 3.7886325410350344e-06, "loss": 0.5565, "step": 9149 }, { "epoch": 0.59, "grad_norm": 1.2174420356750488, "learning_rate": 3.7876183935909733e-06, "loss": 0.5123, "step": 9150 }, { "epoch": 0.59, "grad_norm": 1.149583339691162, "learning_rate": 3.7866042991369838e-06, "loss": 0.5335, "step": 9151 }, { "epoch": 0.59, "grad_norm": 1.267270565032959, "learning_rate": 3.7855902577173924e-06, "loss": 0.5002, "step": 9152 }, { "epoch": 0.59, "grad_norm": 1.348824381828308, "learning_rate": 3.7845762693765154e-06, "loss": 0.5592, "step": 9153 }, { "epoch": 0.59, "grad_norm": 1.2157717943191528, "learning_rate": 3.7835623341586734e-06, "loss": 0.5157, "step": 9154 }, { "epoch": 0.59, "grad_norm": 1.3483785390853882, "learning_rate": 3.782548452108184e-06, "loss": 0.5666, "step": 9155 }, { "epoch": 0.59, "grad_norm": 1.116371750831604, "learning_rate": 3.781534623269361e-06, "loss": 0.515, "step": 9156 }, { "epoch": 0.59, "grad_norm": 1.168823003768921, "learning_rate": 3.7805208476865164e-06, "loss": 0.4977, "step": 9157 }, { "epoch": 0.59, "grad_norm": 1.1887367963790894, "learning_rate": 3.7795071254039584e-06, "loss": 0.5267, "step": 9158 }, { "epoch": 0.59, "grad_norm": 1.2371721267700195, "learning_rate": 3.7784934564659946e-06, "loss": 0.5757, "step": 9159 }, { "epoch": 0.59, "grad_norm": 1.2701683044433594, "learning_rate": 3.7774798409169305e-06, "loss": 0.5256, "step": 9160 }, { "epoch": 0.59, "grad_norm": 1.1107094287872314, "learning_rate": 3.776466278801069e-06, "loss": 0.4864, "step": 9161 }, { "epoch": 0.59, "grad_norm": 1.2403011322021484, "learning_rate": 3.7754527701627096e-06, "loss": 0.5157, "step": 9162 }, { "epoch": 0.59, "grad_norm": 1.2997888326644897, "learning_rate": 3.7744393150461504e-06, "loss": 0.5055, "step": 9163 }, { "epoch": 0.59, "grad_norm": 1.1352407932281494, "learning_rate": 3.7734259134956863e-06, "loss": 0.4998, "step": 9164 }, { "epoch": 0.59, "grad_norm": 1.303761601448059, "learning_rate": 3.7724125655556115e-06, "loss": 0.4942, "step": 9165 }, { "epoch": 0.59, "grad_norm": 1.2749836444854736, "learning_rate": 3.7713992712702154e-06, "loss": 0.539, "step": 9166 }, { "epoch": 0.59, "grad_norm": 1.17074716091156, "learning_rate": 3.7703860306837875e-06, "loss": 0.516, "step": 9167 }, { "epoch": 0.59, "grad_norm": 1.2007627487182617, "learning_rate": 3.7693728438406134e-06, "loss": 0.5182, "step": 9168 }, { "epoch": 0.59, "grad_norm": 1.2301915884017944, "learning_rate": 3.7683597107849784e-06, "loss": 0.4605, "step": 9169 }, { "epoch": 0.59, "grad_norm": 1.286789894104004, "learning_rate": 3.767346631561163e-06, "loss": 0.4727, "step": 9170 }, { "epoch": 0.59, "grad_norm": 1.371688723564148, "learning_rate": 3.7663336062134447e-06, "loss": 0.5407, "step": 9171 }, { "epoch": 0.59, "grad_norm": 1.2681673765182495, "learning_rate": 3.7653206347861015e-06, "loss": 0.5514, "step": 9172 }, { "epoch": 0.59, "grad_norm": 1.1147767305374146, "learning_rate": 3.7643077173234082e-06, "loss": 0.5556, "step": 9173 }, { "epoch": 0.59, "grad_norm": 1.142094373703003, "learning_rate": 3.7632948538696363e-06, "loss": 0.5062, "step": 9174 }, { "epoch": 0.59, "grad_norm": 1.0693858861923218, "learning_rate": 3.7622820444690577e-06, "loss": 0.508, "step": 9175 }, { "epoch": 0.59, "grad_norm": 1.1522988080978394, "learning_rate": 3.761269289165935e-06, "loss": 0.5153, "step": 9176 }, { "epoch": 0.59, "grad_norm": 1.0795787572860718, "learning_rate": 3.7602565880045366e-06, "loss": 0.4931, "step": 9177 }, { "epoch": 0.59, "grad_norm": 1.212175726890564, "learning_rate": 3.7592439410291235e-06, "loss": 0.5029, "step": 9178 }, { "epoch": 0.59, "grad_norm": 1.219828724861145, "learning_rate": 3.7582313482839573e-06, "loss": 0.5368, "step": 9179 }, { "epoch": 0.59, "grad_norm": 1.3097535371780396, "learning_rate": 3.7572188098132945e-06, "loss": 0.5468, "step": 9180 }, { "epoch": 0.59, "grad_norm": 1.2318549156188965, "learning_rate": 3.756206325661393e-06, "loss": 0.5014, "step": 9181 }, { "epoch": 0.59, "grad_norm": 1.341562032699585, "learning_rate": 3.755193895872502e-06, "loss": 0.5528, "step": 9182 }, { "epoch": 0.59, "grad_norm": 1.2575503587722778, "learning_rate": 3.7541815204908745e-06, "loss": 0.541, "step": 9183 }, { "epoch": 0.59, "grad_norm": 1.2412577867507935, "learning_rate": 3.753169199560758e-06, "loss": 0.5038, "step": 9184 }, { "epoch": 0.59, "grad_norm": 1.20724618434906, "learning_rate": 3.752156933126399e-06, "loss": 0.5375, "step": 9185 }, { "epoch": 0.59, "grad_norm": 1.1337226629257202, "learning_rate": 3.751144721232041e-06, "loss": 0.5318, "step": 9186 }, { "epoch": 0.59, "grad_norm": 1.127453327178955, "learning_rate": 3.7501325639219276e-06, "loss": 0.5181, "step": 9187 }, { "epoch": 0.59, "grad_norm": 1.1178491115570068, "learning_rate": 3.7491204612402933e-06, "loss": 0.4693, "step": 9188 }, { "epoch": 0.59, "grad_norm": 1.2078657150268555, "learning_rate": 3.7481084132313756e-06, "loss": 0.5942, "step": 9189 }, { "epoch": 0.59, "grad_norm": 1.2224608659744263, "learning_rate": 3.7470964199394094e-06, "loss": 0.5503, "step": 9190 }, { "epoch": 0.59, "grad_norm": 1.0727583169937134, "learning_rate": 3.746084481408626e-06, "loss": 0.4761, "step": 9191 }, { "epoch": 0.59, "grad_norm": 1.284812569618225, "learning_rate": 3.745072597683255e-06, "loss": 0.5423, "step": 9192 }, { "epoch": 0.59, "grad_norm": 1.2026569843292236, "learning_rate": 3.7440607688075255e-06, "loss": 0.5645, "step": 9193 }, { "epoch": 0.59, "grad_norm": 1.2157964706420898, "learning_rate": 3.7430489948256564e-06, "loss": 0.5642, "step": 9194 }, { "epoch": 0.59, "grad_norm": 1.1908586025238037, "learning_rate": 3.7420372757818734e-06, "loss": 0.4976, "step": 9195 }, { "epoch": 0.59, "grad_norm": 1.20139741897583, "learning_rate": 3.7410256117203957e-06, "loss": 0.5479, "step": 9196 }, { "epoch": 0.59, "grad_norm": 1.270260214805603, "learning_rate": 3.7400140026854398e-06, "loss": 0.5343, "step": 9197 }, { "epoch": 0.59, "grad_norm": 1.168863296508789, "learning_rate": 3.7390024487212224e-06, "loss": 0.5304, "step": 9198 }, { "epoch": 0.59, "grad_norm": 1.2676347494125366, "learning_rate": 3.7379909498719545e-06, "loss": 0.5155, "step": 9199 }, { "epoch": 0.59, "grad_norm": 1.1521695852279663, "learning_rate": 3.736979506181845e-06, "loss": 0.5618, "step": 9200 }, { "epoch": 0.59, "grad_norm": 1.1429277658462524, "learning_rate": 3.7359681176951025e-06, "loss": 0.4872, "step": 9201 }, { "epoch": 0.59, "grad_norm": 1.2330639362335205, "learning_rate": 3.7349567844559326e-06, "loss": 0.563, "step": 9202 }, { "epoch": 0.59, "grad_norm": 1.1687779426574707, "learning_rate": 3.7339455065085383e-06, "loss": 0.5389, "step": 9203 }, { "epoch": 0.59, "grad_norm": 1.2040373086929321, "learning_rate": 3.7329342838971204e-06, "loss": 0.5115, "step": 9204 }, { "epoch": 0.59, "grad_norm": 1.1555424928665161, "learning_rate": 3.7319231166658744e-06, "loss": 0.5071, "step": 9205 }, { "epoch": 0.59, "grad_norm": 1.1385905742645264, "learning_rate": 3.730912004858997e-06, "loss": 0.4916, "step": 9206 }, { "epoch": 0.59, "grad_norm": 1.1381865739822388, "learning_rate": 3.7299009485206827e-06, "loss": 0.4924, "step": 9207 }, { "epoch": 0.59, "grad_norm": 1.3196821212768555, "learning_rate": 3.72888994769512e-06, "loss": 0.5436, "step": 9208 }, { "epoch": 0.59, "grad_norm": 1.1953388452529907, "learning_rate": 3.7278790024264986e-06, "loss": 0.5172, "step": 9209 }, { "epoch": 0.59, "grad_norm": 1.1490521430969238, "learning_rate": 3.7268681127590044e-06, "loss": 0.52, "step": 9210 }, { "epoch": 0.59, "grad_norm": 1.3965526819229126, "learning_rate": 3.7258572787368196e-06, "loss": 0.4296, "step": 9211 }, { "epoch": 0.59, "grad_norm": 1.1391725540161133, "learning_rate": 3.7248465004041266e-06, "loss": 0.5525, "step": 9212 }, { "epoch": 0.59, "grad_norm": 1.0930020809173584, "learning_rate": 3.7238357778051026e-06, "loss": 0.4892, "step": 9213 }, { "epoch": 0.59, "grad_norm": 1.088758111000061, "learning_rate": 3.7228251109839236e-06, "loss": 0.5012, "step": 9214 }, { "epoch": 0.59, "grad_norm": 1.2305634021759033, "learning_rate": 3.7218144999847637e-06, "loss": 0.5461, "step": 9215 }, { "epoch": 0.59, "grad_norm": 1.1432815790176392, "learning_rate": 3.720803944851796e-06, "loss": 0.5144, "step": 9216 }, { "epoch": 0.59, "grad_norm": 1.1172525882720947, "learning_rate": 3.7197934456291873e-06, "loss": 0.5419, "step": 9217 }, { "epoch": 0.6, "grad_norm": 1.2492032051086426, "learning_rate": 3.7187830023611027e-06, "loss": 0.4966, "step": 9218 }, { "epoch": 0.6, "grad_norm": 1.0553056001663208, "learning_rate": 3.7177726150917083e-06, "loss": 0.4773, "step": 9219 }, { "epoch": 0.6, "grad_norm": 1.2113569974899292, "learning_rate": 3.716762283865164e-06, "loss": 0.4978, "step": 9220 }, { "epoch": 0.6, "grad_norm": 1.2741892337799072, "learning_rate": 3.7157520087256295e-06, "loss": 0.5001, "step": 9221 }, { "epoch": 0.6, "grad_norm": 1.099071979522705, "learning_rate": 3.7147417897172633e-06, "loss": 0.5308, "step": 9222 }, { "epoch": 0.6, "grad_norm": 1.1511496305465698, "learning_rate": 3.7137316268842154e-06, "loss": 0.4855, "step": 9223 }, { "epoch": 0.6, "grad_norm": 1.5545164346694946, "learning_rate": 3.7127215202706395e-06, "loss": 0.5842, "step": 9224 }, { "epoch": 0.6, "grad_norm": 1.1690475940704346, "learning_rate": 3.7117114699206845e-06, "loss": 0.547, "step": 9225 }, { "epoch": 0.6, "grad_norm": 1.1993122100830078, "learning_rate": 3.710701475878498e-06, "loss": 0.61, "step": 9226 }, { "epoch": 0.6, "grad_norm": 1.2199805974960327, "learning_rate": 3.7096915381882237e-06, "loss": 0.4924, "step": 9227 }, { "epoch": 0.6, "grad_norm": 1.3194713592529297, "learning_rate": 3.7086816568940044e-06, "loss": 0.5846, "step": 9228 }, { "epoch": 0.6, "grad_norm": 1.0930395126342773, "learning_rate": 3.707671832039977e-06, "loss": 0.5007, "step": 9229 }, { "epoch": 0.6, "grad_norm": 1.2205302715301514, "learning_rate": 3.706662063670279e-06, "loss": 0.5126, "step": 9230 }, { "epoch": 0.6, "grad_norm": 1.1946536302566528, "learning_rate": 3.7056523518290454e-06, "loss": 0.4865, "step": 9231 }, { "epoch": 0.6, "grad_norm": 1.1746658086776733, "learning_rate": 3.7046426965604075e-06, "loss": 0.519, "step": 9232 }, { "epoch": 0.6, "grad_norm": 1.1217776536941528, "learning_rate": 3.7036330979084967e-06, "loss": 0.4843, "step": 9233 }, { "epoch": 0.6, "grad_norm": 1.287819266319275, "learning_rate": 3.7026235559174395e-06, "loss": 0.5956, "step": 9234 }, { "epoch": 0.6, "grad_norm": 1.223112940788269, "learning_rate": 3.7016140706313575e-06, "loss": 0.5801, "step": 9235 }, { "epoch": 0.6, "grad_norm": 1.3956680297851562, "learning_rate": 3.7006046420943746e-06, "loss": 0.5712, "step": 9236 }, { "epoch": 0.6, "grad_norm": 1.3130359649658203, "learning_rate": 3.6995952703506103e-06, "loss": 0.5701, "step": 9237 }, { "epoch": 0.6, "grad_norm": 1.1269010305404663, "learning_rate": 3.6985859554441816e-06, "loss": 0.5071, "step": 9238 }, { "epoch": 0.6, "grad_norm": 1.2628096342086792, "learning_rate": 3.697576697419204e-06, "loss": 0.5568, "step": 9239 }, { "epoch": 0.6, "grad_norm": 1.3528825044631958, "learning_rate": 3.6965674963197894e-06, "loss": 0.5263, "step": 9240 }, { "epoch": 0.6, "grad_norm": 1.177518606185913, "learning_rate": 3.695558352190045e-06, "loss": 0.5347, "step": 9241 }, { "epoch": 0.6, "grad_norm": 1.150586724281311, "learning_rate": 3.69454926507408e-06, "loss": 0.5096, "step": 9242 }, { "epoch": 0.6, "grad_norm": 1.179667592048645, "learning_rate": 3.693540235015998e-06, "loss": 0.5021, "step": 9243 }, { "epoch": 0.6, "grad_norm": 1.2361280918121338, "learning_rate": 3.6925312620599017e-06, "loss": 0.4957, "step": 9244 }, { "epoch": 0.6, "grad_norm": 1.2473702430725098, "learning_rate": 3.6915223462498926e-06, "loss": 0.5246, "step": 9245 }, { "epoch": 0.6, "grad_norm": 1.1780840158462524, "learning_rate": 3.690513487630064e-06, "loss": 0.5591, "step": 9246 }, { "epoch": 0.6, "grad_norm": 1.1870684623718262, "learning_rate": 3.689504686244513e-06, "loss": 0.4858, "step": 9247 }, { "epoch": 0.6, "grad_norm": 1.1580840349197388, "learning_rate": 3.68849594213733e-06, "loss": 0.4894, "step": 9248 }, { "epoch": 0.6, "grad_norm": 1.1586838960647583, "learning_rate": 3.6874872553526057e-06, "loss": 0.5022, "step": 9249 }, { "epoch": 0.6, "grad_norm": 1.1654398441314697, "learning_rate": 3.6864786259344286e-06, "loss": 0.4795, "step": 9250 }, { "epoch": 0.6, "grad_norm": 1.1766729354858398, "learning_rate": 3.6854700539268817e-06, "loss": 0.5263, "step": 9251 }, { "epoch": 0.6, "grad_norm": 1.2409040927886963, "learning_rate": 3.6844615393740463e-06, "loss": 0.4927, "step": 9252 }, { "epoch": 0.6, "grad_norm": 1.190063714981079, "learning_rate": 3.6834530823200025e-06, "loss": 0.5322, "step": 9253 }, { "epoch": 0.6, "grad_norm": 1.187366008758545, "learning_rate": 3.682444682808829e-06, "loss": 0.4955, "step": 9254 }, { "epoch": 0.6, "grad_norm": 1.135026454925537, "learning_rate": 3.681436340884598e-06, "loss": 0.5557, "step": 9255 }, { "epoch": 0.6, "grad_norm": 1.067103624343872, "learning_rate": 3.6804280565913832e-06, "loss": 0.4836, "step": 9256 }, { "epoch": 0.6, "grad_norm": 1.091761827468872, "learning_rate": 3.6794198299732537e-06, "loss": 0.4977, "step": 9257 }, { "epoch": 0.6, "grad_norm": 1.1953035593032837, "learning_rate": 3.6784116610742755e-06, "loss": 0.5622, "step": 9258 }, { "epoch": 0.6, "grad_norm": 1.29705011844635, "learning_rate": 3.6774035499385153e-06, "loss": 0.4679, "step": 9259 }, { "epoch": 0.6, "grad_norm": 1.2086774110794067, "learning_rate": 3.6763954966100317e-06, "loss": 0.5081, "step": 9260 }, { "epoch": 0.6, "grad_norm": 1.2294776439666748, "learning_rate": 3.6753875011328866e-06, "loss": 0.5382, "step": 9261 }, { "epoch": 0.6, "grad_norm": 1.1376067399978638, "learning_rate": 3.674379563551136e-06, "loss": 0.5239, "step": 9262 }, { "epoch": 0.6, "grad_norm": 1.2244676351547241, "learning_rate": 3.673371683908837e-06, "loss": 0.539, "step": 9263 }, { "epoch": 0.6, "grad_norm": 1.2153143882751465, "learning_rate": 3.6723638622500367e-06, "loss": 0.522, "step": 9264 }, { "epoch": 0.6, "grad_norm": 1.173142671585083, "learning_rate": 3.6713560986187863e-06, "loss": 0.5089, "step": 9265 }, { "epoch": 0.6, "grad_norm": 1.1460940837860107, "learning_rate": 3.6703483930591334e-06, "loss": 0.4742, "step": 9266 }, { "epoch": 0.6, "grad_norm": 1.1769869327545166, "learning_rate": 3.669340745615121e-06, "loss": 0.5244, "step": 9267 }, { "epoch": 0.6, "grad_norm": 1.1620100736618042, "learning_rate": 3.6683331563307923e-06, "loss": 0.4727, "step": 9268 }, { "epoch": 0.6, "grad_norm": 1.202326774597168, "learning_rate": 3.667325625250187e-06, "loss": 0.5353, "step": 9269 }, { "epoch": 0.6, "grad_norm": 1.1836442947387695, "learning_rate": 3.6663181524173384e-06, "loss": 0.5305, "step": 9270 }, { "epoch": 0.6, "grad_norm": 1.2461953163146973, "learning_rate": 3.6653107378762824e-06, "loss": 0.557, "step": 9271 }, { "epoch": 0.6, "grad_norm": 1.1533799171447754, "learning_rate": 3.6643033816710505e-06, "loss": 0.5092, "step": 9272 }, { "epoch": 0.6, "grad_norm": 1.3109676837921143, "learning_rate": 3.663296083845672e-06, "loss": 0.5643, "step": 9273 }, { "epoch": 0.6, "grad_norm": 1.1839653253555298, "learning_rate": 3.662288844444173e-06, "loss": 0.5012, "step": 9274 }, { "epoch": 0.6, "grad_norm": 1.18006432056427, "learning_rate": 3.6612816635105784e-06, "loss": 0.565, "step": 9275 }, { "epoch": 0.6, "grad_norm": 1.1305428743362427, "learning_rate": 3.6602745410889073e-06, "loss": 0.4764, "step": 9276 }, { "epoch": 0.6, "grad_norm": 1.3980164527893066, "learning_rate": 3.6592674772231783e-06, "loss": 0.4962, "step": 9277 }, { "epoch": 0.6, "grad_norm": 1.130678653717041, "learning_rate": 3.65826047195741e-06, "loss": 0.5419, "step": 9278 }, { "epoch": 0.6, "grad_norm": 1.1439803838729858, "learning_rate": 3.6572535253356143e-06, "loss": 0.4923, "step": 9279 }, { "epoch": 0.6, "grad_norm": 1.1622581481933594, "learning_rate": 3.6562466374018023e-06, "loss": 0.5032, "step": 9280 }, { "epoch": 0.6, "grad_norm": 1.1773390769958496, "learning_rate": 3.655239808199985e-06, "loss": 0.545, "step": 9281 }, { "epoch": 0.6, "grad_norm": 1.2753522396087646, "learning_rate": 3.654233037774165e-06, "loss": 0.4707, "step": 9282 }, { "epoch": 0.6, "grad_norm": 1.2459763288497925, "learning_rate": 3.653226326168346e-06, "loss": 0.5138, "step": 9283 }, { "epoch": 0.6, "grad_norm": 1.0735303163528442, "learning_rate": 3.652219673426529e-06, "loss": 0.5094, "step": 9284 }, { "epoch": 0.6, "grad_norm": 1.2475911378860474, "learning_rate": 3.651213079592714e-06, "loss": 0.523, "step": 9285 }, { "epoch": 0.6, "grad_norm": 1.1567742824554443, "learning_rate": 3.6502065447108968e-06, "loss": 0.5305, "step": 9286 }, { "epoch": 0.6, "grad_norm": 1.3604061603546143, "learning_rate": 3.6492000688250672e-06, "loss": 0.5699, "step": 9287 }, { "epoch": 0.6, "grad_norm": 1.1315457820892334, "learning_rate": 3.648193651979217e-06, "loss": 0.506, "step": 9288 }, { "epoch": 0.6, "grad_norm": 1.130625605583191, "learning_rate": 3.647187294217335e-06, "loss": 0.5351, "step": 9289 }, { "epoch": 0.6, "grad_norm": 1.360121250152588, "learning_rate": 3.646180995583406e-06, "loss": 0.5506, "step": 9290 }, { "epoch": 0.6, "grad_norm": 1.0803453922271729, "learning_rate": 3.645174756121412e-06, "loss": 0.4613, "step": 9291 }, { "epoch": 0.6, "grad_norm": 1.2453138828277588, "learning_rate": 3.644168575875337e-06, "loss": 0.5689, "step": 9292 }, { "epoch": 0.6, "grad_norm": 1.2348543405532837, "learning_rate": 3.643162454889153e-06, "loss": 0.6005, "step": 9293 }, { "epoch": 0.6, "grad_norm": 1.5949281454086304, "learning_rate": 3.6421563932068375e-06, "loss": 0.5584, "step": 9294 }, { "epoch": 0.6, "grad_norm": 1.1756510734558105, "learning_rate": 3.641150390872363e-06, "loss": 0.4586, "step": 9295 }, { "epoch": 0.6, "grad_norm": 1.1195822954177856, "learning_rate": 3.6401444479296988e-06, "loss": 0.4797, "step": 9296 }, { "epoch": 0.6, "grad_norm": 1.0879900455474854, "learning_rate": 3.6391385644228127e-06, "loss": 0.494, "step": 9297 }, { "epoch": 0.6, "grad_norm": 1.1235555410385132, "learning_rate": 3.63813274039567e-06, "loss": 0.539, "step": 9298 }, { "epoch": 0.6, "grad_norm": 1.5120121240615845, "learning_rate": 3.63712697589223e-06, "loss": 0.5436, "step": 9299 }, { "epoch": 0.6, "grad_norm": 1.0532453060150146, "learning_rate": 3.6361212709564536e-06, "loss": 0.4886, "step": 9300 }, { "epoch": 0.6, "grad_norm": 1.1630723476409912, "learning_rate": 3.635115625632298e-06, "loss": 0.5303, "step": 9301 }, { "epoch": 0.6, "grad_norm": 1.2452261447906494, "learning_rate": 3.6341100399637174e-06, "loss": 0.5196, "step": 9302 }, { "epoch": 0.6, "grad_norm": 1.208842158317566, "learning_rate": 3.633104513994662e-06, "loss": 0.5049, "step": 9303 }, { "epoch": 0.6, "grad_norm": 1.191823959350586, "learning_rate": 3.632099047769083e-06, "loss": 0.5045, "step": 9304 }, { "epoch": 0.6, "grad_norm": 1.1977651119232178, "learning_rate": 3.631093641330924e-06, "loss": 0.5097, "step": 9305 }, { "epoch": 0.6, "grad_norm": 1.2059215307235718, "learning_rate": 3.6300882947241313e-06, "loss": 0.4758, "step": 9306 }, { "epoch": 0.6, "grad_norm": 1.246045708656311, "learning_rate": 3.629083007992644e-06, "loss": 0.536, "step": 9307 }, { "epoch": 0.6, "grad_norm": 1.252798080444336, "learning_rate": 3.628077781180401e-06, "loss": 0.4939, "step": 9308 }, { "epoch": 0.6, "grad_norm": 1.1288905143737793, "learning_rate": 3.6270726143313385e-06, "loss": 0.583, "step": 9309 }, { "epoch": 0.6, "grad_norm": 1.185258150100708, "learning_rate": 3.6260675074893926e-06, "loss": 0.5188, "step": 9310 }, { "epoch": 0.6, "grad_norm": 1.1509898900985718, "learning_rate": 3.6250624606984884e-06, "loss": 0.5212, "step": 9311 }, { "epoch": 0.6, "grad_norm": 1.3003602027893066, "learning_rate": 3.624057474002557e-06, "loss": 0.5014, "step": 9312 }, { "epoch": 0.6, "grad_norm": 1.2031961679458618, "learning_rate": 3.6230525474455237e-06, "loss": 0.522, "step": 9313 }, { "epoch": 0.6, "grad_norm": 1.079709768295288, "learning_rate": 3.6220476810713103e-06, "loss": 0.5213, "step": 9314 }, { "epoch": 0.6, "grad_norm": 1.2813912630081177, "learning_rate": 3.621042874923838e-06, "loss": 0.5439, "step": 9315 }, { "epoch": 0.6, "grad_norm": 1.2022206783294678, "learning_rate": 3.6200381290470254e-06, "loss": 0.5232, "step": 9316 }, { "epoch": 0.6, "grad_norm": 1.1726548671722412, "learning_rate": 3.6190334434847848e-06, "loss": 0.4802, "step": 9317 }, { "epoch": 0.6, "grad_norm": 1.1493865251541138, "learning_rate": 3.6180288182810287e-06, "loss": 0.5326, "step": 9318 }, { "epoch": 0.6, "grad_norm": 1.1845169067382812, "learning_rate": 3.617024253479667e-06, "loss": 0.4668, "step": 9319 }, { "epoch": 0.6, "grad_norm": 1.2616539001464844, "learning_rate": 3.616019749124608e-06, "loss": 0.4962, "step": 9320 }, { "epoch": 0.6, "grad_norm": 1.2528551816940308, "learning_rate": 3.6150153052597546e-06, "loss": 0.5526, "step": 9321 }, { "epoch": 0.6, "grad_norm": 1.2729517221450806, "learning_rate": 3.614010921929011e-06, "loss": 0.545, "step": 9322 }, { "epoch": 0.6, "grad_norm": 1.184571385383606, "learning_rate": 3.613006599176272e-06, "loss": 0.4789, "step": 9323 }, { "epoch": 0.6, "grad_norm": 1.1798896789550781, "learning_rate": 3.612002337045436e-06, "loss": 0.5358, "step": 9324 }, { "epoch": 0.6, "grad_norm": 1.246895670890808, "learning_rate": 3.610998135580397e-06, "loss": 0.5436, "step": 9325 }, { "epoch": 0.6, "grad_norm": 1.1489847898483276, "learning_rate": 3.6099939948250463e-06, "loss": 0.5628, "step": 9326 }, { "epoch": 0.6, "grad_norm": 1.1537744998931885, "learning_rate": 3.608989914823271e-06, "loss": 0.5277, "step": 9327 }, { "epoch": 0.6, "grad_norm": 1.2021628618240356, "learning_rate": 3.60798589561896e-06, "loss": 0.515, "step": 9328 }, { "epoch": 0.6, "grad_norm": 1.1911619901657104, "learning_rate": 3.6069819372559924e-06, "loss": 0.4808, "step": 9329 }, { "epoch": 0.6, "grad_norm": 1.1794813871383667, "learning_rate": 3.6059780397782496e-06, "loss": 0.4878, "step": 9330 }, { "epoch": 0.6, "grad_norm": 1.1917293071746826, "learning_rate": 3.60497420322961e-06, "loss": 0.4798, "step": 9331 }, { "epoch": 0.6, "grad_norm": 1.1709643602371216, "learning_rate": 3.603970427653949e-06, "loss": 0.547, "step": 9332 }, { "epoch": 0.6, "grad_norm": 1.159401774406433, "learning_rate": 3.602966713095141e-06, "loss": 0.5356, "step": 9333 }, { "epoch": 0.6, "grad_norm": 1.097359538078308, "learning_rate": 3.601963059597052e-06, "loss": 0.4955, "step": 9334 }, { "epoch": 0.6, "grad_norm": 1.0839059352874756, "learning_rate": 3.6009594672035496e-06, "loss": 0.4453, "step": 9335 }, { "epoch": 0.6, "grad_norm": 1.2894871234893799, "learning_rate": 3.5999559359585e-06, "loss": 0.5437, "step": 9336 }, { "epoch": 0.6, "grad_norm": 1.1720727682113647, "learning_rate": 3.598952465905764e-06, "loss": 0.5052, "step": 9337 }, { "epoch": 0.6, "grad_norm": 1.1911388635635376, "learning_rate": 3.5979490570892017e-06, "loss": 0.5076, "step": 9338 }, { "epoch": 0.6, "grad_norm": 1.178555965423584, "learning_rate": 3.59694570955267e-06, "loss": 0.5158, "step": 9339 }, { "epoch": 0.6, "grad_norm": 1.1205353736877441, "learning_rate": 3.5959424233400198e-06, "loss": 0.5463, "step": 9340 }, { "epoch": 0.6, "grad_norm": 1.302932620048523, "learning_rate": 3.5949391984951032e-06, "loss": 0.542, "step": 9341 }, { "epoch": 0.6, "grad_norm": 1.128462553024292, "learning_rate": 3.59393603506177e-06, "loss": 0.5083, "step": 9342 }, { "epoch": 0.6, "grad_norm": 1.3947862386703491, "learning_rate": 3.5929329330838654e-06, "loss": 0.5464, "step": 9343 }, { "epoch": 0.6, "grad_norm": 1.125943899154663, "learning_rate": 3.5919298926052308e-06, "loss": 0.5215, "step": 9344 }, { "epoch": 0.6, "grad_norm": 1.1978533267974854, "learning_rate": 3.590926913669709e-06, "loss": 0.525, "step": 9345 }, { "epoch": 0.6, "grad_norm": 1.1019937992095947, "learning_rate": 3.589923996321135e-06, "loss": 0.528, "step": 9346 }, { "epoch": 0.6, "grad_norm": 1.2029211521148682, "learning_rate": 3.588921140603346e-06, "loss": 0.5354, "step": 9347 }, { "epoch": 0.6, "grad_norm": 1.144717812538147, "learning_rate": 3.587918346560174e-06, "loss": 0.4831, "step": 9348 }, { "epoch": 0.6, "grad_norm": 1.3110742568969727, "learning_rate": 3.586915614235447e-06, "loss": 0.5734, "step": 9349 }, { "epoch": 0.6, "grad_norm": 1.1252501010894775, "learning_rate": 3.5859129436729917e-06, "loss": 0.4801, "step": 9350 }, { "epoch": 0.6, "grad_norm": 1.1841561794281006, "learning_rate": 3.5849103349166347e-06, "loss": 0.5796, "step": 9351 }, { "epoch": 0.6, "grad_norm": 1.2184804677963257, "learning_rate": 3.583907788010196e-06, "loss": 0.5189, "step": 9352 }, { "epoch": 0.6, "grad_norm": 1.326690435409546, "learning_rate": 3.5829053029974935e-06, "loss": 0.5975, "step": 9353 }, { "epoch": 0.6, "grad_norm": 1.1385880708694458, "learning_rate": 3.5819028799223443e-06, "loss": 0.5441, "step": 9354 }, { "epoch": 0.6, "grad_norm": 1.180230975151062, "learning_rate": 3.580900518828561e-06, "loss": 0.534, "step": 9355 }, { "epoch": 0.6, "grad_norm": 1.2560336589813232, "learning_rate": 3.5798982197599552e-06, "loss": 0.5078, "step": 9356 }, { "epoch": 0.6, "grad_norm": 1.1648494005203247, "learning_rate": 3.578895982760336e-06, "loss": 0.5109, "step": 9357 }, { "epoch": 0.6, "grad_norm": 1.128820538520813, "learning_rate": 3.577893807873505e-06, "loss": 0.4862, "step": 9358 }, { "epoch": 0.6, "grad_norm": 1.1780891418457031, "learning_rate": 3.5768916951432664e-06, "loss": 0.5325, "step": 9359 }, { "epoch": 0.6, "grad_norm": 1.2039326429367065, "learning_rate": 3.57588964461342e-06, "loss": 0.5631, "step": 9360 }, { "epoch": 0.6, "grad_norm": 1.2767471075057983, "learning_rate": 3.5748876563277636e-06, "loss": 0.5456, "step": 9361 }, { "epoch": 0.6, "grad_norm": 1.1510473489761353, "learning_rate": 3.57388573033009e-06, "loss": 0.5308, "step": 9362 }, { "epoch": 0.6, "grad_norm": 1.2645195722579956, "learning_rate": 3.572883866664194e-06, "loss": 0.5456, "step": 9363 }, { "epoch": 0.6, "grad_norm": 1.2271771430969238, "learning_rate": 3.5718820653738605e-06, "loss": 0.4992, "step": 9364 }, { "epoch": 0.6, "grad_norm": 1.2633540630340576, "learning_rate": 3.5708803265028775e-06, "loss": 0.524, "step": 9365 }, { "epoch": 0.6, "grad_norm": 1.1430758237838745, "learning_rate": 3.569878650095028e-06, "loss": 0.4824, "step": 9366 }, { "epoch": 0.6, "grad_norm": 1.229264259338379, "learning_rate": 3.568877036194093e-06, "loss": 0.4862, "step": 9367 }, { "epoch": 0.6, "grad_norm": 1.152994990348816, "learning_rate": 3.56787548484385e-06, "loss": 0.4977, "step": 9368 }, { "epoch": 0.6, "grad_norm": 1.3350523710250854, "learning_rate": 3.5668739960880772e-06, "loss": 0.5398, "step": 9369 }, { "epoch": 0.6, "grad_norm": 1.2786059379577637, "learning_rate": 3.565872569970542e-06, "loss": 0.5372, "step": 9370 }, { "epoch": 0.6, "grad_norm": 1.1555718183517456, "learning_rate": 3.5648712065350172e-06, "loss": 0.5271, "step": 9371 }, { "epoch": 0.6, "grad_norm": 1.119976282119751, "learning_rate": 3.563869905825269e-06, "loss": 0.5351, "step": 9372 }, { "epoch": 0.61, "grad_norm": 1.1574140787124634, "learning_rate": 3.562868667885062e-06, "loss": 0.5097, "step": 9373 }, { "epoch": 0.61, "grad_norm": 1.2446285486221313, "learning_rate": 3.5618674927581597e-06, "loss": 0.5341, "step": 9374 }, { "epoch": 0.61, "grad_norm": 1.1917579174041748, "learning_rate": 3.5608663804883163e-06, "loss": 0.4924, "step": 9375 }, { "epoch": 0.61, "grad_norm": 1.25700044631958, "learning_rate": 3.559865331119291e-06, "loss": 0.498, "step": 9376 }, { "epoch": 0.61, "grad_norm": 1.2527744770050049, "learning_rate": 3.558864344694837e-06, "loss": 0.5221, "step": 9377 }, { "epoch": 0.61, "grad_norm": 1.1537901163101196, "learning_rate": 3.5578634212587025e-06, "loss": 0.4975, "step": 9378 }, { "epoch": 0.61, "grad_norm": 1.1216920614242554, "learning_rate": 3.556862560854639e-06, "loss": 0.4907, "step": 9379 }, { "epoch": 0.61, "grad_norm": 1.0928301811218262, "learning_rate": 3.5558617635263908e-06, "loss": 0.5339, "step": 9380 }, { "epoch": 0.61, "grad_norm": 1.1341025829315186, "learning_rate": 3.5548610293176967e-06, "loss": 0.5145, "step": 9381 }, { "epoch": 0.61, "grad_norm": 1.1192466020584106, "learning_rate": 3.553860358272299e-06, "loss": 0.4957, "step": 9382 }, { "epoch": 0.61, "grad_norm": 1.2158018350601196, "learning_rate": 3.552859750433934e-06, "loss": 0.5194, "step": 9383 }, { "epoch": 0.61, "grad_norm": 1.1290252208709717, "learning_rate": 3.5518592058463354e-06, "loss": 0.4914, "step": 9384 }, { "epoch": 0.61, "grad_norm": 1.3140506744384766, "learning_rate": 3.5508587245532344e-06, "loss": 0.5884, "step": 9385 }, { "epoch": 0.61, "grad_norm": 1.2709648609161377, "learning_rate": 3.5498583065983625e-06, "loss": 0.5047, "step": 9386 }, { "epoch": 0.61, "grad_norm": 1.2427074909210205, "learning_rate": 3.54885795202544e-06, "loss": 0.5367, "step": 9387 }, { "epoch": 0.61, "grad_norm": 1.1681017875671387, "learning_rate": 3.5478576608781924e-06, "loss": 0.4773, "step": 9388 }, { "epoch": 0.61, "grad_norm": 1.116805911064148, "learning_rate": 3.54685743320034e-06, "loss": 0.5374, "step": 9389 }, { "epoch": 0.61, "grad_norm": 1.1414966583251953, "learning_rate": 3.5458572690356013e-06, "loss": 0.508, "step": 9390 }, { "epoch": 0.61, "grad_norm": 1.1963425874710083, "learning_rate": 3.5448571684276883e-06, "loss": 0.5227, "step": 9391 }, { "epoch": 0.61, "grad_norm": 1.2937819957733154, "learning_rate": 3.543857131420315e-06, "loss": 0.5045, "step": 9392 }, { "epoch": 0.61, "grad_norm": 1.2183680534362793, "learning_rate": 3.542857158057189e-06, "loss": 0.5539, "step": 9393 }, { "epoch": 0.61, "grad_norm": 1.182892084121704, "learning_rate": 3.5418572483820168e-06, "loss": 0.4921, "step": 9394 }, { "epoch": 0.61, "grad_norm": 1.1519774198532104, "learning_rate": 3.5408574024385036e-06, "loss": 0.5747, "step": 9395 }, { "epoch": 0.61, "grad_norm": 1.1261920928955078, "learning_rate": 3.5398576202703477e-06, "loss": 0.5517, "step": 9396 }, { "epoch": 0.61, "grad_norm": 1.1926311254501343, "learning_rate": 3.5388579019212476e-06, "loss": 0.551, "step": 9397 }, { "epoch": 0.61, "grad_norm": 1.1888597011566162, "learning_rate": 3.5378582474349e-06, "loss": 0.4878, "step": 9398 }, { "epoch": 0.61, "grad_norm": 1.2085623741149902, "learning_rate": 3.5368586568549962e-06, "loss": 0.525, "step": 9399 }, { "epoch": 0.61, "grad_norm": 1.1850268840789795, "learning_rate": 3.5358591302252244e-06, "loss": 0.5148, "step": 9400 }, { "epoch": 0.61, "grad_norm": 1.1748151779174805, "learning_rate": 3.534859667589272e-06, "loss": 0.5621, "step": 9401 }, { "epoch": 0.61, "grad_norm": 1.0945603847503662, "learning_rate": 3.533860268990824e-06, "loss": 0.5224, "step": 9402 }, { "epoch": 0.61, "grad_norm": 1.253342628479004, "learning_rate": 3.532860934473561e-06, "loss": 0.5864, "step": 9403 }, { "epoch": 0.61, "grad_norm": 1.3023113012313843, "learning_rate": 3.531861664081163e-06, "loss": 0.5245, "step": 9404 }, { "epoch": 0.61, "grad_norm": 1.2297663688659668, "learning_rate": 3.5308624578573024e-06, "loss": 0.5571, "step": 9405 }, { "epoch": 0.61, "grad_norm": 1.1824787855148315, "learning_rate": 3.529863315845653e-06, "loss": 0.5422, "step": 9406 }, { "epoch": 0.61, "grad_norm": 1.1891136169433594, "learning_rate": 3.5288642380898845e-06, "loss": 0.5325, "step": 9407 }, { "epoch": 0.61, "grad_norm": 1.2769536972045898, "learning_rate": 3.527865224633665e-06, "loss": 0.4817, "step": 9408 }, { "epoch": 0.61, "grad_norm": 1.1083319187164307, "learning_rate": 3.5268662755206583e-06, "loss": 0.5041, "step": 9409 }, { "epoch": 0.61, "grad_norm": 1.1230576038360596, "learning_rate": 3.5258673907945284e-06, "loss": 0.4816, "step": 9410 }, { "epoch": 0.61, "grad_norm": 1.2228131294250488, "learning_rate": 3.5248685704989295e-06, "loss": 0.5275, "step": 9411 }, { "epoch": 0.61, "grad_norm": 1.130631923675537, "learning_rate": 3.5238698146775186e-06, "loss": 0.5144, "step": 9412 }, { "epoch": 0.61, "grad_norm": 1.2715824842453003, "learning_rate": 3.5228711233739504e-06, "loss": 0.566, "step": 9413 }, { "epoch": 0.61, "grad_norm": 1.2345445156097412, "learning_rate": 3.521872496631874e-06, "loss": 0.5698, "step": 9414 }, { "epoch": 0.61, "grad_norm": 1.475579857826233, "learning_rate": 3.5208739344949393e-06, "loss": 0.5599, "step": 9415 }, { "epoch": 0.61, "grad_norm": 1.2626360654830933, "learning_rate": 3.5198754370067865e-06, "loss": 0.4938, "step": 9416 }, { "epoch": 0.61, "grad_norm": 1.5329941511154175, "learning_rate": 3.5188770042110598e-06, "loss": 0.5717, "step": 9417 }, { "epoch": 0.61, "grad_norm": 1.1383901834487915, "learning_rate": 3.5178786361513985e-06, "loss": 0.5337, "step": 9418 }, { "epoch": 0.61, "grad_norm": 1.2073267698287964, "learning_rate": 3.516880332871437e-06, "loss": 0.5547, "step": 9419 }, { "epoch": 0.61, "grad_norm": 1.0384632349014282, "learning_rate": 3.5158820944148104e-06, "loss": 0.4499, "step": 9420 }, { "epoch": 0.61, "grad_norm": 1.9114257097244263, "learning_rate": 3.514883920825151e-06, "loss": 0.5012, "step": 9421 }, { "epoch": 0.61, "grad_norm": 1.4242827892303467, "learning_rate": 3.51388581214608e-06, "loss": 0.5017, "step": 9422 }, { "epoch": 0.61, "grad_norm": 1.044473648071289, "learning_rate": 3.512887768421227e-06, "loss": 0.5167, "step": 9423 }, { "epoch": 0.61, "grad_norm": 1.0955244302749634, "learning_rate": 3.511889789694213e-06, "loss": 0.5271, "step": 9424 }, { "epoch": 0.61, "grad_norm": 1.1802810430526733, "learning_rate": 3.510891876008656e-06, "loss": 0.491, "step": 9425 }, { "epoch": 0.61, "grad_norm": 1.1588243246078491, "learning_rate": 3.509894027408174e-06, "loss": 0.5399, "step": 9426 }, { "epoch": 0.61, "grad_norm": 1.274219274520874, "learning_rate": 3.508896243936382e-06, "loss": 0.5637, "step": 9427 }, { "epoch": 0.61, "grad_norm": 1.247883677482605, "learning_rate": 3.507898525636885e-06, "loss": 0.5353, "step": 9428 }, { "epoch": 0.61, "grad_norm": 1.1027414798736572, "learning_rate": 3.506900872553294e-06, "loss": 0.4702, "step": 9429 }, { "epoch": 0.61, "grad_norm": 1.3387846946716309, "learning_rate": 3.5059032847292134e-06, "loss": 0.5069, "step": 9430 }, { "epoch": 0.61, "grad_norm": 1.2538973093032837, "learning_rate": 3.504905762208246e-06, "loss": 0.5048, "step": 9431 }, { "epoch": 0.61, "grad_norm": 1.189831256866455, "learning_rate": 3.5039083050339906e-06, "loss": 0.551, "step": 9432 }, { "epoch": 0.61, "grad_norm": 1.135807752609253, "learning_rate": 3.5029109132500438e-06, "loss": 0.5437, "step": 9433 }, { "epoch": 0.61, "grad_norm": 1.134039044380188, "learning_rate": 3.5019135868999977e-06, "loss": 0.5108, "step": 9434 }, { "epoch": 0.61, "grad_norm": 1.212791085243225, "learning_rate": 3.500916326027443e-06, "loss": 0.5044, "step": 9435 }, { "epoch": 0.61, "grad_norm": 1.0418614149093628, "learning_rate": 3.499919130675968e-06, "loss": 0.5389, "step": 9436 }, { "epoch": 0.61, "grad_norm": 1.128648042678833, "learning_rate": 3.4989220008891587e-06, "loss": 0.5248, "step": 9437 }, { "epoch": 0.61, "grad_norm": 1.1531949043273926, "learning_rate": 3.497924936710595e-06, "loss": 0.513, "step": 9438 }, { "epoch": 0.61, "grad_norm": 1.551060676574707, "learning_rate": 3.4969279381838585e-06, "loss": 0.5394, "step": 9439 }, { "epoch": 0.61, "grad_norm": 1.3323161602020264, "learning_rate": 3.495931005352522e-06, "loss": 0.5288, "step": 9440 }, { "epoch": 0.61, "grad_norm": 1.1545414924621582, "learning_rate": 3.494934138260162e-06, "loss": 0.5361, "step": 9441 }, { "epoch": 0.61, "grad_norm": 1.1415914297103882, "learning_rate": 3.4939373369503464e-06, "loss": 0.5092, "step": 9442 }, { "epoch": 0.61, "grad_norm": 1.2219913005828857, "learning_rate": 3.4929406014666447e-06, "loss": 0.5057, "step": 9443 }, { "epoch": 0.61, "grad_norm": 1.2311819791793823, "learning_rate": 3.4919439318526206e-06, "loss": 0.4956, "step": 9444 }, { "epoch": 0.61, "grad_norm": 1.1673457622528076, "learning_rate": 3.4909473281518375e-06, "loss": 0.505, "step": 9445 }, { "epoch": 0.61, "grad_norm": 1.2363885641098022, "learning_rate": 3.489950790407853e-06, "loss": 0.5323, "step": 9446 }, { "epoch": 0.61, "grad_norm": 1.2586756944656372, "learning_rate": 3.4889543186642225e-06, "loss": 0.4757, "step": 9447 }, { "epoch": 0.61, "grad_norm": 1.2764877080917358, "learning_rate": 3.4879579129645013e-06, "loss": 0.573, "step": 9448 }, { "epoch": 0.61, "grad_norm": 1.1424756050109863, "learning_rate": 3.486961573352238e-06, "loss": 0.4599, "step": 9449 }, { "epoch": 0.61, "grad_norm": 1.0953632593154907, "learning_rate": 3.4859652998709796e-06, "loss": 0.508, "step": 9450 }, { "epoch": 0.61, "grad_norm": 1.1378364562988281, "learning_rate": 3.484969092564275e-06, "loss": 0.5627, "step": 9451 }, { "epoch": 0.61, "grad_norm": 1.1795520782470703, "learning_rate": 3.4839729514756592e-06, "loss": 0.5028, "step": 9452 }, { "epoch": 0.61, "grad_norm": 1.2269278764724731, "learning_rate": 3.4829768766486755e-06, "loss": 0.5098, "step": 9453 }, { "epoch": 0.61, "grad_norm": 1.3319813013076782, "learning_rate": 3.4819808681268584e-06, "loss": 0.4885, "step": 9454 }, { "epoch": 0.61, "grad_norm": 1.3507459163665771, "learning_rate": 3.4809849259537405e-06, "loss": 0.5369, "step": 9455 }, { "epoch": 0.61, "grad_norm": 1.3416298627853394, "learning_rate": 3.479989050172855e-06, "loss": 0.5074, "step": 9456 }, { "epoch": 0.61, "grad_norm": 1.1711241006851196, "learning_rate": 3.4789932408277237e-06, "loss": 0.4957, "step": 9457 }, { "epoch": 0.61, "grad_norm": 1.2415666580200195, "learning_rate": 3.4779974979618734e-06, "loss": 0.5202, "step": 9458 }, { "epoch": 0.61, "grad_norm": 1.2799187898635864, "learning_rate": 3.4770018216188267e-06, "loss": 0.5114, "step": 9459 }, { "epoch": 0.61, "grad_norm": 1.1899590492248535, "learning_rate": 3.4760062118421003e-06, "loss": 0.5584, "step": 9460 }, { "epoch": 0.61, "grad_norm": 1.1617865562438965, "learning_rate": 3.4750106686752105e-06, "loss": 0.5067, "step": 9461 }, { "epoch": 0.61, "grad_norm": 1.180497169494629, "learning_rate": 3.474015192161673e-06, "loss": 0.5599, "step": 9462 }, { "epoch": 0.61, "grad_norm": 1.0938152074813843, "learning_rate": 3.4730197823449906e-06, "loss": 0.5389, "step": 9463 }, { "epoch": 0.61, "grad_norm": 1.2434102296829224, "learning_rate": 3.472024439268674e-06, "loss": 0.566, "step": 9464 }, { "epoch": 0.61, "grad_norm": 1.232748031616211, "learning_rate": 3.4710291629762283e-06, "loss": 0.5007, "step": 9465 }, { "epoch": 0.61, "grad_norm": 1.1676214933395386, "learning_rate": 3.4700339535111514e-06, "loss": 0.5246, "step": 9466 }, { "epoch": 0.61, "grad_norm": 1.2075468301773071, "learning_rate": 3.4690388109169446e-06, "loss": 0.5281, "step": 9467 }, { "epoch": 0.61, "grad_norm": 1.1649430990219116, "learning_rate": 3.4680437352371028e-06, "loss": 0.5395, "step": 9468 }, { "epoch": 0.61, "grad_norm": 1.342873454093933, "learning_rate": 3.467048726515115e-06, "loss": 0.4715, "step": 9469 }, { "epoch": 0.61, "grad_norm": 1.2815423011779785, "learning_rate": 3.466053784794472e-06, "loss": 0.4996, "step": 9470 }, { "epoch": 0.61, "grad_norm": 1.2006728649139404, "learning_rate": 3.4650589101186603e-06, "loss": 0.528, "step": 9471 }, { "epoch": 0.61, "grad_norm": 1.1199681758880615, "learning_rate": 3.4640641025311638e-06, "loss": 0.5561, "step": 9472 }, { "epoch": 0.61, "grad_norm": 1.208969235420227, "learning_rate": 3.4630693620754617e-06, "loss": 0.5008, "step": 9473 }, { "epoch": 0.61, "grad_norm": 1.237673044204712, "learning_rate": 3.4620746887950356e-06, "loss": 0.552, "step": 9474 }, { "epoch": 0.61, "grad_norm": 1.2413334846496582, "learning_rate": 3.4610800827333545e-06, "loss": 0.5196, "step": 9475 }, { "epoch": 0.61, "grad_norm": 1.1032640933990479, "learning_rate": 3.460085543933893e-06, "loss": 0.4847, "step": 9476 }, { "epoch": 0.61, "grad_norm": 1.2399708032608032, "learning_rate": 3.459091072440118e-06, "loss": 0.5694, "step": 9477 }, { "epoch": 0.61, "grad_norm": 1.2416751384735107, "learning_rate": 3.4580966682954986e-06, "loss": 0.5758, "step": 9478 }, { "epoch": 0.61, "grad_norm": 1.2346547842025757, "learning_rate": 3.4571023315434953e-06, "loss": 0.5003, "step": 9479 }, { "epoch": 0.61, "grad_norm": 1.318312168121338, "learning_rate": 3.456108062227569e-06, "loss": 0.5285, "step": 9480 }, { "epoch": 0.61, "grad_norm": 1.2426494359970093, "learning_rate": 3.4551138603911743e-06, "loss": 0.5393, "step": 9481 }, { "epoch": 0.61, "grad_norm": 1.1783256530761719, "learning_rate": 3.454119726077767e-06, "loss": 0.5568, "step": 9482 }, { "epoch": 0.61, "grad_norm": 1.2169746160507202, "learning_rate": 3.453125659330798e-06, "loss": 0.5457, "step": 9483 }, { "epoch": 0.61, "grad_norm": 1.1296836137771606, "learning_rate": 3.4521316601937173e-06, "loss": 0.4973, "step": 9484 }, { "epoch": 0.61, "grad_norm": 1.1156591176986694, "learning_rate": 3.451137728709967e-06, "loss": 0.5166, "step": 9485 }, { "epoch": 0.61, "grad_norm": 1.4998652935028076, "learning_rate": 3.450143864922991e-06, "loss": 0.5465, "step": 9486 }, { "epoch": 0.61, "grad_norm": 1.2054004669189453, "learning_rate": 3.449150068876227e-06, "loss": 0.59, "step": 9487 }, { "epoch": 0.61, "grad_norm": 1.1709057092666626, "learning_rate": 3.4481563406131137e-06, "loss": 0.5505, "step": 9488 }, { "epoch": 0.61, "grad_norm": 1.102744221687317, "learning_rate": 3.4471626801770815e-06, "loss": 0.4958, "step": 9489 }, { "epoch": 0.61, "grad_norm": 1.1652512550354004, "learning_rate": 3.4461690876115615e-06, "loss": 0.5384, "step": 9490 }, { "epoch": 0.61, "grad_norm": 1.4411821365356445, "learning_rate": 3.4451755629599824e-06, "loss": 0.5249, "step": 9491 }, { "epoch": 0.61, "grad_norm": 1.1517976522445679, "learning_rate": 3.44418210626577e-06, "loss": 0.5669, "step": 9492 }, { "epoch": 0.61, "grad_norm": 1.3085769414901733, "learning_rate": 3.4431887175723422e-06, "loss": 0.5126, "step": 9493 }, { "epoch": 0.61, "grad_norm": 1.2315599918365479, "learning_rate": 3.4421953969231186e-06, "loss": 0.5204, "step": 9494 }, { "epoch": 0.61, "grad_norm": 1.2098829746246338, "learning_rate": 3.4412021443615153e-06, "loss": 0.5182, "step": 9495 }, { "epoch": 0.61, "grad_norm": 1.121785283088684, "learning_rate": 3.4402089599309435e-06, "loss": 0.4844, "step": 9496 }, { "epoch": 0.61, "grad_norm": 1.1160600185394287, "learning_rate": 3.4392158436748146e-06, "loss": 0.5012, "step": 9497 }, { "epoch": 0.61, "grad_norm": 1.311890721321106, "learning_rate": 3.438222795636536e-06, "loss": 0.5162, "step": 9498 }, { "epoch": 0.61, "grad_norm": 1.1446176767349243, "learning_rate": 3.4372298158595074e-06, "loss": 0.592, "step": 9499 }, { "epoch": 0.61, "grad_norm": 1.8313170671463013, "learning_rate": 3.436236904387132e-06, "loss": 0.5198, "step": 9500 }, { "epoch": 0.61, "grad_norm": 1.115390658378601, "learning_rate": 3.435244061262806e-06, "loss": 0.5022, "step": 9501 }, { "epoch": 0.61, "grad_norm": 1.2667275667190552, "learning_rate": 3.434251286529926e-06, "loss": 0.5393, "step": 9502 }, { "epoch": 0.61, "grad_norm": 1.3099581003189087, "learning_rate": 3.433258580231884e-06, "loss": 0.5221, "step": 9503 }, { "epoch": 0.61, "grad_norm": 1.1979676485061646, "learning_rate": 3.432265942412066e-06, "loss": 0.5472, "step": 9504 }, { "epoch": 0.61, "grad_norm": 1.0720009803771973, "learning_rate": 3.431273373113858e-06, "loss": 0.4667, "step": 9505 }, { "epoch": 0.61, "grad_norm": 1.2770906686782837, "learning_rate": 3.4302808723806436e-06, "loss": 0.5152, "step": 9506 }, { "epoch": 0.61, "grad_norm": 1.2062479257583618, "learning_rate": 3.4292884402558026e-06, "loss": 0.5654, "step": 9507 }, { "epoch": 0.61, "grad_norm": 1.2435892820358276, "learning_rate": 3.428296076782711e-06, "loss": 0.5575, "step": 9508 }, { "epoch": 0.61, "grad_norm": 1.16208815574646, "learning_rate": 3.4273037820047457e-06, "loss": 0.5114, "step": 9509 }, { "epoch": 0.61, "grad_norm": 1.2612518072128296, "learning_rate": 3.4263115559652713e-06, "loss": 0.5347, "step": 9510 }, { "epoch": 0.61, "grad_norm": 1.420950174331665, "learning_rate": 3.4253193987076595e-06, "loss": 0.5153, "step": 9511 }, { "epoch": 0.61, "grad_norm": 1.1298222541809082, "learning_rate": 3.424327310275274e-06, "loss": 0.5121, "step": 9512 }, { "epoch": 0.61, "grad_norm": 1.1336562633514404, "learning_rate": 3.4233352907114757e-06, "loss": 0.5118, "step": 9513 }, { "epoch": 0.61, "grad_norm": 1.1943199634552002, "learning_rate": 3.422343340059625e-06, "loss": 0.5228, "step": 9514 }, { "epoch": 0.61, "grad_norm": 1.1933718919754028, "learning_rate": 3.421351458363078e-06, "loss": 0.4808, "step": 9515 }, { "epoch": 0.61, "grad_norm": 1.1913065910339355, "learning_rate": 3.420359645665184e-06, "loss": 0.5348, "step": 9516 }, { "epoch": 0.61, "grad_norm": 1.1446536779403687, "learning_rate": 3.419367902009294e-06, "loss": 0.5295, "step": 9517 }, { "epoch": 0.61, "grad_norm": 1.185625672340393, "learning_rate": 3.418376227438755e-06, "loss": 0.4999, "step": 9518 }, { "epoch": 0.61, "grad_norm": 1.1384090185165405, "learning_rate": 3.417384621996911e-06, "loss": 0.5498, "step": 9519 }, { "epoch": 0.61, "grad_norm": 1.2329734563827515, "learning_rate": 3.416393085727101e-06, "loss": 0.4862, "step": 9520 }, { "epoch": 0.61, "grad_norm": 1.1932227611541748, "learning_rate": 3.4154016186726662e-06, "loss": 0.5476, "step": 9521 }, { "epoch": 0.61, "grad_norm": 1.2180910110473633, "learning_rate": 3.414410220876936e-06, "loss": 0.5228, "step": 9522 }, { "epoch": 0.61, "grad_norm": 1.3138275146484375, "learning_rate": 3.4134188923832444e-06, "loss": 0.5218, "step": 9523 }, { "epoch": 0.61, "grad_norm": 1.3048298358917236, "learning_rate": 3.4124276332349194e-06, "loss": 0.5386, "step": 9524 }, { "epoch": 0.61, "grad_norm": 1.3010225296020508, "learning_rate": 3.4114364434752865e-06, "loss": 0.5316, "step": 9525 }, { "epoch": 0.61, "grad_norm": 1.2343848943710327, "learning_rate": 3.41044532314767e-06, "loss": 0.5455, "step": 9526 }, { "epoch": 0.61, "grad_norm": 1.2033867835998535, "learning_rate": 3.409454272295386e-06, "loss": 0.5236, "step": 9527 }, { "epoch": 0.62, "grad_norm": 1.2721750736236572, "learning_rate": 3.4084632909617522e-06, "loss": 0.5758, "step": 9528 }, { "epoch": 0.62, "grad_norm": 1.251853108406067, "learning_rate": 3.407472379190081e-06, "loss": 0.5898, "step": 9529 }, { "epoch": 0.62, "grad_norm": 1.212121844291687, "learning_rate": 3.406481537023684e-06, "loss": 0.531, "step": 9530 }, { "epoch": 0.62, "grad_norm": 1.3295425176620483, "learning_rate": 3.4054907645058678e-06, "loss": 0.5399, "step": 9531 }, { "epoch": 0.62, "grad_norm": 1.1888195276260376, "learning_rate": 3.4045000616799352e-06, "loss": 0.4713, "step": 9532 }, { "epoch": 0.62, "grad_norm": 1.1809415817260742, "learning_rate": 3.40350942858919e-06, "loss": 0.5325, "step": 9533 }, { "epoch": 0.62, "grad_norm": 1.0929728746414185, "learning_rate": 3.4025188652769283e-06, "loss": 0.5367, "step": 9534 }, { "epoch": 0.62, "grad_norm": 1.2358278036117554, "learning_rate": 3.4015283717864456e-06, "loss": 0.531, "step": 9535 }, { "epoch": 0.62, "grad_norm": 1.1599441766738892, "learning_rate": 3.4005379481610327e-06, "loss": 0.5356, "step": 9536 }, { "epoch": 0.62, "grad_norm": 1.2190371751785278, "learning_rate": 3.39954759444398e-06, "loss": 0.549, "step": 9537 }, { "epoch": 0.62, "grad_norm": 1.1949673891067505, "learning_rate": 3.398557310678572e-06, "loss": 0.5448, "step": 9538 }, { "epoch": 0.62, "grad_norm": 1.2123953104019165, "learning_rate": 3.397567096908094e-06, "loss": 0.5658, "step": 9539 }, { "epoch": 0.62, "grad_norm": 1.1334903240203857, "learning_rate": 3.3965769531758232e-06, "loss": 0.4987, "step": 9540 }, { "epoch": 0.62, "grad_norm": 1.2016311883926392, "learning_rate": 3.3955868795250356e-06, "loss": 0.478, "step": 9541 }, { "epoch": 0.62, "grad_norm": 1.2486268281936646, "learning_rate": 3.3945968759990066e-06, "loss": 0.5473, "step": 9542 }, { "epoch": 0.62, "grad_norm": 1.086887001991272, "learning_rate": 3.3936069426410066e-06, "loss": 0.5126, "step": 9543 }, { "epoch": 0.62, "grad_norm": 1.21282160282135, "learning_rate": 3.392617079494304e-06, "loss": 0.5354, "step": 9544 }, { "epoch": 0.62, "grad_norm": 1.1246203184127808, "learning_rate": 3.39162728660216e-06, "loss": 0.5055, "step": 9545 }, { "epoch": 0.62, "grad_norm": 1.252878189086914, "learning_rate": 3.3906375640078373e-06, "loss": 0.5026, "step": 9546 }, { "epoch": 0.62, "grad_norm": 1.1731195449829102, "learning_rate": 3.3896479117545945e-06, "loss": 0.5244, "step": 9547 }, { "epoch": 0.62, "grad_norm": 1.1956110000610352, "learning_rate": 3.3886583298856866e-06, "loss": 0.5185, "step": 9548 }, { "epoch": 0.62, "grad_norm": 1.2439804077148438, "learning_rate": 3.387668818444366e-06, "loss": 0.4891, "step": 9549 }, { "epoch": 0.62, "grad_norm": 1.1874914169311523, "learning_rate": 3.386679377473884e-06, "loss": 0.5213, "step": 9550 }, { "epoch": 0.62, "grad_norm": 1.19956374168396, "learning_rate": 3.3856900070174814e-06, "loss": 0.516, "step": 9551 }, { "epoch": 0.62, "grad_norm": 1.3156594038009644, "learning_rate": 3.384700707118404e-06, "loss": 0.4447, "step": 9552 }, { "epoch": 0.62, "grad_norm": 1.1903775930404663, "learning_rate": 3.383711477819891e-06, "loss": 0.5082, "step": 9553 }, { "epoch": 0.62, "grad_norm": 1.1614446640014648, "learning_rate": 3.382722319165179e-06, "loss": 0.5442, "step": 9554 }, { "epoch": 0.62, "grad_norm": 1.090293049812317, "learning_rate": 3.381733231197502e-06, "loss": 0.5163, "step": 9555 }, { "epoch": 0.62, "grad_norm": 1.3130813837051392, "learning_rate": 3.3807442139600922e-06, "loss": 0.5132, "step": 9556 }, { "epoch": 0.62, "grad_norm": 1.199583888053894, "learning_rate": 3.379755267496173e-06, "loss": 0.5614, "step": 9557 }, { "epoch": 0.62, "grad_norm": 1.2489339113235474, "learning_rate": 3.378766391848971e-06, "loss": 0.5337, "step": 9558 }, { "epoch": 0.62, "grad_norm": 1.127930998802185, "learning_rate": 3.377777587061707e-06, "loss": 0.5044, "step": 9559 }, { "epoch": 0.62, "grad_norm": 1.1619298458099365, "learning_rate": 3.3767888531775992e-06, "loss": 0.5135, "step": 9560 }, { "epoch": 0.62, "grad_norm": 1.2064975500106812, "learning_rate": 3.3758001902398628e-06, "loss": 0.5301, "step": 9561 }, { "epoch": 0.62, "grad_norm": 1.2538871765136719, "learning_rate": 3.3748115982917116e-06, "loss": 0.5298, "step": 9562 }, { "epoch": 0.62, "grad_norm": 1.1790542602539062, "learning_rate": 3.3738230773763502e-06, "loss": 0.5274, "step": 9563 }, { "epoch": 0.62, "grad_norm": 1.2272841930389404, "learning_rate": 3.372834627536987e-06, "loss": 0.5195, "step": 9564 }, { "epoch": 0.62, "grad_norm": 1.1525938510894775, "learning_rate": 3.3718462488168236e-06, "loss": 0.5367, "step": 9565 }, { "epoch": 0.62, "grad_norm": 1.0973485708236694, "learning_rate": 3.3708579412590604e-06, "loss": 0.4966, "step": 9566 }, { "epoch": 0.62, "grad_norm": 1.35061514377594, "learning_rate": 3.369869704906893e-06, "loss": 0.5432, "step": 9567 }, { "epoch": 0.62, "grad_norm": 1.136736512184143, "learning_rate": 3.3688815398035167e-06, "loss": 0.5571, "step": 9568 }, { "epoch": 0.62, "grad_norm": 1.178284764289856, "learning_rate": 3.3678934459921185e-06, "loss": 0.5139, "step": 9569 }, { "epoch": 0.62, "grad_norm": 1.2230448722839355, "learning_rate": 3.3669054235158873e-06, "loss": 0.5097, "step": 9570 }, { "epoch": 0.62, "grad_norm": 1.1542787551879883, "learning_rate": 3.3659174724180054e-06, "loss": 0.5213, "step": 9571 }, { "epoch": 0.62, "grad_norm": 1.1048150062561035, "learning_rate": 3.364929592741656e-06, "loss": 0.544, "step": 9572 }, { "epoch": 0.62, "grad_norm": 1.3516652584075928, "learning_rate": 3.363941784530016e-06, "loss": 0.5412, "step": 9573 }, { "epoch": 0.62, "grad_norm": 1.1886056661605835, "learning_rate": 3.362954047826259e-06, "loss": 0.5104, "step": 9574 }, { "epoch": 0.62, "grad_norm": 1.0965871810913086, "learning_rate": 3.3619663826735568e-06, "loss": 0.5112, "step": 9575 }, { "epoch": 0.62, "grad_norm": 1.2503682374954224, "learning_rate": 3.3609787891150767e-06, "loss": 0.5335, "step": 9576 }, { "epoch": 0.62, "grad_norm": 1.1949342489242554, "learning_rate": 3.3599912671939873e-06, "loss": 0.5491, "step": 9577 }, { "epoch": 0.62, "grad_norm": 1.132298231124878, "learning_rate": 3.3590038169534468e-06, "loss": 0.5353, "step": 9578 }, { "epoch": 0.62, "grad_norm": 1.2851887941360474, "learning_rate": 3.3580164384366154e-06, "loss": 0.5765, "step": 9579 }, { "epoch": 0.62, "grad_norm": 1.2302805185317993, "learning_rate": 3.3570291316866503e-06, "loss": 0.5222, "step": 9580 }, { "epoch": 0.62, "grad_norm": 1.1906070709228516, "learning_rate": 3.3560418967467024e-06, "loss": 0.5644, "step": 9581 }, { "epoch": 0.62, "grad_norm": 1.3309986591339111, "learning_rate": 3.355054733659922e-06, "loss": 0.5419, "step": 9582 }, { "epoch": 0.62, "grad_norm": 1.1726810932159424, "learning_rate": 3.354067642469454e-06, "loss": 0.4561, "step": 9583 }, { "epoch": 0.62, "grad_norm": 1.2193174362182617, "learning_rate": 3.3530806232184437e-06, "loss": 0.5678, "step": 9584 }, { "epoch": 0.62, "grad_norm": 1.2678221464157104, "learning_rate": 3.352093675950031e-06, "loss": 0.5148, "step": 9585 }, { "epoch": 0.62, "grad_norm": 1.2046706676483154, "learning_rate": 3.351106800707352e-06, "loss": 0.5281, "step": 9586 }, { "epoch": 0.62, "grad_norm": 1.153538465499878, "learning_rate": 3.350119997533539e-06, "loss": 0.5304, "step": 9587 }, { "epoch": 0.62, "grad_norm": 1.2341378927230835, "learning_rate": 3.3491332664717245e-06, "loss": 0.5078, "step": 9588 }, { "epoch": 0.62, "grad_norm": 1.2184783220291138, "learning_rate": 3.348146607565036e-06, "loss": 0.5219, "step": 9589 }, { "epoch": 0.62, "grad_norm": 1.1969839334487915, "learning_rate": 3.347160020856597e-06, "loss": 0.542, "step": 9590 }, { "epoch": 0.62, "grad_norm": 1.1189557313919067, "learning_rate": 3.346173506389531e-06, "loss": 0.4883, "step": 9591 }, { "epoch": 0.62, "grad_norm": 1.1095390319824219, "learning_rate": 3.345187064206953e-06, "loss": 0.5031, "step": 9592 }, { "epoch": 0.62, "grad_norm": 1.2296597957611084, "learning_rate": 3.3442006943519776e-06, "loss": 0.5304, "step": 9593 }, { "epoch": 0.62, "grad_norm": 1.2478487491607666, "learning_rate": 3.343214396867719e-06, "loss": 0.5668, "step": 9594 }, { "epoch": 0.62, "grad_norm": 1.259276032447815, "learning_rate": 3.342228171797284e-06, "loss": 0.4943, "step": 9595 }, { "epoch": 0.62, "grad_norm": 1.225138783454895, "learning_rate": 3.341242019183778e-06, "loss": 0.4586, "step": 9596 }, { "epoch": 0.62, "grad_norm": 1.196152687072754, "learning_rate": 3.3402559390703067e-06, "loss": 0.5641, "step": 9597 }, { "epoch": 0.62, "grad_norm": 1.276203989982605, "learning_rate": 3.339269931499963e-06, "loss": 0.5171, "step": 9598 }, { "epoch": 0.62, "grad_norm": 1.2427704334259033, "learning_rate": 3.338283996515847e-06, "loss": 0.5246, "step": 9599 }, { "epoch": 0.62, "grad_norm": 1.2178997993469238, "learning_rate": 3.3372981341610498e-06, "loss": 0.5267, "step": 9600 }, { "epoch": 0.62, "grad_norm": 1.2019740343093872, "learning_rate": 3.3363123444786617e-06, "loss": 0.5463, "step": 9601 }, { "epoch": 0.62, "grad_norm": 1.0779589414596558, "learning_rate": 3.3353266275117678e-06, "loss": 0.4981, "step": 9602 }, { "epoch": 0.62, "grad_norm": 1.1746402978897095, "learning_rate": 3.3343409833034547e-06, "loss": 0.514, "step": 9603 }, { "epoch": 0.62, "grad_norm": 1.3487330675125122, "learning_rate": 3.3333554118967977e-06, "loss": 0.4785, "step": 9604 }, { "epoch": 0.62, "grad_norm": 1.1339340209960938, "learning_rate": 3.3323699133348754e-06, "loss": 0.5146, "step": 9605 }, { "epoch": 0.62, "grad_norm": 1.2740534543991089, "learning_rate": 3.3313844876607617e-06, "loss": 0.5324, "step": 9606 }, { "epoch": 0.62, "grad_norm": 1.2054158449172974, "learning_rate": 3.3303991349175268e-06, "loss": 0.596, "step": 9607 }, { "epoch": 0.62, "grad_norm": 1.2722057104110718, "learning_rate": 3.3294138551482386e-06, "loss": 0.5224, "step": 9608 }, { "epoch": 0.62, "grad_norm": 1.168372392654419, "learning_rate": 3.328428648395963e-06, "loss": 0.5106, "step": 9609 }, { "epoch": 0.62, "grad_norm": 1.201810359954834, "learning_rate": 3.3274435147037554e-06, "loss": 0.5259, "step": 9610 }, { "epoch": 0.62, "grad_norm": 1.1183110475540161, "learning_rate": 3.3264584541146765e-06, "loss": 0.5227, "step": 9611 }, { "epoch": 0.62, "grad_norm": 1.2082499265670776, "learning_rate": 3.3254734666717813e-06, "loss": 0.5207, "step": 9612 }, { "epoch": 0.62, "grad_norm": 1.0539277791976929, "learning_rate": 3.32448855241812e-06, "loss": 0.4684, "step": 9613 }, { "epoch": 0.62, "grad_norm": 1.216713309288025, "learning_rate": 3.3235037113967422e-06, "loss": 0.5481, "step": 9614 }, { "epoch": 0.62, "grad_norm": 1.1913586854934692, "learning_rate": 3.3225189436506934e-06, "loss": 0.5157, "step": 9615 }, { "epoch": 0.62, "grad_norm": 1.1930553913116455, "learning_rate": 3.3215342492230114e-06, "loss": 0.5176, "step": 9616 }, { "epoch": 0.62, "grad_norm": 1.2135088443756104, "learning_rate": 3.320549628156737e-06, "loss": 0.5483, "step": 9617 }, { "epoch": 0.62, "grad_norm": 1.2142928838729858, "learning_rate": 3.3195650804949047e-06, "loss": 0.5084, "step": 9618 }, { "epoch": 0.62, "grad_norm": 1.2040987014770508, "learning_rate": 3.318580606280549e-06, "loss": 0.5289, "step": 9619 }, { "epoch": 0.62, "grad_norm": 1.2437082529067993, "learning_rate": 3.3175962055566958e-06, "loss": 0.4832, "step": 9620 }, { "epoch": 0.62, "grad_norm": 1.2001901865005493, "learning_rate": 3.316611878366372e-06, "loss": 0.5479, "step": 9621 }, { "epoch": 0.62, "grad_norm": 1.1732221841812134, "learning_rate": 3.3156276247525993e-06, "loss": 0.5284, "step": 9622 }, { "epoch": 0.62, "grad_norm": 1.1349424123764038, "learning_rate": 3.3146434447583973e-06, "loss": 0.5484, "step": 9623 }, { "epoch": 0.62, "grad_norm": 1.2740565538406372, "learning_rate": 3.3136593384267833e-06, "loss": 0.5146, "step": 9624 }, { "epoch": 0.62, "grad_norm": 1.1917706727981567, "learning_rate": 3.3126753058007677e-06, "loss": 0.5217, "step": 9625 }, { "epoch": 0.62, "grad_norm": 1.266653299331665, "learning_rate": 3.3116913469233614e-06, "loss": 0.5202, "step": 9626 }, { "epoch": 0.62, "grad_norm": 1.1052002906799316, "learning_rate": 3.3107074618375714e-06, "loss": 0.4666, "step": 9627 }, { "epoch": 0.62, "grad_norm": 1.1308175325393677, "learning_rate": 3.309723650586398e-06, "loss": 0.5274, "step": 9628 }, { "epoch": 0.62, "grad_norm": 1.153550148010254, "learning_rate": 3.3087399132128437e-06, "loss": 0.5169, "step": 9629 }, { "epoch": 0.62, "grad_norm": 1.0947015285491943, "learning_rate": 3.307756249759905e-06, "loss": 0.4795, "step": 9630 }, { "epoch": 0.62, "grad_norm": 1.2223997116088867, "learning_rate": 3.306772660270573e-06, "loss": 0.5348, "step": 9631 }, { "epoch": 0.62, "grad_norm": 1.2067984342575073, "learning_rate": 3.3057891447878408e-06, "loss": 0.5679, "step": 9632 }, { "epoch": 0.62, "grad_norm": 1.1155304908752441, "learning_rate": 3.3048057033546943e-06, "loss": 0.5445, "step": 9633 }, { "epoch": 0.62, "grad_norm": 1.1905598640441895, "learning_rate": 3.303822336014116e-06, "loss": 0.55, "step": 9634 }, { "epoch": 0.62, "grad_norm": 1.140062928199768, "learning_rate": 3.3028390428090866e-06, "loss": 0.524, "step": 9635 }, { "epoch": 0.62, "grad_norm": 1.740443468093872, "learning_rate": 3.3018558237825844e-06, "loss": 0.5126, "step": 9636 }, { "epoch": 0.62, "grad_norm": 1.1947863101959229, "learning_rate": 3.3008726789775826e-06, "loss": 0.4802, "step": 9637 }, { "epoch": 0.62, "grad_norm": 1.1694791316986084, "learning_rate": 3.2998896084370545e-06, "loss": 0.5701, "step": 9638 }, { "epoch": 0.62, "grad_norm": 1.0632362365722656, "learning_rate": 3.2989066122039627e-06, "loss": 0.511, "step": 9639 }, { "epoch": 0.62, "grad_norm": 1.1372458934783936, "learning_rate": 3.297923690321274e-06, "loss": 0.5176, "step": 9640 }, { "epoch": 0.62, "grad_norm": 1.2170053720474243, "learning_rate": 3.2969408428319505e-06, "loss": 0.5913, "step": 9641 }, { "epoch": 0.62, "grad_norm": 1.1837025880813599, "learning_rate": 3.2959580697789473e-06, "loss": 0.5475, "step": 9642 }, { "epoch": 0.62, "grad_norm": 1.248677134513855, "learning_rate": 3.294975371205221e-06, "loss": 0.485, "step": 9643 }, { "epoch": 0.62, "grad_norm": 1.1954275369644165, "learning_rate": 3.293992747153725e-06, "loss": 0.5302, "step": 9644 }, { "epoch": 0.62, "grad_norm": 1.180058479309082, "learning_rate": 3.293010197667401e-06, "loss": 0.4603, "step": 9645 }, { "epoch": 0.62, "grad_norm": 1.127079963684082, "learning_rate": 3.2920277227891984e-06, "loss": 0.476, "step": 9646 }, { "epoch": 0.62, "grad_norm": 1.1259764432907104, "learning_rate": 3.291045322562057e-06, "loss": 0.4883, "step": 9647 }, { "epoch": 0.62, "grad_norm": 1.1358776092529297, "learning_rate": 3.2900629970289156e-06, "loss": 0.5513, "step": 9648 }, { "epoch": 0.62, "grad_norm": 1.1900572776794434, "learning_rate": 3.2890807462327077e-06, "loss": 0.5125, "step": 9649 }, { "epoch": 0.62, "grad_norm": 1.2559740543365479, "learning_rate": 3.28809857021637e-06, "loss": 0.5635, "step": 9650 }, { "epoch": 0.62, "grad_norm": 1.3026857376098633, "learning_rate": 3.287116469022824e-06, "loss": 0.5562, "step": 9651 }, { "epoch": 0.62, "grad_norm": 1.3248107433319092, "learning_rate": 3.286134442694998e-06, "loss": 0.5644, "step": 9652 }, { "epoch": 0.62, "grad_norm": 1.1766971349716187, "learning_rate": 3.285152491275814e-06, "loss": 0.5406, "step": 9653 }, { "epoch": 0.62, "grad_norm": 1.1978944540023804, "learning_rate": 3.284170614808189e-06, "loss": 0.5056, "step": 9654 }, { "epoch": 0.62, "grad_norm": 1.2516674995422363, "learning_rate": 3.283188813335041e-06, "loss": 0.5128, "step": 9655 }, { "epoch": 0.62, "grad_norm": 1.4688458442687988, "learning_rate": 3.2822070868992815e-06, "loss": 0.5418, "step": 9656 }, { "epoch": 0.62, "grad_norm": 1.251700520515442, "learning_rate": 3.281225435543817e-06, "loss": 0.5244, "step": 9657 }, { "epoch": 0.62, "grad_norm": 1.2723156213760376, "learning_rate": 3.280243859311554e-06, "loss": 0.5497, "step": 9658 }, { "epoch": 0.62, "grad_norm": 1.1592762470245361, "learning_rate": 3.279262358245394e-06, "loss": 0.4739, "step": 9659 }, { "epoch": 0.62, "grad_norm": 1.2024062871932983, "learning_rate": 3.2782809323882383e-06, "loss": 0.5239, "step": 9660 }, { "epoch": 0.62, "grad_norm": 1.2049216032028198, "learning_rate": 3.27729958178298e-06, "loss": 0.5453, "step": 9661 }, { "epoch": 0.62, "grad_norm": 1.1305502653121948, "learning_rate": 3.2763183064725148e-06, "loss": 0.5683, "step": 9662 }, { "epoch": 0.62, "grad_norm": 1.1560590267181396, "learning_rate": 3.275337106499727e-06, "loss": 0.5205, "step": 9663 }, { "epoch": 0.62, "grad_norm": 1.154872179031372, "learning_rate": 3.2743559819075046e-06, "loss": 0.5365, "step": 9664 }, { "epoch": 0.62, "grad_norm": 1.1480717658996582, "learning_rate": 3.27337493273873e-06, "loss": 0.5253, "step": 9665 }, { "epoch": 0.62, "grad_norm": 1.2196283340454102, "learning_rate": 3.272393959036283e-06, "loss": 0.5606, "step": 9666 }, { "epoch": 0.62, "grad_norm": 1.2061926126480103, "learning_rate": 3.2714130608430384e-06, "loss": 0.5373, "step": 9667 }, { "epoch": 0.62, "grad_norm": 1.206421136856079, "learning_rate": 3.27043223820187e-06, "loss": 0.5478, "step": 9668 }, { "epoch": 0.62, "grad_norm": 1.078357458114624, "learning_rate": 3.2694514911556454e-06, "loss": 0.4716, "step": 9669 }, { "epoch": 0.62, "grad_norm": 1.5958784818649292, "learning_rate": 3.2684708197472303e-06, "loss": 0.5597, "step": 9670 }, { "epoch": 0.62, "grad_norm": 1.12003493309021, "learning_rate": 3.2674902240194896e-06, "loss": 0.531, "step": 9671 }, { "epoch": 0.62, "grad_norm": 1.2669225931167603, "learning_rate": 3.2665097040152805e-06, "loss": 0.5184, "step": 9672 }, { "epoch": 0.62, "grad_norm": 1.2486933469772339, "learning_rate": 3.265529259777461e-06, "loss": 0.5063, "step": 9673 }, { "epoch": 0.62, "grad_norm": 1.3578588962554932, "learning_rate": 3.264548891348881e-06, "loss": 0.5267, "step": 9674 }, { "epoch": 0.62, "grad_norm": 1.3773008584976196, "learning_rate": 3.263568598772393e-06, "loss": 0.551, "step": 9675 }, { "epoch": 0.62, "grad_norm": 1.1624245643615723, "learning_rate": 3.26258838209084e-06, "loss": 0.5382, "step": 9676 }, { "epoch": 0.62, "grad_norm": 1.2778066396713257, "learning_rate": 3.261608241347066e-06, "loss": 0.5382, "step": 9677 }, { "epoch": 0.62, "grad_norm": 1.1168466806411743, "learning_rate": 3.2606281765839114e-06, "loss": 0.5335, "step": 9678 }, { "epoch": 0.62, "grad_norm": 1.2331753969192505, "learning_rate": 3.259648187844212e-06, "loss": 0.5544, "step": 9679 }, { "epoch": 0.62, "grad_norm": 1.1560536623001099, "learning_rate": 3.258668275170801e-06, "loss": 0.4949, "step": 9680 }, { "epoch": 0.62, "grad_norm": 1.2114278078079224, "learning_rate": 3.2576884386065056e-06, "loss": 0.5392, "step": 9681 }, { "epoch": 0.62, "grad_norm": 1.0702683925628662, "learning_rate": 3.2567086781941527e-06, "loss": 0.5214, "step": 9682 }, { "epoch": 0.63, "grad_norm": 1.2723252773284912, "learning_rate": 3.2557289939765655e-06, "loss": 0.513, "step": 9683 }, { "epoch": 0.63, "grad_norm": 1.2420933246612549, "learning_rate": 3.254749385996565e-06, "loss": 0.4806, "step": 9684 }, { "epoch": 0.63, "grad_norm": 1.238140344619751, "learning_rate": 3.2537698542969674e-06, "loss": 0.545, "step": 9685 }, { "epoch": 0.63, "grad_norm": 1.2211740016937256, "learning_rate": 3.252790398920582e-06, "loss": 0.5133, "step": 9686 }, { "epoch": 0.63, "grad_norm": 1.265100359916687, "learning_rate": 3.2518110199102205e-06, "loss": 0.4751, "step": 9687 }, { "epoch": 0.63, "grad_norm": 1.25055992603302, "learning_rate": 3.250831717308689e-06, "loss": 0.5034, "step": 9688 }, { "epoch": 0.63, "grad_norm": 1.1664260625839233, "learning_rate": 3.2498524911587904e-06, "loss": 0.5294, "step": 9689 }, { "epoch": 0.63, "grad_norm": 1.1911448240280151, "learning_rate": 3.2488733415033236e-06, "loss": 0.5299, "step": 9690 }, { "epoch": 0.63, "grad_norm": 1.1846842765808105, "learning_rate": 3.2478942683850872e-06, "loss": 0.5215, "step": 9691 }, { "epoch": 0.63, "grad_norm": 1.1807851791381836, "learning_rate": 3.24691527184687e-06, "loss": 0.5361, "step": 9692 }, { "epoch": 0.63, "grad_norm": 1.1722649335861206, "learning_rate": 3.245936351931463e-06, "loss": 0.5275, "step": 9693 }, { "epoch": 0.63, "grad_norm": 1.2330206632614136, "learning_rate": 3.2449575086816526e-06, "loss": 0.526, "step": 9694 }, { "epoch": 0.63, "grad_norm": 1.1189731359481812, "learning_rate": 3.243978742140221e-06, "loss": 0.485, "step": 9695 }, { "epoch": 0.63, "grad_norm": 1.2334944009780884, "learning_rate": 3.243000052349948e-06, "loss": 0.535, "step": 9696 }, { "epoch": 0.63, "grad_norm": 1.2606420516967773, "learning_rate": 3.242021439353613e-06, "loss": 0.5048, "step": 9697 }, { "epoch": 0.63, "grad_norm": 1.1201117038726807, "learning_rate": 3.241042903193982e-06, "loss": 0.4845, "step": 9698 }, { "epoch": 0.63, "grad_norm": 1.0998730659484863, "learning_rate": 3.240064443913828e-06, "loss": 0.5028, "step": 9699 }, { "epoch": 0.63, "grad_norm": 1.1749649047851562, "learning_rate": 3.239086061555916e-06, "loss": 0.5051, "step": 9700 }, { "epoch": 0.63, "grad_norm": 1.223637580871582, "learning_rate": 3.238107756163009e-06, "loss": 0.5574, "step": 9701 }, { "epoch": 0.63, "grad_norm": 1.1844981908798218, "learning_rate": 3.2371295277778667e-06, "loss": 0.5185, "step": 9702 }, { "epoch": 0.63, "grad_norm": 1.3274203538894653, "learning_rate": 3.2361513764432462e-06, "loss": 0.5421, "step": 9703 }, { "epoch": 0.63, "grad_norm": 1.256047248840332, "learning_rate": 3.2351733022018965e-06, "loss": 0.552, "step": 9704 }, { "epoch": 0.63, "grad_norm": 1.3315365314483643, "learning_rate": 3.234195305096568e-06, "loss": 0.5353, "step": 9705 }, { "epoch": 0.63, "grad_norm": 1.1375981569290161, "learning_rate": 3.2332173851700076e-06, "loss": 0.494, "step": 9706 }, { "epoch": 0.63, "grad_norm": 1.1562775373458862, "learning_rate": 3.2322395424649565e-06, "loss": 0.5014, "step": 9707 }, { "epoch": 0.63, "grad_norm": 1.2752375602722168, "learning_rate": 3.231261777024154e-06, "loss": 0.4873, "step": 9708 }, { "epoch": 0.63, "grad_norm": 1.1111737489700317, "learning_rate": 3.230284088890338e-06, "loss": 0.4638, "step": 9709 }, { "epoch": 0.63, "grad_norm": 1.1734496355056763, "learning_rate": 3.2293064781062367e-06, "loss": 0.5239, "step": 9710 }, { "epoch": 0.63, "grad_norm": 1.299607753753662, "learning_rate": 3.2283289447145803e-06, "loss": 0.567, "step": 9711 }, { "epoch": 0.63, "grad_norm": 1.1787073612213135, "learning_rate": 3.227351488758095e-06, "loss": 0.4821, "step": 9712 }, { "epoch": 0.63, "grad_norm": 1.2611638307571411, "learning_rate": 3.2263741102795035e-06, "loss": 0.5338, "step": 9713 }, { "epoch": 0.63, "grad_norm": 1.1342040300369263, "learning_rate": 3.225396809321524e-06, "loss": 0.4795, "step": 9714 }, { "epoch": 0.63, "grad_norm": 1.3058254718780518, "learning_rate": 3.2244195859268702e-06, "loss": 0.5265, "step": 9715 }, { "epoch": 0.63, "grad_norm": 1.154382586479187, "learning_rate": 3.2234424401382554e-06, "loss": 0.5366, "step": 9716 }, { "epoch": 0.63, "grad_norm": 1.1468383073806763, "learning_rate": 3.222465371998388e-06, "loss": 0.5196, "step": 9717 }, { "epoch": 0.63, "grad_norm": 1.1555936336517334, "learning_rate": 3.221488381549973e-06, "loss": 0.5367, "step": 9718 }, { "epoch": 0.63, "grad_norm": 1.0935767889022827, "learning_rate": 3.2205114688357124e-06, "loss": 0.4795, "step": 9719 }, { "epoch": 0.63, "grad_norm": 2.2796120643615723, "learning_rate": 3.219534633898305e-06, "loss": 0.5536, "step": 9720 }, { "epoch": 0.63, "grad_norm": 1.16299569606781, "learning_rate": 3.218557876780445e-06, "loss": 0.5417, "step": 9721 }, { "epoch": 0.63, "grad_norm": 1.217312216758728, "learning_rate": 3.217581197524824e-06, "loss": 0.5315, "step": 9722 }, { "epoch": 0.63, "grad_norm": 1.204205870628357, "learning_rate": 3.2166045961741288e-06, "loss": 0.4865, "step": 9723 }, { "epoch": 0.63, "grad_norm": 1.154810905456543, "learning_rate": 3.2156280727710464e-06, "loss": 0.5277, "step": 9724 }, { "epoch": 0.63, "grad_norm": 1.3201019763946533, "learning_rate": 3.2146516273582567e-06, "loss": 0.5072, "step": 9725 }, { "epoch": 0.63, "grad_norm": 1.3017840385437012, "learning_rate": 3.2136752599784395e-06, "loss": 0.5776, "step": 9726 }, { "epoch": 0.63, "grad_norm": 1.117271900177002, "learning_rate": 3.2126989706742675e-06, "loss": 0.5363, "step": 9727 }, { "epoch": 0.63, "grad_norm": 1.2206932306289673, "learning_rate": 3.2117227594884117e-06, "loss": 0.5228, "step": 9728 }, { "epoch": 0.63, "grad_norm": 1.2289568185806274, "learning_rate": 3.210746626463539e-06, "loss": 0.5222, "step": 9729 }, { "epoch": 0.63, "grad_norm": 1.2038495540618896, "learning_rate": 3.209770571642316e-06, "loss": 0.4836, "step": 9730 }, { "epoch": 0.63, "grad_norm": 1.2610880136489868, "learning_rate": 3.2087945950674027e-06, "loss": 0.5809, "step": 9731 }, { "epoch": 0.63, "grad_norm": 1.2122392654418945, "learning_rate": 3.2078186967814574e-06, "loss": 0.5101, "step": 9732 }, { "epoch": 0.63, "grad_norm": 1.3306342363357544, "learning_rate": 3.206842876827132e-06, "loss": 0.5017, "step": 9733 }, { "epoch": 0.63, "grad_norm": 1.1348531246185303, "learning_rate": 3.205867135247077e-06, "loss": 0.492, "step": 9734 }, { "epoch": 0.63, "grad_norm": 1.1958062648773193, "learning_rate": 3.204891472083941e-06, "loss": 0.5063, "step": 9735 }, { "epoch": 0.63, "grad_norm": 1.1873098611831665, "learning_rate": 3.203915887380368e-06, "loss": 0.5343, "step": 9736 }, { "epoch": 0.63, "grad_norm": 1.3335248231887817, "learning_rate": 3.202940381178997e-06, "loss": 0.5436, "step": 9737 }, { "epoch": 0.63, "grad_norm": 1.4776290655136108, "learning_rate": 3.201964953522467e-06, "loss": 0.5444, "step": 9738 }, { "epoch": 0.63, "grad_norm": 1.1089879274368286, "learning_rate": 3.200989604453408e-06, "loss": 0.5402, "step": 9739 }, { "epoch": 0.63, "grad_norm": 1.2103699445724487, "learning_rate": 3.200014334014453e-06, "loss": 0.5363, "step": 9740 }, { "epoch": 0.63, "grad_norm": 1.17074716091156, "learning_rate": 3.199039142248226e-06, "loss": 0.5344, "step": 9741 }, { "epoch": 0.63, "grad_norm": 1.25416898727417, "learning_rate": 3.1980640291973525e-06, "loss": 0.4837, "step": 9742 }, { "epoch": 0.63, "grad_norm": 1.1838361024856567, "learning_rate": 3.1970889949044507e-06, "loss": 0.4608, "step": 9743 }, { "epoch": 0.63, "grad_norm": 1.186677098274231, "learning_rate": 3.19611403941214e-06, "loss": 0.5223, "step": 9744 }, { "epoch": 0.63, "grad_norm": 2.767326831817627, "learning_rate": 3.195139162763028e-06, "loss": 0.5669, "step": 9745 }, { "epoch": 0.63, "grad_norm": 1.1915004253387451, "learning_rate": 3.194164364999727e-06, "loss": 0.5443, "step": 9746 }, { "epoch": 0.63, "grad_norm": 1.1193510293960571, "learning_rate": 3.1931896461648416e-06, "loss": 0.4805, "step": 9747 }, { "epoch": 0.63, "grad_norm": 1.2340421676635742, "learning_rate": 3.192215006300976e-06, "loss": 0.5393, "step": 9748 }, { "epoch": 0.63, "grad_norm": 1.1847290992736816, "learning_rate": 3.1912404454507284e-06, "loss": 0.5639, "step": 9749 }, { "epoch": 0.63, "grad_norm": 1.3750182390213013, "learning_rate": 3.190265963656696e-06, "loss": 0.5013, "step": 9750 }, { "epoch": 0.63, "grad_norm": 1.2833508253097534, "learning_rate": 3.189291560961467e-06, "loss": 0.5644, "step": 9751 }, { "epoch": 0.63, "grad_norm": 1.176666021347046, "learning_rate": 3.1883172374076323e-06, "loss": 0.5457, "step": 9752 }, { "epoch": 0.63, "grad_norm": 1.2316608428955078, "learning_rate": 3.1873429930377764e-06, "loss": 0.5406, "step": 9753 }, { "epoch": 0.63, "grad_norm": 1.3226114511489868, "learning_rate": 3.1863688278944827e-06, "loss": 0.4923, "step": 9754 }, { "epoch": 0.63, "grad_norm": 1.2973138093948364, "learning_rate": 3.1853947420203286e-06, "loss": 0.4991, "step": 9755 }, { "epoch": 0.63, "grad_norm": 1.19977867603302, "learning_rate": 3.184420735457888e-06, "loss": 0.5084, "step": 9756 }, { "epoch": 0.63, "grad_norm": 1.0945936441421509, "learning_rate": 3.1834468082497327e-06, "loss": 0.4951, "step": 9757 }, { "epoch": 0.63, "grad_norm": 1.1785494089126587, "learning_rate": 3.182472960438431e-06, "loss": 0.5126, "step": 9758 }, { "epoch": 0.63, "grad_norm": 1.1444225311279297, "learning_rate": 3.1814991920665463e-06, "loss": 0.4785, "step": 9759 }, { "epoch": 0.63, "grad_norm": 1.2278330326080322, "learning_rate": 3.180525503176641e-06, "loss": 0.5006, "step": 9760 }, { "epoch": 0.63, "grad_norm": 1.188951849937439, "learning_rate": 3.179551893811272e-06, "loss": 0.5433, "step": 9761 }, { "epoch": 0.63, "grad_norm": 1.1289687156677246, "learning_rate": 3.178578364012991e-06, "loss": 0.4735, "step": 9762 }, { "epoch": 0.63, "grad_norm": 1.0824034214019775, "learning_rate": 3.1776049138243513e-06, "loss": 0.4841, "step": 9763 }, { "epoch": 0.63, "grad_norm": 1.348737359046936, "learning_rate": 3.1766315432879e-06, "loss": 0.5462, "step": 9764 }, { "epoch": 0.63, "grad_norm": 1.2809118032455444, "learning_rate": 3.175658252446178e-06, "loss": 0.5363, "step": 9765 }, { "epoch": 0.63, "grad_norm": 1.2244186401367188, "learning_rate": 3.1746850413417275e-06, "loss": 0.5601, "step": 9766 }, { "epoch": 0.63, "grad_norm": 1.106781244277954, "learning_rate": 3.1737119100170855e-06, "loss": 0.5054, "step": 9767 }, { "epoch": 0.63, "grad_norm": 1.129624843597412, "learning_rate": 3.1727388585147824e-06, "loss": 0.4796, "step": 9768 }, { "epoch": 0.63, "grad_norm": 1.2891401052474976, "learning_rate": 3.1717658868773508e-06, "loss": 0.4967, "step": 9769 }, { "epoch": 0.63, "grad_norm": 1.1747123003005981, "learning_rate": 3.170792995147314e-06, "loss": 0.546, "step": 9770 }, { "epoch": 0.63, "grad_norm": 1.1725796461105347, "learning_rate": 3.169820183367197e-06, "loss": 0.5138, "step": 9771 }, { "epoch": 0.63, "grad_norm": 1.151467204093933, "learning_rate": 3.168847451579517e-06, "loss": 0.4749, "step": 9772 }, { "epoch": 0.63, "grad_norm": 1.1468619108200073, "learning_rate": 3.167874799826792e-06, "loss": 0.4932, "step": 9773 }, { "epoch": 0.63, "grad_norm": 1.1371731758117676, "learning_rate": 3.1669022281515314e-06, "loss": 0.5485, "step": 9774 }, { "epoch": 0.63, "grad_norm": 1.1373982429504395, "learning_rate": 3.165929736596245e-06, "loss": 0.5219, "step": 9775 }, { "epoch": 0.63, "grad_norm": 1.384767770767212, "learning_rate": 3.1649573252034377e-06, "loss": 0.5356, "step": 9776 }, { "epoch": 0.63, "grad_norm": 1.2105618715286255, "learning_rate": 3.163984994015611e-06, "loss": 0.534, "step": 9777 }, { "epoch": 0.63, "grad_norm": 1.3153164386749268, "learning_rate": 3.1630127430752633e-06, "loss": 0.569, "step": 9778 }, { "epoch": 0.63, "grad_norm": 1.1454544067382812, "learning_rate": 3.1620405724248914e-06, "loss": 0.5482, "step": 9779 }, { "epoch": 0.63, "grad_norm": 1.2084919214248657, "learning_rate": 3.1610684821069814e-06, "loss": 0.4775, "step": 9780 }, { "epoch": 0.63, "grad_norm": 1.1215413808822632, "learning_rate": 3.160096472164024e-06, "loss": 0.4778, "step": 9781 }, { "epoch": 0.63, "grad_norm": 1.2260183095932007, "learning_rate": 3.159124542638503e-06, "loss": 0.5485, "step": 9782 }, { "epoch": 0.63, "grad_norm": 1.2087383270263672, "learning_rate": 3.1581526935728983e-06, "loss": 0.6007, "step": 9783 }, { "epoch": 0.63, "grad_norm": 1.2247912883758545, "learning_rate": 3.1571809250096873e-06, "loss": 0.5508, "step": 9784 }, { "epoch": 0.63, "grad_norm": 1.1439985036849976, "learning_rate": 3.156209236991346e-06, "loss": 0.5412, "step": 9785 }, { "epoch": 0.63, "grad_norm": 1.2986921072006226, "learning_rate": 3.1552376295603397e-06, "loss": 0.5142, "step": 9786 }, { "epoch": 0.63, "grad_norm": 1.0551507472991943, "learning_rate": 3.154266102759137e-06, "loss": 0.4679, "step": 9787 }, { "epoch": 0.63, "grad_norm": 1.1747915744781494, "learning_rate": 3.153294656630201e-06, "loss": 0.4731, "step": 9788 }, { "epoch": 0.63, "grad_norm": 1.1085786819458008, "learning_rate": 3.1523232912159906e-06, "loss": 0.4966, "step": 9789 }, { "epoch": 0.63, "grad_norm": 1.1583694219589233, "learning_rate": 3.1513520065589627e-06, "loss": 0.5242, "step": 9790 }, { "epoch": 0.63, "grad_norm": 1.3042473793029785, "learning_rate": 3.1503808027015703e-06, "loss": 0.5709, "step": 9791 }, { "epoch": 0.63, "grad_norm": 1.2959132194519043, "learning_rate": 3.1494096796862592e-06, "loss": 0.5197, "step": 9792 }, { "epoch": 0.63, "grad_norm": 1.160338044166565, "learning_rate": 3.148438637555477e-06, "loss": 0.5437, "step": 9793 }, { "epoch": 0.63, "grad_norm": 1.2376635074615479, "learning_rate": 3.1474676763516644e-06, "loss": 0.5727, "step": 9794 }, { "epoch": 0.63, "grad_norm": 1.0398855209350586, "learning_rate": 3.14649679611726e-06, "loss": 0.5169, "step": 9795 }, { "epoch": 0.63, "grad_norm": 1.1259249448776245, "learning_rate": 3.145525996894698e-06, "loss": 0.5346, "step": 9796 }, { "epoch": 0.63, "grad_norm": 1.1461046934127808, "learning_rate": 3.1445552787264135e-06, "loss": 0.5811, "step": 9797 }, { "epoch": 0.63, "grad_norm": 1.162334680557251, "learning_rate": 3.1435846416548276e-06, "loss": 0.5088, "step": 9798 }, { "epoch": 0.63, "grad_norm": 1.2992936372756958, "learning_rate": 3.1426140857223674e-06, "loss": 0.538, "step": 9799 }, { "epoch": 0.63, "grad_norm": 1.0948148965835571, "learning_rate": 3.141643610971454e-06, "loss": 0.5296, "step": 9800 }, { "epoch": 0.63, "grad_norm": 1.0822416543960571, "learning_rate": 3.140673217444503e-06, "loss": 0.4998, "step": 9801 }, { "epoch": 0.63, "grad_norm": 1.138380527496338, "learning_rate": 3.139702905183931e-06, "loss": 0.4977, "step": 9802 }, { "epoch": 0.63, "grad_norm": 1.2618274688720703, "learning_rate": 3.138732674232143e-06, "loss": 0.5362, "step": 9803 }, { "epoch": 0.63, "grad_norm": 1.272132158279419, "learning_rate": 3.1377625246315485e-06, "loss": 0.5267, "step": 9804 }, { "epoch": 0.63, "grad_norm": 1.1157293319702148, "learning_rate": 3.136792456424549e-06, "loss": 0.5124, "step": 9805 }, { "epoch": 0.63, "grad_norm": 1.2608771324157715, "learning_rate": 3.1358224696535437e-06, "loss": 0.5205, "step": 9806 }, { "epoch": 0.63, "grad_norm": 1.163992166519165, "learning_rate": 3.13485256436093e-06, "loss": 0.5297, "step": 9807 }, { "epoch": 0.63, "grad_norm": 1.2072664499282837, "learning_rate": 3.133882740589098e-06, "loss": 0.5001, "step": 9808 }, { "epoch": 0.63, "grad_norm": 1.1410857439041138, "learning_rate": 3.1329129983804364e-06, "loss": 0.4972, "step": 9809 }, { "epoch": 0.63, "grad_norm": 1.1023494005203247, "learning_rate": 3.1319433377773306e-06, "loss": 0.5015, "step": 9810 }, { "epoch": 0.63, "grad_norm": 1.1164278984069824, "learning_rate": 3.1309737588221624e-06, "loss": 0.4744, "step": 9811 }, { "epoch": 0.63, "grad_norm": 1.1849371194839478, "learning_rate": 3.1300042615573085e-06, "loss": 0.5254, "step": 9812 }, { "epoch": 0.63, "grad_norm": 1.2193048000335693, "learning_rate": 3.129034846025143e-06, "loss": 0.553, "step": 9813 }, { "epoch": 0.63, "grad_norm": 1.4307425022125244, "learning_rate": 3.1280655122680385e-06, "loss": 0.5573, "step": 9814 }, { "epoch": 0.63, "grad_norm": 1.2501044273376465, "learning_rate": 3.1270962603283605e-06, "loss": 0.5114, "step": 9815 }, { "epoch": 0.63, "grad_norm": 1.185727596282959, "learning_rate": 3.126127090248473e-06, "loss": 0.5003, "step": 9816 }, { "epoch": 0.63, "grad_norm": 1.0919562578201294, "learning_rate": 3.125158002070735e-06, "loss": 0.4881, "step": 9817 }, { "epoch": 0.63, "grad_norm": 1.1920244693756104, "learning_rate": 3.124188995837503e-06, "loss": 0.5357, "step": 9818 }, { "epoch": 0.63, "grad_norm": 1.1577223539352417, "learning_rate": 3.123220071591131e-06, "loss": 0.5338, "step": 9819 }, { "epoch": 0.63, "grad_norm": 1.1575649976730347, "learning_rate": 3.1222512293739694e-06, "loss": 0.5559, "step": 9820 }, { "epoch": 0.63, "grad_norm": 1.199253797531128, "learning_rate": 3.12128246922836e-06, "loss": 0.5075, "step": 9821 }, { "epoch": 0.63, "grad_norm": 1.140735387802124, "learning_rate": 3.1203137911966465e-06, "loss": 0.5291, "step": 9822 }, { "epoch": 0.63, "grad_norm": 1.3294315338134766, "learning_rate": 3.1193451953211674e-06, "loss": 0.4595, "step": 9823 }, { "epoch": 0.63, "grad_norm": 1.2515957355499268, "learning_rate": 3.1183766816442584e-06, "loss": 0.5265, "step": 9824 }, { "epoch": 0.63, "grad_norm": 1.2225579023361206, "learning_rate": 3.117408250208249e-06, "loss": 0.5164, "step": 9825 }, { "epoch": 0.63, "grad_norm": 1.0859146118164062, "learning_rate": 3.1164399010554702e-06, "loss": 0.4687, "step": 9826 }, { "epoch": 0.63, "grad_norm": 1.1730949878692627, "learning_rate": 3.1154716342282422e-06, "loss": 0.5058, "step": 9827 }, { "epoch": 0.63, "grad_norm": 1.1762183904647827, "learning_rate": 3.1145034497688862e-06, "loss": 0.4782, "step": 9828 }, { "epoch": 0.63, "grad_norm": 1.1512104272842407, "learning_rate": 3.1135353477197204e-06, "loss": 0.512, "step": 9829 }, { "epoch": 0.63, "grad_norm": 1.1317089796066284, "learning_rate": 3.112567328123057e-06, "loss": 0.4616, "step": 9830 }, { "epoch": 0.63, "grad_norm": 1.1238971948623657, "learning_rate": 3.1115993910212063e-06, "loss": 0.5279, "step": 9831 }, { "epoch": 0.63, "grad_norm": 1.1897003650665283, "learning_rate": 3.1106315364564753e-06, "loss": 0.5021, "step": 9832 }, { "epoch": 0.63, "grad_norm": 1.2301685810089111, "learning_rate": 3.109663764471164e-06, "loss": 0.5175, "step": 9833 }, { "epoch": 0.63, "grad_norm": 1.2756043672561646, "learning_rate": 3.108696075107572e-06, "loss": 0.5218, "step": 9834 }, { "epoch": 0.63, "grad_norm": 1.1574009656906128, "learning_rate": 3.1077284684079957e-06, "loss": 0.503, "step": 9835 }, { "epoch": 0.63, "grad_norm": 4.084864616394043, "learning_rate": 3.106760944414725e-06, "loss": 0.5748, "step": 9836 }, { "epoch": 0.63, "grad_norm": 1.2552472352981567, "learning_rate": 3.1057935031700493e-06, "loss": 0.5085, "step": 9837 }, { "epoch": 0.64, "grad_norm": 1.1467740535736084, "learning_rate": 3.1048261447162543e-06, "loss": 0.5057, "step": 9838 }, { "epoch": 0.64, "grad_norm": 1.2291561365127563, "learning_rate": 3.103858869095617e-06, "loss": 0.4797, "step": 9839 }, { "epoch": 0.64, "grad_norm": 1.2105517387390137, "learning_rate": 3.1028916763504165e-06, "loss": 0.4959, "step": 9840 }, { "epoch": 0.64, "grad_norm": 1.2693705558776855, "learning_rate": 3.101924566522926e-06, "loss": 0.4728, "step": 9841 }, { "epoch": 0.64, "grad_norm": 1.1923779249191284, "learning_rate": 3.1009575396554157e-06, "loss": 0.5251, "step": 9842 }, { "epoch": 0.64, "grad_norm": 1.207335352897644, "learning_rate": 3.0999905957901533e-06, "loss": 0.5853, "step": 9843 }, { "epoch": 0.64, "grad_norm": 1.1948013305664062, "learning_rate": 3.0990237349693985e-06, "loss": 0.576, "step": 9844 }, { "epoch": 0.64, "grad_norm": 1.1443854570388794, "learning_rate": 3.0980569572354113e-06, "loss": 0.5105, "step": 9845 }, { "epoch": 0.64, "grad_norm": 1.270867109298706, "learning_rate": 3.097090262630448e-06, "loss": 0.5129, "step": 9846 }, { "epoch": 0.64, "grad_norm": 1.0646363496780396, "learning_rate": 3.096123651196759e-06, "loss": 0.4885, "step": 9847 }, { "epoch": 0.64, "grad_norm": 1.3050508499145508, "learning_rate": 3.095157122976593e-06, "loss": 0.5519, "step": 9848 }, { "epoch": 0.64, "grad_norm": 1.1827208995819092, "learning_rate": 3.094190678012198e-06, "loss": 0.5193, "step": 9849 }, { "epoch": 0.64, "grad_norm": 1.1264139413833618, "learning_rate": 3.0932243163458075e-06, "loss": 0.5179, "step": 9850 }, { "epoch": 0.64, "grad_norm": 1.1744203567504883, "learning_rate": 3.0922580380196644e-06, "loss": 0.5405, "step": 9851 }, { "epoch": 0.64, "grad_norm": 1.2117109298706055, "learning_rate": 3.0912918430759993e-06, "loss": 0.4966, "step": 9852 }, { "epoch": 0.64, "grad_norm": 1.3077630996704102, "learning_rate": 3.0903257315570446e-06, "loss": 0.5623, "step": 9853 }, { "epoch": 0.64, "grad_norm": 1.2619656324386597, "learning_rate": 3.089359703505024e-06, "loss": 0.5427, "step": 9854 }, { "epoch": 0.64, "grad_norm": 1.2086957693099976, "learning_rate": 3.088393758962162e-06, "loss": 0.5031, "step": 9855 }, { "epoch": 0.64, "grad_norm": 1.2495222091674805, "learning_rate": 3.087427897970676e-06, "loss": 0.5311, "step": 9856 }, { "epoch": 0.64, "grad_norm": 1.233452558517456, "learning_rate": 3.0864621205727817e-06, "loss": 0.511, "step": 9857 }, { "epoch": 0.64, "grad_norm": 1.1496374607086182, "learning_rate": 3.085496426810693e-06, "loss": 0.509, "step": 9858 }, { "epoch": 0.64, "grad_norm": 1.1693964004516602, "learning_rate": 3.0845308167266143e-06, "loss": 0.5047, "step": 9859 }, { "epoch": 0.64, "grad_norm": 1.2455472946166992, "learning_rate": 3.083565290362752e-06, "loss": 0.5102, "step": 9860 }, { "epoch": 0.64, "grad_norm": 1.3354600667953491, "learning_rate": 3.082599847761307e-06, "loss": 0.5159, "step": 9861 }, { "epoch": 0.64, "grad_norm": 1.3071345090866089, "learning_rate": 3.0816344889644766e-06, "loss": 0.54, "step": 9862 }, { "epoch": 0.64, "grad_norm": 1.3615418672561646, "learning_rate": 3.080669214014451e-06, "loss": 0.4926, "step": 9863 }, { "epoch": 0.64, "grad_norm": 1.0578422546386719, "learning_rate": 3.079704022953423e-06, "loss": 0.5171, "step": 9864 }, { "epoch": 0.64, "grad_norm": 1.151629090309143, "learning_rate": 3.078738915823577e-06, "loss": 0.4931, "step": 9865 }, { "epoch": 0.64, "grad_norm": 1.2476861476898193, "learning_rate": 3.077773892667097e-06, "loss": 0.5507, "step": 9866 }, { "epoch": 0.64, "grad_norm": 1.1655609607696533, "learning_rate": 3.0768089535261626e-06, "loss": 0.5112, "step": 9867 }, { "epoch": 0.64, "grad_norm": 1.1962114572525024, "learning_rate": 3.075844098442944e-06, "loss": 0.5117, "step": 9868 }, { "epoch": 0.64, "grad_norm": 1.1938186883926392, "learning_rate": 3.0748793274596162e-06, "loss": 0.5702, "step": 9869 }, { "epoch": 0.64, "grad_norm": 1.2470684051513672, "learning_rate": 3.0739146406183464e-06, "loss": 0.5482, "step": 9870 }, { "epoch": 0.64, "grad_norm": 1.196703553199768, "learning_rate": 3.0729500379612977e-06, "loss": 0.5066, "step": 9871 }, { "epoch": 0.64, "grad_norm": 1.2072234153747559, "learning_rate": 3.0719855195306306e-06, "loss": 0.5122, "step": 9872 }, { "epoch": 0.64, "grad_norm": 1.1353331804275513, "learning_rate": 3.071021085368505e-06, "loss": 0.4855, "step": 9873 }, { "epoch": 0.64, "grad_norm": 1.301966905593872, "learning_rate": 3.0700567355170686e-06, "loss": 0.5158, "step": 9874 }, { "epoch": 0.64, "grad_norm": 1.1175442934036255, "learning_rate": 3.0690924700184725e-06, "loss": 0.5085, "step": 9875 }, { "epoch": 0.64, "grad_norm": 1.4397066831588745, "learning_rate": 3.0681282889148634e-06, "loss": 0.5648, "step": 9876 }, { "epoch": 0.64, "grad_norm": 1.211255669593811, "learning_rate": 3.067164192248382e-06, "loss": 0.5216, "step": 9877 }, { "epoch": 0.64, "grad_norm": 1.2800939083099365, "learning_rate": 3.0662001800611674e-06, "loss": 0.5616, "step": 9878 }, { "epoch": 0.64, "grad_norm": 1.1769007444381714, "learning_rate": 3.065236252395356e-06, "loss": 0.485, "step": 9879 }, { "epoch": 0.64, "grad_norm": 1.079795479774475, "learning_rate": 3.064272409293073e-06, "loss": 0.4845, "step": 9880 }, { "epoch": 0.64, "grad_norm": 1.0914491415023804, "learning_rate": 3.06330865079645e-06, "loss": 0.5058, "step": 9881 }, { "epoch": 0.64, "grad_norm": 1.1225653886795044, "learning_rate": 3.0623449769476088e-06, "loss": 0.5115, "step": 9882 }, { "epoch": 0.64, "grad_norm": 1.4019120931625366, "learning_rate": 3.061381387788669e-06, "loss": 0.5077, "step": 9883 }, { "epoch": 0.64, "grad_norm": 1.5781329870224, "learning_rate": 3.0604178833617493e-06, "loss": 0.5292, "step": 9884 }, { "epoch": 0.64, "grad_norm": 1.2746349573135376, "learning_rate": 3.0594544637089575e-06, "loss": 0.4808, "step": 9885 }, { "epoch": 0.64, "grad_norm": 1.1630059480667114, "learning_rate": 3.058491128872405e-06, "loss": 0.4925, "step": 9886 }, { "epoch": 0.64, "grad_norm": 1.644229531288147, "learning_rate": 3.0575278788941954e-06, "loss": 0.5551, "step": 9887 }, { "epoch": 0.64, "grad_norm": 1.2860167026519775, "learning_rate": 3.0565647138164312e-06, "loss": 0.5173, "step": 9888 }, { "epoch": 0.64, "grad_norm": 1.1839072704315186, "learning_rate": 3.055601633681209e-06, "loss": 0.4551, "step": 9889 }, { "epoch": 0.64, "grad_norm": 1.1977438926696777, "learning_rate": 3.0546386385306248e-06, "loss": 0.5175, "step": 9890 }, { "epoch": 0.64, "grad_norm": 1.2600128650665283, "learning_rate": 3.053675728406764e-06, "loss": 0.5455, "step": 9891 }, { "epoch": 0.64, "grad_norm": 1.2801055908203125, "learning_rate": 3.052712903351717e-06, "loss": 0.5489, "step": 9892 }, { "epoch": 0.64, "grad_norm": 1.167515754699707, "learning_rate": 3.0517501634075638e-06, "loss": 0.5214, "step": 9893 }, { "epoch": 0.64, "grad_norm": 1.3306870460510254, "learning_rate": 3.0507875086163843e-06, "loss": 0.5075, "step": 9894 }, { "epoch": 0.64, "grad_norm": 1.247683048248291, "learning_rate": 3.0498249390202538e-06, "loss": 0.5199, "step": 9895 }, { "epoch": 0.64, "grad_norm": 1.255465030670166, "learning_rate": 3.0488624546612465e-06, "loss": 0.5198, "step": 9896 }, { "epoch": 0.64, "grad_norm": 1.2271382808685303, "learning_rate": 3.047900055581424e-06, "loss": 0.5231, "step": 9897 }, { "epoch": 0.64, "grad_norm": 1.3050732612609863, "learning_rate": 3.0469377418228552e-06, "loss": 0.5136, "step": 9898 }, { "epoch": 0.64, "grad_norm": 1.2570228576660156, "learning_rate": 3.0459755134275976e-06, "loss": 0.4896, "step": 9899 }, { "epoch": 0.64, "grad_norm": 1.4197394847869873, "learning_rate": 3.0450133704377107e-06, "loss": 0.5208, "step": 9900 }, { "epoch": 0.64, "grad_norm": 1.1497488021850586, "learning_rate": 3.0440513128952433e-06, "loss": 0.532, "step": 9901 }, { "epoch": 0.64, "grad_norm": 1.1208058595657349, "learning_rate": 3.043089340842248e-06, "loss": 0.5076, "step": 9902 }, { "epoch": 0.64, "grad_norm": 1.0531507730484009, "learning_rate": 3.042127454320768e-06, "loss": 0.4695, "step": 9903 }, { "epoch": 0.64, "grad_norm": 1.1867417097091675, "learning_rate": 3.0411656533728457e-06, "loss": 0.5167, "step": 9904 }, { "epoch": 0.64, "grad_norm": 1.2555146217346191, "learning_rate": 3.0402039380405197e-06, "loss": 0.5229, "step": 9905 }, { "epoch": 0.64, "grad_norm": 1.2646371126174927, "learning_rate": 3.039242308365822e-06, "loss": 0.5609, "step": 9906 }, { "epoch": 0.64, "grad_norm": 1.1745424270629883, "learning_rate": 3.038280764390784e-06, "loss": 0.4967, "step": 9907 }, { "epoch": 0.64, "grad_norm": 1.1121524572372437, "learning_rate": 3.0373193061574343e-06, "loss": 0.5236, "step": 9908 }, { "epoch": 0.64, "grad_norm": 1.1821202039718628, "learning_rate": 3.036357933707793e-06, "loss": 0.541, "step": 9909 }, { "epoch": 0.64, "grad_norm": 1.1184316873550415, "learning_rate": 3.0353966470838804e-06, "loss": 0.4419, "step": 9910 }, { "epoch": 0.64, "grad_norm": 1.1837159395217896, "learning_rate": 3.0344354463277104e-06, "loss": 0.5268, "step": 9911 }, { "epoch": 0.64, "grad_norm": 1.1382614374160767, "learning_rate": 3.0334743314812964e-06, "loss": 0.5092, "step": 9912 }, { "epoch": 0.64, "grad_norm": 1.0845088958740234, "learning_rate": 3.0325133025866457e-06, "loss": 0.475, "step": 9913 }, { "epoch": 0.64, "grad_norm": 1.2415399551391602, "learning_rate": 3.031552359685764e-06, "loss": 0.5501, "step": 9914 }, { "epoch": 0.64, "grad_norm": 1.3950583934783936, "learning_rate": 3.030591502820648e-06, "loss": 0.5369, "step": 9915 }, { "epoch": 0.64, "grad_norm": 1.4126789569854736, "learning_rate": 3.0296307320332963e-06, "loss": 0.5108, "step": 9916 }, { "epoch": 0.64, "grad_norm": 1.2342182397842407, "learning_rate": 3.0286700473657016e-06, "loss": 0.5426, "step": 9917 }, { "epoch": 0.64, "grad_norm": 1.2108334302902222, "learning_rate": 3.0277094488598534e-06, "loss": 0.6082, "step": 9918 }, { "epoch": 0.64, "grad_norm": 1.1668236255645752, "learning_rate": 3.0267489365577367e-06, "loss": 0.4715, "step": 9919 }, { "epoch": 0.64, "grad_norm": 1.345016360282898, "learning_rate": 3.025788510501335e-06, "loss": 0.5161, "step": 9920 }, { "epoch": 0.64, "grad_norm": 1.27273690700531, "learning_rate": 3.024828170732621e-06, "loss": 0.5632, "step": 9921 }, { "epoch": 0.64, "grad_norm": 1.1262849569320679, "learning_rate": 3.0238679172935727e-06, "loss": 0.5073, "step": 9922 }, { "epoch": 0.64, "grad_norm": 1.1813478469848633, "learning_rate": 3.022907750226159e-06, "loss": 0.5283, "step": 9923 }, { "epoch": 0.64, "grad_norm": 1.2992722988128662, "learning_rate": 3.021947669572346e-06, "loss": 0.5394, "step": 9924 }, { "epoch": 0.64, "grad_norm": 1.2478874921798706, "learning_rate": 3.0209876753740974e-06, "loss": 0.5186, "step": 9925 }, { "epoch": 0.64, "grad_norm": 1.3200656175613403, "learning_rate": 3.0200277676733737e-06, "loss": 0.5549, "step": 9926 }, { "epoch": 0.64, "grad_norm": 1.314435362815857, "learning_rate": 3.019067946512126e-06, "loss": 0.5107, "step": 9927 }, { "epoch": 0.64, "grad_norm": 1.211614727973938, "learning_rate": 3.018108211932307e-06, "loss": 0.5305, "step": 9928 }, { "epoch": 0.64, "grad_norm": 1.0937734842300415, "learning_rate": 3.0171485639758645e-06, "loss": 0.5087, "step": 9929 }, { "epoch": 0.64, "grad_norm": 1.0961326360702515, "learning_rate": 3.016189002684743e-06, "loss": 0.4798, "step": 9930 }, { "epoch": 0.64, "grad_norm": 1.196498155593872, "learning_rate": 3.015229528100884e-06, "loss": 0.5196, "step": 9931 }, { "epoch": 0.64, "grad_norm": 1.197351336479187, "learning_rate": 3.0142701402662193e-06, "loss": 0.5378, "step": 9932 }, { "epoch": 0.64, "grad_norm": 1.145651936531067, "learning_rate": 3.013310839222684e-06, "loss": 0.4931, "step": 9933 }, { "epoch": 0.64, "grad_norm": 1.3052971363067627, "learning_rate": 3.0123516250122052e-06, "loss": 0.5464, "step": 9934 }, { "epoch": 0.64, "grad_norm": 1.0450787544250488, "learning_rate": 3.0113924976767093e-06, "loss": 0.4917, "step": 9935 }, { "epoch": 0.64, "grad_norm": 1.062796950340271, "learning_rate": 3.010433457258116e-06, "loss": 0.5106, "step": 9936 }, { "epoch": 0.64, "grad_norm": 1.207369089126587, "learning_rate": 3.0094745037983464e-06, "loss": 0.534, "step": 9937 }, { "epoch": 0.64, "grad_norm": 1.1733953952789307, "learning_rate": 3.008515637339308e-06, "loss": 0.4921, "step": 9938 }, { "epoch": 0.64, "grad_norm": 1.2733510732650757, "learning_rate": 3.0075568579229126e-06, "loss": 0.5452, "step": 9939 }, { "epoch": 0.64, "grad_norm": 1.2237452268600464, "learning_rate": 3.0065981655910665e-06, "loss": 0.5327, "step": 9940 }, { "epoch": 0.64, "grad_norm": 1.2323386669158936, "learning_rate": 3.0056395603856715e-06, "loss": 0.5366, "step": 9941 }, { "epoch": 0.64, "grad_norm": 1.2373225688934326, "learning_rate": 3.004681042348627e-06, "loss": 0.586, "step": 9942 }, { "epoch": 0.64, "grad_norm": 1.2264245748519897, "learning_rate": 3.0037226115218266e-06, "loss": 0.5308, "step": 9943 }, { "epoch": 0.64, "grad_norm": 1.2726280689239502, "learning_rate": 3.0027642679471585e-06, "loss": 0.5105, "step": 9944 }, { "epoch": 0.64, "grad_norm": 1.2638375759124756, "learning_rate": 3.0018060116665117e-06, "loss": 0.5105, "step": 9945 }, { "epoch": 0.64, "grad_norm": 1.2653162479400635, "learning_rate": 3.0008478427217693e-06, "loss": 0.5074, "step": 9946 }, { "epoch": 0.64, "grad_norm": 1.296169400215149, "learning_rate": 2.9998897611548095e-06, "loss": 0.5474, "step": 9947 }, { "epoch": 0.64, "grad_norm": 1.156346082687378, "learning_rate": 2.998931767007508e-06, "loss": 0.495, "step": 9948 }, { "epoch": 0.64, "grad_norm": 1.3752986192703247, "learning_rate": 2.997973860321737e-06, "loss": 0.5504, "step": 9949 }, { "epoch": 0.64, "grad_norm": 1.2440800666809082, "learning_rate": 2.997016041139362e-06, "loss": 0.5344, "step": 9950 }, { "epoch": 0.64, "grad_norm": 1.0819151401519775, "learning_rate": 2.9960583095022476e-06, "loss": 0.5403, "step": 9951 }, { "epoch": 0.64, "grad_norm": 1.1476374864578247, "learning_rate": 2.9951006654522564e-06, "loss": 0.495, "step": 9952 }, { "epoch": 0.64, "grad_norm": 1.2036144733428955, "learning_rate": 2.99414310903124e-06, "loss": 0.5194, "step": 9953 }, { "epoch": 0.64, "grad_norm": 1.08040189743042, "learning_rate": 2.9931856402810544e-06, "loss": 0.5227, "step": 9954 }, { "epoch": 0.64, "grad_norm": 1.194991111755371, "learning_rate": 2.992228259243547e-06, "loss": 0.4837, "step": 9955 }, { "epoch": 0.64, "grad_norm": 1.1176128387451172, "learning_rate": 2.9912709659605634e-06, "loss": 0.481, "step": 9956 }, { "epoch": 0.64, "grad_norm": 1.23220956325531, "learning_rate": 2.9903137604739407e-06, "loss": 0.5265, "step": 9957 }, { "epoch": 0.64, "grad_norm": 1.2226778268814087, "learning_rate": 2.9893566428255193e-06, "loss": 0.5421, "step": 9958 }, { "epoch": 0.64, "grad_norm": 1.3058280944824219, "learning_rate": 2.9883996130571313e-06, "loss": 0.5509, "step": 9959 }, { "epoch": 0.64, "grad_norm": 1.1694542169570923, "learning_rate": 2.9874426712106066e-06, "loss": 0.4958, "step": 9960 }, { "epoch": 0.64, "grad_norm": 1.1986160278320312, "learning_rate": 2.9864858173277723e-06, "loss": 0.5251, "step": 9961 }, { "epoch": 0.64, "grad_norm": 1.3099945783615112, "learning_rate": 2.9855290514504454e-06, "loss": 0.4993, "step": 9962 }, { "epoch": 0.64, "grad_norm": 1.231925368309021, "learning_rate": 2.984572373620447e-06, "loss": 0.5045, "step": 9963 }, { "epoch": 0.64, "grad_norm": 1.0753499269485474, "learning_rate": 2.98361578387959e-06, "loss": 0.4807, "step": 9964 }, { "epoch": 0.64, "grad_norm": 1.2016479969024658, "learning_rate": 2.9826592822696844e-06, "loss": 0.5074, "step": 9965 }, { "epoch": 0.64, "grad_norm": 1.1388916969299316, "learning_rate": 2.981702868832537e-06, "loss": 0.513, "step": 9966 }, { "epoch": 0.64, "grad_norm": 1.1447745561599731, "learning_rate": 2.9807465436099515e-06, "loss": 0.485, "step": 9967 }, { "epoch": 0.64, "grad_norm": 1.2805017232894897, "learning_rate": 2.9797903066437235e-06, "loss": 0.5062, "step": 9968 }, { "epoch": 0.64, "grad_norm": 1.2336047887802124, "learning_rate": 2.9788341579756484e-06, "loss": 0.5577, "step": 9969 }, { "epoch": 0.64, "grad_norm": 1.1439040899276733, "learning_rate": 2.9778780976475185e-06, "loss": 0.5196, "step": 9970 }, { "epoch": 0.64, "grad_norm": 1.162661075592041, "learning_rate": 2.9769221257011187e-06, "loss": 0.4879, "step": 9971 }, { "epoch": 0.64, "grad_norm": 1.2048845291137695, "learning_rate": 2.9759662421782358e-06, "loss": 0.5243, "step": 9972 }, { "epoch": 0.64, "grad_norm": 1.1278225183486938, "learning_rate": 2.9750104471206444e-06, "loss": 0.5303, "step": 9973 }, { "epoch": 0.64, "grad_norm": 1.267418384552002, "learning_rate": 2.9740547405701215e-06, "loss": 0.5397, "step": 9974 }, { "epoch": 0.64, "grad_norm": 1.2519338130950928, "learning_rate": 2.973099122568439e-06, "loss": 0.5438, "step": 9975 }, { "epoch": 0.64, "grad_norm": 1.2309983968734741, "learning_rate": 2.972143593157365e-06, "loss": 0.5507, "step": 9976 }, { "epoch": 0.64, "grad_norm": 1.1876673698425293, "learning_rate": 2.9711881523786617e-06, "loss": 0.55, "step": 9977 }, { "epoch": 0.64, "grad_norm": 1.1601346731185913, "learning_rate": 2.970232800274092e-06, "loss": 0.5191, "step": 9978 }, { "epoch": 0.64, "grad_norm": 1.1900866031646729, "learning_rate": 2.969277536885408e-06, "loss": 0.4883, "step": 9979 }, { "epoch": 0.64, "grad_norm": 1.2687489986419678, "learning_rate": 2.968322362254363e-06, "loss": 0.5259, "step": 9980 }, { "epoch": 0.64, "grad_norm": 1.1901977062225342, "learning_rate": 2.9673672764227068e-06, "loss": 0.5604, "step": 9981 }, { "epoch": 0.64, "grad_norm": 1.076831340789795, "learning_rate": 2.966412279432182e-06, "loss": 0.507, "step": 9982 }, { "epoch": 0.64, "grad_norm": 1.2366451025009155, "learning_rate": 2.96545737132453e-06, "loss": 0.495, "step": 9983 }, { "epoch": 0.64, "grad_norm": 1.1722636222839355, "learning_rate": 2.964502552141489e-06, "loss": 0.4955, "step": 9984 }, { "epoch": 0.64, "grad_norm": 1.1944695711135864, "learning_rate": 2.9635478219247874e-06, "loss": 0.4984, "step": 9985 }, { "epoch": 0.64, "grad_norm": 1.17558753490448, "learning_rate": 2.9625931807161567e-06, "loss": 0.5514, "step": 9986 }, { "epoch": 0.64, "grad_norm": 1.1596986055374146, "learning_rate": 2.961638628557322e-06, "loss": 0.5339, "step": 9987 }, { "epoch": 0.64, "grad_norm": 1.2257189750671387, "learning_rate": 2.960684165490003e-06, "loss": 0.5354, "step": 9988 }, { "epoch": 0.64, "grad_norm": 1.224433183670044, "learning_rate": 2.9597297915559187e-06, "loss": 0.5066, "step": 9989 }, { "epoch": 0.64, "grad_norm": 1.4785863161087036, "learning_rate": 2.9587755067967817e-06, "loss": 0.5336, "step": 9990 }, { "epoch": 0.64, "grad_norm": 1.1758462190628052, "learning_rate": 2.9578213112543e-06, "loss": 0.5304, "step": 9991 }, { "epoch": 0.64, "grad_norm": 1.1608796119689941, "learning_rate": 2.956867204970179e-06, "loss": 0.503, "step": 9992 }, { "epoch": 0.65, "grad_norm": 1.2247631549835205, "learning_rate": 2.955913187986122e-06, "loss": 0.5543, "step": 9993 }, { "epoch": 0.65, "grad_norm": 1.1707106828689575, "learning_rate": 2.954959260343826e-06, "loss": 0.5353, "step": 9994 }, { "epoch": 0.65, "grad_norm": 1.293500304222107, "learning_rate": 2.954005422084984e-06, "loss": 0.4803, "step": 9995 }, { "epoch": 0.65, "grad_norm": 1.1506611108779907, "learning_rate": 2.9530516732512872e-06, "loss": 0.4872, "step": 9996 }, { "epoch": 0.65, "grad_norm": 1.2343165874481201, "learning_rate": 2.9520980138844193e-06, "loss": 0.5646, "step": 9997 }, { "epoch": 0.65, "grad_norm": 1.3465341329574585, "learning_rate": 2.951144444026065e-06, "loss": 0.5301, "step": 9998 }, { "epoch": 0.65, "grad_norm": 1.19338059425354, "learning_rate": 2.9501909637178995e-06, "loss": 0.4956, "step": 9999 }, { "epoch": 0.65, "grad_norm": 1.1949912309646606, "learning_rate": 2.9492375730015987e-06, "loss": 0.4951, "step": 10000 }, { "epoch": 0.65, "grad_norm": 1.11189866065979, "learning_rate": 2.948284271918832e-06, "loss": 0.5059, "step": 10001 }, { "epoch": 0.65, "grad_norm": 1.2147868871688843, "learning_rate": 2.947331060511268e-06, "loss": 0.5133, "step": 10002 }, { "epoch": 0.65, "grad_norm": 1.3182841539382935, "learning_rate": 2.946377938820567e-06, "loss": 0.5297, "step": 10003 }, { "epoch": 0.65, "grad_norm": 1.088316559791565, "learning_rate": 2.9454249068883873e-06, "loss": 0.5128, "step": 10004 }, { "epoch": 0.65, "grad_norm": 1.3106809854507446, "learning_rate": 2.9444719647563834e-06, "loss": 0.5292, "step": 10005 }, { "epoch": 0.65, "grad_norm": 1.127815842628479, "learning_rate": 2.9435191124662067e-06, "loss": 0.4913, "step": 10006 }, { "epoch": 0.65, "grad_norm": 1.3165414333343506, "learning_rate": 2.942566350059504e-06, "loss": 0.4911, "step": 10007 }, { "epoch": 0.65, "grad_norm": 1.4636422395706177, "learning_rate": 2.9416136775779203e-06, "loss": 0.4904, "step": 10008 }, { "epoch": 0.65, "grad_norm": 1.1880114078521729, "learning_rate": 2.9406610950630896e-06, "loss": 0.5318, "step": 10009 }, { "epoch": 0.65, "grad_norm": 1.1501678228378296, "learning_rate": 2.939708602556649e-06, "loss": 0.4976, "step": 10010 }, { "epoch": 0.65, "grad_norm": 1.1762661933898926, "learning_rate": 2.93875620010023e-06, "loss": 0.5184, "step": 10011 }, { "epoch": 0.65, "grad_norm": 1.2176860570907593, "learning_rate": 2.93780388773546e-06, "loss": 0.5183, "step": 10012 }, { "epoch": 0.65, "grad_norm": 1.1686304807662964, "learning_rate": 2.9368516655039627e-06, "loss": 0.5172, "step": 10013 }, { "epoch": 0.65, "grad_norm": 1.1311930418014526, "learning_rate": 2.9358995334473545e-06, "loss": 0.527, "step": 10014 }, { "epoch": 0.65, "grad_norm": 1.2840937376022339, "learning_rate": 2.934947491607252e-06, "loss": 0.5556, "step": 10015 }, { "epoch": 0.65, "grad_norm": 1.0932358503341675, "learning_rate": 2.933995540025267e-06, "loss": 0.5004, "step": 10016 }, { "epoch": 0.65, "grad_norm": 1.1720720529556274, "learning_rate": 2.9330436787430062e-06, "loss": 0.5104, "step": 10017 }, { "epoch": 0.65, "grad_norm": 1.1508432626724243, "learning_rate": 2.932091907802074e-06, "loss": 0.4935, "step": 10018 }, { "epoch": 0.65, "grad_norm": 1.1325076818466187, "learning_rate": 2.931140227244071e-06, "loss": 0.557, "step": 10019 }, { "epoch": 0.65, "grad_norm": 1.0912835597991943, "learning_rate": 2.9301886371105887e-06, "loss": 0.4942, "step": 10020 }, { "epoch": 0.65, "grad_norm": 1.1137803792953491, "learning_rate": 2.929237137443221e-06, "loss": 0.5041, "step": 10021 }, { "epoch": 0.65, "grad_norm": 1.2209272384643555, "learning_rate": 2.928285728283555e-06, "loss": 0.5589, "step": 10022 }, { "epoch": 0.65, "grad_norm": 1.370172381401062, "learning_rate": 2.9273344096731753e-06, "loss": 0.5191, "step": 10023 }, { "epoch": 0.65, "grad_norm": 1.18450129032135, "learning_rate": 2.9263831816536608e-06, "loss": 0.5232, "step": 10024 }, { "epoch": 0.65, "grad_norm": 1.1392407417297363, "learning_rate": 2.9254320442665894e-06, "loss": 0.5388, "step": 10025 }, { "epoch": 0.65, "grad_norm": 1.2391586303710938, "learning_rate": 2.9244809975535294e-06, "loss": 0.5314, "step": 10026 }, { "epoch": 0.65, "grad_norm": 1.2667897939682007, "learning_rate": 2.9235300415560497e-06, "loss": 0.5022, "step": 10027 }, { "epoch": 0.65, "grad_norm": 1.2163307666778564, "learning_rate": 2.922579176315714e-06, "loss": 0.5113, "step": 10028 }, { "epoch": 0.65, "grad_norm": 1.3320804834365845, "learning_rate": 2.921628401874083e-06, "loss": 0.5199, "step": 10029 }, { "epoch": 0.65, "grad_norm": 1.2160487174987793, "learning_rate": 2.920677718272713e-06, "loss": 0.5463, "step": 10030 }, { "epoch": 0.65, "grad_norm": 1.2402135133743286, "learning_rate": 2.9197271255531568e-06, "loss": 0.5498, "step": 10031 }, { "epoch": 0.65, "grad_norm": 1.3328137397766113, "learning_rate": 2.918776623756958e-06, "loss": 0.5509, "step": 10032 }, { "epoch": 0.65, "grad_norm": 1.211824655532837, "learning_rate": 2.917826212925664e-06, "loss": 0.507, "step": 10033 }, { "epoch": 0.65, "grad_norm": 1.1658884286880493, "learning_rate": 2.9168758931008144e-06, "loss": 0.5359, "step": 10034 }, { "epoch": 0.65, "grad_norm": 1.2503758668899536, "learning_rate": 2.915925664323944e-06, "loss": 0.5205, "step": 10035 }, { "epoch": 0.65, "grad_norm": 1.1367225646972656, "learning_rate": 2.914975526636587e-06, "loss": 0.5079, "step": 10036 }, { "epoch": 0.65, "grad_norm": 1.2489629983901978, "learning_rate": 2.9140254800802713e-06, "loss": 0.5251, "step": 10037 }, { "epoch": 0.65, "grad_norm": 1.1520317792892456, "learning_rate": 2.9130755246965193e-06, "loss": 0.4811, "step": 10038 }, { "epoch": 0.65, "grad_norm": 1.1519970893859863, "learning_rate": 2.9121256605268506e-06, "loss": 0.5221, "step": 10039 }, { "epoch": 0.65, "grad_norm": 1.2000998258590698, "learning_rate": 2.9111758876127827e-06, "loss": 0.529, "step": 10040 }, { "epoch": 0.65, "grad_norm": 1.349739670753479, "learning_rate": 2.9102262059958276e-06, "loss": 0.5428, "step": 10041 }, { "epoch": 0.65, "grad_norm": 1.2432405948638916, "learning_rate": 2.9092766157174935e-06, "loss": 0.5367, "step": 10042 }, { "epoch": 0.65, "grad_norm": 1.3126740455627441, "learning_rate": 2.908327116819286e-06, "loss": 0.5453, "step": 10043 }, { "epoch": 0.65, "grad_norm": 1.2103431224822998, "learning_rate": 2.9073777093427026e-06, "loss": 0.4932, "step": 10044 }, { "epoch": 0.65, "grad_norm": 1.2457637786865234, "learning_rate": 2.9064283933292394e-06, "loss": 0.5116, "step": 10045 }, { "epoch": 0.65, "grad_norm": 1.2291682958602905, "learning_rate": 2.9054791688203897e-06, "loss": 0.5753, "step": 10046 }, { "epoch": 0.65, "grad_norm": 1.1940178871154785, "learning_rate": 2.904530035857642e-06, "loss": 0.5347, "step": 10047 }, { "epoch": 0.65, "grad_norm": 1.1289410591125488, "learning_rate": 2.9035809944824805e-06, "loss": 0.5421, "step": 10048 }, { "epoch": 0.65, "grad_norm": 1.2056387662887573, "learning_rate": 2.9026320447363865e-06, "loss": 0.4736, "step": 10049 }, { "epoch": 0.65, "grad_norm": 1.1873440742492676, "learning_rate": 2.901683186660832e-06, "loss": 0.5164, "step": 10050 }, { "epoch": 0.65, "grad_norm": 1.1632922887802124, "learning_rate": 2.9007344202972924e-06, "loss": 0.5096, "step": 10051 }, { "epoch": 0.65, "grad_norm": 1.1033066511154175, "learning_rate": 2.8997857456872347e-06, "loss": 0.5382, "step": 10052 }, { "epoch": 0.65, "grad_norm": 1.1943076848983765, "learning_rate": 2.8988371628721233e-06, "loss": 0.5212, "step": 10053 }, { "epoch": 0.65, "grad_norm": 1.1910229921340942, "learning_rate": 2.8978886718934198e-06, "loss": 0.5081, "step": 10054 }, { "epoch": 0.65, "grad_norm": 1.2548900842666626, "learning_rate": 2.896940272792579e-06, "loss": 0.5732, "step": 10055 }, { "epoch": 0.65, "grad_norm": 1.3217018842697144, "learning_rate": 2.895991965611052e-06, "loss": 0.5407, "step": 10056 }, { "epoch": 0.65, "grad_norm": 1.495909571647644, "learning_rate": 2.8950437503902873e-06, "loss": 0.5362, "step": 10057 }, { "epoch": 0.65, "grad_norm": 1.227149248123169, "learning_rate": 2.8940956271717303e-06, "loss": 0.5041, "step": 10058 }, { "epoch": 0.65, "grad_norm": 1.1237775087356567, "learning_rate": 2.8931475959968212e-06, "loss": 0.492, "step": 10059 }, { "epoch": 0.65, "grad_norm": 1.1498384475708008, "learning_rate": 2.892199656906993e-06, "loss": 0.533, "step": 10060 }, { "epoch": 0.65, "grad_norm": 1.1740320920944214, "learning_rate": 2.89125180994368e-06, "loss": 0.5469, "step": 10061 }, { "epoch": 0.65, "grad_norm": 1.2999159097671509, "learning_rate": 2.8903040551483096e-06, "loss": 0.5272, "step": 10062 }, { "epoch": 0.65, "grad_norm": 1.143947720527649, "learning_rate": 2.889356392562306e-06, "loss": 0.5137, "step": 10063 }, { "epoch": 0.65, "grad_norm": 1.1999051570892334, "learning_rate": 2.88840882222709e-06, "loss": 0.5058, "step": 10064 }, { "epoch": 0.65, "grad_norm": 1.2275784015655518, "learning_rate": 2.8874613441840753e-06, "loss": 0.5747, "step": 10065 }, { "epoch": 0.65, "grad_norm": 1.3697465658187866, "learning_rate": 2.886513958474675e-06, "loss": 0.5317, "step": 10066 }, { "epoch": 0.65, "grad_norm": 1.1481590270996094, "learning_rate": 2.885566665140296e-06, "loss": 0.5019, "step": 10067 }, { "epoch": 0.65, "grad_norm": 1.0733736753463745, "learning_rate": 2.884619464222345e-06, "loss": 0.4466, "step": 10068 }, { "epoch": 0.65, "grad_norm": 1.2129642963409424, "learning_rate": 2.883672355762218e-06, "loss": 0.5364, "step": 10069 }, { "epoch": 0.65, "grad_norm": 1.3794724941253662, "learning_rate": 2.8827253398013114e-06, "loss": 0.4991, "step": 10070 }, { "epoch": 0.65, "grad_norm": 1.2109919786453247, "learning_rate": 2.881778416381018e-06, "loss": 0.4974, "step": 10071 }, { "epoch": 0.65, "grad_norm": 1.3019152879714966, "learning_rate": 2.880831585542725e-06, "loss": 0.5242, "step": 10072 }, { "epoch": 0.65, "grad_norm": 1.1538296937942505, "learning_rate": 2.8798848473278174e-06, "loss": 0.5259, "step": 10073 }, { "epoch": 0.65, "grad_norm": 1.1485867500305176, "learning_rate": 2.8789382017776716e-06, "loss": 0.4891, "step": 10074 }, { "epoch": 0.65, "grad_norm": 1.1276475191116333, "learning_rate": 2.8779916489336645e-06, "loss": 0.486, "step": 10075 }, { "epoch": 0.65, "grad_norm": 1.182784914970398, "learning_rate": 2.8770451888371677e-06, "loss": 0.5036, "step": 10076 }, { "epoch": 0.65, "grad_norm": 1.1753226518630981, "learning_rate": 2.876098821529548e-06, "loss": 0.4961, "step": 10077 }, { "epoch": 0.65, "grad_norm": 1.1307306289672852, "learning_rate": 2.8751525470521712e-06, "loss": 0.4867, "step": 10078 }, { "epoch": 0.65, "grad_norm": 1.2425864934921265, "learning_rate": 2.8742063654463923e-06, "loss": 0.4983, "step": 10079 }, { "epoch": 0.65, "grad_norm": 1.1052358150482178, "learning_rate": 2.8732602767535688e-06, "loss": 0.5022, "step": 10080 }, { "epoch": 0.65, "grad_norm": 1.1680638790130615, "learning_rate": 2.872314281015052e-06, "loss": 0.4948, "step": 10081 }, { "epoch": 0.65, "grad_norm": 1.4224539995193481, "learning_rate": 2.871368378272188e-06, "loss": 0.5272, "step": 10082 }, { "epoch": 0.65, "grad_norm": 1.0833438634872437, "learning_rate": 2.87042256856632e-06, "loss": 0.5061, "step": 10083 }, { "epoch": 0.65, "grad_norm": 1.1793932914733887, "learning_rate": 2.869476851938789e-06, "loss": 0.5177, "step": 10084 }, { "epoch": 0.65, "grad_norm": 1.1542296409606934, "learning_rate": 2.8685312284309262e-06, "loss": 0.5346, "step": 10085 }, { "epoch": 0.65, "grad_norm": 1.192676067352295, "learning_rate": 2.8675856980840645e-06, "loss": 0.5028, "step": 10086 }, { "epoch": 0.65, "grad_norm": 1.2637954950332642, "learning_rate": 2.8666402609395295e-06, "loss": 0.5399, "step": 10087 }, { "epoch": 0.65, "grad_norm": 1.0835521221160889, "learning_rate": 2.865694917038645e-06, "loss": 0.5266, "step": 10088 }, { "epoch": 0.65, "grad_norm": 1.2761708498001099, "learning_rate": 2.8647496664227294e-06, "loss": 0.5509, "step": 10089 }, { "epoch": 0.65, "grad_norm": 1.1742055416107178, "learning_rate": 2.863804509133099e-06, "loss": 0.5103, "step": 10090 }, { "epoch": 0.65, "grad_norm": 1.116795539855957, "learning_rate": 2.8628594452110593e-06, "loss": 0.5, "step": 10091 }, { "epoch": 0.65, "grad_norm": 1.3823529481887817, "learning_rate": 2.86191447469792e-06, "loss": 0.5014, "step": 10092 }, { "epoch": 0.65, "grad_norm": 1.177901268005371, "learning_rate": 2.8609695976349816e-06, "loss": 0.5132, "step": 10093 }, { "epoch": 0.65, "grad_norm": 1.283996343612671, "learning_rate": 2.8600248140635446e-06, "loss": 0.4855, "step": 10094 }, { "epoch": 0.65, "grad_norm": 1.1135146617889404, "learning_rate": 2.8590801240249013e-06, "loss": 0.5168, "step": 10095 }, { "epoch": 0.65, "grad_norm": 1.2086864709854126, "learning_rate": 2.858135527560344e-06, "loss": 0.4817, "step": 10096 }, { "epoch": 0.65, "grad_norm": 1.2957144975662231, "learning_rate": 2.8571910247111544e-06, "loss": 0.5345, "step": 10097 }, { "epoch": 0.65, "grad_norm": 1.383383870124817, "learning_rate": 2.8562466155186176e-06, "loss": 0.526, "step": 10098 }, { "epoch": 0.65, "grad_norm": 1.1086870431900024, "learning_rate": 2.8553023000240094e-06, "loss": 0.4859, "step": 10099 }, { "epoch": 0.65, "grad_norm": 1.1788322925567627, "learning_rate": 2.8543580782686042e-06, "loss": 0.5156, "step": 10100 }, { "epoch": 0.65, "grad_norm": 1.1447405815124512, "learning_rate": 2.853413950293674e-06, "loss": 0.5405, "step": 10101 }, { "epoch": 0.65, "grad_norm": 1.1500707864761353, "learning_rate": 2.85246991614048e-06, "loss": 0.5069, "step": 10102 }, { "epoch": 0.65, "grad_norm": 1.1340237855911255, "learning_rate": 2.8515259758502843e-06, "loss": 0.4642, "step": 10103 }, { "epoch": 0.65, "grad_norm": 1.2893146276474, "learning_rate": 2.850582129464346e-06, "loss": 0.5524, "step": 10104 }, { "epoch": 0.65, "grad_norm": 1.219773769378662, "learning_rate": 2.849638377023917e-06, "loss": 0.5242, "step": 10105 }, { "epoch": 0.65, "grad_norm": 1.2740647792816162, "learning_rate": 2.8486947185702463e-06, "loss": 0.5383, "step": 10106 }, { "epoch": 0.65, "grad_norm": 1.1472903490066528, "learning_rate": 2.8477511541445815e-06, "loss": 0.4651, "step": 10107 }, { "epoch": 0.65, "grad_norm": 1.327591061592102, "learning_rate": 2.8468076837881587e-06, "loss": 0.5064, "step": 10108 }, { "epoch": 0.65, "grad_norm": 1.2217483520507812, "learning_rate": 2.8458643075422167e-06, "loss": 0.529, "step": 10109 }, { "epoch": 0.65, "grad_norm": 1.2448348999023438, "learning_rate": 2.8449210254479877e-06, "loss": 0.5041, "step": 10110 }, { "epoch": 0.65, "grad_norm": 1.178302526473999, "learning_rate": 2.843977837546701e-06, "loss": 0.53, "step": 10111 }, { "epoch": 0.65, "grad_norm": 1.2986791133880615, "learning_rate": 2.84303474387958e-06, "loss": 0.5412, "step": 10112 }, { "epoch": 0.65, "grad_norm": 1.5349793434143066, "learning_rate": 2.842091744487847e-06, "loss": 0.5278, "step": 10113 }, { "epoch": 0.65, "grad_norm": 1.3357315063476562, "learning_rate": 2.841148839412715e-06, "loss": 0.5828, "step": 10114 }, { "epoch": 0.65, "grad_norm": 1.1502001285552979, "learning_rate": 2.840206028695397e-06, "loss": 0.5037, "step": 10115 }, { "epoch": 0.65, "grad_norm": 1.2548472881317139, "learning_rate": 2.839263312377101e-06, "loss": 0.5534, "step": 10116 }, { "epoch": 0.65, "grad_norm": 1.1891158819198608, "learning_rate": 2.8383206904990315e-06, "loss": 0.5172, "step": 10117 }, { "epoch": 0.65, "grad_norm": 1.1643388271331787, "learning_rate": 2.8373781631023866e-06, "loss": 0.5338, "step": 10118 }, { "epoch": 0.65, "grad_norm": 1.1370935440063477, "learning_rate": 2.836435730228365e-06, "loss": 0.4926, "step": 10119 }, { "epoch": 0.65, "grad_norm": 1.1986595392227173, "learning_rate": 2.8354933919181537e-06, "loss": 0.5427, "step": 10120 }, { "epoch": 0.65, "grad_norm": 1.2011744976043701, "learning_rate": 2.8345511482129417e-06, "loss": 0.5301, "step": 10121 }, { "epoch": 0.65, "grad_norm": 1.1350995302200317, "learning_rate": 2.8336089991539122e-06, "loss": 0.5336, "step": 10122 }, { "epoch": 0.65, "grad_norm": 1.1797459125518799, "learning_rate": 2.832666944782244e-06, "loss": 0.5108, "step": 10123 }, { "epoch": 0.65, "grad_norm": 1.0808552503585815, "learning_rate": 2.8317249851391114e-06, "loss": 0.4789, "step": 10124 }, { "epoch": 0.65, "grad_norm": 1.222442626953125, "learning_rate": 2.830783120265688e-06, "loss": 0.528, "step": 10125 }, { "epoch": 0.65, "grad_norm": 1.116921067237854, "learning_rate": 2.8298413502031353e-06, "loss": 0.4676, "step": 10126 }, { "epoch": 0.65, "grad_norm": 1.2059675455093384, "learning_rate": 2.828899674992618e-06, "loss": 0.5645, "step": 10127 }, { "epoch": 0.65, "grad_norm": 1.7497749328613281, "learning_rate": 2.827958094675295e-06, "loss": 0.5282, "step": 10128 }, { "epoch": 0.65, "grad_norm": 1.2132405042648315, "learning_rate": 2.827016609292319e-06, "loss": 0.4898, "step": 10129 }, { "epoch": 0.65, "grad_norm": 1.0407253503799438, "learning_rate": 2.826075218884841e-06, "loss": 0.4514, "step": 10130 }, { "epoch": 0.65, "grad_norm": 1.1921398639678955, "learning_rate": 2.8251339234940084e-06, "loss": 0.508, "step": 10131 }, { "epoch": 0.65, "grad_norm": 1.3279180526733398, "learning_rate": 2.824192723160959e-06, "loss": 0.5224, "step": 10132 }, { "epoch": 0.65, "grad_norm": 1.2134956121444702, "learning_rate": 2.8232516179268314e-06, "loss": 0.5356, "step": 10133 }, { "epoch": 0.65, "grad_norm": 1.2525603771209717, "learning_rate": 2.82231060783276e-06, "loss": 0.4933, "step": 10134 }, { "epoch": 0.65, "grad_norm": 1.2145905494689941, "learning_rate": 2.821369692919873e-06, "loss": 0.52, "step": 10135 }, { "epoch": 0.65, "grad_norm": 1.1910960674285889, "learning_rate": 2.820428873229296e-06, "loss": 0.5484, "step": 10136 }, { "epoch": 0.65, "grad_norm": 1.255556583404541, "learning_rate": 2.819488148802152e-06, "loss": 0.5021, "step": 10137 }, { "epoch": 0.65, "grad_norm": 1.1833350658416748, "learning_rate": 2.8185475196795532e-06, "loss": 0.5194, "step": 10138 }, { "epoch": 0.65, "grad_norm": 1.1496816873550415, "learning_rate": 2.8176069859026133e-06, "loss": 0.4957, "step": 10139 }, { "epoch": 0.65, "grad_norm": 1.157043218612671, "learning_rate": 2.816666547512442e-06, "loss": 0.551, "step": 10140 }, { "epoch": 0.65, "grad_norm": 1.1910927295684814, "learning_rate": 2.8157262045501426e-06, "loss": 0.4875, "step": 10141 }, { "epoch": 0.65, "grad_norm": 1.3308682441711426, "learning_rate": 2.8147859570568176e-06, "loss": 0.5301, "step": 10142 }, { "epoch": 0.65, "grad_norm": 1.220262050628662, "learning_rate": 2.8138458050735586e-06, "loss": 0.5111, "step": 10143 }, { "epoch": 0.65, "grad_norm": 1.1339364051818848, "learning_rate": 2.8129057486414584e-06, "loss": 0.5399, "step": 10144 }, { "epoch": 0.65, "grad_norm": 1.1993926763534546, "learning_rate": 2.8119657878016053e-06, "loss": 0.5322, "step": 10145 }, { "epoch": 0.65, "grad_norm": 1.0634979009628296, "learning_rate": 2.811025922595083e-06, "loss": 0.4644, "step": 10146 }, { "epoch": 0.65, "grad_norm": 1.2615805864334106, "learning_rate": 2.8100861530629698e-06, "loss": 0.4601, "step": 10147 }, { "epoch": 0.66, "grad_norm": 1.2491531372070312, "learning_rate": 2.8091464792463422e-06, "loss": 0.5232, "step": 10148 }, { "epoch": 0.66, "grad_norm": 1.2438626289367676, "learning_rate": 2.8082069011862676e-06, "loss": 0.4613, "step": 10149 }, { "epoch": 0.66, "grad_norm": 1.1098778247833252, "learning_rate": 2.807267418923815e-06, "loss": 0.4587, "step": 10150 }, { "epoch": 0.66, "grad_norm": 1.261776089668274, "learning_rate": 2.8063280325000465e-06, "loss": 0.5462, "step": 10151 }, { "epoch": 0.66, "grad_norm": 1.3279457092285156, "learning_rate": 2.8053887419560194e-06, "loss": 0.5368, "step": 10152 }, { "epoch": 0.66, "grad_norm": 1.2369188070297241, "learning_rate": 2.8044495473327904e-06, "loss": 0.5322, "step": 10153 }, { "epoch": 0.66, "grad_norm": 1.2788746356964111, "learning_rate": 2.803510448671405e-06, "loss": 0.5395, "step": 10154 }, { "epoch": 0.66, "grad_norm": 1.297658085823059, "learning_rate": 2.8025714460129105e-06, "loss": 0.503, "step": 10155 }, { "epoch": 0.66, "grad_norm": 1.1744824647903442, "learning_rate": 2.801632539398349e-06, "loss": 0.5143, "step": 10156 }, { "epoch": 0.66, "grad_norm": 1.1284021139144897, "learning_rate": 2.8006937288687575e-06, "loss": 0.4992, "step": 10157 }, { "epoch": 0.66, "grad_norm": 1.2602651119232178, "learning_rate": 2.799755014465171e-06, "loss": 0.537, "step": 10158 }, { "epoch": 0.66, "grad_norm": 1.1601804494857788, "learning_rate": 2.798816396228613e-06, "loss": 0.5108, "step": 10159 }, { "epoch": 0.66, "grad_norm": 1.152138113975525, "learning_rate": 2.7978778742001123e-06, "loss": 0.517, "step": 10160 }, { "epoch": 0.66, "grad_norm": 1.1115788221359253, "learning_rate": 2.796939448420688e-06, "loss": 0.4705, "step": 10161 }, { "epoch": 0.66, "grad_norm": 1.2441391944885254, "learning_rate": 2.796001118931358e-06, "loss": 0.4994, "step": 10162 }, { "epoch": 0.66, "grad_norm": 1.124983787536621, "learning_rate": 2.795062885773131e-06, "loss": 0.4757, "step": 10163 }, { "epoch": 0.66, "grad_norm": 1.2500572204589844, "learning_rate": 2.794124748987016e-06, "loss": 0.5445, "step": 10164 }, { "epoch": 0.66, "grad_norm": 1.1970945596694946, "learning_rate": 2.7931867086140174e-06, "loss": 0.5023, "step": 10165 }, { "epoch": 0.66, "grad_norm": 1.2592072486877441, "learning_rate": 2.7922487646951345e-06, "loss": 0.543, "step": 10166 }, { "epoch": 0.66, "grad_norm": 1.178577184677124, "learning_rate": 2.791310917271364e-06, "loss": 0.5165, "step": 10167 }, { "epoch": 0.66, "grad_norm": 1.2015924453735352, "learning_rate": 2.790373166383692e-06, "loss": 0.5391, "step": 10168 }, { "epoch": 0.66, "grad_norm": 1.2386845350265503, "learning_rate": 2.7894355120731087e-06, "loss": 0.4752, "step": 10169 }, { "epoch": 0.66, "grad_norm": 1.1228017807006836, "learning_rate": 2.7884979543805953e-06, "loss": 0.5588, "step": 10170 }, { "epoch": 0.66, "grad_norm": 1.1300280094146729, "learning_rate": 2.7875604933471306e-06, "loss": 0.4809, "step": 10171 }, { "epoch": 0.66, "grad_norm": 1.1092349290847778, "learning_rate": 2.7866231290136907e-06, "loss": 0.484, "step": 10172 }, { "epoch": 0.66, "grad_norm": 1.2552273273468018, "learning_rate": 2.785685861421241e-06, "loss": 0.5604, "step": 10173 }, { "epoch": 0.66, "grad_norm": 1.2100415229797363, "learning_rate": 2.78474869061075e-06, "loss": 0.5658, "step": 10174 }, { "epoch": 0.66, "grad_norm": 1.1674013137817383, "learning_rate": 2.7838116166231775e-06, "loss": 0.4989, "step": 10175 }, { "epoch": 0.66, "grad_norm": 1.2384289503097534, "learning_rate": 2.7828746394994823e-06, "loss": 0.5257, "step": 10176 }, { "epoch": 0.66, "grad_norm": 1.1859030723571777, "learning_rate": 2.7819377592806164e-06, "loss": 0.554, "step": 10177 }, { "epoch": 0.66, "grad_norm": 1.2580329179763794, "learning_rate": 2.7810009760075296e-06, "loss": 0.5758, "step": 10178 }, { "epoch": 0.66, "grad_norm": 1.629270315170288, "learning_rate": 2.780064289721163e-06, "loss": 0.5194, "step": 10179 }, { "epoch": 0.66, "grad_norm": 1.2098982334136963, "learning_rate": 2.7791277004624596e-06, "loss": 0.5455, "step": 10180 }, { "epoch": 0.66, "grad_norm": 1.1505073308944702, "learning_rate": 2.7781912082723535e-06, "loss": 0.5471, "step": 10181 }, { "epoch": 0.66, "grad_norm": 1.2517732381820679, "learning_rate": 2.7772548131917775e-06, "loss": 0.5275, "step": 10182 }, { "epoch": 0.66, "grad_norm": 1.1516538858413696, "learning_rate": 2.7763185152616594e-06, "loss": 0.5055, "step": 10183 }, { "epoch": 0.66, "grad_norm": 1.3470971584320068, "learning_rate": 2.7753823145229236e-06, "loss": 0.4979, "step": 10184 }, { "epoch": 0.66, "grad_norm": 1.2385501861572266, "learning_rate": 2.774446211016485e-06, "loss": 0.5622, "step": 10185 }, { "epoch": 0.66, "grad_norm": 1.2468936443328857, "learning_rate": 2.7735102047832606e-06, "loss": 0.5209, "step": 10186 }, { "epoch": 0.66, "grad_norm": 1.298831820487976, "learning_rate": 2.772574295864161e-06, "loss": 0.5023, "step": 10187 }, { "epoch": 0.66, "grad_norm": 1.2529430389404297, "learning_rate": 2.771638484300092e-06, "loss": 0.5816, "step": 10188 }, { "epoch": 0.66, "grad_norm": 1.1564240455627441, "learning_rate": 2.7707027701319577e-06, "loss": 0.4767, "step": 10189 }, { "epoch": 0.66, "grad_norm": 1.1600490808486938, "learning_rate": 2.7697671534006516e-06, "loss": 0.5086, "step": 10190 }, { "epoch": 0.66, "grad_norm": 1.1501617431640625, "learning_rate": 2.768831634147069e-06, "loss": 0.5, "step": 10191 }, { "epoch": 0.66, "grad_norm": 1.0915882587432861, "learning_rate": 2.767896212412099e-06, "loss": 0.5034, "step": 10192 }, { "epoch": 0.66, "grad_norm": 1.1922597885131836, "learning_rate": 2.766960888236627e-06, "loss": 0.5118, "step": 10193 }, { "epoch": 0.66, "grad_norm": 1.2291004657745361, "learning_rate": 2.7660256616615332e-06, "loss": 0.5147, "step": 10194 }, { "epoch": 0.66, "grad_norm": 1.170229434967041, "learning_rate": 2.7650905327276957e-06, "loss": 0.5048, "step": 10195 }, { "epoch": 0.66, "grad_norm": 1.2538365125656128, "learning_rate": 2.7641555014759834e-06, "loss": 0.5762, "step": 10196 }, { "epoch": 0.66, "grad_norm": 1.235392689704895, "learning_rate": 2.763220567947265e-06, "loss": 0.563, "step": 10197 }, { "epoch": 0.66, "grad_norm": 1.1311622858047485, "learning_rate": 2.762285732182405e-06, "loss": 0.5257, "step": 10198 }, { "epoch": 0.66, "grad_norm": 1.1365412473678589, "learning_rate": 2.761350994222262e-06, "loss": 0.5136, "step": 10199 }, { "epoch": 0.66, "grad_norm": 1.244707703590393, "learning_rate": 2.760416354107691e-06, "loss": 0.4939, "step": 10200 }, { "epoch": 0.66, "grad_norm": 1.345991611480713, "learning_rate": 2.7594818118795465e-06, "loss": 0.521, "step": 10201 }, { "epoch": 0.66, "grad_norm": 1.2324192523956299, "learning_rate": 2.758547367578668e-06, "loss": 0.5198, "step": 10202 }, { "epoch": 0.66, "grad_norm": 1.2890782356262207, "learning_rate": 2.7576130212459006e-06, "loss": 0.5543, "step": 10203 }, { "epoch": 0.66, "grad_norm": 1.2253543138504028, "learning_rate": 2.7566787729220833e-06, "loss": 0.504, "step": 10204 }, { "epoch": 0.66, "grad_norm": 1.2875003814697266, "learning_rate": 2.7557446226480493e-06, "loss": 0.5517, "step": 10205 }, { "epoch": 0.66, "grad_norm": 1.218065619468689, "learning_rate": 2.754810570464628e-06, "loss": 0.5141, "step": 10206 }, { "epoch": 0.66, "grad_norm": 1.2368744611740112, "learning_rate": 2.7538766164126447e-06, "loss": 0.5467, "step": 10207 }, { "epoch": 0.66, "grad_norm": 1.2060163021087646, "learning_rate": 2.752942760532918e-06, "loss": 0.5298, "step": 10208 }, { "epoch": 0.66, "grad_norm": 1.2621341943740845, "learning_rate": 2.752009002866266e-06, "loss": 0.5137, "step": 10209 }, { "epoch": 0.66, "grad_norm": 1.1863021850585938, "learning_rate": 2.7510753434535e-06, "loss": 0.507, "step": 10210 }, { "epoch": 0.66, "grad_norm": 1.0925980806350708, "learning_rate": 2.7501417823354287e-06, "loss": 0.5056, "step": 10211 }, { "epoch": 0.66, "grad_norm": 1.2297664880752563, "learning_rate": 2.749208319552855e-06, "loss": 0.5303, "step": 10212 }, { "epoch": 0.66, "grad_norm": 1.1171691417694092, "learning_rate": 2.7482749551465815e-06, "loss": 0.5003, "step": 10213 }, { "epoch": 0.66, "grad_norm": 1.3110485076904297, "learning_rate": 2.7473416891573976e-06, "loss": 0.5571, "step": 10214 }, { "epoch": 0.66, "grad_norm": 1.1983764171600342, "learning_rate": 2.7464085216260967e-06, "loss": 0.51, "step": 10215 }, { "epoch": 0.66, "grad_norm": 1.2181520462036133, "learning_rate": 2.7454754525934644e-06, "loss": 0.5499, "step": 10216 }, { "epoch": 0.66, "grad_norm": 1.1761469841003418, "learning_rate": 2.7445424821002843e-06, "loss": 0.5047, "step": 10217 }, { "epoch": 0.66, "grad_norm": 1.1361509561538696, "learning_rate": 2.743609610187332e-06, "loss": 0.4837, "step": 10218 }, { "epoch": 0.66, "grad_norm": 1.2978663444519043, "learning_rate": 2.742676836895385e-06, "loss": 0.5064, "step": 10219 }, { "epoch": 0.66, "grad_norm": 1.1854838132858276, "learning_rate": 2.7417441622652075e-06, "loss": 0.504, "step": 10220 }, { "epoch": 0.66, "grad_norm": 1.290607213973999, "learning_rate": 2.7408115863375663e-06, "loss": 0.5081, "step": 10221 }, { "epoch": 0.66, "grad_norm": 1.1410850286483765, "learning_rate": 2.739879109153221e-06, "loss": 0.5061, "step": 10222 }, { "epoch": 0.66, "grad_norm": 1.5642940998077393, "learning_rate": 2.7389467307529293e-06, "loss": 0.527, "step": 10223 }, { "epoch": 0.66, "grad_norm": 1.243598222732544, "learning_rate": 2.7380144511774427e-06, "loss": 0.5813, "step": 10224 }, { "epoch": 0.66, "grad_norm": 1.0946203470230103, "learning_rate": 2.73708227046751e-06, "loss": 0.5118, "step": 10225 }, { "epoch": 0.66, "grad_norm": 1.2099405527114868, "learning_rate": 2.736150188663871e-06, "loss": 0.5105, "step": 10226 }, { "epoch": 0.66, "grad_norm": 1.1383439302444458, "learning_rate": 2.7352182058072664e-06, "loss": 0.5352, "step": 10227 }, { "epoch": 0.66, "grad_norm": 1.3893533945083618, "learning_rate": 2.734286321938431e-06, "loss": 0.5698, "step": 10228 }, { "epoch": 0.66, "grad_norm": 1.1047065258026123, "learning_rate": 2.733354537098094e-06, "loss": 0.4631, "step": 10229 }, { "epoch": 0.66, "grad_norm": 1.2974151372909546, "learning_rate": 2.7324228513269847e-06, "loss": 0.5238, "step": 10230 }, { "epoch": 0.66, "grad_norm": 1.1396963596343994, "learning_rate": 2.7314912646658205e-06, "loss": 0.5609, "step": 10231 }, { "epoch": 0.66, "grad_norm": 1.184896469116211, "learning_rate": 2.73055977715532e-06, "loss": 0.4775, "step": 10232 }, { "epoch": 0.66, "grad_norm": 1.3218414783477783, "learning_rate": 2.729628388836196e-06, "loss": 0.5313, "step": 10233 }, { "epoch": 0.66, "grad_norm": 1.201925277709961, "learning_rate": 2.728697099749158e-06, "loss": 0.5266, "step": 10234 }, { "epoch": 0.66, "grad_norm": 1.0532439947128296, "learning_rate": 2.7277659099349095e-06, "loss": 0.4893, "step": 10235 }, { "epoch": 0.66, "grad_norm": 1.2715381383895874, "learning_rate": 2.726834819434153e-06, "loss": 0.5212, "step": 10236 }, { "epoch": 0.66, "grad_norm": 1.2484052181243896, "learning_rate": 2.7259038282875795e-06, "loss": 0.5181, "step": 10237 }, { "epoch": 0.66, "grad_norm": 1.1699612140655518, "learning_rate": 2.7249729365358824e-06, "loss": 0.5228, "step": 10238 }, { "epoch": 0.66, "grad_norm": 1.362134337425232, "learning_rate": 2.7240421442197484e-06, "loss": 0.5305, "step": 10239 }, { "epoch": 0.66, "grad_norm": 1.3010237216949463, "learning_rate": 2.7231114513798608e-06, "loss": 0.5139, "step": 10240 }, { "epoch": 0.66, "grad_norm": 1.1039412021636963, "learning_rate": 2.7221808580568967e-06, "loss": 0.5039, "step": 10241 }, { "epoch": 0.66, "grad_norm": 1.5034297704696655, "learning_rate": 2.7212503642915323e-06, "loss": 0.4819, "step": 10242 }, { "epoch": 0.66, "grad_norm": 1.3030400276184082, "learning_rate": 2.7203199701244336e-06, "loss": 0.515, "step": 10243 }, { "epoch": 0.66, "grad_norm": 1.2920464277267456, "learning_rate": 2.7193896755962672e-06, "loss": 0.5572, "step": 10244 }, { "epoch": 0.66, "grad_norm": 1.2664536237716675, "learning_rate": 2.7184594807476936e-06, "loss": 0.4932, "step": 10245 }, { "epoch": 0.66, "grad_norm": 1.366476058959961, "learning_rate": 2.717529385619371e-06, "loss": 0.5285, "step": 10246 }, { "epoch": 0.66, "grad_norm": 1.1776126623153687, "learning_rate": 2.7165993902519505e-06, "loss": 0.5063, "step": 10247 }, { "epoch": 0.66, "grad_norm": 1.5391196012496948, "learning_rate": 2.7156694946860782e-06, "loss": 0.5022, "step": 10248 }, { "epoch": 0.66, "grad_norm": 1.1479942798614502, "learning_rate": 2.7147396989623987e-06, "loss": 0.5003, "step": 10249 }, { "epoch": 0.66, "grad_norm": 1.0683302879333496, "learning_rate": 2.7138100031215506e-06, "loss": 0.5192, "step": 10250 }, { "epoch": 0.66, "grad_norm": 1.158613681793213, "learning_rate": 2.7128804072041714e-06, "loss": 0.5427, "step": 10251 }, { "epoch": 0.66, "grad_norm": 1.1347179412841797, "learning_rate": 2.7119509112508865e-06, "loss": 0.505, "step": 10252 }, { "epoch": 0.66, "grad_norm": 1.131700038909912, "learning_rate": 2.711021515302323e-06, "loss": 0.5169, "step": 10253 }, { "epoch": 0.66, "grad_norm": 1.1605666875839233, "learning_rate": 2.710092219399105e-06, "loss": 0.5306, "step": 10254 }, { "epoch": 0.66, "grad_norm": 1.1941211223602295, "learning_rate": 2.7091630235818473e-06, "loss": 0.4985, "step": 10255 }, { "epoch": 0.66, "grad_norm": 1.1168001890182495, "learning_rate": 2.708233927891165e-06, "loss": 0.4953, "step": 10256 }, { "epoch": 0.66, "grad_norm": 1.1981470584869385, "learning_rate": 2.7073049323676635e-06, "loss": 0.4831, "step": 10257 }, { "epoch": 0.66, "grad_norm": 1.1236085891723633, "learning_rate": 2.7063760370519475e-06, "loss": 0.4813, "step": 10258 }, { "epoch": 0.66, "grad_norm": 1.1506669521331787, "learning_rate": 2.7054472419846183e-06, "loss": 0.4582, "step": 10259 }, { "epoch": 0.66, "grad_norm": 1.1627638339996338, "learning_rate": 2.7045185472062717e-06, "loss": 0.5202, "step": 10260 }, { "epoch": 0.66, "grad_norm": 1.182820200920105, "learning_rate": 2.703589952757495e-06, "loss": 0.5305, "step": 10261 }, { "epoch": 0.66, "grad_norm": 1.1277234554290771, "learning_rate": 2.7026614586788763e-06, "loss": 0.494, "step": 10262 }, { "epoch": 0.66, "grad_norm": 1.2846324443817139, "learning_rate": 2.7017330650109986e-06, "loss": 0.5186, "step": 10263 }, { "epoch": 0.66, "grad_norm": 1.2102266550064087, "learning_rate": 2.7008047717944386e-06, "loss": 0.5231, "step": 10264 }, { "epoch": 0.66, "grad_norm": 1.2854706048965454, "learning_rate": 2.69987657906977e-06, "loss": 0.488, "step": 10265 }, { "epoch": 0.66, "grad_norm": 1.2013988494873047, "learning_rate": 2.6989484868775634e-06, "loss": 0.5852, "step": 10266 }, { "epoch": 0.66, "grad_norm": 1.2185829877853394, "learning_rate": 2.69802049525838e-06, "loss": 0.5255, "step": 10267 }, { "epoch": 0.66, "grad_norm": 1.2540979385375977, "learning_rate": 2.6970926042527816e-06, "loss": 0.5237, "step": 10268 }, { "epoch": 0.66, "grad_norm": 1.1989973783493042, "learning_rate": 2.6961648139013242e-06, "loss": 0.5288, "step": 10269 }, { "epoch": 0.66, "grad_norm": 1.168241024017334, "learning_rate": 2.6952371242445584e-06, "loss": 0.4978, "step": 10270 }, { "epoch": 0.66, "grad_norm": 1.092568278312683, "learning_rate": 2.6943095353230335e-06, "loss": 0.5143, "step": 10271 }, { "epoch": 0.66, "grad_norm": 1.1979581117630005, "learning_rate": 2.6933820471772888e-06, "loss": 0.5399, "step": 10272 }, { "epoch": 0.66, "grad_norm": 1.29599130153656, "learning_rate": 2.692454659847863e-06, "loss": 0.5883, "step": 10273 }, { "epoch": 0.66, "grad_norm": 1.2537733316421509, "learning_rate": 2.69152737337529e-06, "loss": 0.565, "step": 10274 }, { "epoch": 0.66, "grad_norm": 1.1335535049438477, "learning_rate": 2.6906001878001e-06, "loss": 0.5351, "step": 10275 }, { "epoch": 0.66, "grad_norm": 1.2239457368850708, "learning_rate": 2.6896731031628177e-06, "loss": 0.5085, "step": 10276 }, { "epoch": 0.66, "grad_norm": 1.3850771188735962, "learning_rate": 2.6887461195039654e-06, "loss": 0.4976, "step": 10277 }, { "epoch": 0.66, "grad_norm": 1.3052458763122559, "learning_rate": 2.6878192368640547e-06, "loss": 0.5356, "step": 10278 }, { "epoch": 0.66, "grad_norm": 1.1987688541412354, "learning_rate": 2.6868924552836e-06, "loss": 0.5119, "step": 10279 }, { "epoch": 0.66, "grad_norm": 1.2770318984985352, "learning_rate": 2.6859657748031083e-06, "loss": 0.5298, "step": 10280 }, { "epoch": 0.66, "grad_norm": 1.1702067852020264, "learning_rate": 2.6850391954630815e-06, "loss": 0.5295, "step": 10281 }, { "epoch": 0.66, "grad_norm": 1.3230944871902466, "learning_rate": 2.6841127173040197e-06, "loss": 0.5453, "step": 10282 }, { "epoch": 0.66, "grad_norm": 1.1947104930877686, "learning_rate": 2.6831863403664176e-06, "loss": 0.5221, "step": 10283 }, { "epoch": 0.66, "grad_norm": 1.217030644416809, "learning_rate": 2.6822600646907614e-06, "loss": 0.5419, "step": 10284 }, { "epoch": 0.66, "grad_norm": 1.1286449432373047, "learning_rate": 2.6813338903175374e-06, "loss": 0.4745, "step": 10285 }, { "epoch": 0.66, "grad_norm": 1.1684980392456055, "learning_rate": 2.6804078172872273e-06, "loss": 0.5234, "step": 10286 }, { "epoch": 0.66, "grad_norm": 1.2629344463348389, "learning_rate": 2.679481845640306e-06, "loss": 0.5078, "step": 10287 }, { "epoch": 0.66, "grad_norm": 1.1813381910324097, "learning_rate": 2.6785559754172473e-06, "loss": 0.5328, "step": 10288 }, { "epoch": 0.66, "grad_norm": 1.3591381311416626, "learning_rate": 2.677630206658519e-06, "loss": 0.5035, "step": 10289 }, { "epoch": 0.66, "grad_norm": 1.2027896642684937, "learning_rate": 2.6767045394045804e-06, "loss": 0.5096, "step": 10290 }, { "epoch": 0.66, "grad_norm": 1.1650842428207397, "learning_rate": 2.6757789736958928e-06, "loss": 0.5362, "step": 10291 }, { "epoch": 0.66, "grad_norm": 1.2618508338928223, "learning_rate": 2.6748535095729088e-06, "loss": 0.5232, "step": 10292 }, { "epoch": 0.66, "grad_norm": 1.2880308628082275, "learning_rate": 2.6739281470760797e-06, "loss": 0.5536, "step": 10293 }, { "epoch": 0.66, "grad_norm": 2.000264883041382, "learning_rate": 2.6730028862458498e-06, "loss": 0.5256, "step": 10294 }, { "epoch": 0.66, "grad_norm": 1.3342254161834717, "learning_rate": 2.672077727122662e-06, "loss": 0.4762, "step": 10295 }, { "epoch": 0.66, "grad_norm": 1.1577717065811157, "learning_rate": 2.671152669746948e-06, "loss": 0.4962, "step": 10296 }, { "epoch": 0.66, "grad_norm": 1.2670818567276, "learning_rate": 2.6702277141591433e-06, "loss": 0.5309, "step": 10297 }, { "epoch": 0.66, "grad_norm": 1.2986444234848022, "learning_rate": 2.669302860399674e-06, "loss": 0.5601, "step": 10298 }, { "epoch": 0.66, "grad_norm": 1.1385738849639893, "learning_rate": 2.6683781085089626e-06, "loss": 0.5371, "step": 10299 }, { "epoch": 0.66, "grad_norm": 1.1082173585891724, "learning_rate": 2.667453458527429e-06, "loss": 0.4937, "step": 10300 }, { "epoch": 0.66, "grad_norm": 1.1903742551803589, "learning_rate": 2.6665289104954887e-06, "loss": 0.5322, "step": 10301 }, { "epoch": 0.66, "grad_norm": 1.2865363359451294, "learning_rate": 2.6656044644535472e-06, "loss": 0.5721, "step": 10302 }, { "epoch": 0.67, "grad_norm": 1.09512460231781, "learning_rate": 2.6646801204420127e-06, "loss": 0.5174, "step": 10303 }, { "epoch": 0.67, "grad_norm": 1.2670576572418213, "learning_rate": 2.663755878501284e-06, "loss": 0.5283, "step": 10304 }, { "epoch": 0.67, "grad_norm": 1.306948184967041, "learning_rate": 2.6628317386717584e-06, "loss": 0.4837, "step": 10305 }, { "epoch": 0.67, "grad_norm": 1.2324776649475098, "learning_rate": 2.661907700993827e-06, "loss": 0.5269, "step": 10306 }, { "epoch": 0.67, "grad_norm": 1.2219407558441162, "learning_rate": 2.66098376550788e-06, "loss": 0.4942, "step": 10307 }, { "epoch": 0.67, "grad_norm": 1.1761504411697388, "learning_rate": 2.6600599322542963e-06, "loss": 0.5092, "step": 10308 }, { "epoch": 0.67, "grad_norm": 2.322883367538452, "learning_rate": 2.6591362012734557e-06, "loss": 0.537, "step": 10309 }, { "epoch": 0.67, "grad_norm": 1.2793924808502197, "learning_rate": 2.6582125726057316e-06, "loss": 0.5199, "step": 10310 }, { "epoch": 0.67, "grad_norm": 1.2900644540786743, "learning_rate": 2.6572890462914948e-06, "loss": 0.5239, "step": 10311 }, { "epoch": 0.67, "grad_norm": 1.224980115890503, "learning_rate": 2.656365622371111e-06, "loss": 0.465, "step": 10312 }, { "epoch": 0.67, "grad_norm": 1.1085715293884277, "learning_rate": 2.6554423008849374e-06, "loss": 0.4983, "step": 10313 }, { "epoch": 0.67, "grad_norm": 1.2755182981491089, "learning_rate": 2.654519081873332e-06, "loss": 0.5047, "step": 10314 }, { "epoch": 0.67, "grad_norm": 1.2273890972137451, "learning_rate": 2.653595965376645e-06, "loss": 0.4829, "step": 10315 }, { "epoch": 0.67, "grad_norm": 1.1317061185836792, "learning_rate": 2.652672951435226e-06, "loss": 0.5131, "step": 10316 }, { "epoch": 0.67, "grad_norm": 1.2468070983886719, "learning_rate": 2.651750040089416e-06, "loss": 0.5042, "step": 10317 }, { "epoch": 0.67, "grad_norm": 1.166130781173706, "learning_rate": 2.6508272313795545e-06, "loss": 0.4562, "step": 10318 }, { "epoch": 0.67, "grad_norm": 1.3467904329299927, "learning_rate": 2.649904525345972e-06, "loss": 0.5308, "step": 10319 }, { "epoch": 0.67, "grad_norm": 1.2154120206832886, "learning_rate": 2.6489819220289993e-06, "loss": 0.5294, "step": 10320 }, { "epoch": 0.67, "grad_norm": 1.2711787223815918, "learning_rate": 2.6480594214689612e-06, "loss": 0.5784, "step": 10321 }, { "epoch": 0.67, "grad_norm": 1.2417949438095093, "learning_rate": 2.647137023706178e-06, "loss": 0.5232, "step": 10322 }, { "epoch": 0.67, "grad_norm": 1.1727265119552612, "learning_rate": 2.646214728780964e-06, "loss": 0.5185, "step": 10323 }, { "epoch": 0.67, "grad_norm": 1.2118594646453857, "learning_rate": 2.6452925367336346e-06, "loss": 0.5163, "step": 10324 }, { "epoch": 0.67, "grad_norm": 1.224421739578247, "learning_rate": 2.6443704476044902e-06, "loss": 0.539, "step": 10325 }, { "epoch": 0.67, "grad_norm": 1.1816059350967407, "learning_rate": 2.643448461433836e-06, "loss": 0.5059, "step": 10326 }, { "epoch": 0.67, "grad_norm": 1.2087829113006592, "learning_rate": 2.64252657826197e-06, "loss": 0.5062, "step": 10327 }, { "epoch": 0.67, "grad_norm": 1.4977730512619019, "learning_rate": 2.641604798129185e-06, "loss": 0.5053, "step": 10328 }, { "epoch": 0.67, "grad_norm": 1.4715392589569092, "learning_rate": 2.64068312107577e-06, "loss": 0.4856, "step": 10329 }, { "epoch": 0.67, "grad_norm": 1.1631323099136353, "learning_rate": 2.63976154714201e-06, "loss": 0.4991, "step": 10330 }, { "epoch": 0.67, "grad_norm": 1.1710777282714844, "learning_rate": 2.638840076368181e-06, "loss": 0.4987, "step": 10331 }, { "epoch": 0.67, "grad_norm": 1.1889317035675049, "learning_rate": 2.63791870879456e-06, "loss": 0.4934, "step": 10332 }, { "epoch": 0.67, "grad_norm": 1.243202805519104, "learning_rate": 2.6369974444614186e-06, "loss": 0.562, "step": 10333 }, { "epoch": 0.67, "grad_norm": 1.1876819133758545, "learning_rate": 2.636076283409023e-06, "loss": 0.4998, "step": 10334 }, { "epoch": 0.67, "grad_norm": 1.246573567390442, "learning_rate": 2.6351552256776334e-06, "loss": 0.504, "step": 10335 }, { "epoch": 0.67, "grad_norm": 1.418649435043335, "learning_rate": 2.63423427130751e-06, "loss": 0.5303, "step": 10336 }, { "epoch": 0.67, "grad_norm": 1.358864426612854, "learning_rate": 2.6333134203389e-06, "loss": 0.5338, "step": 10337 }, { "epoch": 0.67, "grad_norm": 1.2427809238433838, "learning_rate": 2.632392672812054e-06, "loss": 0.5126, "step": 10338 }, { "epoch": 0.67, "grad_norm": 1.2736036777496338, "learning_rate": 2.631472028767217e-06, "loss": 0.5086, "step": 10339 }, { "epoch": 0.67, "grad_norm": 1.5314630270004272, "learning_rate": 2.6305514882446283e-06, "loss": 0.4494, "step": 10340 }, { "epoch": 0.67, "grad_norm": 1.1917742490768433, "learning_rate": 2.629631051284518e-06, "loss": 0.5174, "step": 10341 }, { "epoch": 0.67, "grad_norm": 1.176405906677246, "learning_rate": 2.6287107179271196e-06, "loss": 0.5565, "step": 10342 }, { "epoch": 0.67, "grad_norm": 1.3889204263687134, "learning_rate": 2.6277904882126577e-06, "loss": 0.4993, "step": 10343 }, { "epoch": 0.67, "grad_norm": 1.2436463832855225, "learning_rate": 2.6268703621813528e-06, "loss": 0.4811, "step": 10344 }, { "epoch": 0.67, "grad_norm": 1.2096412181854248, "learning_rate": 2.625950339873424e-06, "loss": 0.5706, "step": 10345 }, { "epoch": 0.67, "grad_norm": 1.1714638471603394, "learning_rate": 2.6250304213290782e-06, "loss": 0.5409, "step": 10346 }, { "epoch": 0.67, "grad_norm": 1.183335781097412, "learning_rate": 2.624110606588526e-06, "loss": 0.5058, "step": 10347 }, { "epoch": 0.67, "grad_norm": 1.3056855201721191, "learning_rate": 2.623190895691968e-06, "loss": 0.5161, "step": 10348 }, { "epoch": 0.67, "grad_norm": 1.2446906566619873, "learning_rate": 2.622271288679607e-06, "loss": 0.5851, "step": 10349 }, { "epoch": 0.67, "grad_norm": 1.1241962909698486, "learning_rate": 2.621351785591631e-06, "loss": 0.4999, "step": 10350 }, { "epoch": 0.67, "grad_norm": 1.249322533607483, "learning_rate": 2.6204323864682312e-06, "loss": 0.5203, "step": 10351 }, { "epoch": 0.67, "grad_norm": 1.2087651491165161, "learning_rate": 2.6195130913495923e-06, "loss": 0.5476, "step": 10352 }, { "epoch": 0.67, "grad_norm": 1.1154342889785767, "learning_rate": 2.6185939002758953e-06, "loss": 0.4739, "step": 10353 }, { "epoch": 0.67, "grad_norm": 1.1918632984161377, "learning_rate": 2.617674813287316e-06, "loss": 0.4976, "step": 10354 }, { "epoch": 0.67, "grad_norm": 1.2436113357543945, "learning_rate": 2.6167558304240227e-06, "loss": 0.5709, "step": 10355 }, { "epoch": 0.67, "grad_norm": 1.269912838935852, "learning_rate": 2.6158369517261824e-06, "loss": 0.5069, "step": 10356 }, { "epoch": 0.67, "grad_norm": 1.3700778484344482, "learning_rate": 2.614918177233958e-06, "loss": 0.5038, "step": 10357 }, { "epoch": 0.67, "grad_norm": 1.149827241897583, "learning_rate": 2.613999506987507e-06, "loss": 0.5266, "step": 10358 }, { "epoch": 0.67, "grad_norm": 1.2183752059936523, "learning_rate": 2.613080941026983e-06, "loss": 0.4819, "step": 10359 }, { "epoch": 0.67, "grad_norm": 1.2349090576171875, "learning_rate": 2.612162479392531e-06, "loss": 0.5602, "step": 10360 }, { "epoch": 0.67, "grad_norm": 1.1926136016845703, "learning_rate": 2.6112441221242964e-06, "loss": 0.5572, "step": 10361 }, { "epoch": 0.67, "grad_norm": 1.2069941759109497, "learning_rate": 2.610325869262418e-06, "loss": 0.521, "step": 10362 }, { "epoch": 0.67, "grad_norm": 1.2785881757736206, "learning_rate": 2.6094077208470304e-06, "loss": 0.5411, "step": 10363 }, { "epoch": 0.67, "grad_norm": 1.6424602270126343, "learning_rate": 2.6084896769182633e-06, "loss": 0.5612, "step": 10364 }, { "epoch": 0.67, "grad_norm": 1.1472810506820679, "learning_rate": 2.6075717375162447e-06, "loss": 0.5113, "step": 10365 }, { "epoch": 0.67, "grad_norm": 1.1747195720672607, "learning_rate": 2.6066539026810905e-06, "loss": 0.4813, "step": 10366 }, { "epoch": 0.67, "grad_norm": 1.2486833333969116, "learning_rate": 2.6057361724529193e-06, "loss": 0.4908, "step": 10367 }, { "epoch": 0.67, "grad_norm": 1.1925777196884155, "learning_rate": 2.6048185468718436e-06, "loss": 0.5339, "step": 10368 }, { "epoch": 0.67, "grad_norm": 1.5177156925201416, "learning_rate": 2.6039010259779685e-06, "loss": 0.5312, "step": 10369 }, { "epoch": 0.67, "grad_norm": 1.3541728258132935, "learning_rate": 2.602983609811398e-06, "loss": 0.5114, "step": 10370 }, { "epoch": 0.67, "grad_norm": 1.2604633569717407, "learning_rate": 2.6020662984122314e-06, "loss": 0.5698, "step": 10371 }, { "epoch": 0.67, "grad_norm": 1.2887401580810547, "learning_rate": 2.6011490918205584e-06, "loss": 0.5508, "step": 10372 }, { "epoch": 0.67, "grad_norm": 1.2665293216705322, "learning_rate": 2.6002319900764688e-06, "loss": 0.5183, "step": 10373 }, { "epoch": 0.67, "grad_norm": 1.1731089353561401, "learning_rate": 2.599314993220048e-06, "loss": 0.5276, "step": 10374 }, { "epoch": 0.67, "grad_norm": 1.1757928133010864, "learning_rate": 2.598398101291375e-06, "loss": 0.5391, "step": 10375 }, { "epoch": 0.67, "grad_norm": 1.2412856817245483, "learning_rate": 2.5974813143305244e-06, "loss": 0.46, "step": 10376 }, { "epoch": 0.67, "grad_norm": 1.1532407999038696, "learning_rate": 2.5965646323775695e-06, "loss": 0.5217, "step": 10377 }, { "epoch": 0.67, "grad_norm": 1.3689334392547607, "learning_rate": 2.5956480554725717e-06, "loss": 0.5245, "step": 10378 }, { "epoch": 0.67, "grad_norm": 1.153821349143982, "learning_rate": 2.594731583655593e-06, "loss": 0.4948, "step": 10379 }, { "epoch": 0.67, "grad_norm": 1.1892756223678589, "learning_rate": 2.593815216966692e-06, "loss": 0.5279, "step": 10380 }, { "epoch": 0.67, "grad_norm": 1.2224178314208984, "learning_rate": 2.5928989554459195e-06, "loss": 0.5192, "step": 10381 }, { "epoch": 0.67, "grad_norm": 1.33133864402771, "learning_rate": 2.5919827991333237e-06, "loss": 0.5505, "step": 10382 }, { "epoch": 0.67, "grad_norm": 1.1970142126083374, "learning_rate": 2.591066748068949e-06, "loss": 0.4696, "step": 10383 }, { "epoch": 0.67, "grad_norm": 1.333259105682373, "learning_rate": 2.590150802292829e-06, "loss": 0.5384, "step": 10384 }, { "epoch": 0.67, "grad_norm": 1.2065304517745972, "learning_rate": 2.589234961845e-06, "loss": 0.468, "step": 10385 }, { "epoch": 0.67, "grad_norm": 1.1295305490493774, "learning_rate": 2.588319226765491e-06, "loss": 0.499, "step": 10386 }, { "epoch": 0.67, "grad_norm": 1.1362388134002686, "learning_rate": 2.5874035970943267e-06, "loss": 0.4718, "step": 10387 }, { "epoch": 0.67, "grad_norm": 1.2050156593322754, "learning_rate": 2.586488072871527e-06, "loss": 0.5447, "step": 10388 }, { "epoch": 0.67, "grad_norm": 1.191189169883728, "learning_rate": 2.5855726541371075e-06, "loss": 0.4873, "step": 10389 }, { "epoch": 0.67, "grad_norm": 1.1253564357757568, "learning_rate": 2.584657340931077e-06, "loss": 0.5243, "step": 10390 }, { "epoch": 0.67, "grad_norm": 1.4412885904312134, "learning_rate": 2.583742133293442e-06, "loss": 0.5093, "step": 10391 }, { "epoch": 0.67, "grad_norm": 1.2647958993911743, "learning_rate": 2.5828270312642044e-06, "loss": 0.503, "step": 10392 }, { "epoch": 0.67, "grad_norm": 1.2974720001220703, "learning_rate": 2.5819120348833605e-06, "loss": 0.5668, "step": 10393 }, { "epoch": 0.67, "grad_norm": 1.305061936378479, "learning_rate": 2.580997144190903e-06, "loss": 0.5574, "step": 10394 }, { "epoch": 0.67, "grad_norm": 1.1910775899887085, "learning_rate": 2.580082359226822e-06, "loss": 0.5094, "step": 10395 }, { "epoch": 0.67, "grad_norm": 1.28313148021698, "learning_rate": 2.579167680031095e-06, "loss": 0.5023, "step": 10396 }, { "epoch": 0.67, "grad_norm": 1.264346718788147, "learning_rate": 2.5782531066437026e-06, "loss": 0.5448, "step": 10397 }, { "epoch": 0.67, "grad_norm": 1.2082988023757935, "learning_rate": 2.577338639104619e-06, "loss": 0.489, "step": 10398 }, { "epoch": 0.67, "grad_norm": 1.1903480291366577, "learning_rate": 2.576424277453813e-06, "loss": 0.5086, "step": 10399 }, { "epoch": 0.67, "grad_norm": 1.2208671569824219, "learning_rate": 2.575510021731251e-06, "loss": 0.4892, "step": 10400 }, { "epoch": 0.67, "grad_norm": 1.2471634149551392, "learning_rate": 2.574595871976888e-06, "loss": 0.5718, "step": 10401 }, { "epoch": 0.67, "grad_norm": 1.2451106309890747, "learning_rate": 2.573681828230683e-06, "loss": 0.5349, "step": 10402 }, { "epoch": 0.67, "grad_norm": 1.1946724653244019, "learning_rate": 2.5727678905325846e-06, "loss": 0.5124, "step": 10403 }, { "epoch": 0.67, "grad_norm": 1.1569095849990845, "learning_rate": 2.5718540589225395e-06, "loss": 0.5094, "step": 10404 }, { "epoch": 0.67, "grad_norm": 1.1437503099441528, "learning_rate": 2.570940333440488e-06, "loss": 0.5059, "step": 10405 }, { "epoch": 0.67, "grad_norm": 1.0550081729888916, "learning_rate": 2.5700267141263706e-06, "loss": 0.4952, "step": 10406 }, { "epoch": 0.67, "grad_norm": 1.3258217573165894, "learning_rate": 2.5691132010201137e-06, "loss": 0.5404, "step": 10407 }, { "epoch": 0.67, "grad_norm": 1.209510087966919, "learning_rate": 2.568199794161647e-06, "loss": 0.5666, "step": 10408 }, { "epoch": 0.67, "grad_norm": 1.1837421655654907, "learning_rate": 2.567286493590893e-06, "loss": 0.4825, "step": 10409 }, { "epoch": 0.67, "grad_norm": 1.1902903318405151, "learning_rate": 2.56637329934777e-06, "loss": 0.5297, "step": 10410 }, { "epoch": 0.67, "grad_norm": 1.2147836685180664, "learning_rate": 2.5654602114721917e-06, "loss": 0.4781, "step": 10411 }, { "epoch": 0.67, "grad_norm": 1.2241711616516113, "learning_rate": 2.5645472300040676e-06, "loss": 0.5351, "step": 10412 }, { "epoch": 0.67, "grad_norm": 1.283610224723816, "learning_rate": 2.5636343549832987e-06, "loss": 0.492, "step": 10413 }, { "epoch": 0.67, "grad_norm": 1.1471495628356934, "learning_rate": 2.5627215864497866e-06, "loss": 0.5406, "step": 10414 }, { "epoch": 0.67, "grad_norm": 1.2025175094604492, "learning_rate": 2.561808924443426e-06, "loss": 0.5263, "step": 10415 }, { "epoch": 0.67, "grad_norm": 1.085119366645813, "learning_rate": 2.5608963690041058e-06, "loss": 0.4578, "step": 10416 }, { "epoch": 0.67, "grad_norm": 1.3046284914016724, "learning_rate": 2.5599839201717124e-06, "loss": 0.5229, "step": 10417 }, { "epoch": 0.67, "grad_norm": 1.3154469728469849, "learning_rate": 2.5590715779861292e-06, "loss": 0.5239, "step": 10418 }, { "epoch": 0.67, "grad_norm": 1.256406307220459, "learning_rate": 2.5581593424872274e-06, "loss": 0.5241, "step": 10419 }, { "epoch": 0.67, "grad_norm": 1.393028736114502, "learning_rate": 2.557247213714881e-06, "loss": 0.5058, "step": 10420 }, { "epoch": 0.67, "grad_norm": 1.224658489227295, "learning_rate": 2.556335191708956e-06, "loss": 0.5044, "step": 10421 }, { "epoch": 0.67, "grad_norm": 1.219727873802185, "learning_rate": 2.5554232765093144e-06, "loss": 0.5454, "step": 10422 }, { "epoch": 0.67, "grad_norm": 1.2584803104400635, "learning_rate": 2.554511468155816e-06, "loss": 0.5264, "step": 10423 }, { "epoch": 0.67, "grad_norm": 1.2595065832138062, "learning_rate": 2.5535997666883127e-06, "loss": 0.5417, "step": 10424 }, { "epoch": 0.67, "grad_norm": 1.2221705913543701, "learning_rate": 2.5526881721466502e-06, "loss": 0.5585, "step": 10425 }, { "epoch": 0.67, "grad_norm": 1.125206470489502, "learning_rate": 2.5517766845706728e-06, "loss": 0.5032, "step": 10426 }, { "epoch": 0.67, "grad_norm": 1.1858631372451782, "learning_rate": 2.5508653040002206e-06, "loss": 0.5065, "step": 10427 }, { "epoch": 0.67, "grad_norm": 1.2105507850646973, "learning_rate": 2.549954030475127e-06, "loss": 0.5487, "step": 10428 }, { "epoch": 0.67, "grad_norm": 1.1679346561431885, "learning_rate": 2.549042864035224e-06, "loss": 0.4857, "step": 10429 }, { "epoch": 0.67, "grad_norm": 1.2711259126663208, "learning_rate": 2.548131804720331e-06, "loss": 0.5363, "step": 10430 }, { "epoch": 0.67, "grad_norm": 1.2406567335128784, "learning_rate": 2.5472208525702715e-06, "loss": 0.5281, "step": 10431 }, { "epoch": 0.67, "grad_norm": 1.24981689453125, "learning_rate": 2.5463100076248605e-06, "loss": 0.5106, "step": 10432 }, { "epoch": 0.67, "grad_norm": 1.2988978624343872, "learning_rate": 2.5453992699239084e-06, "loss": 0.4918, "step": 10433 }, { "epoch": 0.67, "grad_norm": 1.2754614353179932, "learning_rate": 2.5444886395072223e-06, "loss": 0.5592, "step": 10434 }, { "epoch": 0.67, "grad_norm": 1.1668578386306763, "learning_rate": 2.5435781164146014e-06, "loss": 0.5386, "step": 10435 }, { "epoch": 0.67, "grad_norm": 1.2835780382156372, "learning_rate": 2.542667700685843e-06, "loss": 0.5499, "step": 10436 }, { "epoch": 0.67, "grad_norm": 1.2499724626541138, "learning_rate": 2.54175739236074e-06, "loss": 0.5482, "step": 10437 }, { "epoch": 0.67, "grad_norm": 1.3001269102096558, "learning_rate": 2.54084719147908e-06, "loss": 0.5388, "step": 10438 }, { "epoch": 0.67, "grad_norm": 1.2390021085739136, "learning_rate": 2.5399370980806436e-06, "loss": 0.4986, "step": 10439 }, { "epoch": 0.67, "grad_norm": 1.1359524726867676, "learning_rate": 2.5390271122052092e-06, "loss": 0.4839, "step": 10440 }, { "epoch": 0.67, "grad_norm": 1.19949471950531, "learning_rate": 2.5381172338925496e-06, "loss": 0.5219, "step": 10441 }, { "epoch": 0.67, "grad_norm": 1.084418535232544, "learning_rate": 2.537207463182435e-06, "loss": 0.5101, "step": 10442 }, { "epoch": 0.67, "grad_norm": 1.7277206182479858, "learning_rate": 2.53629780011463e-06, "loss": 0.5479, "step": 10443 }, { "epoch": 0.67, "grad_norm": 1.1356277465820312, "learning_rate": 2.5353882447288892e-06, "loss": 0.5001, "step": 10444 }, { "epoch": 0.67, "grad_norm": 1.1435954570770264, "learning_rate": 2.5344787970649696e-06, "loss": 0.4796, "step": 10445 }, { "epoch": 0.67, "grad_norm": 1.1937581300735474, "learning_rate": 2.533569457162621e-06, "loss": 0.519, "step": 10446 }, { "epoch": 0.67, "grad_norm": 1.2282475233078003, "learning_rate": 2.5326602250615894e-06, "loss": 0.5282, "step": 10447 }, { "epoch": 0.67, "grad_norm": 1.2815568447113037, "learning_rate": 2.5317511008016118e-06, "loss": 0.5386, "step": 10448 }, { "epoch": 0.67, "grad_norm": 1.1560183763504028, "learning_rate": 2.5308420844224257e-06, "loss": 0.5406, "step": 10449 }, { "epoch": 0.67, "grad_norm": 1.2447868585586548, "learning_rate": 2.529933175963761e-06, "loss": 0.509, "step": 10450 }, { "epoch": 0.67, "grad_norm": 1.2853782176971436, "learning_rate": 2.529024375465344e-06, "loss": 0.5035, "step": 10451 }, { "epoch": 0.67, "grad_norm": 1.1469050645828247, "learning_rate": 2.5281156829668973e-06, "loss": 0.5623, "step": 10452 }, { "epoch": 0.67, "grad_norm": 1.1896867752075195, "learning_rate": 2.5272070985081387e-06, "loss": 0.5267, "step": 10453 }, { "epoch": 0.67, "grad_norm": 1.2867302894592285, "learning_rate": 2.5262986221287754e-06, "loss": 0.5072, "step": 10454 }, { "epoch": 0.67, "grad_norm": 1.2508621215820312, "learning_rate": 2.525390253868517e-06, "loss": 0.5244, "step": 10455 }, { "epoch": 0.67, "grad_norm": 1.2912521362304688, "learning_rate": 2.524481993767066e-06, "loss": 0.5582, "step": 10456 }, { "epoch": 0.67, "grad_norm": 1.2053974866867065, "learning_rate": 2.5235738418641207e-06, "loss": 0.4944, "step": 10457 }, { "epoch": 0.68, "grad_norm": 1.2621827125549316, "learning_rate": 2.522665798199373e-06, "loss": 0.4973, "step": 10458 }, { "epoch": 0.68, "grad_norm": 1.1847871541976929, "learning_rate": 2.5217578628125138e-06, "loss": 0.5644, "step": 10459 }, { "epoch": 0.68, "grad_norm": 1.1140018701553345, "learning_rate": 2.5208500357432227e-06, "loss": 0.5021, "step": 10460 }, { "epoch": 0.68, "grad_norm": 1.2085521221160889, "learning_rate": 2.51994231703118e-06, "loss": 0.4953, "step": 10461 }, { "epoch": 0.68, "grad_norm": 1.1062875986099243, "learning_rate": 2.51903470671606e-06, "loss": 0.5032, "step": 10462 }, { "epoch": 0.68, "grad_norm": 1.2377324104309082, "learning_rate": 2.5181272048375314e-06, "loss": 0.5097, "step": 10463 }, { "epoch": 0.68, "grad_norm": 1.2001779079437256, "learning_rate": 2.5172198114352604e-06, "loss": 0.523, "step": 10464 }, { "epoch": 0.68, "grad_norm": 1.1772270202636719, "learning_rate": 2.5163125265489073e-06, "loss": 0.4834, "step": 10465 }, { "epoch": 0.68, "grad_norm": 1.1255016326904297, "learning_rate": 2.515405350218123e-06, "loss": 0.4946, "step": 10466 }, { "epoch": 0.68, "grad_norm": 1.2900842428207397, "learning_rate": 2.5144982824825616e-06, "loss": 0.5813, "step": 10467 }, { "epoch": 0.68, "grad_norm": 1.2951037883758545, "learning_rate": 2.513591323381868e-06, "loss": 0.5449, "step": 10468 }, { "epoch": 0.68, "grad_norm": 1.091072678565979, "learning_rate": 2.512684472955681e-06, "loss": 0.5201, "step": 10469 }, { "epoch": 0.68, "grad_norm": 1.2444865703582764, "learning_rate": 2.5117777312436393e-06, "loss": 0.5492, "step": 10470 }, { "epoch": 0.68, "grad_norm": 1.1802082061767578, "learning_rate": 2.5108710982853747e-06, "loss": 0.5394, "step": 10471 }, { "epoch": 0.68, "grad_norm": 1.170310139656067, "learning_rate": 2.509964574120511e-06, "loss": 0.5147, "step": 10472 }, { "epoch": 0.68, "grad_norm": 1.2543034553527832, "learning_rate": 2.509058158788671e-06, "loss": 0.4999, "step": 10473 }, { "epoch": 0.68, "grad_norm": 1.2012687921524048, "learning_rate": 2.5081518523294723e-06, "loss": 0.5278, "step": 10474 }, { "epoch": 0.68, "grad_norm": 1.466234564781189, "learning_rate": 2.5072456547825273e-06, "loss": 0.5969, "step": 10475 }, { "epoch": 0.68, "grad_norm": 1.122194528579712, "learning_rate": 2.5063395661874435e-06, "loss": 0.5125, "step": 10476 }, { "epoch": 0.68, "grad_norm": 1.2961345911026, "learning_rate": 2.505433586583825e-06, "loss": 0.5482, "step": 10477 }, { "epoch": 0.68, "grad_norm": 1.2088584899902344, "learning_rate": 2.5045277160112665e-06, "loss": 0.5279, "step": 10478 }, { "epoch": 0.68, "grad_norm": 1.322607398033142, "learning_rate": 2.503621954509363e-06, "loss": 0.5573, "step": 10479 }, { "epoch": 0.68, "grad_norm": 1.2226725816726685, "learning_rate": 2.502716302117703e-06, "loss": 0.5227, "step": 10480 }, { "epoch": 0.68, "grad_norm": 1.2761867046356201, "learning_rate": 2.5018107588758712e-06, "loss": 0.5114, "step": 10481 }, { "epoch": 0.68, "grad_norm": 1.2667057514190674, "learning_rate": 2.500905324823445e-06, "loss": 0.5335, "step": 10482 }, { "epoch": 0.68, "grad_norm": 1.2646843194961548, "learning_rate": 2.5000000000000015e-06, "loss": 0.5533, "step": 10483 }, { "epoch": 0.68, "grad_norm": 1.2062686681747437, "learning_rate": 2.4990947844451057e-06, "loss": 0.507, "step": 10484 }, { "epoch": 0.68, "grad_norm": 1.2008252143859863, "learning_rate": 2.4981896781983244e-06, "loss": 0.5572, "step": 10485 }, { "epoch": 0.68, "grad_norm": 1.1321613788604736, "learning_rate": 2.497284681299218e-06, "loss": 0.4778, "step": 10486 }, { "epoch": 0.68, "grad_norm": 1.2244065999984741, "learning_rate": 2.4963797937873406e-06, "loss": 0.4884, "step": 10487 }, { "epoch": 0.68, "grad_norm": 1.335422396659851, "learning_rate": 2.495475015702245e-06, "loss": 0.5262, "step": 10488 }, { "epoch": 0.68, "grad_norm": 1.161661148071289, "learning_rate": 2.4945703470834733e-06, "loss": 0.5359, "step": 10489 }, { "epoch": 0.68, "grad_norm": 1.1798967123031616, "learning_rate": 2.4936657879705672e-06, "loss": 0.5165, "step": 10490 }, { "epoch": 0.68, "grad_norm": 1.2227376699447632, "learning_rate": 2.492761338403063e-06, "loss": 0.5222, "step": 10491 }, { "epoch": 0.68, "grad_norm": 1.3135426044464111, "learning_rate": 2.491856998420492e-06, "loss": 0.5759, "step": 10492 }, { "epoch": 0.68, "grad_norm": 1.1282057762145996, "learning_rate": 2.4909527680623807e-06, "loss": 0.4991, "step": 10493 }, { "epoch": 0.68, "grad_norm": 1.1771925687789917, "learning_rate": 2.490048647368252e-06, "loss": 0.527, "step": 10494 }, { "epoch": 0.68, "grad_norm": 1.3316898345947266, "learning_rate": 2.4891446363776193e-06, "loss": 0.6049, "step": 10495 }, { "epoch": 0.68, "grad_norm": 1.1865867376327515, "learning_rate": 2.488240735129997e-06, "loss": 0.4993, "step": 10496 }, { "epoch": 0.68, "grad_norm": 1.2154723405838013, "learning_rate": 2.4873369436648914e-06, "loss": 0.5425, "step": 10497 }, { "epoch": 0.68, "grad_norm": 1.2097212076187134, "learning_rate": 2.486433262021805e-06, "loss": 0.5154, "step": 10498 }, { "epoch": 0.68, "grad_norm": 1.3050005435943604, "learning_rate": 2.4855296902402364e-06, "loss": 0.5492, "step": 10499 }, { "epoch": 0.68, "grad_norm": 1.2672146558761597, "learning_rate": 2.4846262283596787e-06, "loss": 0.5037, "step": 10500 }, { "epoch": 0.68, "grad_norm": 1.288074254989624, "learning_rate": 2.4837228764196176e-06, "loss": 0.53, "step": 10501 }, { "epoch": 0.68, "grad_norm": 1.3210670948028564, "learning_rate": 2.4828196344595366e-06, "loss": 0.5668, "step": 10502 }, { "epoch": 0.68, "grad_norm": 1.276442050933838, "learning_rate": 2.4819165025189155e-06, "loss": 0.4978, "step": 10503 }, { "epoch": 0.68, "grad_norm": 1.2488229274749756, "learning_rate": 2.4810134806372278e-06, "loss": 0.5395, "step": 10504 }, { "epoch": 0.68, "grad_norm": 1.2817420959472656, "learning_rate": 2.4801105688539413e-06, "loss": 0.5314, "step": 10505 }, { "epoch": 0.68, "grad_norm": 1.3393141031265259, "learning_rate": 2.479207767208522e-06, "loss": 0.5037, "step": 10506 }, { "epoch": 0.68, "grad_norm": 1.2624462842941284, "learning_rate": 2.4783050757404257e-06, "loss": 0.4653, "step": 10507 }, { "epoch": 0.68, "grad_norm": 1.153046727180481, "learning_rate": 2.477402494489109e-06, "loss": 0.501, "step": 10508 }, { "epoch": 0.68, "grad_norm": 1.2470396757125854, "learning_rate": 2.4765000234940206e-06, "loss": 0.52, "step": 10509 }, { "epoch": 0.68, "grad_norm": 1.233974814414978, "learning_rate": 2.475597662794605e-06, "loss": 0.5149, "step": 10510 }, { "epoch": 0.68, "grad_norm": 1.1890537738800049, "learning_rate": 2.474695412430303e-06, "loss": 0.5653, "step": 10511 }, { "epoch": 0.68, "grad_norm": 1.188008427619934, "learning_rate": 2.4737932724405512e-06, "loss": 0.5338, "step": 10512 }, { "epoch": 0.68, "grad_norm": 1.3857097625732422, "learning_rate": 2.4728912428647756e-06, "loss": 0.5081, "step": 10513 }, { "epoch": 0.68, "grad_norm": 1.2448362112045288, "learning_rate": 2.4719893237424038e-06, "loss": 0.4807, "step": 10514 }, { "epoch": 0.68, "grad_norm": 1.0792757272720337, "learning_rate": 2.471087515112856e-06, "loss": 0.4669, "step": 10515 }, { "epoch": 0.68, "grad_norm": 1.3196265697479248, "learning_rate": 2.4701858170155483e-06, "loss": 0.5666, "step": 10516 }, { "epoch": 0.68, "grad_norm": 1.2752339839935303, "learning_rate": 2.469284229489892e-06, "loss": 0.5304, "step": 10517 }, { "epoch": 0.68, "grad_norm": 1.129445195198059, "learning_rate": 2.4683827525752947e-06, "loss": 0.5216, "step": 10518 }, { "epoch": 0.68, "grad_norm": 1.1853899955749512, "learning_rate": 2.467481386311153e-06, "loss": 0.4789, "step": 10519 }, { "epoch": 0.68, "grad_norm": 1.3757036924362183, "learning_rate": 2.4665801307368665e-06, "loss": 0.5875, "step": 10520 }, { "epoch": 0.68, "grad_norm": 1.3369925022125244, "learning_rate": 2.4656789858918256e-06, "loss": 0.5445, "step": 10521 }, { "epoch": 0.68, "grad_norm": 1.2933341264724731, "learning_rate": 2.464777951815418e-06, "loss": 0.5237, "step": 10522 }, { "epoch": 0.68, "grad_norm": 1.222967267036438, "learning_rate": 2.463877028547027e-06, "loss": 0.5153, "step": 10523 }, { "epoch": 0.68, "grad_norm": 1.2373929023742676, "learning_rate": 2.4629762161260263e-06, "loss": 0.5377, "step": 10524 }, { "epoch": 0.68, "grad_norm": 1.2038488388061523, "learning_rate": 2.462075514591789e-06, "loss": 0.5155, "step": 10525 }, { "epoch": 0.68, "grad_norm": 1.387918472290039, "learning_rate": 2.461174923983683e-06, "loss": 0.5902, "step": 10526 }, { "epoch": 0.68, "grad_norm": 1.2883095741271973, "learning_rate": 2.460274444341073e-06, "loss": 0.5057, "step": 10527 }, { "epoch": 0.68, "grad_norm": 1.2665293216705322, "learning_rate": 2.4593740757033124e-06, "loss": 0.5365, "step": 10528 }, { "epoch": 0.68, "grad_norm": 1.1333297491073608, "learning_rate": 2.4584738181097564e-06, "loss": 0.547, "step": 10529 }, { "epoch": 0.68, "grad_norm": 1.2500183582305908, "learning_rate": 2.457573671599752e-06, "loss": 0.5436, "step": 10530 }, { "epoch": 0.68, "grad_norm": 1.2118345499038696, "learning_rate": 2.456673636212643e-06, "loss": 0.4939, "step": 10531 }, { "epoch": 0.68, "grad_norm": 1.1754648685455322, "learning_rate": 2.45577371198777e-06, "loss": 0.5275, "step": 10532 }, { "epoch": 0.68, "grad_norm": 1.2569770812988281, "learning_rate": 2.4548738989644617e-06, "loss": 0.5587, "step": 10533 }, { "epoch": 0.68, "grad_norm": 1.3138937950134277, "learning_rate": 2.4539741971820486e-06, "loss": 0.5453, "step": 10534 }, { "epoch": 0.68, "grad_norm": 1.2665064334869385, "learning_rate": 2.4530746066798543e-06, "loss": 0.5531, "step": 10535 }, { "epoch": 0.68, "grad_norm": 1.217120885848999, "learning_rate": 2.4521751274972e-06, "loss": 0.5042, "step": 10536 }, { "epoch": 0.68, "grad_norm": 1.1296143531799316, "learning_rate": 2.4512757596733954e-06, "loss": 0.5378, "step": 10537 }, { "epoch": 0.68, "grad_norm": 1.1895643472671509, "learning_rate": 2.4503765032477515e-06, "loss": 0.4776, "step": 10538 }, { "epoch": 0.68, "grad_norm": 1.1371150016784668, "learning_rate": 2.4494773582595727e-06, "loss": 0.4847, "step": 10539 }, { "epoch": 0.68, "grad_norm": 1.1738520860671997, "learning_rate": 2.4485783247481574e-06, "loss": 0.4993, "step": 10540 }, { "epoch": 0.68, "grad_norm": 1.248477578163147, "learning_rate": 2.447679402752804e-06, "loss": 0.5673, "step": 10541 }, { "epoch": 0.68, "grad_norm": 1.2020174264907837, "learning_rate": 2.4467805923127956e-06, "loss": 0.4663, "step": 10542 }, { "epoch": 0.68, "grad_norm": 1.2419538497924805, "learning_rate": 2.445881893467421e-06, "loss": 0.5741, "step": 10543 }, { "epoch": 0.68, "grad_norm": 1.184579849243164, "learning_rate": 2.444983306255959e-06, "loss": 0.5214, "step": 10544 }, { "epoch": 0.68, "grad_norm": 1.1544806957244873, "learning_rate": 2.4440848307176845e-06, "loss": 0.5146, "step": 10545 }, { "epoch": 0.68, "grad_norm": 1.1027014255523682, "learning_rate": 2.4431864668918677e-06, "loss": 0.4894, "step": 10546 }, { "epoch": 0.68, "grad_norm": 1.1560949087142944, "learning_rate": 2.4422882148177757e-06, "loss": 0.4922, "step": 10547 }, { "epoch": 0.68, "grad_norm": 1.1929134130477905, "learning_rate": 2.441390074534665e-06, "loss": 0.5487, "step": 10548 }, { "epoch": 0.68, "grad_norm": 1.1208791732788086, "learning_rate": 2.440492046081793e-06, "loss": 0.4954, "step": 10549 }, { "epoch": 0.68, "grad_norm": 1.1759328842163086, "learning_rate": 2.4395941294984095e-06, "loss": 0.4799, "step": 10550 }, { "epoch": 0.68, "grad_norm": 1.2544318437576294, "learning_rate": 2.4386963248237606e-06, "loss": 0.5375, "step": 10551 }, { "epoch": 0.68, "grad_norm": 1.2546954154968262, "learning_rate": 2.4377986320970876e-06, "loss": 0.4899, "step": 10552 }, { "epoch": 0.68, "grad_norm": 1.1975080966949463, "learning_rate": 2.4369010513576275e-06, "loss": 0.5, "step": 10553 }, { "epoch": 0.68, "grad_norm": 1.3058220148086548, "learning_rate": 2.436003582644608e-06, "loss": 0.5363, "step": 10554 }, { "epoch": 0.68, "grad_norm": 1.3220765590667725, "learning_rate": 2.435106225997256e-06, "loss": 0.5285, "step": 10555 }, { "epoch": 0.68, "grad_norm": 1.2604470252990723, "learning_rate": 2.4342089814547942e-06, "loss": 0.5195, "step": 10556 }, { "epoch": 0.68, "grad_norm": 1.2276768684387207, "learning_rate": 2.433311849056437e-06, "loss": 0.5131, "step": 10557 }, { "epoch": 0.68, "grad_norm": 1.1485904455184937, "learning_rate": 2.432414828841398e-06, "loss": 0.5225, "step": 10558 }, { "epoch": 0.68, "grad_norm": 1.2379270792007446, "learning_rate": 2.431517920848883e-06, "loss": 0.5783, "step": 10559 }, { "epoch": 0.68, "grad_norm": 1.2787312269210815, "learning_rate": 2.4306211251180924e-06, "loss": 0.5738, "step": 10560 }, { "epoch": 0.68, "grad_norm": 1.2225898504257202, "learning_rate": 2.429724441688222e-06, "loss": 0.5483, "step": 10561 }, { "epoch": 0.68, "grad_norm": 1.3587206602096558, "learning_rate": 2.4288278705984652e-06, "loss": 0.526, "step": 10562 }, { "epoch": 0.68, "grad_norm": 1.285603404045105, "learning_rate": 2.427931411888009e-06, "loss": 0.5948, "step": 10563 }, { "epoch": 0.68, "grad_norm": 1.3368574380874634, "learning_rate": 2.4270350655960345e-06, "loss": 0.5643, "step": 10564 }, { "epoch": 0.68, "grad_norm": 1.2106366157531738, "learning_rate": 2.4261388317617205e-06, "loss": 0.4858, "step": 10565 }, { "epoch": 0.68, "grad_norm": 1.185526728630066, "learning_rate": 2.425242710424236e-06, "loss": 0.5548, "step": 10566 }, { "epoch": 0.68, "grad_norm": 1.1395399570465088, "learning_rate": 2.4243467016227493e-06, "loss": 0.4951, "step": 10567 }, { "epoch": 0.68, "grad_norm": 1.2506402730941772, "learning_rate": 2.4234508053964236e-06, "loss": 0.5098, "step": 10568 }, { "epoch": 0.68, "grad_norm": 1.4189424514770508, "learning_rate": 2.4225550217844153e-06, "loss": 0.5191, "step": 10569 }, { "epoch": 0.68, "grad_norm": 1.1667925119400024, "learning_rate": 2.421659350825879e-06, "loss": 0.4913, "step": 10570 }, { "epoch": 0.68, "grad_norm": 1.2866357564926147, "learning_rate": 2.420763792559958e-06, "loss": 0.5298, "step": 10571 }, { "epoch": 0.68, "grad_norm": 1.127439022064209, "learning_rate": 2.419868347025797e-06, "loss": 0.5572, "step": 10572 }, { "epoch": 0.68, "grad_norm": 1.370529294013977, "learning_rate": 2.4189730142625347e-06, "loss": 0.5052, "step": 10573 }, { "epoch": 0.68, "grad_norm": 1.2270433902740479, "learning_rate": 2.418077794309302e-06, "loss": 0.4955, "step": 10574 }, { "epoch": 0.68, "grad_norm": 1.2217918634414673, "learning_rate": 2.417182687205228e-06, "loss": 0.5098, "step": 10575 }, { "epoch": 0.68, "grad_norm": 1.189098596572876, "learning_rate": 2.4162876929894364e-06, "loss": 0.504, "step": 10576 }, { "epoch": 0.68, "grad_norm": 1.2284318208694458, "learning_rate": 2.4153928117010422e-06, "loss": 0.5127, "step": 10577 }, { "epoch": 0.68, "grad_norm": 1.2626919746398926, "learning_rate": 2.414498043379159e-06, "loss": 0.5543, "step": 10578 }, { "epoch": 0.68, "grad_norm": 1.2702056169509888, "learning_rate": 2.413603388062897e-06, "loss": 0.4864, "step": 10579 }, { "epoch": 0.68, "grad_norm": 1.231683611869812, "learning_rate": 2.4127088457913566e-06, "loss": 0.5292, "step": 10580 }, { "epoch": 0.68, "grad_norm": 1.1159627437591553, "learning_rate": 2.4118144166036377e-06, "loss": 0.516, "step": 10581 }, { "epoch": 0.68, "grad_norm": 1.2484173774719238, "learning_rate": 2.4109201005388355e-06, "loss": 0.5064, "step": 10582 }, { "epoch": 0.68, "grad_norm": 1.148272156715393, "learning_rate": 2.4100258976360337e-06, "loss": 0.5162, "step": 10583 }, { "epoch": 0.68, "grad_norm": 1.0855919122695923, "learning_rate": 2.409131807934317e-06, "loss": 0.5178, "step": 10584 }, { "epoch": 0.68, "grad_norm": 1.1891170740127563, "learning_rate": 2.408237831472765e-06, "loss": 0.5195, "step": 10585 }, { "epoch": 0.68, "grad_norm": 1.2534193992614746, "learning_rate": 2.40734396829045e-06, "loss": 0.5242, "step": 10586 }, { "epoch": 0.68, "grad_norm": 1.2328436374664307, "learning_rate": 2.4064502184264415e-06, "loss": 0.5209, "step": 10587 }, { "epoch": 0.68, "grad_norm": 1.1383283138275146, "learning_rate": 2.4055565819198048e-06, "loss": 0.5029, "step": 10588 }, { "epoch": 0.68, "grad_norm": 1.1432915925979614, "learning_rate": 2.4046630588095937e-06, "loss": 0.4555, "step": 10589 }, { "epoch": 0.68, "grad_norm": 1.1446954011917114, "learning_rate": 2.4037696491348642e-06, "loss": 0.5113, "step": 10590 }, { "epoch": 0.68, "grad_norm": 1.3567043542861938, "learning_rate": 2.402876352934665e-06, "loss": 0.5289, "step": 10591 }, { "epoch": 0.68, "grad_norm": 1.246907114982605, "learning_rate": 2.4019831702480402e-06, "loss": 0.4636, "step": 10592 }, { "epoch": 0.68, "grad_norm": 1.3592047691345215, "learning_rate": 2.4010901011140273e-06, "loss": 0.571, "step": 10593 }, { "epoch": 0.68, "grad_norm": 1.2311517000198364, "learning_rate": 2.4001971455716634e-06, "loss": 0.4784, "step": 10594 }, { "epoch": 0.68, "grad_norm": 1.1399929523468018, "learning_rate": 2.399304303659972e-06, "loss": 0.5473, "step": 10595 }, { "epoch": 0.68, "grad_norm": 1.1963304281234741, "learning_rate": 2.3984115754179806e-06, "loss": 0.4852, "step": 10596 }, { "epoch": 0.68, "grad_norm": 1.1745054721832275, "learning_rate": 2.3975189608847065e-06, "loss": 0.4619, "step": 10597 }, { "epoch": 0.68, "grad_norm": 1.1684290170669556, "learning_rate": 2.396626460099164e-06, "loss": 0.5226, "step": 10598 }, { "epoch": 0.68, "grad_norm": 1.13935124874115, "learning_rate": 2.3957340731003624e-06, "loss": 0.5054, "step": 10599 }, { "epoch": 0.68, "grad_norm": 1.136742115020752, "learning_rate": 2.394841799927307e-06, "loss": 0.4937, "step": 10600 }, { "epoch": 0.68, "grad_norm": 1.186776876449585, "learning_rate": 2.393949640618993e-06, "loss": 0.4877, "step": 10601 }, { "epoch": 0.68, "grad_norm": 1.1881959438323975, "learning_rate": 2.3930575952144175e-06, "loss": 0.5737, "step": 10602 }, { "epoch": 0.68, "grad_norm": 1.144637107849121, "learning_rate": 2.3921656637525682e-06, "loss": 0.5009, "step": 10603 }, { "epoch": 0.68, "grad_norm": 1.1444379091262817, "learning_rate": 2.3912738462724287e-06, "loss": 0.5267, "step": 10604 }, { "epoch": 0.68, "grad_norm": 1.1973742246627808, "learning_rate": 2.390382142812979e-06, "loss": 0.5334, "step": 10605 }, { "epoch": 0.68, "grad_norm": 2.488680124282837, "learning_rate": 2.389490553413196e-06, "loss": 0.5538, "step": 10606 }, { "epoch": 0.68, "grad_norm": 1.0975606441497803, "learning_rate": 2.3885990781120423e-06, "loss": 0.4786, "step": 10607 }, { "epoch": 0.68, "grad_norm": 1.1826778650283813, "learning_rate": 2.387707716948486e-06, "loss": 0.5007, "step": 10608 }, { "epoch": 0.68, "grad_norm": 1.275640606880188, "learning_rate": 2.3868164699614854e-06, "loss": 0.5218, "step": 10609 }, { "epoch": 0.68, "grad_norm": 1.138184905052185, "learning_rate": 2.3859253371899948e-06, "loss": 0.5636, "step": 10610 }, { "epoch": 0.68, "grad_norm": 1.2850868701934814, "learning_rate": 2.3850343186729637e-06, "loss": 0.5134, "step": 10611 }, { "epoch": 0.68, "grad_norm": 1.33498215675354, "learning_rate": 2.3841434144493373e-06, "loss": 0.5238, "step": 10612 }, { "epoch": 0.69, "grad_norm": 1.0936609506607056, "learning_rate": 2.3832526245580518e-06, "loss": 0.4943, "step": 10613 }, { "epoch": 0.69, "grad_norm": 1.1854356527328491, "learning_rate": 2.382361949038042e-06, "loss": 0.5156, "step": 10614 }, { "epoch": 0.69, "grad_norm": 1.315377950668335, "learning_rate": 2.3814713879282385e-06, "loss": 0.5456, "step": 10615 }, { "epoch": 0.69, "grad_norm": 1.2003101110458374, "learning_rate": 2.3805809412675666e-06, "loss": 0.5721, "step": 10616 }, { "epoch": 0.69, "grad_norm": 1.205583930015564, "learning_rate": 2.3796906090949418e-06, "loss": 0.5195, "step": 10617 }, { "epoch": 0.69, "grad_norm": 1.140055537223816, "learning_rate": 2.3788003914492797e-06, "loss": 0.5019, "step": 10618 }, { "epoch": 0.69, "grad_norm": 1.237176537513733, "learning_rate": 2.37791028836949e-06, "loss": 0.5181, "step": 10619 }, { "epoch": 0.69, "grad_norm": 1.1681867837905884, "learning_rate": 2.3770202998944756e-06, "loss": 0.5213, "step": 10620 }, { "epoch": 0.69, "grad_norm": 1.113576054573059, "learning_rate": 2.3761304260631396e-06, "loss": 0.488, "step": 10621 }, { "epoch": 0.69, "grad_norm": 1.294341802597046, "learning_rate": 2.37524066691437e-06, "loss": 0.4936, "step": 10622 }, { "epoch": 0.69, "grad_norm": 1.1083732843399048, "learning_rate": 2.3743510224870597e-06, "loss": 0.5235, "step": 10623 }, { "epoch": 0.69, "grad_norm": 1.2729471921920776, "learning_rate": 2.3734614928200917e-06, "loss": 0.5307, "step": 10624 }, { "epoch": 0.69, "grad_norm": 1.0985417366027832, "learning_rate": 2.3725720779523477e-06, "loss": 0.5522, "step": 10625 }, { "epoch": 0.69, "grad_norm": 1.0794697999954224, "learning_rate": 2.3716827779226965e-06, "loss": 0.4886, "step": 10626 }, { "epoch": 0.69, "grad_norm": 1.2635741233825684, "learning_rate": 2.3707935927700104e-06, "loss": 0.5852, "step": 10627 }, { "epoch": 0.69, "grad_norm": 1.1607216596603394, "learning_rate": 2.369904522533153e-06, "loss": 0.5132, "step": 10628 }, { "epoch": 0.69, "grad_norm": 1.1310311555862427, "learning_rate": 2.369015567250983e-06, "loss": 0.4963, "step": 10629 }, { "epoch": 0.69, "grad_norm": 1.2702797651290894, "learning_rate": 2.3681267269623565e-06, "loss": 0.5364, "step": 10630 }, { "epoch": 0.69, "grad_norm": 1.1216752529144287, "learning_rate": 2.367238001706118e-06, "loss": 0.5067, "step": 10631 }, { "epoch": 0.69, "grad_norm": 1.141152262687683, "learning_rate": 2.3663493915211144e-06, "loss": 0.5524, "step": 10632 }, { "epoch": 0.69, "grad_norm": 1.1523175239562988, "learning_rate": 2.365460896446184e-06, "loss": 0.4684, "step": 10633 }, { "epoch": 0.69, "grad_norm": 1.166474461555481, "learning_rate": 2.3645725165201604e-06, "loss": 0.5047, "step": 10634 }, { "epoch": 0.69, "grad_norm": 1.2838314771652222, "learning_rate": 2.3636842517818746e-06, "loss": 0.5199, "step": 10635 }, { "epoch": 0.69, "grad_norm": 1.253658652305603, "learning_rate": 2.362796102270147e-06, "loss": 0.5059, "step": 10636 }, { "epoch": 0.69, "grad_norm": 1.1916464567184448, "learning_rate": 2.3619080680237972e-06, "loss": 0.5053, "step": 10637 }, { "epoch": 0.69, "grad_norm": 1.2196729183197021, "learning_rate": 2.3610201490816392e-06, "loss": 0.528, "step": 10638 }, { "epoch": 0.69, "grad_norm": 1.3595377206802368, "learning_rate": 2.3601323454824822e-06, "loss": 0.573, "step": 10639 }, { "epoch": 0.69, "grad_norm": 1.249167561531067, "learning_rate": 2.359244657265129e-06, "loss": 0.5257, "step": 10640 }, { "epoch": 0.69, "grad_norm": 1.2761234045028687, "learning_rate": 2.3583570844683808e-06, "loss": 0.5178, "step": 10641 }, { "epoch": 0.69, "grad_norm": 1.1774883270263672, "learning_rate": 2.3574696271310266e-06, "loss": 0.5088, "step": 10642 }, { "epoch": 0.69, "grad_norm": 1.1874172687530518, "learning_rate": 2.3565822852918575e-06, "loss": 0.5282, "step": 10643 }, { "epoch": 0.69, "grad_norm": 1.1311591863632202, "learning_rate": 2.3556950589896566e-06, "loss": 0.5219, "step": 10644 }, { "epoch": 0.69, "grad_norm": 1.1585667133331299, "learning_rate": 2.3548079482632023e-06, "loss": 0.5084, "step": 10645 }, { "epoch": 0.69, "grad_norm": 1.1388686895370483, "learning_rate": 2.353920953151268e-06, "loss": 0.5061, "step": 10646 }, { "epoch": 0.69, "grad_norm": 1.3547987937927246, "learning_rate": 2.353034073692624e-06, "loss": 0.5257, "step": 10647 }, { "epoch": 0.69, "grad_norm": 1.2504419088363647, "learning_rate": 2.352147309926029e-06, "loss": 0.4991, "step": 10648 }, { "epoch": 0.69, "grad_norm": 1.2839139699935913, "learning_rate": 2.3512606618902432e-06, "loss": 0.5556, "step": 10649 }, { "epoch": 0.69, "grad_norm": 1.2750598192214966, "learning_rate": 2.3503741296240207e-06, "loss": 0.4861, "step": 10650 }, { "epoch": 0.69, "grad_norm": 1.2421187162399292, "learning_rate": 2.3494877131661086e-06, "loss": 0.5322, "step": 10651 }, { "epoch": 0.69, "grad_norm": 1.2314530611038208, "learning_rate": 2.3486014125552503e-06, "loss": 0.4727, "step": 10652 }, { "epoch": 0.69, "grad_norm": 1.2801928520202637, "learning_rate": 2.3477152278301853e-06, "loss": 0.4937, "step": 10653 }, { "epoch": 0.69, "grad_norm": 1.2130991220474243, "learning_rate": 2.346829159029643e-06, "loss": 0.5327, "step": 10654 }, { "epoch": 0.69, "grad_norm": 1.2361130714416504, "learning_rate": 2.345943206192353e-06, "loss": 0.5471, "step": 10655 }, { "epoch": 0.69, "grad_norm": 1.3798744678497314, "learning_rate": 2.345057369357037e-06, "loss": 0.5545, "step": 10656 }, { "epoch": 0.69, "grad_norm": 1.3777587413787842, "learning_rate": 2.344171648562414e-06, "loss": 0.4503, "step": 10657 }, { "epoch": 0.69, "grad_norm": 1.3054732084274292, "learning_rate": 2.343286043847198e-06, "loss": 0.5149, "step": 10658 }, { "epoch": 0.69, "grad_norm": 1.1640971899032593, "learning_rate": 2.3424005552500935e-06, "loss": 0.5182, "step": 10659 }, { "epoch": 0.69, "grad_norm": 1.3125200271606445, "learning_rate": 2.341515182809803e-06, "loss": 0.5195, "step": 10660 }, { "epoch": 0.69, "grad_norm": 1.205224871635437, "learning_rate": 2.3406299265650256e-06, "loss": 0.5201, "step": 10661 }, { "epoch": 0.69, "grad_norm": 1.2897067070007324, "learning_rate": 2.339744786554453e-06, "loss": 0.4774, "step": 10662 }, { "epoch": 0.69, "grad_norm": 1.1538972854614258, "learning_rate": 2.3388597628167715e-06, "loss": 0.5037, "step": 10663 }, { "epoch": 0.69, "grad_norm": 1.352510690689087, "learning_rate": 2.3379748553906663e-06, "loss": 0.5309, "step": 10664 }, { "epoch": 0.69, "grad_norm": 1.058855414390564, "learning_rate": 2.33709006431481e-06, "loss": 0.4602, "step": 10665 }, { "epoch": 0.69, "grad_norm": 1.2132378816604614, "learning_rate": 2.3362053896278764e-06, "loss": 0.4874, "step": 10666 }, { "epoch": 0.69, "grad_norm": 1.0944327116012573, "learning_rate": 2.3353208313685328e-06, "loss": 0.4627, "step": 10667 }, { "epoch": 0.69, "grad_norm": 1.2168529033660889, "learning_rate": 2.33443638957544e-06, "loss": 0.5522, "step": 10668 }, { "epoch": 0.69, "grad_norm": 1.3809428215026855, "learning_rate": 2.333552064287256e-06, "loss": 0.5472, "step": 10669 }, { "epoch": 0.69, "grad_norm": 1.1851714849472046, "learning_rate": 2.332667855542634e-06, "loss": 0.4915, "step": 10670 }, { "epoch": 0.69, "grad_norm": 1.1161688566207886, "learning_rate": 2.331783763380216e-06, "loss": 0.5203, "step": 10671 }, { "epoch": 0.69, "grad_norm": 1.2812459468841553, "learning_rate": 2.3308997878386454e-06, "loss": 0.5774, "step": 10672 }, { "epoch": 0.69, "grad_norm": 1.1836789846420288, "learning_rate": 2.330015928956559e-06, "loss": 0.569, "step": 10673 }, { "epoch": 0.69, "grad_norm": 1.2123641967773438, "learning_rate": 2.3291321867725876e-06, "loss": 0.5193, "step": 10674 }, { "epoch": 0.69, "grad_norm": 1.2933038473129272, "learning_rate": 2.328248561325357e-06, "loss": 0.5187, "step": 10675 }, { "epoch": 0.69, "grad_norm": 1.227145791053772, "learning_rate": 2.327365052653491e-06, "loss": 0.5425, "step": 10676 }, { "epoch": 0.69, "grad_norm": 1.224636197090149, "learning_rate": 2.3264816607956007e-06, "loss": 0.5082, "step": 10677 }, { "epoch": 0.69, "grad_norm": 1.2593454122543335, "learning_rate": 2.3255983857902997e-06, "loss": 0.5765, "step": 10678 }, { "epoch": 0.69, "grad_norm": 1.2416362762451172, "learning_rate": 2.324715227676193e-06, "loss": 0.5484, "step": 10679 }, { "epoch": 0.69, "grad_norm": 1.2320419549942017, "learning_rate": 2.323832186491881e-06, "loss": 0.5152, "step": 10680 }, { "epoch": 0.69, "grad_norm": 1.177232265472412, "learning_rate": 2.3229492622759604e-06, "loss": 0.5401, "step": 10681 }, { "epoch": 0.69, "grad_norm": 1.26077401638031, "learning_rate": 2.322066455067022e-06, "loss": 0.5251, "step": 10682 }, { "epoch": 0.69, "grad_norm": 1.2199757099151611, "learning_rate": 2.3211837649036477e-06, "loss": 0.4745, "step": 10683 }, { "epoch": 0.69, "grad_norm": 1.3347562551498413, "learning_rate": 2.32030119182442e-06, "loss": 0.5583, "step": 10684 }, { "epoch": 0.69, "grad_norm": 1.1546823978424072, "learning_rate": 2.3194187358679132e-06, "loss": 0.5082, "step": 10685 }, { "epoch": 0.69, "grad_norm": 1.1951113939285278, "learning_rate": 2.3185363970726976e-06, "loss": 0.539, "step": 10686 }, { "epoch": 0.69, "grad_norm": 1.185779333114624, "learning_rate": 2.3176541754773375e-06, "loss": 0.4974, "step": 10687 }, { "epoch": 0.69, "grad_norm": 1.2516120672225952, "learning_rate": 2.316772071120395e-06, "loss": 0.5077, "step": 10688 }, { "epoch": 0.69, "grad_norm": 1.1638188362121582, "learning_rate": 2.31589008404042e-06, "loss": 0.5231, "step": 10689 }, { "epoch": 0.69, "grad_norm": 1.3086093664169312, "learning_rate": 2.3150082142759657e-06, "loss": 0.4677, "step": 10690 }, { "epoch": 0.69, "grad_norm": 1.4117718935012817, "learning_rate": 2.314126461865574e-06, "loss": 0.4889, "step": 10691 }, { "epoch": 0.69, "grad_norm": 1.0940628051757812, "learning_rate": 2.3132448268477846e-06, "loss": 0.4995, "step": 10692 }, { "epoch": 0.69, "grad_norm": 1.1958826780319214, "learning_rate": 2.3123633092611325e-06, "loss": 0.53, "step": 10693 }, { "epoch": 0.69, "grad_norm": 1.2196128368377686, "learning_rate": 2.311481909144148e-06, "loss": 0.5244, "step": 10694 }, { "epoch": 0.69, "grad_norm": 1.297071099281311, "learning_rate": 2.310600626535351e-06, "loss": 0.6046, "step": 10695 }, { "epoch": 0.69, "grad_norm": 1.1957437992095947, "learning_rate": 2.309719461473261e-06, "loss": 0.5056, "step": 10696 }, { "epoch": 0.69, "grad_norm": 1.2141649723052979, "learning_rate": 2.3088384139963934e-06, "loss": 0.518, "step": 10697 }, { "epoch": 0.69, "grad_norm": 1.225226640701294, "learning_rate": 2.3079574841432546e-06, "loss": 0.491, "step": 10698 }, { "epoch": 0.69, "grad_norm": 1.3241370916366577, "learning_rate": 2.3070766719523506e-06, "loss": 0.5452, "step": 10699 }, { "epoch": 0.69, "grad_norm": 1.2445454597473145, "learning_rate": 2.306195977462176e-06, "loss": 0.5517, "step": 10700 }, { "epoch": 0.69, "grad_norm": 1.1845972537994385, "learning_rate": 2.305315400711225e-06, "loss": 0.5162, "step": 10701 }, { "epoch": 0.69, "grad_norm": 1.1314350366592407, "learning_rate": 2.304434941737985e-06, "loss": 0.5275, "step": 10702 }, { "epoch": 0.69, "grad_norm": 1.227669358253479, "learning_rate": 2.3035546005809396e-06, "loss": 0.5564, "step": 10703 }, { "epoch": 0.69, "grad_norm": 1.1428685188293457, "learning_rate": 2.3026743772785655e-06, "loss": 0.4756, "step": 10704 }, { "epoch": 0.69, "grad_norm": 1.189528465270996, "learning_rate": 2.301794271869337e-06, "loss": 0.5096, "step": 10705 }, { "epoch": 0.69, "grad_norm": 1.3542441129684448, "learning_rate": 2.3009142843917176e-06, "loss": 0.5284, "step": 10706 }, { "epoch": 0.69, "grad_norm": 1.2642388343811035, "learning_rate": 2.300034414884171e-06, "loss": 0.5001, "step": 10707 }, { "epoch": 0.69, "grad_norm": 1.2278227806091309, "learning_rate": 2.2991546633851543e-06, "loss": 0.5438, "step": 10708 }, { "epoch": 0.69, "grad_norm": 1.1689687967300415, "learning_rate": 2.2982750299331186e-06, "loss": 0.4969, "step": 10709 }, { "epoch": 0.69, "grad_norm": 1.171159267425537, "learning_rate": 2.297395514566513e-06, "loss": 0.5088, "step": 10710 }, { "epoch": 0.69, "grad_norm": 1.2018256187438965, "learning_rate": 2.2965161173237744e-06, "loss": 0.5525, "step": 10711 }, { "epoch": 0.69, "grad_norm": 1.1814390420913696, "learning_rate": 2.295636838243342e-06, "loss": 0.5172, "step": 10712 }, { "epoch": 0.69, "grad_norm": 1.3822988271713257, "learning_rate": 2.2947576773636454e-06, "loss": 0.5109, "step": 10713 }, { "epoch": 0.69, "grad_norm": 1.1498229503631592, "learning_rate": 2.293878634723113e-06, "loss": 0.4822, "step": 10714 }, { "epoch": 0.69, "grad_norm": 1.225131630897522, "learning_rate": 2.292999710360162e-06, "loss": 0.5757, "step": 10715 }, { "epoch": 0.69, "grad_norm": 1.1895575523376465, "learning_rate": 2.292120904313209e-06, "loss": 0.5451, "step": 10716 }, { "epoch": 0.69, "grad_norm": 1.2249093055725098, "learning_rate": 2.2912422166206655e-06, "loss": 0.5394, "step": 10717 }, { "epoch": 0.69, "grad_norm": 1.318041443824768, "learning_rate": 2.2903636473209358e-06, "loss": 0.5216, "step": 10718 }, { "epoch": 0.69, "grad_norm": 1.1520522832870483, "learning_rate": 2.289485196452422e-06, "loss": 0.4925, "step": 10719 }, { "epoch": 0.69, "grad_norm": 1.9524688720703125, "learning_rate": 2.2886068640535146e-06, "loss": 0.573, "step": 10720 }, { "epoch": 0.69, "grad_norm": 1.2023138999938965, "learning_rate": 2.2877286501626067e-06, "loss": 0.5129, "step": 10721 }, { "epoch": 0.69, "grad_norm": 1.1812171936035156, "learning_rate": 2.286850554818081e-06, "loss": 0.502, "step": 10722 }, { "epoch": 0.69, "grad_norm": 1.2223806381225586, "learning_rate": 2.2859725780583196e-06, "loss": 0.4704, "step": 10723 }, { "epoch": 0.69, "grad_norm": 1.1747280359268188, "learning_rate": 2.285094719921693e-06, "loss": 0.521, "step": 10724 }, { "epoch": 0.69, "grad_norm": 1.1614034175872803, "learning_rate": 2.2842169804465713e-06, "loss": 0.4938, "step": 10725 }, { "epoch": 0.69, "grad_norm": 1.2008750438690186, "learning_rate": 2.2833393596713185e-06, "loss": 0.535, "step": 10726 }, { "epoch": 0.69, "grad_norm": 1.3031368255615234, "learning_rate": 2.282461857634293e-06, "loss": 0.5413, "step": 10727 }, { "epoch": 0.69, "grad_norm": 1.2565646171569824, "learning_rate": 2.281584474373849e-06, "loss": 0.537, "step": 10728 }, { "epoch": 0.69, "grad_norm": 1.1314092874526978, "learning_rate": 2.2807072099283356e-06, "loss": 0.5157, "step": 10729 }, { "epoch": 0.69, "grad_norm": 1.2424063682556152, "learning_rate": 2.279830064336092e-06, "loss": 0.5265, "step": 10730 }, { "epoch": 0.69, "grad_norm": 1.088639736175537, "learning_rate": 2.2789530376354584e-06, "loss": 0.5025, "step": 10731 }, { "epoch": 0.69, "grad_norm": 1.3350530862808228, "learning_rate": 2.2780761298647672e-06, "loss": 0.6074, "step": 10732 }, { "epoch": 0.69, "grad_norm": 1.153237223625183, "learning_rate": 2.277199341062346e-06, "loss": 0.4605, "step": 10733 }, { "epoch": 0.69, "grad_norm": 1.1718389987945557, "learning_rate": 2.2763226712665167e-06, "loss": 0.5161, "step": 10734 }, { "epoch": 0.69, "grad_norm": 1.0506689548492432, "learning_rate": 2.2754461205155977e-06, "loss": 0.4518, "step": 10735 }, { "epoch": 0.69, "grad_norm": 1.1626511812210083, "learning_rate": 2.2745696888478973e-06, "loss": 0.5079, "step": 10736 }, { "epoch": 0.69, "grad_norm": 1.3058580160140991, "learning_rate": 2.273693376301724e-06, "loss": 0.5773, "step": 10737 }, { "epoch": 0.69, "grad_norm": 1.1681467294692993, "learning_rate": 2.27281718291538e-06, "loss": 0.5027, "step": 10738 }, { "epoch": 0.69, "grad_norm": 1.083746314048767, "learning_rate": 2.2719411087271603e-06, "loss": 0.4819, "step": 10739 }, { "epoch": 0.69, "grad_norm": 1.2206803560256958, "learning_rate": 2.271065153775358e-06, "loss": 0.5316, "step": 10740 }, { "epoch": 0.69, "grad_norm": 1.1915415525436401, "learning_rate": 2.2701893180982553e-06, "loss": 0.5307, "step": 10741 }, { "epoch": 0.69, "grad_norm": 1.272523283958435, "learning_rate": 2.269313601734135e-06, "loss": 0.5011, "step": 10742 }, { "epoch": 0.69, "grad_norm": 1.1596053838729858, "learning_rate": 2.2684380047212714e-06, "loss": 0.4854, "step": 10743 }, { "epoch": 0.69, "grad_norm": 1.3691027164459229, "learning_rate": 2.267562527097935e-06, "loss": 0.5413, "step": 10744 }, { "epoch": 0.69, "grad_norm": 1.1633570194244385, "learning_rate": 2.2666871689023907e-06, "loss": 0.5321, "step": 10745 }, { "epoch": 0.69, "grad_norm": 1.1780651807785034, "learning_rate": 2.2658119301729004e-06, "loss": 0.5601, "step": 10746 }, { "epoch": 0.69, "grad_norm": 1.2319859266281128, "learning_rate": 2.2649368109477143e-06, "loss": 0.5259, "step": 10747 }, { "epoch": 0.69, "grad_norm": 1.1891149282455444, "learning_rate": 2.2640618112650837e-06, "loss": 0.5052, "step": 10748 }, { "epoch": 0.69, "grad_norm": 1.225418210029602, "learning_rate": 2.263186931163253e-06, "loss": 0.547, "step": 10749 }, { "epoch": 0.69, "grad_norm": 1.2071685791015625, "learning_rate": 2.2623121706804595e-06, "loss": 0.5014, "step": 10750 }, { "epoch": 0.69, "grad_norm": 1.1589275598526, "learning_rate": 2.261437529854938e-06, "loss": 0.4943, "step": 10751 }, { "epoch": 0.69, "grad_norm": 1.1956658363342285, "learning_rate": 2.260563008724919e-06, "loss": 0.4888, "step": 10752 }, { "epoch": 0.69, "grad_norm": 1.2714118957519531, "learning_rate": 2.2596886073286204e-06, "loss": 0.4898, "step": 10753 }, { "epoch": 0.69, "grad_norm": 1.3959702253341675, "learning_rate": 2.2588143257042626e-06, "loss": 0.4784, "step": 10754 }, { "epoch": 0.69, "grad_norm": 1.4252327680587769, "learning_rate": 2.257940163890059e-06, "loss": 0.5253, "step": 10755 }, { "epoch": 0.69, "grad_norm": 1.2436765432357788, "learning_rate": 2.2570661219242156e-06, "loss": 0.5595, "step": 10756 }, { "epoch": 0.69, "grad_norm": 1.3373136520385742, "learning_rate": 2.2561921998449356e-06, "loss": 0.5459, "step": 10757 }, { "epoch": 0.69, "grad_norm": 1.3578025102615356, "learning_rate": 2.255318397690417e-06, "loss": 0.5186, "step": 10758 }, { "epoch": 0.69, "grad_norm": 1.1180356740951538, "learning_rate": 2.254444715498848e-06, "loss": 0.5444, "step": 10759 }, { "epoch": 0.69, "grad_norm": 1.4388079643249512, "learning_rate": 2.253571153308417e-06, "loss": 0.5494, "step": 10760 }, { "epoch": 0.69, "grad_norm": 1.2517443895339966, "learning_rate": 2.252697711157305e-06, "loss": 0.5515, "step": 10761 }, { "epoch": 0.69, "grad_norm": 1.347144365310669, "learning_rate": 2.2518243890836877e-06, "loss": 0.4957, "step": 10762 }, { "epoch": 0.69, "grad_norm": 1.2490226030349731, "learning_rate": 2.2509511871257367e-06, "loss": 0.5571, "step": 10763 }, { "epoch": 0.69, "grad_norm": 1.3115116357803345, "learning_rate": 2.2500781053216175e-06, "loss": 0.5061, "step": 10764 }, { "epoch": 0.69, "grad_norm": 1.2176258563995361, "learning_rate": 2.249205143709488e-06, "loss": 0.5117, "step": 10765 }, { "epoch": 0.69, "grad_norm": 1.1854960918426514, "learning_rate": 2.248332302327505e-06, "loss": 0.5388, "step": 10766 }, { "epoch": 0.7, "grad_norm": 1.241650938987732, "learning_rate": 2.2474595812138173e-06, "loss": 0.5129, "step": 10767 }, { "epoch": 0.7, "grad_norm": 1.2118271589279175, "learning_rate": 2.2465869804065695e-06, "loss": 0.4977, "step": 10768 }, { "epoch": 0.7, "grad_norm": 1.0978431701660156, "learning_rate": 2.2457144999439006e-06, "loss": 0.4633, "step": 10769 }, { "epoch": 0.7, "grad_norm": 1.2957912683486938, "learning_rate": 2.244842139863947e-06, "loss": 0.5173, "step": 10770 }, { "epoch": 0.7, "grad_norm": 1.1528784036636353, "learning_rate": 2.243969900204833e-06, "loss": 0.4717, "step": 10771 }, { "epoch": 0.7, "grad_norm": 1.1912130117416382, "learning_rate": 2.2430977810046846e-06, "loss": 0.4992, "step": 10772 }, { "epoch": 0.7, "grad_norm": 1.1552022695541382, "learning_rate": 2.2422257823016187e-06, "loss": 0.5992, "step": 10773 }, { "epoch": 0.7, "grad_norm": 1.1347767114639282, "learning_rate": 2.241353904133749e-06, "loss": 0.5279, "step": 10774 }, { "epoch": 0.7, "grad_norm": 1.1792808771133423, "learning_rate": 2.2404821465391824e-06, "loss": 0.4816, "step": 10775 }, { "epoch": 0.7, "grad_norm": 1.2162814140319824, "learning_rate": 2.239610509556024e-06, "loss": 0.5009, "step": 10776 }, { "epoch": 0.7, "grad_norm": 1.2298665046691895, "learning_rate": 2.2387389932223658e-06, "loss": 0.5127, "step": 10777 }, { "epoch": 0.7, "grad_norm": 1.31264328956604, "learning_rate": 2.2378675975763025e-06, "loss": 0.5475, "step": 10778 }, { "epoch": 0.7, "grad_norm": 1.2382962703704834, "learning_rate": 2.23699632265592e-06, "loss": 0.5086, "step": 10779 }, { "epoch": 0.7, "grad_norm": 1.1948156356811523, "learning_rate": 2.2361251684992996e-06, "loss": 0.4564, "step": 10780 }, { "epoch": 0.7, "grad_norm": 1.1485354900360107, "learning_rate": 2.235254135144517e-06, "loss": 0.5397, "step": 10781 }, { "epoch": 0.7, "grad_norm": 1.189104437828064, "learning_rate": 2.2343832226296454e-06, "loss": 0.4957, "step": 10782 }, { "epoch": 0.7, "grad_norm": 1.2308462858200073, "learning_rate": 2.2335124309927453e-06, "loss": 0.5025, "step": 10783 }, { "epoch": 0.7, "grad_norm": 1.1861299276351929, "learning_rate": 2.2326417602718793e-06, "loss": 0.56, "step": 10784 }, { "epoch": 0.7, "grad_norm": 1.164529800415039, "learning_rate": 2.231771210505102e-06, "loss": 0.5088, "step": 10785 }, { "epoch": 0.7, "grad_norm": 1.1509689092636108, "learning_rate": 2.2309007817304633e-06, "loss": 0.5182, "step": 10786 }, { "epoch": 0.7, "grad_norm": 1.2709468603134155, "learning_rate": 2.230030473986009e-06, "loss": 0.5453, "step": 10787 }, { "epoch": 0.7, "grad_norm": 1.1481572389602661, "learning_rate": 2.229160287309774e-06, "loss": 0.4981, "step": 10788 }, { "epoch": 0.7, "grad_norm": 1.1665425300598145, "learning_rate": 2.228290221739794e-06, "loss": 0.5002, "step": 10789 }, { "epoch": 0.7, "grad_norm": 1.1314334869384766, "learning_rate": 2.227420277314097e-06, "loss": 0.4743, "step": 10790 }, { "epoch": 0.7, "grad_norm": 1.259694218635559, "learning_rate": 2.2265504540707065e-06, "loss": 0.5111, "step": 10791 }, { "epoch": 0.7, "grad_norm": 1.2661792039871216, "learning_rate": 2.2256807520476403e-06, "loss": 0.5602, "step": 10792 }, { "epoch": 0.7, "grad_norm": 1.1586928367614746, "learning_rate": 2.2248111712829122e-06, "loss": 0.538, "step": 10793 }, { "epoch": 0.7, "grad_norm": 1.2014936208724976, "learning_rate": 2.223941711814526e-06, "loss": 0.519, "step": 10794 }, { "epoch": 0.7, "grad_norm": 1.092617392539978, "learning_rate": 2.2230723736804855e-06, "loss": 0.4491, "step": 10795 }, { "epoch": 0.7, "grad_norm": 1.1687308549880981, "learning_rate": 2.2222031569187865e-06, "loss": 0.5107, "step": 10796 }, { "epoch": 0.7, "grad_norm": 1.1869463920593262, "learning_rate": 2.2213340615674206e-06, "loss": 0.5219, "step": 10797 }, { "epoch": 0.7, "grad_norm": 1.1498723030090332, "learning_rate": 2.2204650876643748e-06, "loss": 0.5068, "step": 10798 }, { "epoch": 0.7, "grad_norm": 1.1826670169830322, "learning_rate": 2.2195962352476296e-06, "loss": 0.5072, "step": 10799 }, { "epoch": 0.7, "grad_norm": 1.1459429264068604, "learning_rate": 2.218727504355158e-06, "loss": 0.5239, "step": 10800 }, { "epoch": 0.7, "grad_norm": 1.2954996824264526, "learning_rate": 2.2178588950249308e-06, "loss": 0.507, "step": 10801 }, { "epoch": 0.7, "grad_norm": 1.1812412738800049, "learning_rate": 2.2169904072949137e-06, "loss": 0.5312, "step": 10802 }, { "epoch": 0.7, "grad_norm": 1.225348949432373, "learning_rate": 2.2161220412030677e-06, "loss": 0.4775, "step": 10803 }, { "epoch": 0.7, "grad_norm": 1.1575307846069336, "learning_rate": 2.215253796787343e-06, "loss": 0.538, "step": 10804 }, { "epoch": 0.7, "grad_norm": 1.1824979782104492, "learning_rate": 2.2143856740856895e-06, "loss": 0.544, "step": 10805 }, { "epoch": 0.7, "grad_norm": 1.2605981826782227, "learning_rate": 2.213517673136052e-06, "loss": 0.5617, "step": 10806 }, { "epoch": 0.7, "grad_norm": 1.2893258333206177, "learning_rate": 2.2126497939763667e-06, "loss": 0.5516, "step": 10807 }, { "epoch": 0.7, "grad_norm": 1.134805679321289, "learning_rate": 2.2117820366445703e-06, "loss": 0.5023, "step": 10808 }, { "epoch": 0.7, "grad_norm": 1.1363486051559448, "learning_rate": 2.210914401178585e-06, "loss": 0.5151, "step": 10809 }, { "epoch": 0.7, "grad_norm": 1.2502857446670532, "learning_rate": 2.2100468876163354e-06, "loss": 0.5227, "step": 10810 }, { "epoch": 0.7, "grad_norm": 1.1363059282302856, "learning_rate": 2.2091794959957387e-06, "loss": 0.5173, "step": 10811 }, { "epoch": 0.7, "grad_norm": 1.1911418437957764, "learning_rate": 2.2083122263547054e-06, "loss": 0.4477, "step": 10812 }, { "epoch": 0.7, "grad_norm": 1.250519871711731, "learning_rate": 2.2074450787311437e-06, "loss": 0.5418, "step": 10813 }, { "epoch": 0.7, "grad_norm": 1.2204155921936035, "learning_rate": 2.2065780531629506e-06, "loss": 0.5333, "step": 10814 }, { "epoch": 0.7, "grad_norm": 1.277311086654663, "learning_rate": 2.205711149688024e-06, "loss": 0.5675, "step": 10815 }, { "epoch": 0.7, "grad_norm": 1.1862815618515015, "learning_rate": 2.2048443683442537e-06, "loss": 0.5365, "step": 10816 }, { "epoch": 0.7, "grad_norm": 1.1563291549682617, "learning_rate": 2.2039777091695264e-06, "loss": 0.4981, "step": 10817 }, { "epoch": 0.7, "grad_norm": 1.1118770837783813, "learning_rate": 2.203111172201718e-06, "loss": 0.5359, "step": 10818 }, { "epoch": 0.7, "grad_norm": 1.1219282150268555, "learning_rate": 2.2022447574787035e-06, "loss": 0.4935, "step": 10819 }, { "epoch": 0.7, "grad_norm": 1.2468100786209106, "learning_rate": 2.201378465038353e-06, "loss": 0.5072, "step": 10820 }, { "epoch": 0.7, "grad_norm": 1.282076120376587, "learning_rate": 2.200512294918529e-06, "loss": 0.4687, "step": 10821 }, { "epoch": 0.7, "grad_norm": 1.205830454826355, "learning_rate": 2.19964624715709e-06, "loss": 0.5131, "step": 10822 }, { "epoch": 0.7, "grad_norm": 1.2117226123809814, "learning_rate": 2.1987803217918904e-06, "loss": 0.5109, "step": 10823 }, { "epoch": 0.7, "grad_norm": 1.213850975036621, "learning_rate": 2.197914518860773e-06, "loss": 0.5486, "step": 10824 }, { "epoch": 0.7, "grad_norm": 1.1988143920898438, "learning_rate": 2.1970488384015836e-06, "loss": 0.498, "step": 10825 }, { "epoch": 0.7, "grad_norm": 1.2309675216674805, "learning_rate": 2.196183280452157e-06, "loss": 0.5144, "step": 10826 }, { "epoch": 0.7, "grad_norm": 1.215378761291504, "learning_rate": 2.195317845050326e-06, "loss": 0.457, "step": 10827 }, { "epoch": 0.7, "grad_norm": 1.217592477798462, "learning_rate": 2.194452532233917e-06, "loss": 0.5554, "step": 10828 }, { "epoch": 0.7, "grad_norm": 1.1162683963775635, "learning_rate": 2.193587342040748e-06, "loss": 0.4615, "step": 10829 }, { "epoch": 0.7, "grad_norm": 1.2973780632019043, "learning_rate": 2.192722274508635e-06, "loss": 0.5256, "step": 10830 }, { "epoch": 0.7, "grad_norm": 1.1359366178512573, "learning_rate": 2.191857329675389e-06, "loss": 0.4816, "step": 10831 }, { "epoch": 0.7, "grad_norm": 1.2925784587860107, "learning_rate": 2.190992507578814e-06, "loss": 0.5189, "step": 10832 }, { "epoch": 0.7, "grad_norm": 1.1533547639846802, "learning_rate": 2.1901278082567095e-06, "loss": 0.5584, "step": 10833 }, { "epoch": 0.7, "grad_norm": 1.2505626678466797, "learning_rate": 2.1892632317468705e-06, "loss": 0.5289, "step": 10834 }, { "epoch": 0.7, "grad_norm": 1.2684420347213745, "learning_rate": 2.1883987780870814e-06, "loss": 0.5004, "step": 10835 }, { "epoch": 0.7, "grad_norm": 1.2893638610839844, "learning_rate": 2.1875344473151284e-06, "loss": 0.5578, "step": 10836 }, { "epoch": 0.7, "grad_norm": 1.1730424165725708, "learning_rate": 2.186670239468788e-06, "loss": 0.5151, "step": 10837 }, { "epoch": 0.7, "grad_norm": 1.220078468322754, "learning_rate": 2.1858061545858334e-06, "loss": 0.5336, "step": 10838 }, { "epoch": 0.7, "grad_norm": 1.2492157220840454, "learning_rate": 2.184942192704031e-06, "loss": 0.5327, "step": 10839 }, { "epoch": 0.7, "grad_norm": 1.2166179418563843, "learning_rate": 2.184078353861144e-06, "loss": 0.487, "step": 10840 }, { "epoch": 0.7, "grad_norm": 1.2088828086853027, "learning_rate": 2.1832146380949254e-06, "loss": 0.5174, "step": 10841 }, { "epoch": 0.7, "grad_norm": 1.1738468408584595, "learning_rate": 2.1823510454431272e-06, "loss": 0.5239, "step": 10842 }, { "epoch": 0.7, "grad_norm": 1.303065538406372, "learning_rate": 2.1814875759434957e-06, "loss": 0.5211, "step": 10843 }, { "epoch": 0.7, "grad_norm": 1.1433930397033691, "learning_rate": 2.18062422963377e-06, "loss": 0.503, "step": 10844 }, { "epoch": 0.7, "grad_norm": 1.2990938425064087, "learning_rate": 2.179761006551685e-06, "loss": 0.5291, "step": 10845 }, { "epoch": 0.7, "grad_norm": 1.2294399738311768, "learning_rate": 2.178897906734972e-06, "loss": 0.5061, "step": 10846 }, { "epoch": 0.7, "grad_norm": 1.2727502584457397, "learning_rate": 2.178034930221351e-06, "loss": 0.5356, "step": 10847 }, { "epoch": 0.7, "grad_norm": 1.234281301498413, "learning_rate": 2.1771720770485425e-06, "loss": 0.5152, "step": 10848 }, { "epoch": 0.7, "grad_norm": 1.2192060947418213, "learning_rate": 2.17630934725426e-06, "loss": 0.5225, "step": 10849 }, { "epoch": 0.7, "grad_norm": 1.108641266822815, "learning_rate": 2.1754467408762104e-06, "loss": 0.5098, "step": 10850 }, { "epoch": 0.7, "grad_norm": 1.1777524948120117, "learning_rate": 2.1745842579520967e-06, "loss": 0.5201, "step": 10851 }, { "epoch": 0.7, "grad_norm": 1.1753147840499878, "learning_rate": 2.1737218985196167e-06, "loss": 0.507, "step": 10852 }, { "epoch": 0.7, "grad_norm": 1.192425012588501, "learning_rate": 2.1728596626164587e-06, "loss": 0.4939, "step": 10853 }, { "epoch": 0.7, "grad_norm": 1.2645788192749023, "learning_rate": 2.1719975502803115e-06, "loss": 0.5197, "step": 10854 }, { "epoch": 0.7, "grad_norm": 1.1636852025985718, "learning_rate": 2.1711355615488545e-06, "loss": 0.5004, "step": 10855 }, { "epoch": 0.7, "grad_norm": 1.1718840599060059, "learning_rate": 2.1702736964597632e-06, "loss": 0.5048, "step": 10856 }, { "epoch": 0.7, "grad_norm": 1.438157320022583, "learning_rate": 2.1694119550507083e-06, "loss": 0.5862, "step": 10857 }, { "epoch": 0.7, "grad_norm": 1.2998765707015991, "learning_rate": 2.1685503373593555e-06, "loss": 0.4959, "step": 10858 }, { "epoch": 0.7, "grad_norm": 1.1751189231872559, "learning_rate": 2.1676888434233605e-06, "loss": 0.4932, "step": 10859 }, { "epoch": 0.7, "grad_norm": 1.359894037246704, "learning_rate": 2.1668274732803783e-06, "loss": 0.539, "step": 10860 }, { "epoch": 0.7, "grad_norm": 1.32668137550354, "learning_rate": 2.165966226968058e-06, "loss": 0.5265, "step": 10861 }, { "epoch": 0.7, "grad_norm": 1.2102967500686646, "learning_rate": 2.1651051045240425e-06, "loss": 0.5204, "step": 10862 }, { "epoch": 0.7, "grad_norm": 1.3793827295303345, "learning_rate": 2.1642441059859677e-06, "loss": 0.4655, "step": 10863 }, { "epoch": 0.7, "grad_norm": 1.162314772605896, "learning_rate": 2.1633832313914687e-06, "loss": 0.5051, "step": 10864 }, { "epoch": 0.7, "grad_norm": 1.2739912271499634, "learning_rate": 2.1625224807781686e-06, "loss": 0.5117, "step": 10865 }, { "epoch": 0.7, "grad_norm": 1.1868658065795898, "learning_rate": 2.1616618541836904e-06, "loss": 0.5179, "step": 10866 }, { "epoch": 0.7, "grad_norm": 1.1885627508163452, "learning_rate": 2.160801351645649e-06, "loss": 0.5313, "step": 10867 }, { "epoch": 0.7, "grad_norm": 1.2177600860595703, "learning_rate": 2.159940973201656e-06, "loss": 0.523, "step": 10868 }, { "epoch": 0.7, "grad_norm": 1.193283200263977, "learning_rate": 2.1590807188893163e-06, "loss": 0.5234, "step": 10869 }, { "epoch": 0.7, "grad_norm": 1.1674481630325317, "learning_rate": 2.1582205887462276e-06, "loss": 0.5396, "step": 10870 }, { "epoch": 0.7, "grad_norm": 1.2632594108581543, "learning_rate": 2.157360582809985e-06, "loss": 0.5193, "step": 10871 }, { "epoch": 0.7, "grad_norm": 1.1883493661880493, "learning_rate": 2.156500701118177e-06, "loss": 0.5067, "step": 10872 }, { "epoch": 0.7, "grad_norm": 1.3879777193069458, "learning_rate": 2.155640943708387e-06, "loss": 0.5412, "step": 10873 }, { "epoch": 0.7, "grad_norm": 1.2693843841552734, "learning_rate": 2.154781310618193e-06, "loss": 0.5792, "step": 10874 }, { "epoch": 0.7, "grad_norm": 1.1866720914840698, "learning_rate": 2.1539218018851694e-06, "loss": 0.4829, "step": 10875 }, { "epoch": 0.7, "grad_norm": 1.1823123693466187, "learning_rate": 2.1530624175468785e-06, "loss": 0.5002, "step": 10876 }, { "epoch": 0.7, "grad_norm": 1.1479814052581787, "learning_rate": 2.152203157640884e-06, "loss": 0.5601, "step": 10877 }, { "epoch": 0.7, "grad_norm": 1.2088656425476074, "learning_rate": 2.151344022204742e-06, "loss": 0.5297, "step": 10878 }, { "epoch": 0.7, "grad_norm": 5.2625298500061035, "learning_rate": 2.150485011276004e-06, "loss": 0.5207, "step": 10879 }, { "epoch": 0.7, "grad_norm": 1.2007369995117188, "learning_rate": 2.1496261248922133e-06, "loss": 0.5948, "step": 10880 }, { "epoch": 0.7, "grad_norm": 1.262690544128418, "learning_rate": 2.1487673630909133e-06, "loss": 0.5229, "step": 10881 }, { "epoch": 0.7, "grad_norm": 1.1848589181900024, "learning_rate": 2.1479087259096333e-06, "loss": 0.5505, "step": 10882 }, { "epoch": 0.7, "grad_norm": 1.1649101972579956, "learning_rate": 2.1470502133859043e-06, "loss": 0.492, "step": 10883 }, { "epoch": 0.7, "grad_norm": 1.3423097133636475, "learning_rate": 2.14619182555725e-06, "loss": 0.4987, "step": 10884 }, { "epoch": 0.7, "grad_norm": 1.1812154054641724, "learning_rate": 2.1453335624611883e-06, "loss": 0.4959, "step": 10885 }, { "epoch": 0.7, "grad_norm": 1.1613095998764038, "learning_rate": 2.144475424135231e-06, "loss": 0.5525, "step": 10886 }, { "epoch": 0.7, "grad_norm": 1.2160414457321167, "learning_rate": 2.1436174106168877e-06, "loss": 0.5287, "step": 10887 }, { "epoch": 0.7, "grad_norm": 1.205976128578186, "learning_rate": 2.1427595219436558e-06, "loss": 0.5316, "step": 10888 }, { "epoch": 0.7, "grad_norm": 1.1922727823257446, "learning_rate": 2.1419017581530334e-06, "loss": 0.4883, "step": 10889 }, { "epoch": 0.7, "grad_norm": 1.1705286502838135, "learning_rate": 2.141044119282511e-06, "loss": 0.5547, "step": 10890 }, { "epoch": 0.7, "grad_norm": 1.1200305223464966, "learning_rate": 2.1401866053695743e-06, "loss": 0.5453, "step": 10891 }, { "epoch": 0.7, "grad_norm": 1.1951733827590942, "learning_rate": 2.139329216451704e-06, "loss": 0.4725, "step": 10892 }, { "epoch": 0.7, "grad_norm": 1.2062795162200928, "learning_rate": 2.138471952566372e-06, "loss": 0.582, "step": 10893 }, { "epoch": 0.7, "grad_norm": 1.2157622575759888, "learning_rate": 2.137614813751048e-06, "loss": 0.5195, "step": 10894 }, { "epoch": 0.7, "grad_norm": 1.1737680435180664, "learning_rate": 2.1367578000431943e-06, "loss": 0.528, "step": 10895 }, { "epoch": 0.7, "grad_norm": 1.3392056226730347, "learning_rate": 2.135900911480271e-06, "loss": 0.5446, "step": 10896 }, { "epoch": 0.7, "grad_norm": 1.2225686311721802, "learning_rate": 2.135044148099731e-06, "loss": 0.537, "step": 10897 }, { "epoch": 0.7, "grad_norm": 1.120237946510315, "learning_rate": 2.1341875099390175e-06, "loss": 0.5085, "step": 10898 }, { "epoch": 0.7, "grad_norm": 1.2763010263442993, "learning_rate": 2.133330997035574e-06, "loss": 0.5037, "step": 10899 }, { "epoch": 0.7, "grad_norm": 1.2464914321899414, "learning_rate": 2.132474609426837e-06, "loss": 0.5199, "step": 10900 }, { "epoch": 0.7, "grad_norm": 1.262642502784729, "learning_rate": 2.131618347150236e-06, "loss": 0.5459, "step": 10901 }, { "epoch": 0.7, "grad_norm": 1.198341727256775, "learning_rate": 2.1307622102431983e-06, "loss": 0.4861, "step": 10902 }, { "epoch": 0.7, "grad_norm": 1.2630963325500488, "learning_rate": 2.129906198743141e-06, "loss": 0.5435, "step": 10903 }, { "epoch": 0.7, "grad_norm": 1.2335383892059326, "learning_rate": 2.129050312687477e-06, "loss": 0.5072, "step": 10904 }, { "epoch": 0.7, "grad_norm": 1.1590174436569214, "learning_rate": 2.1281945521136184e-06, "loss": 0.4955, "step": 10905 }, { "epoch": 0.7, "grad_norm": 1.1663589477539062, "learning_rate": 2.1273389170589674e-06, "loss": 0.5056, "step": 10906 }, { "epoch": 0.7, "grad_norm": 1.3048027753829956, "learning_rate": 2.126483407560919e-06, "loss": 0.5345, "step": 10907 }, { "epoch": 0.7, "grad_norm": 1.2278403043746948, "learning_rate": 2.125628023656867e-06, "loss": 0.5331, "step": 10908 }, { "epoch": 0.7, "grad_norm": 1.2776646614074707, "learning_rate": 2.1247727653841986e-06, "loss": 0.5223, "step": 10909 }, { "epoch": 0.7, "grad_norm": 1.160939335823059, "learning_rate": 2.123917632780294e-06, "loss": 0.5237, "step": 10910 }, { "epoch": 0.7, "grad_norm": 1.196368932723999, "learning_rate": 2.1230626258825316e-06, "loss": 0.4379, "step": 10911 }, { "epoch": 0.7, "grad_norm": 1.148416519165039, "learning_rate": 2.1222077447282767e-06, "loss": 0.4966, "step": 10912 }, { "epoch": 0.7, "grad_norm": 1.2229139804840088, "learning_rate": 2.1213529893548972e-06, "loss": 0.5404, "step": 10913 }, { "epoch": 0.7, "grad_norm": 1.1308212280273438, "learning_rate": 2.1204983597997515e-06, "loss": 0.4578, "step": 10914 }, { "epoch": 0.7, "grad_norm": 1.2285618782043457, "learning_rate": 2.1196438561001924e-06, "loss": 0.5412, "step": 10915 }, { "epoch": 0.7, "grad_norm": 1.1700307130813599, "learning_rate": 2.1187894782935716e-06, "loss": 0.4925, "step": 10916 }, { "epoch": 0.7, "grad_norm": 1.1998779773712158, "learning_rate": 2.117935226417227e-06, "loss": 0.4936, "step": 10917 }, { "epoch": 0.7, "grad_norm": 1.268453598022461, "learning_rate": 2.117081100508498e-06, "loss": 0.5201, "step": 10918 }, { "epoch": 0.7, "grad_norm": 1.1871124505996704, "learning_rate": 2.1162271006047157e-06, "loss": 0.4775, "step": 10919 }, { "epoch": 0.7, "grad_norm": 1.1940839290618896, "learning_rate": 2.115373226743207e-06, "loss": 0.531, "step": 10920 }, { "epoch": 0.7, "grad_norm": 1.2219963073730469, "learning_rate": 2.1145194789612917e-06, "loss": 0.5235, "step": 10921 }, { "epoch": 0.71, "grad_norm": 1.2724153995513916, "learning_rate": 2.113665857296288e-06, "loss": 0.5363, "step": 10922 }, { "epoch": 0.71, "grad_norm": 1.331539273262024, "learning_rate": 2.1128123617854996e-06, "loss": 0.5248, "step": 10923 }, { "epoch": 0.71, "grad_norm": 1.1772598028182983, "learning_rate": 2.1119589924662346e-06, "loss": 0.5322, "step": 10924 }, { "epoch": 0.71, "grad_norm": 1.1076220273971558, "learning_rate": 2.111105749375791e-06, "loss": 0.5091, "step": 10925 }, { "epoch": 0.71, "grad_norm": 1.3051210641860962, "learning_rate": 2.1102526325514617e-06, "loss": 0.5484, "step": 10926 }, { "epoch": 0.71, "grad_norm": 1.114073395729065, "learning_rate": 2.1093996420305337e-06, "loss": 0.5311, "step": 10927 }, { "epoch": 0.71, "grad_norm": 1.120653748512268, "learning_rate": 2.1085467778502915e-06, "loss": 0.5118, "step": 10928 }, { "epoch": 0.71, "grad_norm": 1.1090667247772217, "learning_rate": 2.107694040048008e-06, "loss": 0.5145, "step": 10929 }, { "epoch": 0.71, "grad_norm": 1.3024027347564697, "learning_rate": 2.1068414286609553e-06, "loss": 0.5382, "step": 10930 }, { "epoch": 0.71, "grad_norm": 1.172569990158081, "learning_rate": 2.1059889437264e-06, "loss": 0.5228, "step": 10931 }, { "epoch": 0.71, "grad_norm": 1.1974586248397827, "learning_rate": 2.1051365852816004e-06, "loss": 0.4963, "step": 10932 }, { "epoch": 0.71, "grad_norm": 1.2637014389038086, "learning_rate": 2.1042843533638123e-06, "loss": 0.511, "step": 10933 }, { "epoch": 0.71, "grad_norm": 1.2490596771240234, "learning_rate": 2.103432248010286e-06, "loss": 0.4885, "step": 10934 }, { "epoch": 0.71, "grad_norm": 1.2696008682250977, "learning_rate": 2.1025802692582607e-06, "loss": 0.5198, "step": 10935 }, { "epoch": 0.71, "grad_norm": 1.0999102592468262, "learning_rate": 2.101728417144977e-06, "loss": 0.4978, "step": 10936 }, { "epoch": 0.71, "grad_norm": 1.1654093265533447, "learning_rate": 2.1008766917076663e-06, "loss": 0.5252, "step": 10937 }, { "epoch": 0.71, "grad_norm": 1.1301239728927612, "learning_rate": 2.1000250929835554e-06, "loss": 0.5109, "step": 10938 }, { "epoch": 0.71, "grad_norm": 1.3579978942871094, "learning_rate": 2.0991736210098655e-06, "loss": 0.523, "step": 10939 }, { "epoch": 0.71, "grad_norm": 1.1400790214538574, "learning_rate": 2.098322275823815e-06, "loss": 0.5098, "step": 10940 }, { "epoch": 0.71, "grad_norm": 1.2420450448989868, "learning_rate": 2.0974710574626085e-06, "loss": 0.4887, "step": 10941 }, { "epoch": 0.71, "grad_norm": 1.2169058322906494, "learning_rate": 2.096619965963454e-06, "loss": 0.5146, "step": 10942 }, { "epoch": 0.71, "grad_norm": 1.2945047616958618, "learning_rate": 2.09576900136355e-06, "loss": 0.526, "step": 10943 }, { "epoch": 0.71, "grad_norm": 1.180678367614746, "learning_rate": 2.0949181637000896e-06, "loss": 0.4946, "step": 10944 }, { "epoch": 0.71, "grad_norm": 1.1672152280807495, "learning_rate": 2.0940674530102618e-06, "loss": 0.4985, "step": 10945 }, { "epoch": 0.71, "grad_norm": 1.1324471235275269, "learning_rate": 2.0932168693312494e-06, "loss": 0.4736, "step": 10946 }, { "epoch": 0.71, "grad_norm": 1.286409616470337, "learning_rate": 2.0923664127002263e-06, "loss": 0.5293, "step": 10947 }, { "epoch": 0.71, "grad_norm": 1.3328651189804077, "learning_rate": 2.0915160831543656e-06, "loss": 0.5055, "step": 10948 }, { "epoch": 0.71, "grad_norm": 1.1291788816452026, "learning_rate": 2.090665880730833e-06, "loss": 0.5464, "step": 10949 }, { "epoch": 0.71, "grad_norm": 1.1137149333953857, "learning_rate": 2.0898158054667884e-06, "loss": 0.5242, "step": 10950 }, { "epoch": 0.71, "grad_norm": 1.191617488861084, "learning_rate": 2.0889658573993866e-06, "loss": 0.5184, "step": 10951 }, { "epoch": 0.71, "grad_norm": 1.2109061479568481, "learning_rate": 2.0881160365657787e-06, "loss": 0.5394, "step": 10952 }, { "epoch": 0.71, "grad_norm": 1.1618956327438354, "learning_rate": 2.087266343003104e-06, "loss": 0.4851, "step": 10953 }, { "epoch": 0.71, "grad_norm": 1.27151358127594, "learning_rate": 2.0864167767485026e-06, "loss": 0.5353, "step": 10954 }, { "epoch": 0.71, "grad_norm": 1.2809038162231445, "learning_rate": 2.085567337839107e-06, "loss": 0.5441, "step": 10955 }, { "epoch": 0.71, "grad_norm": 1.1463018655776978, "learning_rate": 2.0847180263120432e-06, "loss": 0.4602, "step": 10956 }, { "epoch": 0.71, "grad_norm": 1.1895344257354736, "learning_rate": 2.0838688422044347e-06, "loss": 0.5385, "step": 10957 }, { "epoch": 0.71, "grad_norm": 1.2578628063201904, "learning_rate": 2.083019785553394e-06, "loss": 0.5336, "step": 10958 }, { "epoch": 0.71, "grad_norm": 1.1682333946228027, "learning_rate": 2.0821708563960323e-06, "loss": 0.449, "step": 10959 }, { "epoch": 0.71, "grad_norm": 1.2711615562438965, "learning_rate": 2.0813220547694545e-06, "loss": 0.5541, "step": 10960 }, { "epoch": 0.71, "grad_norm": 1.1872347593307495, "learning_rate": 2.080473380710759e-06, "loss": 0.4919, "step": 10961 }, { "epoch": 0.71, "grad_norm": 1.540755033493042, "learning_rate": 2.0796248342570402e-06, "loss": 0.5825, "step": 10962 }, { "epoch": 0.71, "grad_norm": 1.0845186710357666, "learning_rate": 2.0787764154453864e-06, "loss": 0.5315, "step": 10963 }, { "epoch": 0.71, "grad_norm": 1.2806941270828247, "learning_rate": 2.0779281243128775e-06, "loss": 0.5429, "step": 10964 }, { "epoch": 0.71, "grad_norm": 1.2666904926300049, "learning_rate": 2.077079960896591e-06, "loss": 0.5173, "step": 10965 }, { "epoch": 0.71, "grad_norm": 1.8683745861053467, "learning_rate": 2.076231925233598e-06, "loss": 0.512, "step": 10966 }, { "epoch": 0.71, "grad_norm": 1.19956374168396, "learning_rate": 2.075384017360964e-06, "loss": 0.4998, "step": 10967 }, { "epoch": 0.71, "grad_norm": 1.123069167137146, "learning_rate": 2.0745362373157496e-06, "loss": 0.5414, "step": 10968 }, { "epoch": 0.71, "grad_norm": 1.1066646575927734, "learning_rate": 2.0736885851350096e-06, "loss": 0.4795, "step": 10969 }, { "epoch": 0.71, "grad_norm": 1.1851162910461426, "learning_rate": 2.07284106085579e-06, "loss": 0.5541, "step": 10970 }, { "epoch": 0.71, "grad_norm": 1.1392163038253784, "learning_rate": 2.0719936645151355e-06, "loss": 0.5143, "step": 10971 }, { "epoch": 0.71, "grad_norm": 1.1959270238876343, "learning_rate": 2.0711463961500838e-06, "loss": 0.5558, "step": 10972 }, { "epoch": 0.71, "grad_norm": 1.1491066217422485, "learning_rate": 2.070299255797666e-06, "loss": 0.4901, "step": 10973 }, { "epoch": 0.71, "grad_norm": 1.1820297241210938, "learning_rate": 2.0694522434949094e-06, "loss": 0.5663, "step": 10974 }, { "epoch": 0.71, "grad_norm": 1.352411150932312, "learning_rate": 2.0686053592788365e-06, "loss": 0.5708, "step": 10975 }, { "epoch": 0.71, "grad_norm": 1.1772867441177368, "learning_rate": 2.0677586031864576e-06, "loss": 0.4871, "step": 10976 }, { "epoch": 0.71, "grad_norm": 1.2434873580932617, "learning_rate": 2.066911975254785e-06, "loss": 0.5347, "step": 10977 }, { "epoch": 0.71, "grad_norm": 1.198717474937439, "learning_rate": 2.066065475520823e-06, "loss": 0.5237, "step": 10978 }, { "epoch": 0.71, "grad_norm": 1.1763274669647217, "learning_rate": 2.0652191040215693e-06, "loss": 0.4956, "step": 10979 }, { "epoch": 0.71, "grad_norm": 1.354894757270813, "learning_rate": 2.0643728607940166e-06, "loss": 0.5427, "step": 10980 }, { "epoch": 0.71, "grad_norm": 1.2066290378570557, "learning_rate": 2.063526745875154e-06, "loss": 0.5326, "step": 10981 }, { "epoch": 0.71, "grad_norm": 1.155434250831604, "learning_rate": 2.062680759301959e-06, "loss": 0.485, "step": 10982 }, { "epoch": 0.71, "grad_norm": 1.2513511180877686, "learning_rate": 2.0618349011114104e-06, "loss": 0.5531, "step": 10983 }, { "epoch": 0.71, "grad_norm": 1.2864494323730469, "learning_rate": 2.060989171340477e-06, "loss": 0.5331, "step": 10984 }, { "epoch": 0.71, "grad_norm": 1.1751604080200195, "learning_rate": 2.060143570026124e-06, "loss": 0.5552, "step": 10985 }, { "epoch": 0.71, "grad_norm": 1.1250545978546143, "learning_rate": 2.059298097205313e-06, "loss": 0.4532, "step": 10986 }, { "epoch": 0.71, "grad_norm": 1.2127904891967773, "learning_rate": 2.0584527529149934e-06, "loss": 0.54, "step": 10987 }, { "epoch": 0.71, "grad_norm": 1.2549030780792236, "learning_rate": 2.057607537192114e-06, "loss": 0.5242, "step": 10988 }, { "epoch": 0.71, "grad_norm": 1.2708556652069092, "learning_rate": 2.0567624500736185e-06, "loss": 0.5436, "step": 10989 }, { "epoch": 0.71, "grad_norm": 1.173907995223999, "learning_rate": 2.0559174915964418e-06, "loss": 0.5002, "step": 10990 }, { "epoch": 0.71, "grad_norm": 1.2347363233566284, "learning_rate": 2.0550726617975182e-06, "loss": 0.4852, "step": 10991 }, { "epoch": 0.71, "grad_norm": 1.2521859407424927, "learning_rate": 2.0542279607137684e-06, "loss": 0.4963, "step": 10992 }, { "epoch": 0.71, "grad_norm": 1.2736608982086182, "learning_rate": 2.0533833883821143e-06, "loss": 0.5087, "step": 10993 }, { "epoch": 0.71, "grad_norm": 1.2403697967529297, "learning_rate": 2.0525389448394694e-06, "loss": 0.4706, "step": 10994 }, { "epoch": 0.71, "grad_norm": 1.2677514553070068, "learning_rate": 2.0516946301227452e-06, "loss": 0.5163, "step": 10995 }, { "epoch": 0.71, "grad_norm": 1.1795587539672852, "learning_rate": 2.0508504442688397e-06, "loss": 0.4988, "step": 10996 }, { "epoch": 0.71, "grad_norm": 1.2397042512893677, "learning_rate": 2.0500063873146527e-06, "loss": 0.5458, "step": 10997 }, { "epoch": 0.71, "grad_norm": 1.2679859399795532, "learning_rate": 2.0491624592970753e-06, "loss": 0.5213, "step": 10998 }, { "epoch": 0.71, "grad_norm": 1.3030117750167847, "learning_rate": 2.0483186602529938e-06, "loss": 0.4841, "step": 10999 }, { "epoch": 0.71, "grad_norm": 1.2013490200042725, "learning_rate": 2.0474749902192896e-06, "loss": 0.5371, "step": 11000 }, { "epoch": 0.71, "grad_norm": 1.2413558959960938, "learning_rate": 2.046631449232834e-06, "loss": 0.5145, "step": 11001 }, { "epoch": 0.71, "grad_norm": 1.144160270690918, "learning_rate": 2.0457880373304988e-06, "loss": 0.501, "step": 11002 }, { "epoch": 0.71, "grad_norm": 1.1746543645858765, "learning_rate": 2.044944754549146e-06, "loss": 0.488, "step": 11003 }, { "epoch": 0.71, "grad_norm": 1.0826184749603271, "learning_rate": 2.044101600925636e-06, "loss": 0.4492, "step": 11004 }, { "epoch": 0.71, "grad_norm": 1.1617929935455322, "learning_rate": 2.0432585764968162e-06, "loss": 0.513, "step": 11005 }, { "epoch": 0.71, "grad_norm": 1.1179927587509155, "learning_rate": 2.042415681299536e-06, "loss": 0.4916, "step": 11006 }, { "epoch": 0.71, "grad_norm": 1.149549961090088, "learning_rate": 2.0415729153706355e-06, "loss": 0.4811, "step": 11007 }, { "epoch": 0.71, "grad_norm": 1.1222679615020752, "learning_rate": 2.0407302787469502e-06, "loss": 0.5195, "step": 11008 }, { "epoch": 0.71, "grad_norm": 1.1088905334472656, "learning_rate": 2.0398877714653094e-06, "loss": 0.5276, "step": 11009 }, { "epoch": 0.71, "grad_norm": 1.2908984422683716, "learning_rate": 2.039045393562539e-06, "loss": 0.5217, "step": 11010 }, { "epoch": 0.71, "grad_norm": 1.3171535730361938, "learning_rate": 2.0382031450754525e-06, "loss": 0.5081, "step": 11011 }, { "epoch": 0.71, "grad_norm": 1.1013978719711304, "learning_rate": 2.0373610260408656e-06, "loss": 0.4916, "step": 11012 }, { "epoch": 0.71, "grad_norm": 1.2404379844665527, "learning_rate": 2.0365190364955845e-06, "loss": 0.4875, "step": 11013 }, { "epoch": 0.71, "grad_norm": 1.1751364469528198, "learning_rate": 2.03567717647641e-06, "loss": 0.5106, "step": 11014 }, { "epoch": 0.71, "grad_norm": 1.1548347473144531, "learning_rate": 2.0348354460201387e-06, "loss": 0.5331, "step": 11015 }, { "epoch": 0.71, "grad_norm": 1.2443441152572632, "learning_rate": 2.033993845163561e-06, "loss": 0.5354, "step": 11016 }, { "epoch": 0.71, "grad_norm": 1.1904244422912598, "learning_rate": 2.033152373943458e-06, "loss": 0.5058, "step": 11017 }, { "epoch": 0.71, "grad_norm": 1.2359777688980103, "learning_rate": 2.032311032396611e-06, "loss": 0.5147, "step": 11018 }, { "epoch": 0.71, "grad_norm": 1.1507092714309692, "learning_rate": 2.0314698205597915e-06, "loss": 0.5226, "step": 11019 }, { "epoch": 0.71, "grad_norm": 1.179816484451294, "learning_rate": 2.030628738469768e-06, "loss": 0.5483, "step": 11020 }, { "epoch": 0.71, "grad_norm": 1.1701444387435913, "learning_rate": 2.0297877861633003e-06, "loss": 0.5133, "step": 11021 }, { "epoch": 0.71, "grad_norm": 1.1362115144729614, "learning_rate": 2.028946963677147e-06, "loss": 0.4514, "step": 11022 }, { "epoch": 0.71, "grad_norm": 1.28322434425354, "learning_rate": 2.0281062710480552e-06, "loss": 0.5405, "step": 11023 }, { "epoch": 0.71, "grad_norm": 1.2802436351776123, "learning_rate": 2.0272657083127705e-06, "loss": 0.5404, "step": 11024 }, { "epoch": 0.71, "grad_norm": 1.1815989017486572, "learning_rate": 2.0264252755080322e-06, "loss": 0.5097, "step": 11025 }, { "epoch": 0.71, "grad_norm": 1.266679286956787, "learning_rate": 2.025584972670573e-06, "loss": 0.5067, "step": 11026 }, { "epoch": 0.71, "grad_norm": 1.133595585823059, "learning_rate": 2.02474479983712e-06, "loss": 0.5258, "step": 11027 }, { "epoch": 0.71, "grad_norm": 1.2750623226165771, "learning_rate": 2.023904757044398e-06, "loss": 0.568, "step": 11028 }, { "epoch": 0.71, "grad_norm": 1.1233952045440674, "learning_rate": 2.023064844329119e-06, "loss": 0.5052, "step": 11029 }, { "epoch": 0.71, "grad_norm": 1.3163738250732422, "learning_rate": 2.0222250617279944e-06, "loss": 0.5527, "step": 11030 }, { "epoch": 0.71, "grad_norm": 1.2606791257858276, "learning_rate": 2.0213854092777304e-06, "loss": 0.5268, "step": 11031 }, { "epoch": 0.71, "grad_norm": 1.3122941255569458, "learning_rate": 2.0205458870150248e-06, "loss": 0.5431, "step": 11032 }, { "epoch": 0.71, "grad_norm": 1.2400822639465332, "learning_rate": 2.019706494976571e-06, "loss": 0.5181, "step": 11033 }, { "epoch": 0.71, "grad_norm": 1.1892271041870117, "learning_rate": 2.0188672331990594e-06, "loss": 0.5511, "step": 11034 }, { "epoch": 0.71, "grad_norm": 1.1450929641723633, "learning_rate": 2.018028101719167e-06, "loss": 0.5192, "step": 11035 }, { "epoch": 0.71, "grad_norm": 1.228196144104004, "learning_rate": 2.017189100573574e-06, "loss": 0.5074, "step": 11036 }, { "epoch": 0.71, "grad_norm": 1.173600435256958, "learning_rate": 2.0163502297989486e-06, "loss": 0.5066, "step": 11037 }, { "epoch": 0.71, "grad_norm": 1.2578397989273071, "learning_rate": 2.0155114894319572e-06, "loss": 0.465, "step": 11038 }, { "epoch": 0.71, "grad_norm": 1.3203462362289429, "learning_rate": 2.0146728795092586e-06, "loss": 0.5179, "step": 11039 }, { "epoch": 0.71, "grad_norm": 1.1711366176605225, "learning_rate": 2.0138344000675076e-06, "loss": 0.5123, "step": 11040 }, { "epoch": 0.71, "grad_norm": 1.1971938610076904, "learning_rate": 2.0129960511433487e-06, "loss": 0.4787, "step": 11041 }, { "epoch": 0.71, "grad_norm": 1.2216635942459106, "learning_rate": 2.0121578327734265e-06, "loss": 0.5443, "step": 11042 }, { "epoch": 0.71, "grad_norm": 1.1672842502593994, "learning_rate": 2.011319744994376e-06, "loss": 0.5494, "step": 11043 }, { "epoch": 0.71, "grad_norm": 1.1673471927642822, "learning_rate": 2.010481787842829e-06, "loss": 0.5473, "step": 11044 }, { "epoch": 0.71, "grad_norm": 1.112378716468811, "learning_rate": 2.0096439613554115e-06, "loss": 0.4892, "step": 11045 }, { "epoch": 0.71, "grad_norm": 1.378509759902954, "learning_rate": 2.0088062655687397e-06, "loss": 0.5323, "step": 11046 }, { "epoch": 0.71, "grad_norm": 1.1233984231948853, "learning_rate": 2.007968700519428e-06, "loss": 0.4802, "step": 11047 }, { "epoch": 0.71, "grad_norm": 1.229224681854248, "learning_rate": 2.0071312662440856e-06, "loss": 0.563, "step": 11048 }, { "epoch": 0.71, "grad_norm": 1.2280136346817017, "learning_rate": 2.0062939627793136e-06, "loss": 0.572, "step": 11049 }, { "epoch": 0.71, "grad_norm": 1.15505850315094, "learning_rate": 2.005456790161709e-06, "loss": 0.5207, "step": 11050 }, { "epoch": 0.71, "grad_norm": 1.4655141830444336, "learning_rate": 2.0046197484278633e-06, "loss": 0.5155, "step": 11051 }, { "epoch": 0.71, "grad_norm": 1.1917182207107544, "learning_rate": 2.0037828376143586e-06, "loss": 0.585, "step": 11052 }, { "epoch": 0.71, "grad_norm": 1.1703839302062988, "learning_rate": 2.002946057757776e-06, "loss": 0.5283, "step": 11053 }, { "epoch": 0.71, "grad_norm": 1.2065850496292114, "learning_rate": 2.002109408894688e-06, "loss": 0.5519, "step": 11054 }, { "epoch": 0.71, "grad_norm": 1.2685961723327637, "learning_rate": 2.0012728910616634e-06, "loss": 0.5871, "step": 11055 }, { "epoch": 0.71, "grad_norm": 1.1184974908828735, "learning_rate": 2.0004365042952645e-06, "loss": 0.5012, "step": 11056 }, { "epoch": 0.71, "grad_norm": 1.154728889465332, "learning_rate": 1.9996002486320483e-06, "loss": 0.5195, "step": 11057 }, { "epoch": 0.71, "grad_norm": 1.355319619178772, "learning_rate": 1.9987641241085622e-06, "loss": 0.5218, "step": 11058 }, { "epoch": 0.71, "grad_norm": 1.203948974609375, "learning_rate": 1.9979281307613527e-06, "loss": 0.5588, "step": 11059 }, { "epoch": 0.71, "grad_norm": 1.1683136224746704, "learning_rate": 1.99709226862696e-06, "loss": 0.5124, "step": 11060 }, { "epoch": 0.71, "grad_norm": 1.0962885618209839, "learning_rate": 1.996256537741916e-06, "loss": 0.4741, "step": 11061 }, { "epoch": 0.71, "grad_norm": 1.1588863134384155, "learning_rate": 1.995420938142749e-06, "loss": 0.5018, "step": 11062 }, { "epoch": 0.71, "grad_norm": 1.1928918361663818, "learning_rate": 1.9945854698659832e-06, "loss": 0.4962, "step": 11063 }, { "epoch": 0.71, "grad_norm": 1.2510159015655518, "learning_rate": 1.99375013294813e-06, "loss": 0.568, "step": 11064 }, { "epoch": 0.71, "grad_norm": 1.2725151777267456, "learning_rate": 1.9929149274257027e-06, "loss": 0.4985, "step": 11065 }, { "epoch": 0.71, "grad_norm": 1.2677266597747803, "learning_rate": 1.9920798533352053e-06, "loss": 0.5155, "step": 11066 }, { "epoch": 0.71, "grad_norm": 1.1547644138336182, "learning_rate": 1.991244910713137e-06, "loss": 0.5341, "step": 11067 }, { "epoch": 0.71, "grad_norm": 1.1388251781463623, "learning_rate": 1.9904100995959914e-06, "loss": 0.5728, "step": 11068 }, { "epoch": 0.71, "grad_norm": 1.2842793464660645, "learning_rate": 1.9895754200202565e-06, "loss": 0.5244, "step": 11069 }, { "epoch": 0.71, "grad_norm": 1.2299529314041138, "learning_rate": 1.988740872022412e-06, "loss": 0.5118, "step": 11070 }, { "epoch": 0.71, "grad_norm": 1.1576765775680542, "learning_rate": 1.987906455638934e-06, "loss": 0.5168, "step": 11071 }, { "epoch": 0.71, "grad_norm": 1.2075275182724, "learning_rate": 1.987072170906294e-06, "loss": 0.517, "step": 11072 }, { "epoch": 0.71, "grad_norm": 1.2537978887557983, "learning_rate": 1.9862380178609564e-06, "loss": 0.5295, "step": 11073 }, { "epoch": 0.71, "grad_norm": 1.3035134077072144, "learning_rate": 1.985403996539379e-06, "loss": 0.6062, "step": 11074 }, { "epoch": 0.71, "grad_norm": 1.1275521516799927, "learning_rate": 1.9845701069780166e-06, "loss": 0.5321, "step": 11075 }, { "epoch": 0.71, "grad_norm": 1.1958683729171753, "learning_rate": 1.983736349213313e-06, "loss": 0.4681, "step": 11076 }, { "epoch": 0.72, "grad_norm": 1.22831130027771, "learning_rate": 1.982902723281712e-06, "loss": 0.5548, "step": 11077 }, { "epoch": 0.72, "grad_norm": 1.1209369897842407, "learning_rate": 1.9820692292196493e-06, "loss": 0.5314, "step": 11078 }, { "epoch": 0.72, "grad_norm": 1.2517279386520386, "learning_rate": 1.9812358670635533e-06, "loss": 0.4766, "step": 11079 }, { "epoch": 0.72, "grad_norm": 1.1921437978744507, "learning_rate": 1.9804026368498514e-06, "loss": 0.561, "step": 11080 }, { "epoch": 0.72, "grad_norm": 1.1018606424331665, "learning_rate": 1.9795695386149573e-06, "loss": 0.4935, "step": 11081 }, { "epoch": 0.72, "grad_norm": 1.2480223178863525, "learning_rate": 1.978736572395286e-06, "loss": 0.5184, "step": 11082 }, { "epoch": 0.72, "grad_norm": 1.315132737159729, "learning_rate": 1.977903738227244e-06, "loss": 0.5797, "step": 11083 }, { "epoch": 0.72, "grad_norm": 1.2457351684570312, "learning_rate": 1.9770710361472343e-06, "loss": 0.5117, "step": 11084 }, { "epoch": 0.72, "grad_norm": 1.2302881479263306, "learning_rate": 1.976238466191649e-06, "loss": 0.5095, "step": 11085 }, { "epoch": 0.72, "grad_norm": 1.1681016683578491, "learning_rate": 1.975406028396879e-06, "loss": 0.4766, "step": 11086 }, { "epoch": 0.72, "grad_norm": 1.2110649347305298, "learning_rate": 1.974573722799307e-06, "loss": 0.5091, "step": 11087 }, { "epoch": 0.72, "grad_norm": 1.1524286270141602, "learning_rate": 1.9737415494353128e-06, "loss": 0.5241, "step": 11088 }, { "epoch": 0.72, "grad_norm": 1.2205989360809326, "learning_rate": 1.972909508341269e-06, "loss": 0.5801, "step": 11089 }, { "epoch": 0.72, "grad_norm": 1.1608245372772217, "learning_rate": 1.972077599553539e-06, "loss": 0.5051, "step": 11090 }, { "epoch": 0.72, "grad_norm": 1.2329999208450317, "learning_rate": 1.971245823108485e-06, "loss": 0.531, "step": 11091 }, { "epoch": 0.72, "grad_norm": 1.187025547027588, "learning_rate": 1.970414179042462e-06, "loss": 0.474, "step": 11092 }, { "epoch": 0.72, "grad_norm": 1.2708392143249512, "learning_rate": 1.96958266739182e-06, "loss": 0.5166, "step": 11093 }, { "epoch": 0.72, "grad_norm": 1.1324987411499023, "learning_rate": 1.9687512881928995e-06, "loss": 0.5141, "step": 11094 }, { "epoch": 0.72, "grad_norm": 1.2420819997787476, "learning_rate": 1.9679200414820392e-06, "loss": 0.5438, "step": 11095 }, { "epoch": 0.72, "grad_norm": 1.0544114112854004, "learning_rate": 1.9670889272955717e-06, "loss": 0.5062, "step": 11096 }, { "epoch": 0.72, "grad_norm": 1.211303949356079, "learning_rate": 1.9662579456698217e-06, "loss": 0.5145, "step": 11097 }, { "epoch": 0.72, "grad_norm": 1.0944623947143555, "learning_rate": 1.9654270966411115e-06, "loss": 0.5028, "step": 11098 }, { "epoch": 0.72, "grad_norm": 1.2407399415969849, "learning_rate": 1.964596380245752e-06, "loss": 0.5622, "step": 11099 }, { "epoch": 0.72, "grad_norm": 1.259400486946106, "learning_rate": 1.963765796520053e-06, "loss": 0.5394, "step": 11100 }, { "epoch": 0.72, "grad_norm": 1.2007300853729248, "learning_rate": 1.9629353455003175e-06, "loss": 0.5553, "step": 11101 }, { "epoch": 0.72, "grad_norm": 1.319653034210205, "learning_rate": 1.9621050272228425e-06, "loss": 0.5129, "step": 11102 }, { "epoch": 0.72, "grad_norm": 1.110202670097351, "learning_rate": 1.961274841723919e-06, "loss": 0.5282, "step": 11103 }, { "epoch": 0.72, "grad_norm": 1.2587828636169434, "learning_rate": 1.960444789039834e-06, "loss": 0.5267, "step": 11104 }, { "epoch": 0.72, "grad_norm": 1.1633156538009644, "learning_rate": 1.9596148692068627e-06, "loss": 0.4874, "step": 11105 }, { "epoch": 0.72, "grad_norm": 1.2223788499832153, "learning_rate": 1.958785082261282e-06, "loss": 0.5089, "step": 11106 }, { "epoch": 0.72, "grad_norm": 1.3380138874053955, "learning_rate": 1.957955428239358e-06, "loss": 0.5262, "step": 11107 }, { "epoch": 0.72, "grad_norm": 1.1236751079559326, "learning_rate": 1.9571259071773542e-06, "loss": 0.4632, "step": 11108 }, { "epoch": 0.72, "grad_norm": 1.160483717918396, "learning_rate": 1.9562965191115263e-06, "loss": 0.5155, "step": 11109 }, { "epoch": 0.72, "grad_norm": 1.1762139797210693, "learning_rate": 1.9554672640781265e-06, "loss": 0.5342, "step": 11110 }, { "epoch": 0.72, "grad_norm": 1.2062071561813354, "learning_rate": 1.9546381421133952e-06, "loss": 0.4615, "step": 11111 }, { "epoch": 0.72, "grad_norm": 1.2253352403640747, "learning_rate": 1.953809153253575e-06, "loss": 0.5171, "step": 11112 }, { "epoch": 0.72, "grad_norm": 1.2129464149475098, "learning_rate": 1.952980297534896e-06, "loss": 0.5551, "step": 11113 }, { "epoch": 0.72, "grad_norm": 1.1943037509918213, "learning_rate": 1.952151574993587e-06, "loss": 0.5426, "step": 11114 }, { "epoch": 0.72, "grad_norm": 1.152648687362671, "learning_rate": 1.9513229856658692e-06, "loss": 0.5424, "step": 11115 }, { "epoch": 0.72, "grad_norm": 1.1823196411132812, "learning_rate": 1.95049452958796e-06, "loss": 0.5112, "step": 11116 }, { "epoch": 0.72, "grad_norm": 1.2578693628311157, "learning_rate": 1.9496662067960655e-06, "loss": 0.4933, "step": 11117 }, { "epoch": 0.72, "grad_norm": 1.2766486406326294, "learning_rate": 1.948838017326391e-06, "loss": 0.5434, "step": 11118 }, { "epoch": 0.72, "grad_norm": 1.2128852605819702, "learning_rate": 1.9480099612151344e-06, "loss": 0.4779, "step": 11119 }, { "epoch": 0.72, "grad_norm": 1.329459547996521, "learning_rate": 1.9471820384984887e-06, "loss": 0.5373, "step": 11120 }, { "epoch": 0.72, "grad_norm": 1.189738154411316, "learning_rate": 1.9463542492126392e-06, "loss": 0.5166, "step": 11121 }, { "epoch": 0.72, "grad_norm": 1.2159343957901, "learning_rate": 1.9455265933937687e-06, "loss": 0.5161, "step": 11122 }, { "epoch": 0.72, "grad_norm": 1.2321968078613281, "learning_rate": 1.9446990710780485e-06, "loss": 0.5166, "step": 11123 }, { "epoch": 0.72, "grad_norm": 1.2050819396972656, "learning_rate": 1.943871682301649e-06, "loss": 0.5636, "step": 11124 }, { "epoch": 0.72, "grad_norm": 1.2997344732284546, "learning_rate": 1.9430444271007336e-06, "loss": 0.6113, "step": 11125 }, { "epoch": 0.72, "grad_norm": 1.081766963005066, "learning_rate": 1.942217305511459e-06, "loss": 0.4672, "step": 11126 }, { "epoch": 0.72, "grad_norm": 1.080610752105713, "learning_rate": 1.9413903175699787e-06, "loss": 0.5011, "step": 11127 }, { "epoch": 0.72, "grad_norm": 1.3550163507461548, "learning_rate": 1.9405634633124347e-06, "loss": 0.5447, "step": 11128 }, { "epoch": 0.72, "grad_norm": 1.4009063243865967, "learning_rate": 1.939736742774968e-06, "loss": 0.5569, "step": 11129 }, { "epoch": 0.72, "grad_norm": 1.2063405513763428, "learning_rate": 1.938910155993713e-06, "loss": 0.5153, "step": 11130 }, { "epoch": 0.72, "grad_norm": 1.2934972047805786, "learning_rate": 1.938083703004797e-06, "loss": 0.5552, "step": 11131 }, { "epoch": 0.72, "grad_norm": 1.221956491470337, "learning_rate": 1.937257383844343e-06, "loss": 0.5398, "step": 11132 }, { "epoch": 0.72, "grad_norm": 1.2183459997177124, "learning_rate": 1.936431198548468e-06, "loss": 0.5026, "step": 11133 }, { "epoch": 0.72, "grad_norm": 1.1978868246078491, "learning_rate": 1.93560514715328e-06, "loss": 0.5316, "step": 11134 }, { "epoch": 0.72, "grad_norm": 1.1325210332870483, "learning_rate": 1.9347792296948843e-06, "loss": 0.5064, "step": 11135 }, { "epoch": 0.72, "grad_norm": 1.1595054864883423, "learning_rate": 1.93395344620938e-06, "loss": 0.4944, "step": 11136 }, { "epoch": 0.72, "grad_norm": 1.3502943515777588, "learning_rate": 1.9331277967328603e-06, "loss": 0.5089, "step": 11137 }, { "epoch": 0.72, "grad_norm": 1.3046742677688599, "learning_rate": 1.932302281301412e-06, "loss": 0.583, "step": 11138 }, { "epoch": 0.72, "grad_norm": 1.2781833410263062, "learning_rate": 1.931476899951118e-06, "loss": 0.5251, "step": 11139 }, { "epoch": 0.72, "grad_norm": 1.2043787240982056, "learning_rate": 1.9306516527180497e-06, "loss": 0.4746, "step": 11140 }, { "epoch": 0.72, "grad_norm": 1.2247169017791748, "learning_rate": 1.929826539638279e-06, "loss": 0.551, "step": 11141 }, { "epoch": 0.72, "grad_norm": 1.1386337280273438, "learning_rate": 1.929001560747869e-06, "loss": 0.4821, "step": 11142 }, { "epoch": 0.72, "grad_norm": 1.1892554759979248, "learning_rate": 1.9281767160828774e-06, "loss": 0.5289, "step": 11143 }, { "epoch": 0.72, "grad_norm": 1.1793750524520874, "learning_rate": 1.9273520056793567e-06, "loss": 0.5092, "step": 11144 }, { "epoch": 0.72, "grad_norm": 1.1960022449493408, "learning_rate": 1.926527429573353e-06, "loss": 0.4992, "step": 11145 }, { "epoch": 0.72, "grad_norm": 1.1996583938598633, "learning_rate": 1.9257029878009048e-06, "loss": 0.5538, "step": 11146 }, { "epoch": 0.72, "grad_norm": 1.2309678792953491, "learning_rate": 1.9248786803980468e-06, "loss": 0.5457, "step": 11147 }, { "epoch": 0.72, "grad_norm": 1.1382395029067993, "learning_rate": 1.9240545074008077e-06, "loss": 0.4707, "step": 11148 }, { "epoch": 0.72, "grad_norm": 1.1425399780273438, "learning_rate": 1.92323046884521e-06, "loss": 0.4841, "step": 11149 }, { "epoch": 0.72, "grad_norm": 1.3135473728179932, "learning_rate": 1.922406564767271e-06, "loss": 0.4964, "step": 11150 }, { "epoch": 0.72, "grad_norm": 1.212449550628662, "learning_rate": 1.921582795203002e-06, "loss": 0.5356, "step": 11151 }, { "epoch": 0.72, "grad_norm": 1.240094542503357, "learning_rate": 1.920759160188405e-06, "loss": 0.5485, "step": 11152 }, { "epoch": 0.72, "grad_norm": 1.1618605852127075, "learning_rate": 1.9199356597594806e-06, "loss": 0.5348, "step": 11153 }, { "epoch": 0.72, "grad_norm": 1.278842568397522, "learning_rate": 1.9191122939522217e-06, "loss": 0.5257, "step": 11154 }, { "epoch": 0.72, "grad_norm": 1.2254347801208496, "learning_rate": 1.9182890628026156e-06, "loss": 0.563, "step": 11155 }, { "epoch": 0.72, "grad_norm": 1.1655457019805908, "learning_rate": 1.917465966346644e-06, "loss": 0.5029, "step": 11156 }, { "epoch": 0.72, "grad_norm": 1.247754693031311, "learning_rate": 1.916643004620283e-06, "loss": 0.4849, "step": 11157 }, { "epoch": 0.72, "grad_norm": 1.1352636814117432, "learning_rate": 1.9158201776594996e-06, "loss": 0.458, "step": 11158 }, { "epoch": 0.72, "grad_norm": 1.1856962442398071, "learning_rate": 1.914997485500259e-06, "loss": 0.5002, "step": 11159 }, { "epoch": 0.72, "grad_norm": 1.3234995603561401, "learning_rate": 1.9141749281785193e-06, "loss": 0.5769, "step": 11160 }, { "epoch": 0.72, "grad_norm": 1.1707749366760254, "learning_rate": 1.9133525057302315e-06, "loss": 0.4666, "step": 11161 }, { "epoch": 0.72, "grad_norm": 1.1112396717071533, "learning_rate": 1.9125302181913422e-06, "loss": 0.5487, "step": 11162 }, { "epoch": 0.72, "grad_norm": 1.1778883934020996, "learning_rate": 1.9117080655977933e-06, "loss": 0.5243, "step": 11163 }, { "epoch": 0.72, "grad_norm": 1.2169252634048462, "learning_rate": 1.9108860479855145e-06, "loss": 0.491, "step": 11164 }, { "epoch": 0.72, "grad_norm": 1.2505953311920166, "learning_rate": 1.9100641653904374e-06, "loss": 0.5309, "step": 11165 }, { "epoch": 0.72, "grad_norm": 1.4278935194015503, "learning_rate": 1.9092424178484825e-06, "loss": 0.5224, "step": 11166 }, { "epoch": 0.72, "grad_norm": 1.0587955713272095, "learning_rate": 1.9084208053955676e-06, "loss": 0.5295, "step": 11167 }, { "epoch": 0.72, "grad_norm": 1.1653552055358887, "learning_rate": 1.9075993280676054e-06, "loss": 0.477, "step": 11168 }, { "epoch": 0.72, "grad_norm": 1.2591655254364014, "learning_rate": 1.9067779859004953e-06, "loss": 0.5355, "step": 11169 }, { "epoch": 0.72, "grad_norm": 1.2920100688934326, "learning_rate": 1.9059567789301398e-06, "loss": 0.5521, "step": 11170 }, { "epoch": 0.72, "grad_norm": 1.1981074810028076, "learning_rate": 1.9051357071924298e-06, "loss": 0.4996, "step": 11171 }, { "epoch": 0.72, "grad_norm": 1.3336378335952759, "learning_rate": 1.904314770723254e-06, "loss": 0.5645, "step": 11172 }, { "epoch": 0.72, "grad_norm": 1.0342782735824585, "learning_rate": 1.9034939695584936e-06, "loss": 0.5011, "step": 11173 }, { "epoch": 0.72, "grad_norm": 1.1874598264694214, "learning_rate": 1.9026733037340216e-06, "loss": 0.4887, "step": 11174 }, { "epoch": 0.72, "grad_norm": 1.152208924293518, "learning_rate": 1.9018527732857079e-06, "loss": 0.4794, "step": 11175 }, { "epoch": 0.72, "grad_norm": 1.1926109790802002, "learning_rate": 1.901032378249416e-06, "loss": 0.492, "step": 11176 }, { "epoch": 0.72, "grad_norm": 1.2557812929153442, "learning_rate": 1.900212118661004e-06, "loss": 0.5214, "step": 11177 }, { "epoch": 0.72, "grad_norm": 1.176190972328186, "learning_rate": 1.8993919945563238e-06, "loss": 0.5104, "step": 11178 }, { "epoch": 0.72, "grad_norm": 1.3031985759735107, "learning_rate": 1.8985720059712187e-06, "loss": 0.5045, "step": 11179 }, { "epoch": 0.72, "grad_norm": 1.2569890022277832, "learning_rate": 1.8977521529415287e-06, "loss": 0.56, "step": 11180 }, { "epoch": 0.72, "grad_norm": 1.2061166763305664, "learning_rate": 1.8969324355030889e-06, "loss": 0.5321, "step": 11181 }, { "epoch": 0.72, "grad_norm": 1.1112916469573975, "learning_rate": 1.8961128536917278e-06, "loss": 0.4945, "step": 11182 }, { "epoch": 0.72, "grad_norm": 1.443838357925415, "learning_rate": 1.8952934075432645e-06, "loss": 0.5516, "step": 11183 }, { "epoch": 0.72, "grad_norm": 1.2553859949111938, "learning_rate": 1.8944740970935154e-06, "loss": 0.4876, "step": 11184 }, { "epoch": 0.72, "grad_norm": 1.2521671056747437, "learning_rate": 1.8936549223782912e-06, "loss": 0.5335, "step": 11185 }, { "epoch": 0.72, "grad_norm": 1.2936190366744995, "learning_rate": 1.892835883433396e-06, "loss": 0.4786, "step": 11186 }, { "epoch": 0.72, "grad_norm": 1.2519776821136475, "learning_rate": 1.8920169802946299e-06, "loss": 0.5023, "step": 11187 }, { "epoch": 0.72, "grad_norm": 1.2811857461929321, "learning_rate": 1.8911982129977801e-06, "loss": 0.5316, "step": 11188 }, { "epoch": 0.72, "grad_norm": 1.2732388973236084, "learning_rate": 1.8903795815786362e-06, "loss": 0.5356, "step": 11189 }, { "epoch": 0.72, "grad_norm": 1.1389294862747192, "learning_rate": 1.8895610860729784e-06, "loss": 0.5251, "step": 11190 }, { "epoch": 0.72, "grad_norm": 1.1615084409713745, "learning_rate": 1.8887427265165798e-06, "loss": 0.5029, "step": 11191 }, { "epoch": 0.72, "grad_norm": 1.1729563474655151, "learning_rate": 1.8879245029452114e-06, "loss": 0.5166, "step": 11192 }, { "epoch": 0.72, "grad_norm": 1.2210533618927002, "learning_rate": 1.8871064153946322e-06, "loss": 0.5117, "step": 11193 }, { "epoch": 0.72, "grad_norm": 1.1574077606201172, "learning_rate": 1.8862884639005995e-06, "loss": 0.5201, "step": 11194 }, { "epoch": 0.72, "grad_norm": 1.2779735326766968, "learning_rate": 1.8854706484988656e-06, "loss": 0.5159, "step": 11195 }, { "epoch": 0.72, "grad_norm": 1.0926142930984497, "learning_rate": 1.8846529692251737e-06, "loss": 0.5026, "step": 11196 }, { "epoch": 0.72, "grad_norm": 1.2618657350540161, "learning_rate": 1.8838354261152626e-06, "loss": 0.533, "step": 11197 }, { "epoch": 0.72, "grad_norm": 1.2104634046554565, "learning_rate": 1.8830180192048664e-06, "loss": 0.5177, "step": 11198 }, { "epoch": 0.72, "grad_norm": 1.4179211854934692, "learning_rate": 1.88220074852971e-06, "loss": 0.491, "step": 11199 }, { "epoch": 0.72, "grad_norm": 1.1298736333847046, "learning_rate": 1.8813836141255144e-06, "loss": 0.5168, "step": 11200 }, { "epoch": 0.72, "grad_norm": 1.3773114681243896, "learning_rate": 1.8805666160279945e-06, "loss": 0.5279, "step": 11201 }, { "epoch": 0.72, "grad_norm": 1.1839581727981567, "learning_rate": 1.8797497542728598e-06, "loss": 0.578, "step": 11202 }, { "epoch": 0.72, "grad_norm": 1.2218079566955566, "learning_rate": 1.8789330288958134e-06, "loss": 0.5674, "step": 11203 }, { "epoch": 0.72, "grad_norm": 1.5683709383010864, "learning_rate": 1.878116439932553e-06, "loss": 0.5464, "step": 11204 }, { "epoch": 0.72, "grad_norm": 1.2235167026519775, "learning_rate": 1.877299987418767e-06, "loss": 0.505, "step": 11205 }, { "epoch": 0.72, "grad_norm": 1.106073260307312, "learning_rate": 1.8764836713901418e-06, "loss": 0.5301, "step": 11206 }, { "epoch": 0.72, "grad_norm": 1.4372918605804443, "learning_rate": 1.875667491882357e-06, "loss": 0.5121, "step": 11207 }, { "epoch": 0.72, "grad_norm": 1.4497050046920776, "learning_rate": 1.8748514489310848e-06, "loss": 0.5222, "step": 11208 }, { "epoch": 0.72, "grad_norm": 1.215886116027832, "learning_rate": 1.8740355425719924e-06, "loss": 0.4853, "step": 11209 }, { "epoch": 0.72, "grad_norm": 1.2782942056655884, "learning_rate": 1.8732197728407436e-06, "loss": 0.4808, "step": 11210 }, { "epoch": 0.72, "grad_norm": 1.171302080154419, "learning_rate": 1.8724041397729897e-06, "loss": 0.4831, "step": 11211 }, { "epoch": 0.72, "grad_norm": 4.722898960113525, "learning_rate": 1.8715886434043811e-06, "loss": 0.486, "step": 11212 }, { "epoch": 0.72, "grad_norm": 1.2686601877212524, "learning_rate": 1.8707732837705623e-06, "loss": 0.5181, "step": 11213 }, { "epoch": 0.72, "grad_norm": 1.3037075996398926, "learning_rate": 1.869958060907169e-06, "loss": 0.5537, "step": 11214 }, { "epoch": 0.72, "grad_norm": 1.1133719682693481, "learning_rate": 1.8691429748498352e-06, "loss": 0.5202, "step": 11215 }, { "epoch": 0.72, "grad_norm": 1.1079604625701904, "learning_rate": 1.8683280256341823e-06, "loss": 0.4734, "step": 11216 }, { "epoch": 0.72, "grad_norm": 1.2304432392120361, "learning_rate": 1.8675132132958318e-06, "loss": 0.4761, "step": 11217 }, { "epoch": 0.72, "grad_norm": 1.1696350574493408, "learning_rate": 1.8666985378703966e-06, "loss": 0.5004, "step": 11218 }, { "epoch": 0.72, "grad_norm": 1.2693759202957153, "learning_rate": 1.8658839993934846e-06, "loss": 0.5533, "step": 11219 }, { "epoch": 0.72, "grad_norm": 1.1462496519088745, "learning_rate": 1.8650695979006966e-06, "loss": 0.5237, "step": 11220 }, { "epoch": 0.72, "grad_norm": 1.227560043334961, "learning_rate": 1.8642553334276303e-06, "loss": 0.5326, "step": 11221 }, { "epoch": 0.72, "grad_norm": 1.1689056158065796, "learning_rate": 1.8634412060098711e-06, "loss": 0.5046, "step": 11222 }, { "epoch": 0.72, "grad_norm": 1.283920407295227, "learning_rate": 1.8626272156830045e-06, "loss": 0.5264, "step": 11223 }, { "epoch": 0.72, "grad_norm": 1.358521819114685, "learning_rate": 1.8618133624826073e-06, "loss": 0.5473, "step": 11224 }, { "epoch": 0.72, "grad_norm": 1.201602816581726, "learning_rate": 1.860999646444252e-06, "loss": 0.468, "step": 11225 }, { "epoch": 0.72, "grad_norm": 1.0677032470703125, "learning_rate": 1.8601860676035033e-06, "loss": 0.4421, "step": 11226 }, { "epoch": 0.72, "grad_norm": 1.2512092590332031, "learning_rate": 1.8593726259959221e-06, "loss": 0.5214, "step": 11227 }, { "epoch": 0.72, "grad_norm": 1.3358819484710693, "learning_rate": 1.8585593216570591e-06, "loss": 0.5221, "step": 11228 }, { "epoch": 0.72, "grad_norm": 1.4034751653671265, "learning_rate": 1.8577461546224629e-06, "loss": 0.5553, "step": 11229 }, { "epoch": 0.72, "grad_norm": 1.271106481552124, "learning_rate": 1.8569331249276756e-06, "loss": 0.4831, "step": 11230 }, { "epoch": 0.72, "grad_norm": 1.1023207902908325, "learning_rate": 1.8561202326082317e-06, "loss": 0.4571, "step": 11231 }, { "epoch": 0.73, "grad_norm": 1.1188340187072754, "learning_rate": 1.8553074776996617e-06, "loss": 0.4993, "step": 11232 }, { "epoch": 0.73, "grad_norm": 1.261826753616333, "learning_rate": 1.8544948602374896e-06, "loss": 0.5565, "step": 11233 }, { "epoch": 0.73, "grad_norm": 1.175811529159546, "learning_rate": 1.8536823802572301e-06, "loss": 0.5046, "step": 11234 }, { "epoch": 0.73, "grad_norm": 1.1603277921676636, "learning_rate": 1.852870037794396e-06, "loss": 0.5071, "step": 11235 }, { "epoch": 0.73, "grad_norm": 1.161086916923523, "learning_rate": 1.852057832884493e-06, "loss": 0.533, "step": 11236 }, { "epoch": 0.73, "grad_norm": 1.2200944423675537, "learning_rate": 1.85124576556302e-06, "loss": 0.498, "step": 11237 }, { "epoch": 0.73, "grad_norm": 1.1825754642486572, "learning_rate": 1.850433835865471e-06, "loss": 0.5281, "step": 11238 }, { "epoch": 0.73, "grad_norm": 1.2583301067352295, "learning_rate": 1.8496220438273348e-06, "loss": 0.5009, "step": 11239 }, { "epoch": 0.73, "grad_norm": 1.187910556793213, "learning_rate": 1.8488103894840898e-06, "loss": 0.4864, "step": 11240 }, { "epoch": 0.73, "grad_norm": 1.2071564197540283, "learning_rate": 1.8479988728712118e-06, "loss": 0.4951, "step": 11241 }, { "epoch": 0.73, "grad_norm": 1.2374452352523804, "learning_rate": 1.8471874940241707e-06, "loss": 0.537, "step": 11242 }, { "epoch": 0.73, "grad_norm": 1.2679431438446045, "learning_rate": 1.84637625297843e-06, "loss": 0.4998, "step": 11243 }, { "epoch": 0.73, "grad_norm": 1.1245368719100952, "learning_rate": 1.8455651497694471e-06, "loss": 0.4942, "step": 11244 }, { "epoch": 0.73, "grad_norm": 1.1845890283584595, "learning_rate": 1.8447541844326743e-06, "loss": 0.4856, "step": 11245 }, { "epoch": 0.73, "grad_norm": 1.19780695438385, "learning_rate": 1.8439433570035542e-06, "loss": 0.5476, "step": 11246 }, { "epoch": 0.73, "grad_norm": 1.2677134275436401, "learning_rate": 1.843132667517527e-06, "loss": 0.5396, "step": 11247 }, { "epoch": 0.73, "grad_norm": 1.2280197143554688, "learning_rate": 1.8423221160100258e-06, "loss": 0.5459, "step": 11248 }, { "epoch": 0.73, "grad_norm": 1.2864534854888916, "learning_rate": 1.8415117025164785e-06, "loss": 0.5082, "step": 11249 }, { "epoch": 0.73, "grad_norm": 1.085888385772705, "learning_rate": 1.840701427072305e-06, "loss": 0.5116, "step": 11250 }, { "epoch": 0.73, "grad_norm": 1.1504377126693726, "learning_rate": 1.8398912897129235e-06, "loss": 0.4502, "step": 11251 }, { "epoch": 0.73, "grad_norm": 1.1273988485336304, "learning_rate": 1.8390812904737381e-06, "loss": 0.4988, "step": 11252 }, { "epoch": 0.73, "grad_norm": 1.0441358089447021, "learning_rate": 1.8382714293901549e-06, "loss": 0.4652, "step": 11253 }, { "epoch": 0.73, "grad_norm": 1.1486469507217407, "learning_rate": 1.8374617064975698e-06, "loss": 0.5019, "step": 11254 }, { "epoch": 0.73, "grad_norm": 1.1341774463653564, "learning_rate": 1.8366521218313743e-06, "loss": 0.5016, "step": 11255 }, { "epoch": 0.73, "grad_norm": 1.1760691404342651, "learning_rate": 1.8358426754269543e-06, "loss": 0.4883, "step": 11256 }, { "epoch": 0.73, "grad_norm": 1.2610909938812256, "learning_rate": 1.8350333673196857e-06, "loss": 0.5026, "step": 11257 }, { "epoch": 0.73, "grad_norm": 1.2167774438858032, "learning_rate": 1.8342241975449433e-06, "loss": 0.4891, "step": 11258 }, { "epoch": 0.73, "grad_norm": 1.21676504611969, "learning_rate": 1.8334151661380927e-06, "loss": 0.5306, "step": 11259 }, { "epoch": 0.73, "grad_norm": 1.209770679473877, "learning_rate": 1.8326062731344957e-06, "loss": 0.476, "step": 11260 }, { "epoch": 0.73, "grad_norm": 1.1283483505249023, "learning_rate": 1.831797518569507e-06, "loss": 0.5073, "step": 11261 }, { "epoch": 0.73, "grad_norm": 1.1879031658172607, "learning_rate": 1.8309889024784755e-06, "loss": 0.5297, "step": 11262 }, { "epoch": 0.73, "grad_norm": 1.109090805053711, "learning_rate": 1.830180424896742e-06, "loss": 0.4679, "step": 11263 }, { "epoch": 0.73, "grad_norm": 1.1974513530731201, "learning_rate": 1.829372085859643e-06, "loss": 0.531, "step": 11264 }, { "epoch": 0.73, "grad_norm": 1.2893469333648682, "learning_rate": 1.8285638854025106e-06, "loss": 0.5546, "step": 11265 }, { "epoch": 0.73, "grad_norm": 1.2437865734100342, "learning_rate": 1.8277558235606679e-06, "loss": 0.5444, "step": 11266 }, { "epoch": 0.73, "grad_norm": 1.1747186183929443, "learning_rate": 1.8269479003694357e-06, "loss": 0.5135, "step": 11267 }, { "epoch": 0.73, "grad_norm": 1.2440437078475952, "learning_rate": 1.8261401158641224e-06, "loss": 0.5366, "step": 11268 }, { "epoch": 0.73, "grad_norm": 1.2861274480819702, "learning_rate": 1.8253324700800357e-06, "loss": 0.564, "step": 11269 }, { "epoch": 0.73, "grad_norm": 1.2442041635513306, "learning_rate": 1.8245249630524763e-06, "loss": 0.494, "step": 11270 }, { "epoch": 0.73, "grad_norm": 1.2069143056869507, "learning_rate": 1.8237175948167395e-06, "loss": 0.5079, "step": 11271 }, { "epoch": 0.73, "grad_norm": 1.246904730796814, "learning_rate": 1.8229103654081097e-06, "loss": 0.5124, "step": 11272 }, { "epoch": 0.73, "grad_norm": 1.18019437789917, "learning_rate": 1.8221032748618716e-06, "loss": 0.4801, "step": 11273 }, { "epoch": 0.73, "grad_norm": 1.2210835218429565, "learning_rate": 1.8212963232132996e-06, "loss": 0.5148, "step": 11274 }, { "epoch": 0.73, "grad_norm": 1.29314124584198, "learning_rate": 1.8204895104976645e-06, "loss": 0.4961, "step": 11275 }, { "epoch": 0.73, "grad_norm": 1.1879254579544067, "learning_rate": 1.8196828367502312e-06, "loss": 0.5065, "step": 11276 }, { "epoch": 0.73, "grad_norm": 1.1800100803375244, "learning_rate": 1.818876302006254e-06, "loss": 0.4994, "step": 11277 }, { "epoch": 0.73, "grad_norm": 1.2204591035842896, "learning_rate": 1.8180699063009865e-06, "loss": 0.4671, "step": 11278 }, { "epoch": 0.73, "grad_norm": 1.307758092880249, "learning_rate": 1.8172636496696739e-06, "loss": 0.5473, "step": 11279 }, { "epoch": 0.73, "grad_norm": 1.0833392143249512, "learning_rate": 1.816457532147557e-06, "loss": 0.4763, "step": 11280 }, { "epoch": 0.73, "grad_norm": 1.2274136543273926, "learning_rate": 1.815651553769866e-06, "loss": 0.5174, "step": 11281 }, { "epoch": 0.73, "grad_norm": 1.124843955039978, "learning_rate": 1.81484571457183e-06, "loss": 0.5024, "step": 11282 }, { "epoch": 0.73, "grad_norm": 1.1796058416366577, "learning_rate": 1.81404001458867e-06, "loss": 0.5129, "step": 11283 }, { "epoch": 0.73, "grad_norm": 1.304100751876831, "learning_rate": 1.8132344538556013e-06, "loss": 0.5297, "step": 11284 }, { "epoch": 0.73, "grad_norm": 1.2689136266708374, "learning_rate": 1.812429032407832e-06, "loss": 0.5503, "step": 11285 }, { "epoch": 0.73, "grad_norm": 1.3669734001159668, "learning_rate": 1.8116237502805677e-06, "loss": 0.5259, "step": 11286 }, { "epoch": 0.73, "grad_norm": 1.2192200422286987, "learning_rate": 1.8108186075090017e-06, "loss": 0.4864, "step": 11287 }, { "epoch": 0.73, "grad_norm": 1.1539764404296875, "learning_rate": 1.8100136041283262e-06, "loss": 0.5454, "step": 11288 }, { "epoch": 0.73, "grad_norm": 1.3155062198638916, "learning_rate": 1.8092087401737257e-06, "loss": 0.5097, "step": 11289 }, { "epoch": 0.73, "grad_norm": 1.184954285621643, "learning_rate": 1.8084040156803783e-06, "loss": 0.4987, "step": 11290 }, { "epoch": 0.73, "grad_norm": 1.2393908500671387, "learning_rate": 1.8075994306834577e-06, "loss": 0.5513, "step": 11291 }, { "epoch": 0.73, "grad_norm": 1.156128168106079, "learning_rate": 1.806794985218131e-06, "loss": 0.4735, "step": 11292 }, { "epoch": 0.73, "grad_norm": 1.5159586668014526, "learning_rate": 1.8059906793195547e-06, "loss": 0.5312, "step": 11293 }, { "epoch": 0.73, "grad_norm": 1.1296513080596924, "learning_rate": 1.8051865130228858e-06, "loss": 0.4894, "step": 11294 }, { "epoch": 0.73, "grad_norm": 1.1398706436157227, "learning_rate": 1.8043824863632715e-06, "loss": 0.488, "step": 11295 }, { "epoch": 0.73, "grad_norm": 1.3081228733062744, "learning_rate": 1.8035785993758537e-06, "loss": 0.5591, "step": 11296 }, { "epoch": 0.73, "grad_norm": 1.357240915298462, "learning_rate": 1.8027748520957706e-06, "loss": 0.5734, "step": 11297 }, { "epoch": 0.73, "grad_norm": 1.1677700281143188, "learning_rate": 1.8019712445581472e-06, "loss": 0.5225, "step": 11298 }, { "epoch": 0.73, "grad_norm": 1.1451867818832397, "learning_rate": 1.8011677767981107e-06, "loss": 0.5151, "step": 11299 }, { "epoch": 0.73, "grad_norm": 1.3066582679748535, "learning_rate": 1.8003644488507772e-06, "loss": 0.5158, "step": 11300 }, { "epoch": 0.73, "grad_norm": 1.1608672142028809, "learning_rate": 1.799561260751258e-06, "loss": 0.519, "step": 11301 }, { "epoch": 0.73, "grad_norm": 1.311458706855774, "learning_rate": 1.7987582125346591e-06, "loss": 0.5353, "step": 11302 }, { "epoch": 0.73, "grad_norm": 1.3048348426818848, "learning_rate": 1.7979553042360815e-06, "loss": 0.5512, "step": 11303 }, { "epoch": 0.73, "grad_norm": 1.420695424079895, "learning_rate": 1.7971525358906139e-06, "loss": 0.5559, "step": 11304 }, { "epoch": 0.73, "grad_norm": 1.3446974754333496, "learning_rate": 1.7963499075333457e-06, "loss": 0.4832, "step": 11305 }, { "epoch": 0.73, "grad_norm": 1.223564863204956, "learning_rate": 1.7955474191993576e-06, "loss": 0.5377, "step": 11306 }, { "epoch": 0.73, "grad_norm": 1.2168971300125122, "learning_rate": 1.7947450709237245e-06, "loss": 0.4739, "step": 11307 }, { "epoch": 0.73, "grad_norm": 1.1686416864395142, "learning_rate": 1.7939428627415146e-06, "loss": 0.5457, "step": 11308 }, { "epoch": 0.73, "grad_norm": 1.2564119100570679, "learning_rate": 1.7931407946877916e-06, "loss": 0.5322, "step": 11309 }, { "epoch": 0.73, "grad_norm": 1.2380417585372925, "learning_rate": 1.7923388667976094e-06, "loss": 0.5102, "step": 11310 }, { "epoch": 0.73, "grad_norm": 1.2141138315200806, "learning_rate": 1.7915370791060194e-06, "loss": 0.5209, "step": 11311 }, { "epoch": 0.73, "grad_norm": 1.4893617630004883, "learning_rate": 1.7907354316480657e-06, "loss": 0.5329, "step": 11312 }, { "epoch": 0.73, "grad_norm": 1.2414977550506592, "learning_rate": 1.789933924458786e-06, "loss": 0.5141, "step": 11313 }, { "epoch": 0.73, "grad_norm": 1.1473976373672485, "learning_rate": 1.7891325575732126e-06, "loss": 0.5265, "step": 11314 }, { "epoch": 0.73, "grad_norm": 1.230094313621521, "learning_rate": 1.7883313310263727e-06, "loss": 0.4866, "step": 11315 }, { "epoch": 0.73, "grad_norm": 1.1755040884017944, "learning_rate": 1.7875302448532828e-06, "loss": 0.5221, "step": 11316 }, { "epoch": 0.73, "grad_norm": 1.145602822303772, "learning_rate": 1.7867292990889567e-06, "loss": 0.471, "step": 11317 }, { "epoch": 0.73, "grad_norm": 1.2156143188476562, "learning_rate": 1.7859284937684036e-06, "loss": 0.4599, "step": 11318 }, { "epoch": 0.73, "grad_norm": 1.2013427019119263, "learning_rate": 1.7851278289266227e-06, "loss": 0.4879, "step": 11319 }, { "epoch": 0.73, "grad_norm": 1.3220206499099731, "learning_rate": 1.7843273045986104e-06, "loss": 0.5372, "step": 11320 }, { "epoch": 0.73, "grad_norm": 1.156706690788269, "learning_rate": 1.7835269208193568e-06, "loss": 0.5289, "step": 11321 }, { "epoch": 0.73, "grad_norm": 1.261476993560791, "learning_rate": 1.782726677623841e-06, "loss": 0.5599, "step": 11322 }, { "epoch": 0.73, "grad_norm": 1.1886913776397705, "learning_rate": 1.7819265750470417e-06, "loss": 0.5671, "step": 11323 }, { "epoch": 0.73, "grad_norm": 1.1921685934066772, "learning_rate": 1.7811266131239286e-06, "loss": 0.5262, "step": 11324 }, { "epoch": 0.73, "grad_norm": 1.167641043663025, "learning_rate": 1.7803267918894668e-06, "loss": 0.554, "step": 11325 }, { "epoch": 0.73, "grad_norm": 1.165313720703125, "learning_rate": 1.7795271113786138e-06, "loss": 0.4929, "step": 11326 }, { "epoch": 0.73, "grad_norm": 1.2236350774765015, "learning_rate": 1.778727571626324e-06, "loss": 0.5494, "step": 11327 }, { "epoch": 0.73, "grad_norm": 1.238731026649475, "learning_rate": 1.7779281726675391e-06, "loss": 0.4726, "step": 11328 }, { "epoch": 0.73, "grad_norm": 1.2629318237304688, "learning_rate": 1.7771289145372005e-06, "loss": 0.5373, "step": 11329 }, { "epoch": 0.73, "grad_norm": 1.2198054790496826, "learning_rate": 1.776329797270242e-06, "loss": 0.525, "step": 11330 }, { "epoch": 0.73, "grad_norm": 1.1672234535217285, "learning_rate": 1.7755308209015914e-06, "loss": 0.4976, "step": 11331 }, { "epoch": 0.73, "grad_norm": 1.164543628692627, "learning_rate": 1.7747319854661688e-06, "loss": 0.4837, "step": 11332 }, { "epoch": 0.73, "grad_norm": 1.3638129234313965, "learning_rate": 1.7739332909988915e-06, "loss": 0.5154, "step": 11333 }, { "epoch": 0.73, "grad_norm": 1.137704849243164, "learning_rate": 1.7731347375346653e-06, "loss": 0.4906, "step": 11334 }, { "epoch": 0.73, "grad_norm": 1.1621369123458862, "learning_rate": 1.772336325108394e-06, "loss": 0.4877, "step": 11335 }, { "epoch": 0.73, "grad_norm": 1.2545338869094849, "learning_rate": 1.7715380537549748e-06, "loss": 0.5248, "step": 11336 }, { "epoch": 0.73, "grad_norm": 1.2256008386611938, "learning_rate": 1.7707399235092975e-06, "loss": 0.476, "step": 11337 }, { "epoch": 0.73, "grad_norm": 1.2486363649368286, "learning_rate": 1.7699419344062468e-06, "loss": 0.4958, "step": 11338 }, { "epoch": 0.73, "grad_norm": 1.1757192611694336, "learning_rate": 1.769144086480702e-06, "loss": 0.5215, "step": 11339 }, { "epoch": 0.73, "grad_norm": 1.2416365146636963, "learning_rate": 1.7683463797675316e-06, "loss": 0.5216, "step": 11340 }, { "epoch": 0.73, "grad_norm": 1.2440108060836792, "learning_rate": 1.7675488143016035e-06, "loss": 0.5306, "step": 11341 }, { "epoch": 0.73, "grad_norm": 1.6960320472717285, "learning_rate": 1.7667513901177764e-06, "loss": 0.5247, "step": 11342 }, { "epoch": 0.73, "grad_norm": 1.2689858675003052, "learning_rate": 1.7659541072509045e-06, "loss": 0.5311, "step": 11343 }, { "epoch": 0.73, "grad_norm": 1.2357232570648193, "learning_rate": 1.7651569657358365e-06, "loss": 0.5042, "step": 11344 }, { "epoch": 0.73, "grad_norm": 1.285810947418213, "learning_rate": 1.7643599656074096e-06, "loss": 0.5776, "step": 11345 }, { "epoch": 0.73, "grad_norm": 1.1110438108444214, "learning_rate": 1.7635631069004605e-06, "loss": 0.4716, "step": 11346 }, { "epoch": 0.73, "grad_norm": 1.1592333316802979, "learning_rate": 1.7627663896498177e-06, "loss": 0.4972, "step": 11347 }, { "epoch": 0.73, "grad_norm": 1.230967402458191, "learning_rate": 1.7619698138903042e-06, "loss": 0.4836, "step": 11348 }, { "epoch": 0.73, "grad_norm": 1.1873271465301514, "learning_rate": 1.7611733796567354e-06, "loss": 0.51, "step": 11349 }, { "epoch": 0.73, "grad_norm": 1.2736437320709229, "learning_rate": 1.7603770869839243e-06, "loss": 0.5245, "step": 11350 }, { "epoch": 0.73, "grad_norm": 1.2260808944702148, "learning_rate": 1.75958093590667e-06, "loss": 0.5609, "step": 11351 }, { "epoch": 0.73, "grad_norm": 1.2454365491867065, "learning_rate": 1.7587849264597728e-06, "loss": 0.5238, "step": 11352 }, { "epoch": 0.73, "grad_norm": 1.2865471839904785, "learning_rate": 1.757989058678024e-06, "loss": 0.5094, "step": 11353 }, { "epoch": 0.73, "grad_norm": 1.2760127782821655, "learning_rate": 1.7571933325962094e-06, "loss": 0.5303, "step": 11354 }, { "epoch": 0.73, "grad_norm": 1.1954082250595093, "learning_rate": 1.7563977482491074e-06, "loss": 0.4952, "step": 11355 }, { "epoch": 0.73, "grad_norm": 1.1902787685394287, "learning_rate": 1.7556023056714927e-06, "loss": 0.5193, "step": 11356 }, { "epoch": 0.73, "grad_norm": 1.3164693117141724, "learning_rate": 1.754807004898129e-06, "loss": 0.5351, "step": 11357 }, { "epoch": 0.73, "grad_norm": 1.3190968036651611, "learning_rate": 1.7540118459637784e-06, "loss": 0.4978, "step": 11358 }, { "epoch": 0.73, "grad_norm": 1.1373987197875977, "learning_rate": 1.7532168289031953e-06, "loss": 0.4993, "step": 11359 }, { "epoch": 0.73, "grad_norm": 1.2551652193069458, "learning_rate": 1.7524219537511294e-06, "loss": 0.5342, "step": 11360 }, { "epoch": 0.73, "grad_norm": 1.3009976148605347, "learning_rate": 1.7516272205423196e-06, "loss": 0.5459, "step": 11361 }, { "epoch": 0.73, "grad_norm": 1.1241246461868286, "learning_rate": 1.750832629311503e-06, "loss": 0.5511, "step": 11362 }, { "epoch": 0.73, "grad_norm": 1.4328669309616089, "learning_rate": 1.750038180093409e-06, "loss": 0.5194, "step": 11363 }, { "epoch": 0.73, "grad_norm": 1.5230988264083862, "learning_rate": 1.7492438729227611e-06, "loss": 0.5612, "step": 11364 }, { "epoch": 0.73, "grad_norm": 1.2144585847854614, "learning_rate": 1.7484497078342788e-06, "loss": 0.5139, "step": 11365 }, { "epoch": 0.73, "grad_norm": 1.2914644479751587, "learning_rate": 1.747655684862668e-06, "loss": 0.5175, "step": 11366 }, { "epoch": 0.73, "grad_norm": 1.2639317512512207, "learning_rate": 1.7468618040426366e-06, "loss": 0.5517, "step": 11367 }, { "epoch": 0.73, "grad_norm": 1.239988088607788, "learning_rate": 1.7460680654088825e-06, "loss": 0.4791, "step": 11368 }, { "epoch": 0.73, "grad_norm": 1.2556209564208984, "learning_rate": 1.7452744689961e-06, "loss": 0.5065, "step": 11369 }, { "epoch": 0.73, "grad_norm": 1.1081587076187134, "learning_rate": 1.7444810148389706e-06, "loss": 0.4898, "step": 11370 }, { "epoch": 0.73, "grad_norm": 1.406590223312378, "learning_rate": 1.7436877029721776e-06, "loss": 0.5404, "step": 11371 }, { "epoch": 0.73, "grad_norm": 1.2915624380111694, "learning_rate": 1.7428945334303932e-06, "loss": 0.5475, "step": 11372 }, { "epoch": 0.73, "grad_norm": 1.2631142139434814, "learning_rate": 1.7421015062482854e-06, "loss": 0.5106, "step": 11373 }, { "epoch": 0.73, "grad_norm": 1.2120277881622314, "learning_rate": 1.7413086214605168e-06, "loss": 0.5404, "step": 11374 }, { "epoch": 0.73, "grad_norm": 1.1930277347564697, "learning_rate": 1.7405158791017397e-06, "loss": 0.5284, "step": 11375 }, { "epoch": 0.73, "grad_norm": 1.3252907991409302, "learning_rate": 1.7397232792066033e-06, "loss": 0.5177, "step": 11376 }, { "epoch": 0.73, "grad_norm": 1.266969919204712, "learning_rate": 1.738930821809751e-06, "loss": 0.5005, "step": 11377 }, { "epoch": 0.73, "grad_norm": 1.2034345865249634, "learning_rate": 1.738138506945819e-06, "loss": 0.539, "step": 11378 }, { "epoch": 0.73, "grad_norm": 1.2629868984222412, "learning_rate": 1.737346334649437e-06, "loss": 0.5247, "step": 11379 }, { "epoch": 0.73, "grad_norm": 1.189868450164795, "learning_rate": 1.7365543049552303e-06, "loss": 0.5664, "step": 11380 }, { "epoch": 0.73, "grad_norm": 1.1859116554260254, "learning_rate": 1.735762417897814e-06, "loss": 0.4874, "step": 11381 }, { "epoch": 0.73, "grad_norm": 1.1634924411773682, "learning_rate": 1.7349706735118e-06, "loss": 0.4744, "step": 11382 }, { "epoch": 0.73, "grad_norm": 1.2357196807861328, "learning_rate": 1.734179071831794e-06, "loss": 0.5286, "step": 11383 }, { "epoch": 0.73, "grad_norm": 1.2004518508911133, "learning_rate": 1.7333876128923955e-06, "loss": 0.5268, "step": 11384 }, { "epoch": 0.73, "grad_norm": 1.1650493144989014, "learning_rate": 1.7325962967281979e-06, "loss": 0.5053, "step": 11385 }, { "epoch": 0.73, "grad_norm": 1.1399396657943726, "learning_rate": 1.7318051233737843e-06, "loss": 0.4763, "step": 11386 }, { "epoch": 0.74, "grad_norm": 1.3336424827575684, "learning_rate": 1.7310140928637366e-06, "loss": 0.5415, "step": 11387 }, { "epoch": 0.74, "grad_norm": 1.1970157623291016, "learning_rate": 1.7302232052326289e-06, "loss": 0.4858, "step": 11388 }, { "epoch": 0.74, "grad_norm": 3.934485673904419, "learning_rate": 1.7294324605150286e-06, "loss": 0.4718, "step": 11389 }, { "epoch": 0.74, "grad_norm": 1.2401342391967773, "learning_rate": 1.728641858745498e-06, "loss": 0.5333, "step": 11390 }, { "epoch": 0.74, "grad_norm": 1.23381507396698, "learning_rate": 1.7278513999585923e-06, "loss": 0.5378, "step": 11391 }, { "epoch": 0.74, "grad_norm": 1.24614417552948, "learning_rate": 1.7270610841888585e-06, "loss": 0.5115, "step": 11392 }, { "epoch": 0.74, "grad_norm": 1.2225838899612427, "learning_rate": 1.7262709114708403e-06, "loss": 0.4928, "step": 11393 }, { "epoch": 0.74, "grad_norm": 1.8588701486587524, "learning_rate": 1.7254808818390738e-06, "loss": 0.5362, "step": 11394 }, { "epoch": 0.74, "grad_norm": 1.2777378559112549, "learning_rate": 1.7246909953280904e-06, "loss": 0.5529, "step": 11395 }, { "epoch": 0.74, "grad_norm": 1.2063593864440918, "learning_rate": 1.7239012519724124e-06, "loss": 0.4936, "step": 11396 }, { "epoch": 0.74, "grad_norm": 1.3271852731704712, "learning_rate": 1.7231116518065605e-06, "loss": 0.5587, "step": 11397 }, { "epoch": 0.74, "grad_norm": 1.1840929985046387, "learning_rate": 1.722322194865042e-06, "loss": 0.5355, "step": 11398 }, { "epoch": 0.74, "grad_norm": 1.2543472051620483, "learning_rate": 1.7215328811823633e-06, "loss": 0.4505, "step": 11399 }, { "epoch": 0.74, "grad_norm": 1.1422125101089478, "learning_rate": 1.7207437107930247e-06, "loss": 0.5343, "step": 11400 }, { "epoch": 0.74, "grad_norm": 1.296647548675537, "learning_rate": 1.7199546837315172e-06, "loss": 0.5315, "step": 11401 }, { "epoch": 0.74, "grad_norm": 1.2529724836349487, "learning_rate": 1.7191658000323285e-06, "loss": 0.5345, "step": 11402 }, { "epoch": 0.74, "grad_norm": 1.20262610912323, "learning_rate": 1.7183770597299394e-06, "loss": 0.5469, "step": 11403 }, { "epoch": 0.74, "grad_norm": 1.257387638092041, "learning_rate": 1.7175884628588202e-06, "loss": 0.5518, "step": 11404 }, { "epoch": 0.74, "grad_norm": 1.1617969274520874, "learning_rate": 1.716800009453441e-06, "loss": 0.5016, "step": 11405 }, { "epoch": 0.74, "grad_norm": 1.136739730834961, "learning_rate": 1.7160116995482623e-06, "loss": 0.5433, "step": 11406 }, { "epoch": 0.74, "grad_norm": 1.111488938331604, "learning_rate": 1.71522353317774e-06, "loss": 0.4888, "step": 11407 }, { "epoch": 0.74, "grad_norm": 1.2307459115982056, "learning_rate": 1.7144355103763216e-06, "loss": 0.5842, "step": 11408 }, { "epoch": 0.74, "grad_norm": 1.435840368270874, "learning_rate": 1.7136476311784521e-06, "loss": 0.539, "step": 11409 }, { "epoch": 0.74, "grad_norm": 1.3050029277801514, "learning_rate": 1.7128598956185643e-06, "loss": 0.5209, "step": 11410 }, { "epoch": 0.74, "grad_norm": 1.1531848907470703, "learning_rate": 1.7120723037310893e-06, "loss": 0.465, "step": 11411 }, { "epoch": 0.74, "grad_norm": 1.164764404296875, "learning_rate": 1.7112848555504508e-06, "loss": 0.5426, "step": 11412 }, { "epoch": 0.74, "grad_norm": 1.1989531517028809, "learning_rate": 1.7104975511110666e-06, "loss": 0.4996, "step": 11413 }, { "epoch": 0.74, "grad_norm": 1.1717389822006226, "learning_rate": 1.7097103904473472e-06, "loss": 0.5459, "step": 11414 }, { "epoch": 0.74, "grad_norm": 1.3399447202682495, "learning_rate": 1.7089233735936988e-06, "loss": 0.5191, "step": 11415 }, { "epoch": 0.74, "grad_norm": 1.1610939502716064, "learning_rate": 1.7081365005845174e-06, "loss": 0.5016, "step": 11416 }, { "epoch": 0.74, "grad_norm": 1.2005819082260132, "learning_rate": 1.707349771454197e-06, "loss": 0.5346, "step": 11417 }, { "epoch": 0.74, "grad_norm": 1.1949063539505005, "learning_rate": 1.7065631862371224e-06, "loss": 0.4934, "step": 11418 }, { "epoch": 0.74, "grad_norm": 1.1981390714645386, "learning_rate": 1.7057767449676737e-06, "loss": 0.4952, "step": 11419 }, { "epoch": 0.74, "grad_norm": 1.1189992427825928, "learning_rate": 1.7049904476802242e-06, "loss": 0.4733, "step": 11420 }, { "epoch": 0.74, "grad_norm": 1.2994452714920044, "learning_rate": 1.7042042944091426e-06, "loss": 0.5155, "step": 11421 }, { "epoch": 0.74, "grad_norm": 1.3266462087631226, "learning_rate": 1.7034182851887865e-06, "loss": 0.5308, "step": 11422 }, { "epoch": 0.74, "grad_norm": 1.2591581344604492, "learning_rate": 1.7026324200535122e-06, "loss": 0.5417, "step": 11423 }, { "epoch": 0.74, "grad_norm": 1.3223803043365479, "learning_rate": 1.701846699037667e-06, "loss": 0.5277, "step": 11424 }, { "epoch": 0.74, "grad_norm": 1.2144314050674438, "learning_rate": 1.7010611221755934e-06, "loss": 0.5144, "step": 11425 }, { "epoch": 0.74, "grad_norm": 1.0792441368103027, "learning_rate": 1.7002756895016286e-06, "loss": 0.5092, "step": 11426 }, { "epoch": 0.74, "grad_norm": 1.1335971355438232, "learning_rate": 1.6994904010500984e-06, "loss": 0.5335, "step": 11427 }, { "epoch": 0.74, "grad_norm": 1.177647352218628, "learning_rate": 1.698705256855327e-06, "loss": 0.5068, "step": 11428 }, { "epoch": 0.74, "grad_norm": 1.2037129402160645, "learning_rate": 1.6979202569516319e-06, "loss": 0.5059, "step": 11429 }, { "epoch": 0.74, "grad_norm": 1.2343661785125732, "learning_rate": 1.6971354013733222e-06, "loss": 0.4869, "step": 11430 }, { "epoch": 0.74, "grad_norm": 1.3272428512573242, "learning_rate": 1.6963506901547028e-06, "loss": 0.5019, "step": 11431 }, { "epoch": 0.74, "grad_norm": 1.2052315473556519, "learning_rate": 1.695566123330073e-06, "loss": 0.5681, "step": 11432 }, { "epoch": 0.74, "grad_norm": 1.1885987520217896, "learning_rate": 1.6947817009337208e-06, "loss": 0.5568, "step": 11433 }, { "epoch": 0.74, "grad_norm": 1.2115823030471802, "learning_rate": 1.693997422999933e-06, "loss": 0.5602, "step": 11434 }, { "epoch": 0.74, "grad_norm": 1.1192662715911865, "learning_rate": 1.6932132895629876e-06, "loss": 0.4756, "step": 11435 }, { "epoch": 0.74, "grad_norm": 1.2533766031265259, "learning_rate": 1.6924293006571585e-06, "loss": 0.5131, "step": 11436 }, { "epoch": 0.74, "grad_norm": 1.1545507907867432, "learning_rate": 1.6916454563167107e-06, "loss": 0.493, "step": 11437 }, { "epoch": 0.74, "grad_norm": 1.3817592859268188, "learning_rate": 1.6908617565759061e-06, "loss": 0.5442, "step": 11438 }, { "epoch": 0.74, "grad_norm": 1.1782093048095703, "learning_rate": 1.6900782014689942e-06, "loss": 0.4779, "step": 11439 }, { "epoch": 0.74, "grad_norm": 1.2270939350128174, "learning_rate": 1.6892947910302248e-06, "loss": 0.5168, "step": 11440 }, { "epoch": 0.74, "grad_norm": 1.1376577615737915, "learning_rate": 1.6885115252938383e-06, "loss": 0.4723, "step": 11441 }, { "epoch": 0.74, "grad_norm": 1.1542218923568726, "learning_rate": 1.6877284042940696e-06, "loss": 0.4966, "step": 11442 }, { "epoch": 0.74, "grad_norm": 1.2874141931533813, "learning_rate": 1.6869454280651465e-06, "loss": 0.4749, "step": 11443 }, { "epoch": 0.74, "grad_norm": 1.1982280015945435, "learning_rate": 1.6861625966412926e-06, "loss": 0.5284, "step": 11444 }, { "epoch": 0.74, "grad_norm": 1.2073400020599365, "learning_rate": 1.6853799100567198e-06, "loss": 0.5028, "step": 11445 }, { "epoch": 0.74, "grad_norm": 1.2394834756851196, "learning_rate": 1.6845973683456391e-06, "loss": 0.5161, "step": 11446 }, { "epoch": 0.74, "grad_norm": 1.2244230508804321, "learning_rate": 1.683814971542254e-06, "loss": 0.5138, "step": 11447 }, { "epoch": 0.74, "grad_norm": 1.1657320261001587, "learning_rate": 1.6830327196807606e-06, "loss": 0.4913, "step": 11448 }, { "epoch": 0.74, "grad_norm": 1.1575695276260376, "learning_rate": 1.6822506127953508e-06, "loss": 0.4998, "step": 11449 }, { "epoch": 0.74, "grad_norm": 1.154029369354248, "learning_rate": 1.6814686509202048e-06, "loss": 0.4931, "step": 11450 }, { "epoch": 0.74, "grad_norm": 1.2208043336868286, "learning_rate": 1.6806868340895027e-06, "loss": 0.4867, "step": 11451 }, { "epoch": 0.74, "grad_norm": 1.25564706325531, "learning_rate": 1.6799051623374152e-06, "loss": 0.5008, "step": 11452 }, { "epoch": 0.74, "grad_norm": 1.3194745779037476, "learning_rate": 1.6791236356981066e-06, "loss": 0.5433, "step": 11453 }, { "epoch": 0.74, "grad_norm": 1.267136573791504, "learning_rate": 1.6783422542057376e-06, "loss": 0.4948, "step": 11454 }, { "epoch": 0.74, "grad_norm": 1.3455753326416016, "learning_rate": 1.6775610178944575e-06, "loss": 0.5021, "step": 11455 }, { "epoch": 0.74, "grad_norm": 1.3064905405044556, "learning_rate": 1.6767799267984124e-06, "loss": 0.5156, "step": 11456 }, { "epoch": 0.74, "grad_norm": 1.2420259714126587, "learning_rate": 1.6759989809517436e-06, "loss": 0.5268, "step": 11457 }, { "epoch": 0.74, "grad_norm": 1.2949260473251343, "learning_rate": 1.6752181803885848e-06, "loss": 0.5216, "step": 11458 }, { "epoch": 0.74, "grad_norm": 1.2051945924758911, "learning_rate": 1.6744375251430589e-06, "loss": 0.4933, "step": 11459 }, { "epoch": 0.74, "grad_norm": 1.2531176805496216, "learning_rate": 1.6736570152492892e-06, "loss": 0.5105, "step": 11460 }, { "epoch": 0.74, "grad_norm": 1.3772960901260376, "learning_rate": 1.6728766507413896e-06, "loss": 0.5079, "step": 11461 }, { "epoch": 0.74, "grad_norm": 1.1595020294189453, "learning_rate": 1.6720964316534672e-06, "loss": 0.529, "step": 11462 }, { "epoch": 0.74, "grad_norm": 1.0963643789291382, "learning_rate": 1.6713163580196252e-06, "loss": 0.5472, "step": 11463 }, { "epoch": 0.74, "grad_norm": 1.2779319286346436, "learning_rate": 1.6705364298739557e-06, "loss": 0.5418, "step": 11464 }, { "epoch": 0.74, "grad_norm": 1.1428247690200806, "learning_rate": 1.6697566472505484e-06, "loss": 0.4745, "step": 11465 }, { "epoch": 0.74, "grad_norm": 1.2369495630264282, "learning_rate": 1.6689770101834862e-06, "loss": 0.4941, "step": 11466 }, { "epoch": 0.74, "grad_norm": 1.4070088863372803, "learning_rate": 1.6681975187068444e-06, "loss": 0.5566, "step": 11467 }, { "epoch": 0.74, "grad_norm": 1.095137119293213, "learning_rate": 1.6674181728546952e-06, "loss": 0.4592, "step": 11468 }, { "epoch": 0.74, "grad_norm": 1.3657253980636597, "learning_rate": 1.6666389726610971e-06, "loss": 0.5053, "step": 11469 }, { "epoch": 0.74, "grad_norm": 1.3318382501602173, "learning_rate": 1.6658599181601099e-06, "loss": 0.4307, "step": 11470 }, { "epoch": 0.74, "grad_norm": 1.207769513130188, "learning_rate": 1.6650810093857833e-06, "loss": 0.4517, "step": 11471 }, { "epoch": 0.74, "grad_norm": 1.231296420097351, "learning_rate": 1.6643022463721615e-06, "loss": 0.4808, "step": 11472 }, { "epoch": 0.74, "grad_norm": 1.203546404838562, "learning_rate": 1.6635236291532836e-06, "loss": 0.5657, "step": 11473 }, { "epoch": 0.74, "grad_norm": 1.250759243965149, "learning_rate": 1.6627451577631782e-06, "loss": 0.5611, "step": 11474 }, { "epoch": 0.74, "grad_norm": 1.241901159286499, "learning_rate": 1.6619668322358723e-06, "loss": 0.5193, "step": 11475 }, { "epoch": 0.74, "grad_norm": 1.234857439994812, "learning_rate": 1.6611886526053833e-06, "loss": 0.525, "step": 11476 }, { "epoch": 0.74, "grad_norm": 1.3087133169174194, "learning_rate": 1.660410618905724e-06, "loss": 0.5332, "step": 11477 }, { "epoch": 0.74, "grad_norm": 1.3010807037353516, "learning_rate": 1.6596327311709003e-06, "loss": 0.4871, "step": 11478 }, { "epoch": 0.74, "grad_norm": 1.238330602645874, "learning_rate": 1.6588549894349137e-06, "loss": 0.5444, "step": 11479 }, { "epoch": 0.74, "grad_norm": 1.29227614402771, "learning_rate": 1.6580773937317536e-06, "loss": 0.5441, "step": 11480 }, { "epoch": 0.74, "grad_norm": 1.2296841144561768, "learning_rate": 1.6572999440954079e-06, "loss": 0.4936, "step": 11481 }, { "epoch": 0.74, "grad_norm": 1.303869366645813, "learning_rate": 1.6565226405598578e-06, "loss": 0.5183, "step": 11482 }, { "epoch": 0.74, "grad_norm": 1.1003013849258423, "learning_rate": 1.6557454831590764e-06, "loss": 0.4981, "step": 11483 }, { "epoch": 0.74, "grad_norm": 1.2235299348831177, "learning_rate": 1.654968471927032e-06, "loss": 0.5405, "step": 11484 }, { "epoch": 0.74, "grad_norm": 1.340958833694458, "learning_rate": 1.654191606897687e-06, "loss": 0.5522, "step": 11485 }, { "epoch": 0.74, "grad_norm": 1.1080596446990967, "learning_rate": 1.6534148881049928e-06, "loss": 0.5072, "step": 11486 }, { "epoch": 0.74, "grad_norm": 1.1615482568740845, "learning_rate": 1.6526383155828995e-06, "loss": 0.5321, "step": 11487 }, { "epoch": 0.74, "grad_norm": 1.1768910884857178, "learning_rate": 1.6518618893653494e-06, "loss": 0.4927, "step": 11488 }, { "epoch": 0.74, "grad_norm": 1.2016665935516357, "learning_rate": 1.6510856094862771e-06, "loss": 0.5712, "step": 11489 }, { "epoch": 0.74, "grad_norm": 1.226826548576355, "learning_rate": 1.650309475979613e-06, "loss": 0.5443, "step": 11490 }, { "epoch": 0.74, "grad_norm": 1.1588776111602783, "learning_rate": 1.6495334888792814e-06, "loss": 0.4888, "step": 11491 }, { "epoch": 0.74, "grad_norm": 1.1394647359848022, "learning_rate": 1.6487576482191942e-06, "loss": 0.5245, "step": 11492 }, { "epoch": 0.74, "grad_norm": 1.2752552032470703, "learning_rate": 1.6479819540332642e-06, "loss": 0.5141, "step": 11493 }, { "epoch": 0.74, "grad_norm": 1.111721396446228, "learning_rate": 1.647206406355395e-06, "loss": 0.4502, "step": 11494 }, { "epoch": 0.74, "grad_norm": 1.1991008520126343, "learning_rate": 1.6464310052194831e-06, "loss": 0.476, "step": 11495 }, { "epoch": 0.74, "grad_norm": 1.1894901990890503, "learning_rate": 1.64565575065942e-06, "loss": 0.5027, "step": 11496 }, { "epoch": 0.74, "grad_norm": 1.2545350790023804, "learning_rate": 1.6448806427090907e-06, "loss": 0.4848, "step": 11497 }, { "epoch": 0.74, "grad_norm": 1.2824723720550537, "learning_rate": 1.6441056814023714e-06, "loss": 0.5401, "step": 11498 }, { "epoch": 0.74, "grad_norm": 1.3647964000701904, "learning_rate": 1.643330866773134e-06, "loss": 0.5036, "step": 11499 }, { "epoch": 0.74, "grad_norm": 1.1348302364349365, "learning_rate": 1.6425561988552442e-06, "loss": 0.4893, "step": 11500 }, { "epoch": 0.74, "grad_norm": 1.1555103063583374, "learning_rate": 1.6417816776825601e-06, "loss": 0.5495, "step": 11501 }, { "epoch": 0.74, "grad_norm": 1.2927825450897217, "learning_rate": 1.6410073032889352e-06, "loss": 0.5443, "step": 11502 }, { "epoch": 0.74, "grad_norm": 1.1805459260940552, "learning_rate": 1.6402330757082163e-06, "loss": 0.5093, "step": 11503 }, { "epoch": 0.74, "grad_norm": 1.2457941770553589, "learning_rate": 1.6394589949742396e-06, "loss": 0.512, "step": 11504 }, { "epoch": 0.74, "grad_norm": 1.1624464988708496, "learning_rate": 1.6386850611208398e-06, "loss": 0.4598, "step": 11505 }, { "epoch": 0.74, "grad_norm": 1.1515964269638062, "learning_rate": 1.6379112741818436e-06, "loss": 0.5179, "step": 11506 }, { "epoch": 0.74, "grad_norm": 1.157157301902771, "learning_rate": 1.6371376341910717e-06, "loss": 0.4873, "step": 11507 }, { "epoch": 0.74, "grad_norm": 1.2829835414886475, "learning_rate": 1.6363641411823371e-06, "loss": 0.5326, "step": 11508 }, { "epoch": 0.74, "grad_norm": 1.1905786991119385, "learning_rate": 1.6355907951894495e-06, "loss": 0.5312, "step": 11509 }, { "epoch": 0.74, "grad_norm": 1.2274036407470703, "learning_rate": 1.6348175962462059e-06, "loss": 0.5462, "step": 11510 }, { "epoch": 0.74, "grad_norm": 1.2882113456726074, "learning_rate": 1.6340445443864035e-06, "loss": 0.4962, "step": 11511 }, { "epoch": 0.74, "grad_norm": 1.130246877670288, "learning_rate": 1.6332716396438291e-06, "loss": 0.5523, "step": 11512 }, { "epoch": 0.74, "grad_norm": 1.2076607942581177, "learning_rate": 1.6324988820522658e-06, "loss": 0.5407, "step": 11513 }, { "epoch": 0.74, "grad_norm": 1.1657577753067017, "learning_rate": 1.6317262716454896e-06, "loss": 0.521, "step": 11514 }, { "epoch": 0.74, "grad_norm": 1.1559758186340332, "learning_rate": 1.6309538084572657e-06, "loss": 0.511, "step": 11515 }, { "epoch": 0.74, "grad_norm": 1.1436439752578735, "learning_rate": 1.6301814925213588e-06, "loss": 0.4955, "step": 11516 }, { "epoch": 0.74, "grad_norm": 1.1751455068588257, "learning_rate": 1.6294093238715248e-06, "loss": 0.5339, "step": 11517 }, { "epoch": 0.74, "grad_norm": 1.197326898574829, "learning_rate": 1.6286373025415126e-06, "loss": 0.5206, "step": 11518 }, { "epoch": 0.74, "grad_norm": 1.1681644916534424, "learning_rate": 1.6278654285650657e-06, "loss": 0.5305, "step": 11519 }, { "epoch": 0.74, "grad_norm": 1.2119061946868896, "learning_rate": 1.6270937019759232e-06, "loss": 0.5689, "step": 11520 }, { "epoch": 0.74, "grad_norm": 1.1808520555496216, "learning_rate": 1.6263221228078102e-06, "loss": 0.5196, "step": 11521 }, { "epoch": 0.74, "grad_norm": 1.2892882823944092, "learning_rate": 1.625550691094453e-06, "loss": 0.5016, "step": 11522 }, { "epoch": 0.74, "grad_norm": 1.2693601846694946, "learning_rate": 1.6247794068695695e-06, "loss": 0.5148, "step": 11523 }, { "epoch": 0.74, "grad_norm": 1.2164443731307983, "learning_rate": 1.6240082701668697e-06, "loss": 0.54, "step": 11524 }, { "epoch": 0.74, "grad_norm": 1.2531070709228516, "learning_rate": 1.6232372810200582e-06, "loss": 0.5595, "step": 11525 }, { "epoch": 0.74, "grad_norm": 1.2353566884994507, "learning_rate": 1.6224664394628343e-06, "loss": 0.5239, "step": 11526 }, { "epoch": 0.74, "grad_norm": 1.235394835472107, "learning_rate": 1.6216957455288867e-06, "loss": 0.5283, "step": 11527 }, { "epoch": 0.74, "grad_norm": 1.2121460437774658, "learning_rate": 1.6209251992519021e-06, "loss": 0.4915, "step": 11528 }, { "epoch": 0.74, "grad_norm": 1.2494077682495117, "learning_rate": 1.6201548006655592e-06, "loss": 0.5118, "step": 11529 }, { "epoch": 0.74, "grad_norm": 1.221799612045288, "learning_rate": 1.6193845498035294e-06, "loss": 0.5319, "step": 11530 }, { "epoch": 0.74, "grad_norm": 1.2220603227615356, "learning_rate": 1.6186144466994791e-06, "loss": 0.5228, "step": 11531 }, { "epoch": 0.74, "grad_norm": 1.2042285203933716, "learning_rate": 1.6178444913870684e-06, "loss": 0.5066, "step": 11532 }, { "epoch": 0.74, "grad_norm": 1.1338995695114136, "learning_rate": 1.6170746838999478e-06, "loss": 0.4516, "step": 11533 }, { "epoch": 0.74, "grad_norm": 1.1752794981002808, "learning_rate": 1.6163050242717643e-06, "loss": 0.5303, "step": 11534 }, { "epoch": 0.74, "grad_norm": 1.3109251260757446, "learning_rate": 1.6155355125361582e-06, "loss": 0.5032, "step": 11535 }, { "epoch": 0.74, "grad_norm": 1.246719479560852, "learning_rate": 1.614766148726763e-06, "loss": 0.4567, "step": 11536 }, { "epoch": 0.74, "grad_norm": 1.2391653060913086, "learning_rate": 1.6139969328772053e-06, "loss": 0.5155, "step": 11537 }, { "epoch": 0.74, "grad_norm": 1.1632083654403687, "learning_rate": 1.6132278650211075e-06, "loss": 0.536, "step": 11538 }, { "epoch": 0.74, "grad_norm": 1.193347692489624, "learning_rate": 1.6124589451920796e-06, "loss": 0.5265, "step": 11539 }, { "epoch": 0.74, "grad_norm": 1.2276368141174316, "learning_rate": 1.6116901734237316e-06, "loss": 0.4699, "step": 11540 }, { "epoch": 0.74, "grad_norm": 1.286455750465393, "learning_rate": 1.6109215497496644e-06, "loss": 0.51, "step": 11541 }, { "epoch": 0.75, "grad_norm": 1.2034111022949219, "learning_rate": 1.610153074203472e-06, "loss": 0.5457, "step": 11542 }, { "epoch": 0.75, "grad_norm": 1.3099740743637085, "learning_rate": 1.6093847468187445e-06, "loss": 0.5595, "step": 11543 }, { "epoch": 0.75, "grad_norm": 1.1973412036895752, "learning_rate": 1.6086165676290605e-06, "loss": 0.5351, "step": 11544 }, { "epoch": 0.75, "grad_norm": 1.2139872312545776, "learning_rate": 1.6078485366679958e-06, "loss": 0.501, "step": 11545 }, { "epoch": 0.75, "grad_norm": 1.2494218349456787, "learning_rate": 1.6070806539691203e-06, "loss": 0.5038, "step": 11546 }, { "epoch": 0.75, "grad_norm": 1.2366352081298828, "learning_rate": 1.6063129195659965e-06, "loss": 0.5036, "step": 11547 }, { "epoch": 0.75, "grad_norm": 1.253217101097107, "learning_rate": 1.6055453334921784e-06, "loss": 0.527, "step": 11548 }, { "epoch": 0.75, "grad_norm": 1.246019721031189, "learning_rate": 1.6047778957812154e-06, "loss": 0.4899, "step": 11549 }, { "epoch": 0.75, "grad_norm": 1.241541862487793, "learning_rate": 1.6040106064666512e-06, "loss": 0.52, "step": 11550 }, { "epoch": 0.75, "grad_norm": 1.0751149654388428, "learning_rate": 1.603243465582021e-06, "loss": 0.4906, "step": 11551 }, { "epoch": 0.75, "grad_norm": 1.3358107805252075, "learning_rate": 1.6024764731608573e-06, "loss": 0.4912, "step": 11552 }, { "epoch": 0.75, "grad_norm": 1.4406183958053589, "learning_rate": 1.6017096292366792e-06, "loss": 0.5089, "step": 11553 }, { "epoch": 0.75, "grad_norm": 1.267486572265625, "learning_rate": 1.6009429338430055e-06, "loss": 0.5635, "step": 11554 }, { "epoch": 0.75, "grad_norm": 1.1913446187973022, "learning_rate": 1.6001763870133469e-06, "loss": 0.5482, "step": 11555 }, { "epoch": 0.75, "grad_norm": 1.2335284948349, "learning_rate": 1.5994099887812065e-06, "loss": 0.573, "step": 11556 }, { "epoch": 0.75, "grad_norm": 1.1248888969421387, "learning_rate": 1.5986437391800836e-06, "loss": 0.4923, "step": 11557 }, { "epoch": 0.75, "grad_norm": 1.235227108001709, "learning_rate": 1.5978776382434658e-06, "loss": 0.4885, "step": 11558 }, { "epoch": 0.75, "grad_norm": 1.3132362365722656, "learning_rate": 1.5971116860048386e-06, "loss": 0.5107, "step": 11559 }, { "epoch": 0.75, "grad_norm": 1.1694194078445435, "learning_rate": 1.5963458824976796e-06, "loss": 0.4971, "step": 11560 }, { "epoch": 0.75, "grad_norm": 1.153109073638916, "learning_rate": 1.5955802277554627e-06, "loss": 0.4873, "step": 11561 }, { "epoch": 0.75, "grad_norm": 1.2246001958847046, "learning_rate": 1.5948147218116489e-06, "loss": 0.5139, "step": 11562 }, { "epoch": 0.75, "grad_norm": 1.218641757965088, "learning_rate": 1.594049364699698e-06, "loss": 0.5199, "step": 11563 }, { "epoch": 0.75, "grad_norm": 1.8349933624267578, "learning_rate": 1.5932841564530616e-06, "loss": 0.4529, "step": 11564 }, { "epoch": 0.75, "grad_norm": 1.286433219909668, "learning_rate": 1.5925190971051857e-06, "loss": 0.5188, "step": 11565 }, { "epoch": 0.75, "grad_norm": 1.1253501176834106, "learning_rate": 1.5917541866895087e-06, "loss": 0.4842, "step": 11566 }, { "epoch": 0.75, "grad_norm": 1.1377218961715698, "learning_rate": 1.5909894252394642e-06, "loss": 0.5187, "step": 11567 }, { "epoch": 0.75, "grad_norm": 1.3038698434829712, "learning_rate": 1.590224812788475e-06, "loss": 0.5194, "step": 11568 }, { "epoch": 0.75, "grad_norm": 1.1572163105010986, "learning_rate": 1.5894603493699618e-06, "loss": 0.5518, "step": 11569 }, { "epoch": 0.75, "grad_norm": 1.2392851114273071, "learning_rate": 1.5886960350173375e-06, "loss": 0.5186, "step": 11570 }, { "epoch": 0.75, "grad_norm": 1.251461148262024, "learning_rate": 1.5879318697640084e-06, "loss": 0.4958, "step": 11571 }, { "epoch": 0.75, "grad_norm": 1.3193439245224, "learning_rate": 1.5871678536433738e-06, "loss": 0.5085, "step": 11572 }, { "epoch": 0.75, "grad_norm": 1.1087408065795898, "learning_rate": 1.5864039866888286e-06, "loss": 0.4643, "step": 11573 }, { "epoch": 0.75, "grad_norm": 1.227738618850708, "learning_rate": 1.5856402689337563e-06, "loss": 0.5413, "step": 11574 }, { "epoch": 0.75, "grad_norm": 1.1964136362075806, "learning_rate": 1.584876700411539e-06, "loss": 0.5018, "step": 11575 }, { "epoch": 0.75, "grad_norm": 1.1456822156906128, "learning_rate": 1.5841132811555498e-06, "loss": 0.4947, "step": 11576 }, { "epoch": 0.75, "grad_norm": 1.252071499824524, "learning_rate": 1.5833500111991563e-06, "loss": 0.4676, "step": 11577 }, { "epoch": 0.75, "grad_norm": 1.1831005811691284, "learning_rate": 1.5825868905757185e-06, "loss": 0.5205, "step": 11578 }, { "epoch": 0.75, "grad_norm": 1.1999648809432983, "learning_rate": 1.5818239193185918e-06, "loss": 0.53, "step": 11579 }, { "epoch": 0.75, "grad_norm": 1.1774541139602661, "learning_rate": 1.5810610974611218e-06, "loss": 0.5119, "step": 11580 }, { "epoch": 0.75, "grad_norm": 1.1988328695297241, "learning_rate": 1.5802984250366499e-06, "loss": 0.468, "step": 11581 }, { "epoch": 0.75, "grad_norm": 1.256605625152588, "learning_rate": 1.5795359020785105e-06, "loss": 0.5308, "step": 11582 }, { "epoch": 0.75, "grad_norm": 1.421644687652588, "learning_rate": 1.5787735286200323e-06, "loss": 0.5389, "step": 11583 }, { "epoch": 0.75, "grad_norm": 1.242592215538025, "learning_rate": 1.5780113046945366e-06, "loss": 0.5137, "step": 11584 }, { "epoch": 0.75, "grad_norm": 1.492953896522522, "learning_rate": 1.5772492303353393e-06, "loss": 0.5105, "step": 11585 }, { "epoch": 0.75, "grad_norm": 1.2554291486740112, "learning_rate": 1.576487305575745e-06, "loss": 0.5438, "step": 11586 }, { "epoch": 0.75, "grad_norm": 1.2073144912719727, "learning_rate": 1.5757255304490588e-06, "loss": 0.505, "step": 11587 }, { "epoch": 0.75, "grad_norm": 1.160159707069397, "learning_rate": 1.5749639049885746e-06, "loss": 0.5327, "step": 11588 }, { "epoch": 0.75, "grad_norm": 1.670549988746643, "learning_rate": 1.574202429227581e-06, "loss": 0.5662, "step": 11589 }, { "epoch": 0.75, "grad_norm": 1.2038788795471191, "learning_rate": 1.5734411031993612e-06, "loss": 0.5369, "step": 11590 }, { "epoch": 0.75, "grad_norm": 1.278647541999817, "learning_rate": 1.5726799269371912e-06, "loss": 0.507, "step": 11591 }, { "epoch": 0.75, "grad_norm": 1.37911057472229, "learning_rate": 1.5719189004743373e-06, "loss": 0.5275, "step": 11592 }, { "epoch": 0.75, "grad_norm": 1.182064175605774, "learning_rate": 1.5711580238440643e-06, "loss": 0.4776, "step": 11593 }, { "epoch": 0.75, "grad_norm": 1.353445053100586, "learning_rate": 1.570397297079627e-06, "loss": 0.4712, "step": 11594 }, { "epoch": 0.75, "grad_norm": 1.261817216873169, "learning_rate": 1.569636720214276e-06, "loss": 0.5079, "step": 11595 }, { "epoch": 0.75, "grad_norm": 1.2740206718444824, "learning_rate": 1.5688762932812528e-06, "loss": 0.5348, "step": 11596 }, { "epoch": 0.75, "grad_norm": 1.187476634979248, "learning_rate": 1.5681160163137964e-06, "loss": 0.5058, "step": 11597 }, { "epoch": 0.75, "grad_norm": 1.2624300718307495, "learning_rate": 1.5673558893451329e-06, "loss": 0.5044, "step": 11598 }, { "epoch": 0.75, "grad_norm": 1.1789990663528442, "learning_rate": 1.5665959124084867e-06, "loss": 0.4944, "step": 11599 }, { "epoch": 0.75, "grad_norm": 1.347859501838684, "learning_rate": 1.5658360855370757e-06, "loss": 0.5427, "step": 11600 }, { "epoch": 0.75, "grad_norm": 1.2567270994186401, "learning_rate": 1.5650764087641085e-06, "loss": 0.5253, "step": 11601 }, { "epoch": 0.75, "grad_norm": 1.1928044557571411, "learning_rate": 1.564316882122791e-06, "loss": 0.5102, "step": 11602 }, { "epoch": 0.75, "grad_norm": 1.119070291519165, "learning_rate": 1.5635575056463171e-06, "loss": 0.4984, "step": 11603 }, { "epoch": 0.75, "grad_norm": 1.3652595281600952, "learning_rate": 1.5627982793678782e-06, "loss": 0.5675, "step": 11604 }, { "epoch": 0.75, "grad_norm": 1.2571991682052612, "learning_rate": 1.5620392033206583e-06, "loss": 0.5097, "step": 11605 }, { "epoch": 0.75, "grad_norm": 1.2586784362792969, "learning_rate": 1.561280277537835e-06, "loss": 0.4856, "step": 11606 }, { "epoch": 0.75, "grad_norm": 1.2032058238983154, "learning_rate": 1.5605215020525783e-06, "loss": 0.4614, "step": 11607 }, { "epoch": 0.75, "grad_norm": 1.2242088317871094, "learning_rate": 1.559762876898055e-06, "loss": 0.536, "step": 11608 }, { "epoch": 0.75, "grad_norm": 1.1807951927185059, "learning_rate": 1.5590044021074185e-06, "loss": 0.5114, "step": 11609 }, { "epoch": 0.75, "grad_norm": 1.1992912292480469, "learning_rate": 1.5582460777138215e-06, "loss": 0.531, "step": 11610 }, { "epoch": 0.75, "grad_norm": 1.190155029296875, "learning_rate": 1.5574879037504093e-06, "loss": 0.4883, "step": 11611 }, { "epoch": 0.75, "grad_norm": 1.121188998222351, "learning_rate": 1.5567298802503184e-06, "loss": 0.5327, "step": 11612 }, { "epoch": 0.75, "grad_norm": 1.2135337591171265, "learning_rate": 1.5559720072466806e-06, "loss": 0.5095, "step": 11613 }, { "epoch": 0.75, "grad_norm": 1.3762423992156982, "learning_rate": 1.5552142847726227e-06, "loss": 0.5363, "step": 11614 }, { "epoch": 0.75, "grad_norm": 1.1449259519577026, "learning_rate": 1.5544567128612586e-06, "loss": 0.4827, "step": 11615 }, { "epoch": 0.75, "grad_norm": 1.1532737016677856, "learning_rate": 1.5536992915457028e-06, "loss": 0.5014, "step": 11616 }, { "epoch": 0.75, "grad_norm": 1.1795600652694702, "learning_rate": 1.5529420208590584e-06, "loss": 0.5479, "step": 11617 }, { "epoch": 0.75, "grad_norm": 1.2726770639419556, "learning_rate": 1.5521849008344253e-06, "loss": 0.5147, "step": 11618 }, { "epoch": 0.75, "grad_norm": 1.0584758520126343, "learning_rate": 1.5514279315048946e-06, "loss": 0.5025, "step": 11619 }, { "epoch": 0.75, "grad_norm": 1.3605707883834839, "learning_rate": 1.5506711129035534e-06, "loss": 0.5766, "step": 11620 }, { "epoch": 0.75, "grad_norm": 1.3091362714767456, "learning_rate": 1.5499144450634768e-06, "loss": 0.487, "step": 11621 }, { "epoch": 0.75, "grad_norm": 1.2539809942245483, "learning_rate": 1.5491579280177383e-06, "loss": 0.5335, "step": 11622 }, { "epoch": 0.75, "grad_norm": 1.186920166015625, "learning_rate": 1.5484015617994036e-06, "loss": 0.5313, "step": 11623 }, { "epoch": 0.75, "grad_norm": 1.1583678722381592, "learning_rate": 1.5476453464415314e-06, "loss": 0.4948, "step": 11624 }, { "epoch": 0.75, "grad_norm": 1.283007264137268, "learning_rate": 1.5468892819771736e-06, "loss": 0.5529, "step": 11625 }, { "epoch": 0.75, "grad_norm": 1.187656044960022, "learning_rate": 1.546133368439378e-06, "loss": 0.5235, "step": 11626 }, { "epoch": 0.75, "grad_norm": 1.1268267631530762, "learning_rate": 1.5453776058611803e-06, "loss": 0.5103, "step": 11627 }, { "epoch": 0.75, "grad_norm": 1.181599736213684, "learning_rate": 1.544621994275614e-06, "loss": 0.469, "step": 11628 }, { "epoch": 0.75, "grad_norm": 1.1699079275131226, "learning_rate": 1.5438665337157056e-06, "loss": 0.4913, "step": 11629 }, { "epoch": 0.75, "grad_norm": 1.3158704042434692, "learning_rate": 1.5431112242144742e-06, "loss": 0.5379, "step": 11630 }, { "epoch": 0.75, "grad_norm": 1.1599253416061401, "learning_rate": 1.5423560658049318e-06, "loss": 0.5399, "step": 11631 }, { "epoch": 0.75, "grad_norm": 1.1367768049240112, "learning_rate": 1.5416010585200876e-06, "loss": 0.5237, "step": 11632 }, { "epoch": 0.75, "grad_norm": 1.2501988410949707, "learning_rate": 1.5408462023929354e-06, "loss": 0.5217, "step": 11633 }, { "epoch": 0.75, "grad_norm": 1.2638144493103027, "learning_rate": 1.5400914974564718e-06, "loss": 0.5331, "step": 11634 }, { "epoch": 0.75, "grad_norm": 1.6887085437774658, "learning_rate": 1.5393369437436817e-06, "loss": 0.5122, "step": 11635 }, { "epoch": 0.75, "grad_norm": 1.3022229671478271, "learning_rate": 1.5385825412875455e-06, "loss": 0.4781, "step": 11636 }, { "epoch": 0.75, "grad_norm": 1.2561490535736084, "learning_rate": 1.5378282901210372e-06, "loss": 0.518, "step": 11637 }, { "epoch": 0.75, "grad_norm": 1.2813297510147095, "learning_rate": 1.5370741902771208e-06, "loss": 0.5504, "step": 11638 }, { "epoch": 0.75, "grad_norm": 1.1905807256698608, "learning_rate": 1.5363202417887563e-06, "loss": 0.499, "step": 11639 }, { "epoch": 0.75, "grad_norm": 1.8361185789108276, "learning_rate": 1.535566444688898e-06, "loss": 0.467, "step": 11640 }, { "epoch": 0.75, "grad_norm": 1.1363680362701416, "learning_rate": 1.534812799010494e-06, "loss": 0.5212, "step": 11641 }, { "epoch": 0.75, "grad_norm": 1.1663832664489746, "learning_rate": 1.5340593047864799e-06, "loss": 0.4637, "step": 11642 }, { "epoch": 0.75, "grad_norm": 1.2169456481933594, "learning_rate": 1.533305962049792e-06, "loss": 0.5469, "step": 11643 }, { "epoch": 0.75, "grad_norm": 1.1972917318344116, "learning_rate": 1.532552770833356e-06, "loss": 0.4895, "step": 11644 }, { "epoch": 0.75, "grad_norm": 1.2453290224075317, "learning_rate": 1.5317997311700921e-06, "loss": 0.5305, "step": 11645 }, { "epoch": 0.75, "grad_norm": 1.1448874473571777, "learning_rate": 1.5310468430929153e-06, "loss": 0.4799, "step": 11646 }, { "epoch": 0.75, "grad_norm": 1.2397511005401611, "learning_rate": 1.5302941066347298e-06, "loss": 0.4933, "step": 11647 }, { "epoch": 0.75, "grad_norm": 1.1810240745544434, "learning_rate": 1.529541521828437e-06, "loss": 0.4963, "step": 11648 }, { "epoch": 0.75, "grad_norm": 1.1744015216827393, "learning_rate": 1.5287890887069302e-06, "loss": 0.4876, "step": 11649 }, { "epoch": 0.75, "grad_norm": 1.3179908990859985, "learning_rate": 1.5280368073030983e-06, "loss": 0.5848, "step": 11650 }, { "epoch": 0.75, "grad_norm": 1.2489546537399292, "learning_rate": 1.5272846776498178e-06, "loss": 0.4875, "step": 11651 }, { "epoch": 0.75, "grad_norm": 1.208104133605957, "learning_rate": 1.5265326997799645e-06, "loss": 0.4659, "step": 11652 }, { "epoch": 0.75, "grad_norm": 1.2630144357681274, "learning_rate": 1.5257808737264051e-06, "loss": 0.4812, "step": 11653 }, { "epoch": 0.75, "grad_norm": 1.2002629041671753, "learning_rate": 1.525029199522e-06, "loss": 0.5307, "step": 11654 }, { "epoch": 0.75, "grad_norm": 1.3820655345916748, "learning_rate": 1.5242776771996054e-06, "loss": 0.5774, "step": 11655 }, { "epoch": 0.75, "grad_norm": 1.2347192764282227, "learning_rate": 1.5235263067920635e-06, "loss": 0.5228, "step": 11656 }, { "epoch": 0.75, "grad_norm": 1.28091299533844, "learning_rate": 1.5227750883322179e-06, "loss": 0.5602, "step": 11657 }, { "epoch": 0.75, "grad_norm": 1.2753504514694214, "learning_rate": 1.5220240218529014e-06, "loss": 0.4494, "step": 11658 }, { "epoch": 0.75, "grad_norm": 1.1941168308258057, "learning_rate": 1.521273107386942e-06, "loss": 0.481, "step": 11659 }, { "epoch": 0.75, "grad_norm": 1.1486989259719849, "learning_rate": 1.5205223449671596e-06, "loss": 0.5341, "step": 11660 }, { "epoch": 0.75, "grad_norm": 1.2166892290115356, "learning_rate": 1.5197717346263703e-06, "loss": 0.5555, "step": 11661 }, { "epoch": 0.75, "grad_norm": 1.1451563835144043, "learning_rate": 1.5190212763973771e-06, "loss": 0.4958, "step": 11662 }, { "epoch": 0.75, "grad_norm": 1.473819613456726, "learning_rate": 1.5182709703129838e-06, "loss": 0.53, "step": 11663 }, { "epoch": 0.75, "grad_norm": 1.2572296857833862, "learning_rate": 1.5175208164059829e-06, "loss": 0.5274, "step": 11664 }, { "epoch": 0.75, "grad_norm": 1.2264304161071777, "learning_rate": 1.516770814709162e-06, "loss": 0.5512, "step": 11665 }, { "epoch": 0.75, "grad_norm": 1.1788662672042847, "learning_rate": 1.516020965255302e-06, "loss": 0.5284, "step": 11666 }, { "epoch": 0.75, "grad_norm": 1.2945539951324463, "learning_rate": 1.5152712680771786e-06, "loss": 0.5787, "step": 11667 }, { "epoch": 0.75, "grad_norm": 1.1373027563095093, "learning_rate": 1.5145217232075555e-06, "loss": 0.5487, "step": 11668 }, { "epoch": 0.75, "grad_norm": 1.3340927362442017, "learning_rate": 1.5137723306791957e-06, "loss": 0.5081, "step": 11669 }, { "epoch": 0.75, "grad_norm": 1.1214776039123535, "learning_rate": 1.5130230905248522e-06, "loss": 0.5104, "step": 11670 }, { "epoch": 0.75, "grad_norm": 1.323587417602539, "learning_rate": 1.512274002777273e-06, "loss": 0.5601, "step": 11671 }, { "epoch": 0.75, "grad_norm": 1.440951943397522, "learning_rate": 1.511525067469199e-06, "loss": 0.5177, "step": 11672 }, { "epoch": 0.75, "grad_norm": 1.3173927068710327, "learning_rate": 1.5107762846333657e-06, "loss": 0.5173, "step": 11673 }, { "epoch": 0.75, "grad_norm": 1.2387499809265137, "learning_rate": 1.5100276543024967e-06, "loss": 0.5506, "step": 11674 }, { "epoch": 0.75, "grad_norm": 1.2437455654144287, "learning_rate": 1.5092791765093145e-06, "loss": 0.4733, "step": 11675 }, { "epoch": 0.75, "grad_norm": 1.2674146890640259, "learning_rate": 1.5085308512865333e-06, "loss": 0.4738, "step": 11676 }, { "epoch": 0.75, "grad_norm": 1.2672524452209473, "learning_rate": 1.5077826786668608e-06, "loss": 0.5506, "step": 11677 }, { "epoch": 0.75, "grad_norm": 1.1850734949111938, "learning_rate": 1.5070346586829977e-06, "loss": 0.5089, "step": 11678 }, { "epoch": 0.75, "grad_norm": 1.2497899532318115, "learning_rate": 1.5062867913676383e-06, "loss": 0.497, "step": 11679 }, { "epoch": 0.75, "grad_norm": 1.2098044157028198, "learning_rate": 1.5055390767534683e-06, "loss": 0.5212, "step": 11680 }, { "epoch": 0.75, "grad_norm": 1.2618123292922974, "learning_rate": 1.5047915148731695e-06, "loss": 0.5282, "step": 11681 }, { "epoch": 0.75, "grad_norm": 1.2502001523971558, "learning_rate": 1.5040441057594158e-06, "loss": 0.5445, "step": 11682 }, { "epoch": 0.75, "grad_norm": 1.110045313835144, "learning_rate": 1.5032968494448746e-06, "loss": 0.5313, "step": 11683 }, { "epoch": 0.75, "grad_norm": 1.112015962600708, "learning_rate": 1.502549745962208e-06, "loss": 0.4859, "step": 11684 }, { "epoch": 0.75, "grad_norm": 1.3094871044158936, "learning_rate": 1.5018027953440667e-06, "loss": 0.4935, "step": 11685 }, { "epoch": 0.75, "grad_norm": 1.2903780937194824, "learning_rate": 1.5010559976231004e-06, "loss": 0.5563, "step": 11686 }, { "epoch": 0.75, "grad_norm": 1.3952083587646484, "learning_rate": 1.5003093528319485e-06, "loss": 0.5713, "step": 11687 }, { "epoch": 0.75, "grad_norm": 1.2571064233779907, "learning_rate": 1.4995628610032453e-06, "loss": 0.5509, "step": 11688 }, { "epoch": 0.75, "grad_norm": 1.1677947044372559, "learning_rate": 1.4988165221696183e-06, "loss": 0.5024, "step": 11689 }, { "epoch": 0.75, "grad_norm": 1.2960833311080933, "learning_rate": 1.49807033636369e-06, "loss": 0.5336, "step": 11690 }, { "epoch": 0.75, "grad_norm": 1.1388092041015625, "learning_rate": 1.4973243036180702e-06, "loss": 0.5388, "step": 11691 }, { "epoch": 0.75, "grad_norm": 1.2668853998184204, "learning_rate": 1.496578423965368e-06, "loss": 0.5351, "step": 11692 }, { "epoch": 0.75, "grad_norm": 1.2510815858840942, "learning_rate": 1.4958326974381842e-06, "loss": 0.4929, "step": 11693 }, { "epoch": 0.75, "grad_norm": 1.1440852880477905, "learning_rate": 1.4950871240691124e-06, "loss": 0.4846, "step": 11694 }, { "epoch": 0.75, "grad_norm": 1.198406457901001, "learning_rate": 1.4943417038907392e-06, "loss": 0.4935, "step": 11695 }, { "epoch": 0.75, "grad_norm": 1.225595235824585, "learning_rate": 1.493596436935647e-06, "loss": 0.5026, "step": 11696 }, { "epoch": 0.76, "grad_norm": 1.2834664583206177, "learning_rate": 1.4928513232364067e-06, "loss": 0.5106, "step": 11697 }, { "epoch": 0.76, "grad_norm": 1.1968966722488403, "learning_rate": 1.4921063628255866e-06, "loss": 0.5089, "step": 11698 }, { "epoch": 0.76, "grad_norm": 1.1597356796264648, "learning_rate": 1.4913615557357464e-06, "loss": 0.5142, "step": 11699 }, { "epoch": 0.76, "grad_norm": 1.2732393741607666, "learning_rate": 1.4906169019994404e-06, "loss": 0.5431, "step": 11700 }, { "epoch": 0.76, "grad_norm": 1.313011646270752, "learning_rate": 1.4898724016492155e-06, "loss": 0.5276, "step": 11701 }, { "epoch": 0.76, "grad_norm": 1.254512071609497, "learning_rate": 1.4891280547176129e-06, "loss": 0.5378, "step": 11702 }, { "epoch": 0.76, "grad_norm": 1.0856679677963257, "learning_rate": 1.488383861237163e-06, "loss": 0.4668, "step": 11703 }, { "epoch": 0.76, "grad_norm": 1.1757850646972656, "learning_rate": 1.4876398212403952e-06, "loss": 0.5118, "step": 11704 }, { "epoch": 0.76, "grad_norm": 1.1366887092590332, "learning_rate": 1.4868959347598283e-06, "loss": 0.5596, "step": 11705 }, { "epoch": 0.76, "grad_norm": 1.2004128694534302, "learning_rate": 1.4861522018279766e-06, "loss": 0.5027, "step": 11706 }, { "epoch": 0.76, "grad_norm": 1.1393375396728516, "learning_rate": 1.4854086224773462e-06, "loss": 0.5035, "step": 11707 }, { "epoch": 0.76, "grad_norm": 1.1716355085372925, "learning_rate": 1.4846651967404384e-06, "loss": 0.5158, "step": 11708 }, { "epoch": 0.76, "grad_norm": 1.2720973491668701, "learning_rate": 1.4839219246497437e-06, "loss": 0.5045, "step": 11709 }, { "epoch": 0.76, "grad_norm": 1.3804322481155396, "learning_rate": 1.4831788062377501e-06, "loss": 0.5243, "step": 11710 }, { "epoch": 0.76, "grad_norm": 1.2078521251678467, "learning_rate": 1.4824358415369372e-06, "loss": 0.533, "step": 11711 }, { "epoch": 0.76, "grad_norm": 1.133772611618042, "learning_rate": 1.4816930305797782e-06, "loss": 0.4646, "step": 11712 }, { "epoch": 0.76, "grad_norm": 1.2031489610671997, "learning_rate": 1.48095037339874e-06, "loss": 0.5014, "step": 11713 }, { "epoch": 0.76, "grad_norm": 1.2326868772506714, "learning_rate": 1.4802078700262828e-06, "loss": 0.4819, "step": 11714 }, { "epoch": 0.76, "grad_norm": 1.3529363870620728, "learning_rate": 1.4794655204948572e-06, "loss": 0.5169, "step": 11715 }, { "epoch": 0.76, "grad_norm": 1.3073186874389648, "learning_rate": 1.4787233248369103e-06, "loss": 0.5115, "step": 11716 }, { "epoch": 0.76, "grad_norm": 1.218441367149353, "learning_rate": 1.4779812830848823e-06, "loss": 0.5183, "step": 11717 }, { "epoch": 0.76, "grad_norm": 1.172842025756836, "learning_rate": 1.477239395271205e-06, "loss": 0.5134, "step": 11718 }, { "epoch": 0.76, "grad_norm": 1.152396559715271, "learning_rate": 1.476497661428305e-06, "loss": 0.4877, "step": 11719 }, { "epoch": 0.76, "grad_norm": 1.1982730627059937, "learning_rate": 1.475756081588603e-06, "loss": 0.5131, "step": 11720 }, { "epoch": 0.76, "grad_norm": 1.0973962545394897, "learning_rate": 1.4750146557845085e-06, "loss": 0.503, "step": 11721 }, { "epoch": 0.76, "grad_norm": 1.2531946897506714, "learning_rate": 1.4742733840484291e-06, "loss": 0.5121, "step": 11722 }, { "epoch": 0.76, "grad_norm": 1.2939521074295044, "learning_rate": 1.4735322664127633e-06, "loss": 0.5292, "step": 11723 }, { "epoch": 0.76, "grad_norm": 1.2166780233383179, "learning_rate": 1.4727913029099034e-06, "loss": 0.5565, "step": 11724 }, { "epoch": 0.76, "grad_norm": 1.1879748106002808, "learning_rate": 1.4720504935722374e-06, "loss": 0.533, "step": 11725 }, { "epoch": 0.76, "grad_norm": 1.1847145557403564, "learning_rate": 1.4713098384321407e-06, "loss": 0.5054, "step": 11726 }, { "epoch": 0.76, "grad_norm": 1.1283702850341797, "learning_rate": 1.470569337521986e-06, "loss": 0.4769, "step": 11727 }, { "epoch": 0.76, "grad_norm": 1.2171084880828857, "learning_rate": 1.4698289908741403e-06, "loss": 0.5692, "step": 11728 }, { "epoch": 0.76, "grad_norm": 1.2043086290359497, "learning_rate": 1.4690887985209611e-06, "loss": 0.5253, "step": 11729 }, { "epoch": 0.76, "grad_norm": 1.2024149894714355, "learning_rate": 1.4683487604948022e-06, "loss": 0.5397, "step": 11730 }, { "epoch": 0.76, "grad_norm": 1.2278213500976562, "learning_rate": 1.4676088768280056e-06, "loss": 0.5237, "step": 11731 }, { "epoch": 0.76, "grad_norm": 1.155968427658081, "learning_rate": 1.4668691475529112e-06, "loss": 0.454, "step": 11732 }, { "epoch": 0.76, "grad_norm": 1.251841425895691, "learning_rate": 1.466129572701851e-06, "loss": 0.5351, "step": 11733 }, { "epoch": 0.76, "grad_norm": 1.2510011196136475, "learning_rate": 1.4653901523071494e-06, "loss": 0.502, "step": 11734 }, { "epoch": 0.76, "grad_norm": 1.4006032943725586, "learning_rate": 1.4646508864011267e-06, "loss": 0.5221, "step": 11735 }, { "epoch": 0.76, "grad_norm": 1.1943941116333008, "learning_rate": 1.4639117750160908e-06, "loss": 0.5516, "step": 11736 }, { "epoch": 0.76, "grad_norm": 1.15956449508667, "learning_rate": 1.463172818184348e-06, "loss": 0.5009, "step": 11737 }, { "epoch": 0.76, "grad_norm": 1.18558669090271, "learning_rate": 1.4624340159381961e-06, "loss": 0.5044, "step": 11738 }, { "epoch": 0.76, "grad_norm": 1.1768836975097656, "learning_rate": 1.4616953683099283e-06, "loss": 0.5134, "step": 11739 }, { "epoch": 0.76, "grad_norm": 1.2091593742370605, "learning_rate": 1.4609568753318248e-06, "loss": 0.5452, "step": 11740 }, { "epoch": 0.76, "grad_norm": 1.2121682167053223, "learning_rate": 1.4602185370361654e-06, "loss": 0.5161, "step": 11741 }, { "epoch": 0.76, "grad_norm": 1.417818546295166, "learning_rate": 1.4594803534552215e-06, "loss": 0.5304, "step": 11742 }, { "epoch": 0.76, "grad_norm": 1.1322652101516724, "learning_rate": 1.4587423246212569e-06, "loss": 0.5072, "step": 11743 }, { "epoch": 0.76, "grad_norm": 1.2209511995315552, "learning_rate": 1.4580044505665296e-06, "loss": 0.4719, "step": 11744 }, { "epoch": 0.76, "grad_norm": 1.1966712474822998, "learning_rate": 1.457266731323288e-06, "loss": 0.5337, "step": 11745 }, { "epoch": 0.76, "grad_norm": 1.1860755681991577, "learning_rate": 1.456529166923777e-06, "loss": 0.5486, "step": 11746 }, { "epoch": 0.76, "grad_norm": 1.2553859949111938, "learning_rate": 1.455791757400234e-06, "loss": 0.5277, "step": 11747 }, { "epoch": 0.76, "grad_norm": 1.1831047534942627, "learning_rate": 1.4550545027848895e-06, "loss": 0.4911, "step": 11748 }, { "epoch": 0.76, "grad_norm": 1.1906729936599731, "learning_rate": 1.4543174031099677e-06, "loss": 0.4993, "step": 11749 }, { "epoch": 0.76, "grad_norm": 1.2972798347473145, "learning_rate": 1.4535804584076825e-06, "loss": 0.5245, "step": 11750 }, { "epoch": 0.76, "grad_norm": 1.2395154237747192, "learning_rate": 1.452843668710246e-06, "loss": 0.4787, "step": 11751 }, { "epoch": 0.76, "grad_norm": 1.2045851945877075, "learning_rate": 1.4521070340498605e-06, "loss": 0.4671, "step": 11752 }, { "epoch": 0.76, "grad_norm": 2.426290988922119, "learning_rate": 1.451370554458723e-06, "loss": 0.5542, "step": 11753 }, { "epoch": 0.76, "grad_norm": 1.1911797523498535, "learning_rate": 1.4506342299690234e-06, "loss": 0.4999, "step": 11754 }, { "epoch": 0.76, "grad_norm": 1.2178157567977905, "learning_rate": 1.4498980606129453e-06, "loss": 0.539, "step": 11755 }, { "epoch": 0.76, "grad_norm": 1.2145692110061646, "learning_rate": 1.4491620464226625e-06, "loss": 0.5111, "step": 11756 }, { "epoch": 0.76, "grad_norm": 1.352418065071106, "learning_rate": 1.4484261874303446e-06, "loss": 0.4928, "step": 11757 }, { "epoch": 0.76, "grad_norm": 1.2647112607955933, "learning_rate": 1.4476904836681555e-06, "loss": 0.5006, "step": 11758 }, { "epoch": 0.76, "grad_norm": 1.1870309114456177, "learning_rate": 1.44695493516825e-06, "loss": 0.5173, "step": 11759 }, { "epoch": 0.76, "grad_norm": 1.2463363409042358, "learning_rate": 1.4462195419627773e-06, "loss": 0.572, "step": 11760 }, { "epoch": 0.76, "grad_norm": 1.2436710596084595, "learning_rate": 1.4454843040838812e-06, "loss": 0.4872, "step": 11761 }, { "epoch": 0.76, "grad_norm": 1.2113237380981445, "learning_rate": 1.4447492215636937e-06, "loss": 0.4875, "step": 11762 }, { "epoch": 0.76, "grad_norm": 1.2876354455947876, "learning_rate": 1.444014294434345e-06, "loss": 0.5379, "step": 11763 }, { "epoch": 0.76, "grad_norm": 1.2422469854354858, "learning_rate": 1.4432795227279573e-06, "loss": 0.5474, "step": 11764 }, { "epoch": 0.76, "grad_norm": 1.2971314191818237, "learning_rate": 1.442544906476645e-06, "loss": 0.4975, "step": 11765 }, { "epoch": 0.76, "grad_norm": 1.2075221538543701, "learning_rate": 1.4418104457125165e-06, "loss": 0.5215, "step": 11766 }, { "epoch": 0.76, "grad_norm": 1.150297999382019, "learning_rate": 1.4410761404676743e-06, "loss": 0.5031, "step": 11767 }, { "epoch": 0.76, "grad_norm": 1.1337188482284546, "learning_rate": 1.440341990774211e-06, "loss": 0.5098, "step": 11768 }, { "epoch": 0.76, "grad_norm": 1.2361395359039307, "learning_rate": 1.4396079966642146e-06, "loss": 0.5064, "step": 11769 }, { "epoch": 0.76, "grad_norm": 1.2540652751922607, "learning_rate": 1.4388741581697674e-06, "loss": 0.4967, "step": 11770 }, { "epoch": 0.76, "grad_norm": 1.2227946519851685, "learning_rate": 1.4381404753229421e-06, "loss": 0.4589, "step": 11771 }, { "epoch": 0.76, "grad_norm": 1.347289800643921, "learning_rate": 1.4374069481558096e-06, "loss": 0.5402, "step": 11772 }, { "epoch": 0.76, "grad_norm": 1.4212650060653687, "learning_rate": 1.4366735767004253e-06, "loss": 0.5269, "step": 11773 }, { "epoch": 0.76, "grad_norm": 1.0985058546066284, "learning_rate": 1.4359403609888455e-06, "loss": 0.4907, "step": 11774 }, { "epoch": 0.76, "grad_norm": 1.1196035146713257, "learning_rate": 1.4352073010531176e-06, "loss": 0.5034, "step": 11775 }, { "epoch": 0.76, "grad_norm": 1.2225767374038696, "learning_rate": 1.434474396925281e-06, "loss": 0.5563, "step": 11776 }, { "epoch": 0.76, "grad_norm": 1.115281343460083, "learning_rate": 1.433741648637369e-06, "loss": 0.4821, "step": 11777 }, { "epoch": 0.76, "grad_norm": 1.1347935199737549, "learning_rate": 1.4330090562214105e-06, "loss": 0.5182, "step": 11778 }, { "epoch": 0.76, "grad_norm": 1.2946124076843262, "learning_rate": 1.4322766197094217e-06, "loss": 0.5025, "step": 11779 }, { "epoch": 0.76, "grad_norm": 1.1697243452072144, "learning_rate": 1.4315443391334166e-06, "loss": 0.5336, "step": 11780 }, { "epoch": 0.76, "grad_norm": 1.3066519498825073, "learning_rate": 1.4308122145254022e-06, "loss": 0.5334, "step": 11781 }, { "epoch": 0.76, "grad_norm": 1.218388319015503, "learning_rate": 1.4300802459173768e-06, "loss": 0.5191, "step": 11782 }, { "epoch": 0.76, "grad_norm": 1.2521963119506836, "learning_rate": 1.4293484333413338e-06, "loss": 0.5596, "step": 11783 }, { "epoch": 0.76, "grad_norm": 1.2160924673080444, "learning_rate": 1.428616776829259e-06, "loss": 0.5373, "step": 11784 }, { "epoch": 0.76, "grad_norm": 1.2838144302368164, "learning_rate": 1.42788527641313e-06, "loss": 0.5298, "step": 11785 }, { "epoch": 0.76, "grad_norm": 1.2747900485992432, "learning_rate": 1.427153932124919e-06, "loss": 0.5298, "step": 11786 }, { "epoch": 0.76, "grad_norm": 1.2362174987792969, "learning_rate": 1.4264227439965917e-06, "loss": 0.4804, "step": 11787 }, { "epoch": 0.76, "grad_norm": 1.2305551767349243, "learning_rate": 1.4256917120601067e-06, "loss": 0.5476, "step": 11788 }, { "epoch": 0.76, "grad_norm": 1.2320358753204346, "learning_rate": 1.4249608363474143e-06, "loss": 0.5377, "step": 11789 }, { "epoch": 0.76, "grad_norm": 1.1647437810897827, "learning_rate": 1.424230116890462e-06, "loss": 0.4893, "step": 11790 }, { "epoch": 0.76, "grad_norm": 1.185996413230896, "learning_rate": 1.4234995537211844e-06, "loss": 0.5305, "step": 11791 }, { "epoch": 0.76, "grad_norm": 1.2752830982208252, "learning_rate": 1.4227691468715133e-06, "loss": 0.5784, "step": 11792 }, { "epoch": 0.76, "grad_norm": 1.1177458763122559, "learning_rate": 1.4220388963733734e-06, "loss": 0.4569, "step": 11793 }, { "epoch": 0.76, "grad_norm": 1.2698777914047241, "learning_rate": 1.4213088022586824e-06, "loss": 0.5067, "step": 11794 }, { "epoch": 0.76, "grad_norm": 1.2003659009933472, "learning_rate": 1.4205788645593505e-06, "loss": 0.4736, "step": 11795 }, { "epoch": 0.76, "grad_norm": 1.5498079061508179, "learning_rate": 1.4198490833072826e-06, "loss": 0.4999, "step": 11796 }, { "epoch": 0.76, "grad_norm": 1.1056785583496094, "learning_rate": 1.4191194585343731e-06, "loss": 0.496, "step": 11797 }, { "epoch": 0.76, "grad_norm": 1.1049045324325562, "learning_rate": 1.4183899902725135e-06, "loss": 0.4877, "step": 11798 }, { "epoch": 0.76, "grad_norm": 1.1429972648620605, "learning_rate": 1.4176606785535863e-06, "loss": 0.5188, "step": 11799 }, { "epoch": 0.76, "grad_norm": 1.1921454668045044, "learning_rate": 1.4169315234094683e-06, "loss": 0.5064, "step": 11800 }, { "epoch": 0.76, "grad_norm": 1.26972234249115, "learning_rate": 1.416202524872029e-06, "loss": 0.549, "step": 11801 }, { "epoch": 0.76, "grad_norm": 1.191674828529358, "learning_rate": 1.415473682973133e-06, "loss": 0.5342, "step": 11802 }, { "epoch": 0.76, "grad_norm": 1.1208248138427734, "learning_rate": 1.414744997744632e-06, "loss": 0.497, "step": 11803 }, { "epoch": 0.76, "grad_norm": 1.2699596881866455, "learning_rate": 1.4140164692183772e-06, "loss": 0.4972, "step": 11804 }, { "epoch": 0.76, "grad_norm": 1.2093180418014526, "learning_rate": 1.4132880974262109e-06, "loss": 0.5039, "step": 11805 }, { "epoch": 0.76, "grad_norm": 1.2676734924316406, "learning_rate": 1.4125598823999675e-06, "loss": 0.5642, "step": 11806 }, { "epoch": 0.76, "grad_norm": 1.2103986740112305, "learning_rate": 1.4118318241714767e-06, "loss": 0.5261, "step": 11807 }, { "epoch": 0.76, "grad_norm": 1.1546446084976196, "learning_rate": 1.4111039227725603e-06, "loss": 0.5429, "step": 11808 }, { "epoch": 0.76, "grad_norm": 1.2035939693450928, "learning_rate": 1.4103761782350305e-06, "loss": 0.5103, "step": 11809 }, { "epoch": 0.76, "grad_norm": 1.1741398572921753, "learning_rate": 1.4096485905906965e-06, "loss": 0.5005, "step": 11810 }, { "epoch": 0.76, "grad_norm": 1.1957707405090332, "learning_rate": 1.4089211598713593e-06, "loss": 0.555, "step": 11811 }, { "epoch": 0.76, "grad_norm": 1.4191491603851318, "learning_rate": 1.4081938861088134e-06, "loss": 0.5439, "step": 11812 }, { "epoch": 0.76, "grad_norm": 1.2626147270202637, "learning_rate": 1.407466769334847e-06, "loss": 0.526, "step": 11813 }, { "epoch": 0.76, "grad_norm": 1.3103790283203125, "learning_rate": 1.406739809581238e-06, "loss": 0.5357, "step": 11814 }, { "epoch": 0.76, "grad_norm": 1.117545247077942, "learning_rate": 1.4060130068797605e-06, "loss": 0.4653, "step": 11815 }, { "epoch": 0.76, "grad_norm": 1.2663277387619019, "learning_rate": 1.4052863612621826e-06, "loss": 0.5368, "step": 11816 }, { "epoch": 0.76, "grad_norm": 1.2669429779052734, "learning_rate": 1.4045598727602633e-06, "loss": 0.4991, "step": 11817 }, { "epoch": 0.76, "grad_norm": 1.2819349765777588, "learning_rate": 1.4038335414057553e-06, "loss": 0.4666, "step": 11818 }, { "epoch": 0.76, "grad_norm": 1.1010500192642212, "learning_rate": 1.4031073672304068e-06, "loss": 0.4871, "step": 11819 }, { "epoch": 0.76, "grad_norm": 1.206091046333313, "learning_rate": 1.4023813502659533e-06, "loss": 0.5419, "step": 11820 }, { "epoch": 0.76, "grad_norm": 1.1891453266143799, "learning_rate": 1.4016554905441293e-06, "loss": 0.5323, "step": 11821 }, { "epoch": 0.76, "grad_norm": 1.1978477239608765, "learning_rate": 1.4009297880966599e-06, "loss": 0.4901, "step": 11822 }, { "epoch": 0.76, "grad_norm": 1.1692519187927246, "learning_rate": 1.4002042429552637e-06, "loss": 0.5153, "step": 11823 }, { "epoch": 0.76, "grad_norm": 1.159617304801941, "learning_rate": 1.3994788551516542e-06, "loss": 0.5373, "step": 11824 }, { "epoch": 0.76, "grad_norm": 1.2272398471832275, "learning_rate": 1.3987536247175326e-06, "loss": 0.5466, "step": 11825 }, { "epoch": 0.76, "grad_norm": 1.239229440689087, "learning_rate": 1.3980285516845986e-06, "loss": 0.5687, "step": 11826 }, { "epoch": 0.76, "grad_norm": 1.2342020273208618, "learning_rate": 1.3973036360845438e-06, "loss": 0.5574, "step": 11827 }, { "epoch": 0.76, "grad_norm": 1.1476223468780518, "learning_rate": 1.3965788779490537e-06, "loss": 0.4554, "step": 11828 }, { "epoch": 0.76, "grad_norm": 1.38198983669281, "learning_rate": 1.3958542773098016e-06, "loss": 0.4936, "step": 11829 }, { "epoch": 0.76, "grad_norm": 1.1213805675506592, "learning_rate": 1.3951298341984605e-06, "loss": 0.5319, "step": 11830 }, { "epoch": 0.76, "grad_norm": 1.240538477897644, "learning_rate": 1.394405548646694e-06, "loss": 0.4873, "step": 11831 }, { "epoch": 0.76, "grad_norm": 1.2863494157791138, "learning_rate": 1.3936814206861581e-06, "loss": 0.5894, "step": 11832 }, { "epoch": 0.76, "grad_norm": 1.3314933776855469, "learning_rate": 1.3929574503485044e-06, "loss": 0.5305, "step": 11833 }, { "epoch": 0.76, "grad_norm": 1.1073635816574097, "learning_rate": 1.3922336376653727e-06, "loss": 0.4664, "step": 11834 }, { "epoch": 0.76, "grad_norm": 1.2606993913650513, "learning_rate": 1.3915099826684008e-06, "loss": 0.5435, "step": 11835 }, { "epoch": 0.76, "grad_norm": 1.225094199180603, "learning_rate": 1.3907864853892172e-06, "loss": 0.5387, "step": 11836 }, { "epoch": 0.76, "grad_norm": 1.111209750175476, "learning_rate": 1.3900631458594465e-06, "loss": 0.505, "step": 11837 }, { "epoch": 0.76, "grad_norm": 1.2367000579833984, "learning_rate": 1.3893399641106997e-06, "loss": 0.4686, "step": 11838 }, { "epoch": 0.76, "grad_norm": 1.2817448377609253, "learning_rate": 1.388616940174588e-06, "loss": 0.5247, "step": 11839 }, { "epoch": 0.76, "grad_norm": 1.2277776002883911, "learning_rate": 1.3878940740827123e-06, "loss": 0.5352, "step": 11840 }, { "epoch": 0.76, "grad_norm": 1.2462519407272339, "learning_rate": 1.387171365866667e-06, "loss": 0.5267, "step": 11841 }, { "epoch": 0.76, "grad_norm": 1.1906906366348267, "learning_rate": 1.3864488155580403e-06, "loss": 0.4615, "step": 11842 }, { "epoch": 0.76, "grad_norm": 1.2192083597183228, "learning_rate": 1.3857264231884144e-06, "loss": 0.5607, "step": 11843 }, { "epoch": 0.76, "grad_norm": 1.201562762260437, "learning_rate": 1.3850041887893596e-06, "loss": 0.5039, "step": 11844 }, { "epoch": 0.76, "grad_norm": 1.1706202030181885, "learning_rate": 1.384282112392445e-06, "loss": 0.5905, "step": 11845 }, { "epoch": 0.76, "grad_norm": 1.3240008354187012, "learning_rate": 1.3835601940292309e-06, "loss": 0.534, "step": 11846 }, { "epoch": 0.76, "grad_norm": 1.12644624710083, "learning_rate": 1.3828384337312701e-06, "loss": 0.5629, "step": 11847 }, { "epoch": 0.76, "grad_norm": 1.1687889099121094, "learning_rate": 1.3821168315301086e-06, "loss": 0.5112, "step": 11848 }, { "epoch": 0.76, "grad_norm": 1.1954164505004883, "learning_rate": 1.3813953874572882e-06, "loss": 0.5367, "step": 11849 }, { "epoch": 0.76, "grad_norm": 1.1211328506469727, "learning_rate": 1.3806741015443376e-06, "loss": 0.4869, "step": 11850 }, { "epoch": 0.76, "grad_norm": 1.283594012260437, "learning_rate": 1.3799529738227836e-06, "loss": 0.5135, "step": 11851 }, { "epoch": 0.77, "grad_norm": 1.2102903127670288, "learning_rate": 1.3792320043241459e-06, "loss": 0.5286, "step": 11852 }, { "epoch": 0.77, "grad_norm": 1.27409029006958, "learning_rate": 1.378511193079935e-06, "loss": 0.5147, "step": 11853 }, { "epoch": 0.77, "grad_norm": 1.2650259733200073, "learning_rate": 1.3777905401216584e-06, "loss": 0.5207, "step": 11854 }, { "epoch": 0.77, "grad_norm": 1.303455114364624, "learning_rate": 1.37707004548081e-06, "loss": 0.5185, "step": 11855 }, { "epoch": 0.77, "grad_norm": 1.232143759727478, "learning_rate": 1.3763497091888827e-06, "loss": 0.5583, "step": 11856 }, { "epoch": 0.77, "grad_norm": 1.183131456375122, "learning_rate": 1.3756295312773604e-06, "loss": 0.5319, "step": 11857 }, { "epoch": 0.77, "grad_norm": 1.2782796621322632, "learning_rate": 1.37490951177772e-06, "loss": 0.5427, "step": 11858 }, { "epoch": 0.77, "grad_norm": 1.3743160963058472, "learning_rate": 1.3741896507214324e-06, "loss": 0.4784, "step": 11859 }, { "epoch": 0.77, "grad_norm": 1.2672306299209595, "learning_rate": 1.3734699481399621e-06, "loss": 0.5017, "step": 11860 }, { "epoch": 0.77, "grad_norm": 1.2546582221984863, "learning_rate": 1.3727504040647621e-06, "loss": 0.5682, "step": 11861 }, { "epoch": 0.77, "grad_norm": 1.2527215480804443, "learning_rate": 1.3720310185272833e-06, "loss": 0.5392, "step": 11862 }, { "epoch": 0.77, "grad_norm": 1.1966279745101929, "learning_rate": 1.3713117915589685e-06, "loss": 0.489, "step": 11863 }, { "epoch": 0.77, "grad_norm": 1.180772304534912, "learning_rate": 1.3705927231912535e-06, "loss": 0.5185, "step": 11864 }, { "epoch": 0.77, "grad_norm": 1.2204355001449585, "learning_rate": 1.369873813455566e-06, "loss": 0.5165, "step": 11865 }, { "epoch": 0.77, "grad_norm": 1.1531513929367065, "learning_rate": 1.36915506238333e-06, "loss": 0.4903, "step": 11866 }, { "epoch": 0.77, "grad_norm": 1.251659631729126, "learning_rate": 1.3684364700059572e-06, "loss": 0.5163, "step": 11867 }, { "epoch": 0.77, "grad_norm": 1.1127300262451172, "learning_rate": 1.3677180363548564e-06, "loss": 0.4575, "step": 11868 }, { "epoch": 0.77, "grad_norm": 1.2661741971969604, "learning_rate": 1.3669997614614293e-06, "loss": 0.5778, "step": 11869 }, { "epoch": 0.77, "grad_norm": 1.2471678256988525, "learning_rate": 1.3662816453570692e-06, "loss": 0.5194, "step": 11870 }, { "epoch": 0.77, "grad_norm": 1.251612901687622, "learning_rate": 1.3655636880731632e-06, "loss": 0.5975, "step": 11871 }, { "epoch": 0.77, "grad_norm": 1.2103863954544067, "learning_rate": 1.3648458896410927e-06, "loss": 0.5199, "step": 11872 }, { "epoch": 0.77, "grad_norm": 1.2313066720962524, "learning_rate": 1.364128250092228e-06, "loss": 0.5368, "step": 11873 }, { "epoch": 0.77, "grad_norm": 1.2911065816879272, "learning_rate": 1.3634107694579373e-06, "loss": 0.5228, "step": 11874 }, { "epoch": 0.77, "grad_norm": 1.2082489728927612, "learning_rate": 1.3626934477695791e-06, "loss": 0.4963, "step": 11875 }, { "epoch": 0.77, "grad_norm": 1.2964423894882202, "learning_rate": 1.3619762850585061e-06, "loss": 0.5257, "step": 11876 }, { "epoch": 0.77, "grad_norm": 1.2581390142440796, "learning_rate": 1.3612592813560632e-06, "loss": 0.5187, "step": 11877 }, { "epoch": 0.77, "grad_norm": 1.169561743736267, "learning_rate": 1.3605424366935905e-06, "loss": 0.4852, "step": 11878 }, { "epoch": 0.77, "grad_norm": 1.2476695775985718, "learning_rate": 1.3598257511024165e-06, "loss": 0.5272, "step": 11879 }, { "epoch": 0.77, "grad_norm": 1.1682615280151367, "learning_rate": 1.359109224613867e-06, "loss": 0.5082, "step": 11880 }, { "epoch": 0.77, "grad_norm": 1.2479946613311768, "learning_rate": 1.3583928572592603e-06, "loss": 0.534, "step": 11881 }, { "epoch": 0.77, "grad_norm": 1.270320177078247, "learning_rate": 1.3576766490699057e-06, "loss": 0.5437, "step": 11882 }, { "epoch": 0.77, "grad_norm": 1.1995223760604858, "learning_rate": 1.3569606000771074e-06, "loss": 0.4577, "step": 11883 }, { "epoch": 0.77, "grad_norm": 1.156639814376831, "learning_rate": 1.356244710312164e-06, "loss": 0.5061, "step": 11884 }, { "epoch": 0.77, "grad_norm": 1.1843516826629639, "learning_rate": 1.3555289798063614e-06, "loss": 0.4986, "step": 11885 }, { "epoch": 0.77, "grad_norm": 1.1857964992523193, "learning_rate": 1.3548134085909847e-06, "loss": 0.5334, "step": 11886 }, { "epoch": 0.77, "grad_norm": 1.2076839208602905, "learning_rate": 1.3540979966973089e-06, "loss": 0.4859, "step": 11887 }, { "epoch": 0.77, "grad_norm": 1.1782370805740356, "learning_rate": 1.3533827441566034e-06, "loss": 0.5066, "step": 11888 }, { "epoch": 0.77, "grad_norm": 1.1448302268981934, "learning_rate": 1.3526676510001297e-06, "loss": 0.4732, "step": 11889 }, { "epoch": 0.77, "grad_norm": 1.1534276008605957, "learning_rate": 1.351952717259144e-06, "loss": 0.5048, "step": 11890 }, { "epoch": 0.77, "grad_norm": 1.2988674640655518, "learning_rate": 1.3512379429648915e-06, "loss": 0.5145, "step": 11891 }, { "epoch": 0.77, "grad_norm": 1.17606520652771, "learning_rate": 1.3505233281486146e-06, "loss": 0.5029, "step": 11892 }, { "epoch": 0.77, "grad_norm": 1.087454080581665, "learning_rate": 1.3498088728415471e-06, "loss": 0.5021, "step": 11893 }, { "epoch": 0.77, "grad_norm": 1.1908248662948608, "learning_rate": 1.3490945770749165e-06, "loss": 0.4897, "step": 11894 }, { "epoch": 0.77, "grad_norm": 1.253601312637329, "learning_rate": 1.3483804408799422e-06, "loss": 0.4764, "step": 11895 }, { "epoch": 0.77, "grad_norm": 1.5028527975082397, "learning_rate": 1.347666464287839e-06, "loss": 0.5308, "step": 11896 }, { "epoch": 0.77, "grad_norm": 1.3084197044372559, "learning_rate": 1.3469526473298101e-06, "loss": 0.5292, "step": 11897 }, { "epoch": 0.77, "grad_norm": 1.2388418912887573, "learning_rate": 1.346238990037056e-06, "loss": 0.5202, "step": 11898 }, { "epoch": 0.77, "grad_norm": 1.2115042209625244, "learning_rate": 1.345525492440769e-06, "loss": 0.562, "step": 11899 }, { "epoch": 0.77, "grad_norm": 1.2554631233215332, "learning_rate": 1.3448121545721338e-06, "loss": 0.5474, "step": 11900 }, { "epoch": 0.77, "grad_norm": 1.1526143550872803, "learning_rate": 1.3440989764623301e-06, "loss": 0.4639, "step": 11901 }, { "epoch": 0.77, "grad_norm": 1.1893141269683838, "learning_rate": 1.3433859581425269e-06, "loss": 0.5202, "step": 11902 }, { "epoch": 0.77, "grad_norm": 1.2771503925323486, "learning_rate": 1.342673099643889e-06, "loss": 0.546, "step": 11903 }, { "epoch": 0.77, "grad_norm": 1.7111711502075195, "learning_rate": 1.341960400997574e-06, "loss": 0.4552, "step": 11904 }, { "epoch": 0.77, "grad_norm": 1.1298832893371582, "learning_rate": 1.3412478622347318e-06, "loss": 0.5037, "step": 11905 }, { "epoch": 0.77, "grad_norm": 1.2349534034729004, "learning_rate": 1.3405354833865058e-06, "loss": 0.5028, "step": 11906 }, { "epoch": 0.77, "grad_norm": 1.3111681938171387, "learning_rate": 1.3398232644840336e-06, "loss": 0.5648, "step": 11907 }, { "epoch": 0.77, "grad_norm": 1.1497913599014282, "learning_rate": 1.3391112055584422e-06, "loss": 0.5039, "step": 11908 }, { "epoch": 0.77, "grad_norm": 1.2620545625686646, "learning_rate": 1.3383993066408545e-06, "loss": 0.4899, "step": 11909 }, { "epoch": 0.77, "grad_norm": 1.1164143085479736, "learning_rate": 1.3376875677623858e-06, "loss": 0.5097, "step": 11910 }, { "epoch": 0.77, "grad_norm": 1.1380351781845093, "learning_rate": 1.3369759889541451e-06, "loss": 0.5431, "step": 11911 }, { "epoch": 0.77, "grad_norm": 1.2060481309890747, "learning_rate": 1.3362645702472326e-06, "loss": 0.515, "step": 11912 }, { "epoch": 0.77, "grad_norm": 1.176688551902771, "learning_rate": 1.3355533116727454e-06, "loss": 0.557, "step": 11913 }, { "epoch": 0.77, "grad_norm": 1.27883780002594, "learning_rate": 1.3348422132617667e-06, "loss": 0.5176, "step": 11914 }, { "epoch": 0.77, "grad_norm": 1.2226837873458862, "learning_rate": 1.3341312750453782e-06, "loss": 0.5364, "step": 11915 }, { "epoch": 0.77, "grad_norm": 1.230825424194336, "learning_rate": 1.3334204970546543e-06, "loss": 0.5586, "step": 11916 }, { "epoch": 0.77, "grad_norm": 1.2599589824676514, "learning_rate": 1.3327098793206616e-06, "loss": 0.5282, "step": 11917 }, { "epoch": 0.77, "grad_norm": 1.3040770292282104, "learning_rate": 1.331999421874457e-06, "loss": 0.5003, "step": 11918 }, { "epoch": 0.77, "grad_norm": 1.1798681020736694, "learning_rate": 1.3312891247470943e-06, "loss": 0.4918, "step": 11919 }, { "epoch": 0.77, "grad_norm": 1.1898046731948853, "learning_rate": 1.3305789879696184e-06, "loss": 0.5032, "step": 11920 }, { "epoch": 0.77, "grad_norm": 1.2537950277328491, "learning_rate": 1.3298690115730679e-06, "loss": 0.5316, "step": 11921 }, { "epoch": 0.77, "grad_norm": 1.2901502847671509, "learning_rate": 1.3291591955884747e-06, "loss": 0.558, "step": 11922 }, { "epoch": 0.77, "grad_norm": 1.146270990371704, "learning_rate": 1.328449540046861e-06, "loss": 0.4932, "step": 11923 }, { "epoch": 0.77, "grad_norm": 1.2344095706939697, "learning_rate": 1.3277400449792455e-06, "loss": 0.5138, "step": 11924 }, { "epoch": 0.77, "grad_norm": 1.131432294845581, "learning_rate": 1.3270307104166375e-06, "loss": 0.5052, "step": 11925 }, { "epoch": 0.77, "grad_norm": 1.187168002128601, "learning_rate": 1.3263215363900429e-06, "loss": 0.5309, "step": 11926 }, { "epoch": 0.77, "grad_norm": 1.1218984127044678, "learning_rate": 1.325612522930454e-06, "loss": 0.5064, "step": 11927 }, { "epoch": 0.77, "grad_norm": 1.1174218654632568, "learning_rate": 1.324903670068861e-06, "loss": 0.4703, "step": 11928 }, { "epoch": 0.77, "grad_norm": 1.2627493143081665, "learning_rate": 1.324194977836248e-06, "loss": 0.5086, "step": 11929 }, { "epoch": 0.77, "grad_norm": 1.2291147708892822, "learning_rate": 1.3234864462635876e-06, "loss": 0.5637, "step": 11930 }, { "epoch": 0.77, "grad_norm": 1.1818550825119019, "learning_rate": 1.3227780753818514e-06, "loss": 0.5225, "step": 11931 }, { "epoch": 0.77, "grad_norm": 1.2519696950912476, "learning_rate": 1.3220698652219966e-06, "loss": 0.5249, "step": 11932 }, { "epoch": 0.77, "grad_norm": 1.3122854232788086, "learning_rate": 1.3213618158149783e-06, "loss": 0.5278, "step": 11933 }, { "epoch": 0.77, "grad_norm": 1.2035528421401978, "learning_rate": 1.320653927191744e-06, "loss": 0.5198, "step": 11934 }, { "epoch": 0.77, "grad_norm": 1.206816554069519, "learning_rate": 1.3199461993832342e-06, "loss": 0.4964, "step": 11935 }, { "epoch": 0.77, "grad_norm": 1.2213563919067383, "learning_rate": 1.319238632420381e-06, "loss": 0.5019, "step": 11936 }, { "epoch": 0.77, "grad_norm": 1.2404340505599976, "learning_rate": 1.3185312263341127e-06, "loss": 0.5012, "step": 11937 }, { "epoch": 0.77, "grad_norm": 1.2709908485412598, "learning_rate": 1.3178239811553445e-06, "loss": 0.4828, "step": 11938 }, { "epoch": 0.77, "grad_norm": 1.195817470550537, "learning_rate": 1.3171168969149895e-06, "loss": 0.5401, "step": 11939 }, { "epoch": 0.77, "grad_norm": 1.1980913877487183, "learning_rate": 1.3164099736439535e-06, "loss": 0.5442, "step": 11940 }, { "epoch": 0.77, "grad_norm": 1.157550573348999, "learning_rate": 1.3157032113731333e-06, "loss": 0.4648, "step": 11941 }, { "epoch": 0.77, "grad_norm": 1.1857367753982544, "learning_rate": 1.3149966101334216e-06, "loss": 0.5705, "step": 11942 }, { "epoch": 0.77, "grad_norm": 1.211924433708191, "learning_rate": 1.3142901699556997e-06, "loss": 0.4497, "step": 11943 }, { "epoch": 0.77, "grad_norm": 1.1392470598220825, "learning_rate": 1.3135838908708448e-06, "loss": 0.516, "step": 11944 }, { "epoch": 0.77, "grad_norm": 1.3026858568191528, "learning_rate": 1.312877772909727e-06, "loss": 0.5174, "step": 11945 }, { "epoch": 0.77, "grad_norm": 1.410675048828125, "learning_rate": 1.3121718161032088e-06, "loss": 0.5317, "step": 11946 }, { "epoch": 0.77, "grad_norm": 1.200429916381836, "learning_rate": 1.311466020482146e-06, "loss": 0.4937, "step": 11947 }, { "epoch": 0.77, "grad_norm": 1.1266419887542725, "learning_rate": 1.3107603860773882e-06, "loss": 0.5082, "step": 11948 }, { "epoch": 0.77, "grad_norm": 1.185248851776123, "learning_rate": 1.3100549129197743e-06, "loss": 0.5079, "step": 11949 }, { "epoch": 0.77, "grad_norm": 1.1750925779342651, "learning_rate": 1.3093496010401396e-06, "loss": 0.5291, "step": 11950 }, { "epoch": 0.77, "grad_norm": 1.2108147144317627, "learning_rate": 1.3086444504693119e-06, "loss": 0.4609, "step": 11951 }, { "epoch": 0.77, "grad_norm": 1.3054007291793823, "learning_rate": 1.3079394612381119e-06, "loss": 0.5294, "step": 11952 }, { "epoch": 0.77, "grad_norm": 1.2438006401062012, "learning_rate": 1.3072346333773516e-06, "loss": 0.5163, "step": 11953 }, { "epoch": 0.77, "grad_norm": 1.1595865488052368, "learning_rate": 1.3065299669178404e-06, "loss": 0.5283, "step": 11954 }, { "epoch": 0.77, "grad_norm": 1.2075608968734741, "learning_rate": 1.3058254618903733e-06, "loss": 0.5539, "step": 11955 }, { "epoch": 0.77, "grad_norm": 1.2515089511871338, "learning_rate": 1.305121118325744e-06, "loss": 0.5207, "step": 11956 }, { "epoch": 0.77, "grad_norm": 1.28859543800354, "learning_rate": 1.3044169362547377e-06, "loss": 0.527, "step": 11957 }, { "epoch": 0.77, "grad_norm": 1.2054657936096191, "learning_rate": 1.3037129157081323e-06, "loss": 0.5011, "step": 11958 }, { "epoch": 0.77, "grad_norm": 1.1551004648208618, "learning_rate": 1.3030090567166986e-06, "loss": 0.5245, "step": 11959 }, { "epoch": 0.77, "grad_norm": 1.7566899061203003, "learning_rate": 1.3023053593112027e-06, "loss": 0.5539, "step": 11960 }, { "epoch": 0.77, "grad_norm": 1.1707284450531006, "learning_rate": 1.3016018235223975e-06, "loss": 0.5049, "step": 11961 }, { "epoch": 0.77, "grad_norm": 1.181976318359375, "learning_rate": 1.3008984493810351e-06, "loss": 0.5496, "step": 11962 }, { "epoch": 0.77, "grad_norm": 1.3185659646987915, "learning_rate": 1.3001952369178571e-06, "loss": 0.543, "step": 11963 }, { "epoch": 0.77, "grad_norm": 1.1712214946746826, "learning_rate": 1.2994921861636e-06, "loss": 0.5269, "step": 11964 }, { "epoch": 0.77, "grad_norm": 1.2337521314620972, "learning_rate": 1.2987892971489918e-06, "loss": 0.5471, "step": 11965 }, { "epoch": 0.77, "grad_norm": 1.211575984954834, "learning_rate": 1.2980865699047557e-06, "loss": 0.5462, "step": 11966 }, { "epoch": 0.77, "grad_norm": 1.3424603939056396, "learning_rate": 1.2973840044616032e-06, "loss": 0.5426, "step": 11967 }, { "epoch": 0.77, "grad_norm": 1.2166410684585571, "learning_rate": 1.2966816008502432e-06, "loss": 0.5251, "step": 11968 }, { "epoch": 0.77, "grad_norm": 1.166161298751831, "learning_rate": 1.2959793591013754e-06, "loss": 0.5459, "step": 11969 }, { "epoch": 0.77, "grad_norm": 1.3318276405334473, "learning_rate": 1.295277279245693e-06, "loss": 0.5263, "step": 11970 }, { "epoch": 0.77, "grad_norm": 1.1600780487060547, "learning_rate": 1.2945753613138828e-06, "loss": 0.5075, "step": 11971 }, { "epoch": 0.77, "grad_norm": 1.306265115737915, "learning_rate": 1.2938736053366252e-06, "loss": 0.5405, "step": 11972 }, { "epoch": 0.77, "grad_norm": 1.1673439741134644, "learning_rate": 1.2931720113445884e-06, "loss": 0.4827, "step": 11973 }, { "epoch": 0.77, "grad_norm": 1.4148355722427368, "learning_rate": 1.2924705793684394e-06, "loss": 0.5519, "step": 11974 }, { "epoch": 0.77, "grad_norm": 1.2063159942626953, "learning_rate": 1.2917693094388361e-06, "loss": 0.5097, "step": 11975 }, { "epoch": 0.77, "grad_norm": 1.2129584550857544, "learning_rate": 1.2910682015864284e-06, "loss": 0.5306, "step": 11976 }, { "epoch": 0.77, "grad_norm": 1.2959798574447632, "learning_rate": 1.2903672558418606e-06, "loss": 0.494, "step": 11977 }, { "epoch": 0.77, "grad_norm": 2.270904541015625, "learning_rate": 1.289666472235771e-06, "loss": 0.5466, "step": 11978 }, { "epoch": 0.77, "grad_norm": 1.1881464719772339, "learning_rate": 1.2889658507987852e-06, "loss": 0.5214, "step": 11979 }, { "epoch": 0.77, "grad_norm": 1.146474838256836, "learning_rate": 1.2882653915615273e-06, "loss": 0.5376, "step": 11980 }, { "epoch": 0.77, "grad_norm": 1.198508858680725, "learning_rate": 1.287565094554613e-06, "loss": 0.5189, "step": 11981 }, { "epoch": 0.77, "grad_norm": 1.2096728086471558, "learning_rate": 1.2868649598086502e-06, "loss": 0.4922, "step": 11982 }, { "epoch": 0.77, "grad_norm": 1.198053240776062, "learning_rate": 1.2861649873542415e-06, "loss": 0.4791, "step": 11983 }, { "epoch": 0.77, "grad_norm": 1.196953296661377, "learning_rate": 1.285465177221978e-06, "loss": 0.4899, "step": 11984 }, { "epoch": 0.77, "grad_norm": 1.184007167816162, "learning_rate": 1.2847655294424476e-06, "loss": 0.5344, "step": 11985 }, { "epoch": 0.77, "grad_norm": 1.336309790611267, "learning_rate": 1.284066044046231e-06, "loss": 0.5468, "step": 11986 }, { "epoch": 0.77, "grad_norm": 1.2451893091201782, "learning_rate": 1.2833667210639001e-06, "loss": 0.5304, "step": 11987 }, { "epoch": 0.77, "grad_norm": 1.3546931743621826, "learning_rate": 1.2826675605260209e-06, "loss": 0.5557, "step": 11988 }, { "epoch": 0.77, "grad_norm": 2.293610095977783, "learning_rate": 1.2819685624631528e-06, "loss": 0.52, "step": 11989 }, { "epoch": 0.77, "grad_norm": 1.3820223808288574, "learning_rate": 1.2812697269058454e-06, "loss": 0.533, "step": 11990 }, { "epoch": 0.77, "grad_norm": 1.2007973194122314, "learning_rate": 1.2805710538846433e-06, "loss": 0.5379, "step": 11991 }, { "epoch": 0.77, "grad_norm": 1.23117995262146, "learning_rate": 1.2798725434300846e-06, "loss": 0.5272, "step": 11992 }, { "epoch": 0.77, "grad_norm": 1.0863193273544312, "learning_rate": 1.2791741955726983e-06, "loss": 0.479, "step": 11993 }, { "epoch": 0.77, "grad_norm": 1.235620379447937, "learning_rate": 1.2784760103430089e-06, "loss": 0.5144, "step": 11994 }, { "epoch": 0.77, "grad_norm": 1.2596982717514038, "learning_rate": 1.2777779877715324e-06, "loss": 0.546, "step": 11995 }, { "epoch": 0.77, "grad_norm": 1.2790896892547607, "learning_rate": 1.2770801278887752e-06, "loss": 0.5326, "step": 11996 }, { "epoch": 0.77, "grad_norm": 1.1878294944763184, "learning_rate": 1.2763824307252397e-06, "loss": 0.5276, "step": 11997 }, { "epoch": 0.77, "grad_norm": 1.2119276523590088, "learning_rate": 1.275684896311422e-06, "loss": 0.5516, "step": 11998 }, { "epoch": 0.77, "grad_norm": 1.2668777704238892, "learning_rate": 1.2749875246778082e-06, "loss": 0.4891, "step": 11999 }, { "epoch": 0.77, "grad_norm": 1.2706180810928345, "learning_rate": 1.2742903158548792e-06, "loss": 0.4982, "step": 12000 }, { "epoch": 0.77, "grad_norm": 1.268504023551941, "learning_rate": 1.2735932698731095e-06, "loss": 0.5059, "step": 12001 }, { "epoch": 0.77, "grad_norm": 1.384063720703125, "learning_rate": 1.2728963867629623e-06, "loss": 0.5091, "step": 12002 }, { "epoch": 0.77, "grad_norm": 1.208277702331543, "learning_rate": 1.272199666554898e-06, "loss": 0.5485, "step": 12003 }, { "epoch": 0.77, "grad_norm": 1.0640931129455566, "learning_rate": 1.2715031092793684e-06, "loss": 0.5173, "step": 12004 }, { "epoch": 0.77, "grad_norm": 1.4285497665405273, "learning_rate": 1.270806714966818e-06, "loss": 0.591, "step": 12005 }, { "epoch": 0.77, "grad_norm": 1.4352073669433594, "learning_rate": 1.2701104836476862e-06, "loss": 0.5451, "step": 12006 }, { "epoch": 0.78, "grad_norm": 1.2819294929504395, "learning_rate": 1.2694144153524008e-06, "loss": 0.5529, "step": 12007 }, { "epoch": 0.78, "grad_norm": 1.1574783325195312, "learning_rate": 1.2687185101113857e-06, "loss": 0.4952, "step": 12008 }, { "epoch": 0.78, "grad_norm": 1.34609055519104, "learning_rate": 1.2680227679550577e-06, "loss": 0.4943, "step": 12009 }, { "epoch": 0.78, "grad_norm": 1.2583606243133545, "learning_rate": 1.2673271889138267e-06, "loss": 0.5465, "step": 12010 }, { "epoch": 0.78, "grad_norm": 1.172774314880371, "learning_rate": 1.2666317730180944e-06, "loss": 0.512, "step": 12011 }, { "epoch": 0.78, "grad_norm": 1.4395952224731445, "learning_rate": 1.2659365202982542e-06, "loss": 0.4861, "step": 12012 }, { "epoch": 0.78, "grad_norm": 1.3425015211105347, "learning_rate": 1.2652414307846944e-06, "loss": 0.4699, "step": 12013 }, { "epoch": 0.78, "grad_norm": 1.166609764099121, "learning_rate": 1.2645465045077959e-06, "loss": 0.5317, "step": 12014 }, { "epoch": 0.78, "grad_norm": 1.1901297569274902, "learning_rate": 1.2638517414979334e-06, "loss": 0.5084, "step": 12015 }, { "epoch": 0.78, "grad_norm": 1.2584888935089111, "learning_rate": 1.263157141785471e-06, "loss": 0.4666, "step": 12016 }, { "epoch": 0.78, "grad_norm": 1.2097744941711426, "learning_rate": 1.2624627054007682e-06, "loss": 0.4953, "step": 12017 }, { "epoch": 0.78, "grad_norm": 1.2448930740356445, "learning_rate": 1.2617684323741774e-06, "loss": 0.4921, "step": 12018 }, { "epoch": 0.78, "grad_norm": 1.188934564590454, "learning_rate": 1.261074322736044e-06, "loss": 0.5003, "step": 12019 }, { "epoch": 0.78, "grad_norm": 1.4017057418823242, "learning_rate": 1.2603803765167073e-06, "loss": 0.5347, "step": 12020 }, { "epoch": 0.78, "grad_norm": 1.1455955505371094, "learning_rate": 1.2596865937464937e-06, "loss": 0.543, "step": 12021 }, { "epoch": 0.78, "grad_norm": 1.4313642978668213, "learning_rate": 1.2589929744557294e-06, "loss": 0.4582, "step": 12022 }, { "epoch": 0.78, "grad_norm": 1.1411678791046143, "learning_rate": 1.2582995186747304e-06, "loss": 0.5231, "step": 12023 }, { "epoch": 0.78, "grad_norm": 1.2281705141067505, "learning_rate": 1.2576062264338051e-06, "loss": 0.5188, "step": 12024 }, { "epoch": 0.78, "grad_norm": 1.2294220924377441, "learning_rate": 1.2569130977632582e-06, "loss": 0.5248, "step": 12025 }, { "epoch": 0.78, "grad_norm": 1.3656847476959229, "learning_rate": 1.2562201326933809e-06, "loss": 0.5768, "step": 12026 }, { "epoch": 0.78, "grad_norm": 1.167988657951355, "learning_rate": 1.2555273312544625e-06, "loss": 0.5071, "step": 12027 }, { "epoch": 0.78, "grad_norm": 1.3521534204483032, "learning_rate": 1.2548346934767836e-06, "loss": 0.5375, "step": 12028 }, { "epoch": 0.78, "grad_norm": 1.310768485069275, "learning_rate": 1.2541422193906172e-06, "loss": 0.4953, "step": 12029 }, { "epoch": 0.78, "grad_norm": 1.232383131980896, "learning_rate": 1.253449909026232e-06, "loss": 0.5363, "step": 12030 }, { "epoch": 0.78, "grad_norm": 1.3093509674072266, "learning_rate": 1.2527577624138832e-06, "loss": 0.5096, "step": 12031 }, { "epoch": 0.78, "grad_norm": 1.178834080696106, "learning_rate": 1.2520657795838248e-06, "loss": 0.4958, "step": 12032 }, { "epoch": 0.78, "grad_norm": 1.2267696857452393, "learning_rate": 1.2513739605663012e-06, "loss": 0.5189, "step": 12033 }, { "epoch": 0.78, "grad_norm": 1.2736238241195679, "learning_rate": 1.2506823053915501e-06, "loss": 0.4561, "step": 12034 }, { "epoch": 0.78, "grad_norm": 1.1890842914581299, "learning_rate": 1.2499908140898027e-06, "loss": 0.5166, "step": 12035 }, { "epoch": 0.78, "grad_norm": 1.261533260345459, "learning_rate": 1.2492994866912827e-06, "loss": 0.5015, "step": 12036 }, { "epoch": 0.78, "grad_norm": 1.1921281814575195, "learning_rate": 1.2486083232262035e-06, "loss": 0.5425, "step": 12037 }, { "epoch": 0.78, "grad_norm": 1.227228045463562, "learning_rate": 1.247917323724776e-06, "loss": 0.5294, "step": 12038 }, { "epoch": 0.78, "grad_norm": 1.1904184818267822, "learning_rate": 1.247226488217202e-06, "loss": 0.5065, "step": 12039 }, { "epoch": 0.78, "grad_norm": 1.2736084461212158, "learning_rate": 1.2465358167336755e-06, "loss": 0.569, "step": 12040 }, { "epoch": 0.78, "grad_norm": 1.2516852617263794, "learning_rate": 1.2458453093043848e-06, "loss": 0.5294, "step": 12041 }, { "epoch": 0.78, "grad_norm": 1.1558901071548462, "learning_rate": 1.2451549659595108e-06, "loss": 0.5165, "step": 12042 }, { "epoch": 0.78, "grad_norm": 1.2541170120239258, "learning_rate": 1.2444647867292243e-06, "loss": 0.58, "step": 12043 }, { "epoch": 0.78, "grad_norm": 1.1267324686050415, "learning_rate": 1.2437747716436927e-06, "loss": 0.5038, "step": 12044 }, { "epoch": 0.78, "grad_norm": 1.1870428323745728, "learning_rate": 1.2430849207330748e-06, "loss": 0.5275, "step": 12045 }, { "epoch": 0.78, "grad_norm": 1.2849727869033813, "learning_rate": 1.2423952340275214e-06, "loss": 0.5093, "step": 12046 }, { "epoch": 0.78, "grad_norm": 1.2575920820236206, "learning_rate": 1.2417057115571785e-06, "loss": 0.5159, "step": 12047 }, { "epoch": 0.78, "grad_norm": 1.3046520948410034, "learning_rate": 1.241016353352183e-06, "loss": 0.4988, "step": 12048 }, { "epoch": 0.78, "grad_norm": 1.202710509300232, "learning_rate": 1.2403271594426636e-06, "loss": 0.508, "step": 12049 }, { "epoch": 0.78, "grad_norm": 1.1916582584381104, "learning_rate": 1.2396381298587435e-06, "loss": 0.4682, "step": 12050 }, { "epoch": 0.78, "grad_norm": 1.2662315368652344, "learning_rate": 1.238949264630539e-06, "loss": 0.5364, "step": 12051 }, { "epoch": 0.78, "grad_norm": 1.1810169219970703, "learning_rate": 1.2382605637881585e-06, "loss": 0.5375, "step": 12052 }, { "epoch": 0.78, "grad_norm": 1.2640398740768433, "learning_rate": 1.2375720273617037e-06, "loss": 0.5005, "step": 12053 }, { "epoch": 0.78, "grad_norm": 1.1637660264968872, "learning_rate": 1.236883655381269e-06, "loss": 0.5371, "step": 12054 }, { "epoch": 0.78, "grad_norm": 1.1841715574264526, "learning_rate": 1.23619544787694e-06, "loss": 0.4659, "step": 12055 }, { "epoch": 0.78, "grad_norm": 1.3404104709625244, "learning_rate": 1.2355074048787974e-06, "loss": 0.5153, "step": 12056 }, { "epoch": 0.78, "grad_norm": 1.2914842367172241, "learning_rate": 1.2348195264169133e-06, "loss": 0.537, "step": 12057 }, { "epoch": 0.78, "grad_norm": 1.224036455154419, "learning_rate": 1.234131812521353e-06, "loss": 0.5081, "step": 12058 }, { "epoch": 0.78, "grad_norm": 1.1907581090927124, "learning_rate": 1.2334442632221755e-06, "loss": 0.4791, "step": 12059 }, { "epoch": 0.78, "grad_norm": 1.2432589530944824, "learning_rate": 1.2327568785494331e-06, "loss": 0.5444, "step": 12060 }, { "epoch": 0.78, "grad_norm": 1.4026459455490112, "learning_rate": 1.232069658533166e-06, "loss": 0.54, "step": 12061 }, { "epoch": 0.78, "grad_norm": 1.3139466047286987, "learning_rate": 1.2313826032034131e-06, "loss": 0.555, "step": 12062 }, { "epoch": 0.78, "grad_norm": 1.1055599451065063, "learning_rate": 1.230695712590203e-06, "loss": 0.4888, "step": 12063 }, { "epoch": 0.78, "grad_norm": 1.377345085144043, "learning_rate": 1.2300089867235587e-06, "loss": 0.516, "step": 12064 }, { "epoch": 0.78, "grad_norm": 1.230565071105957, "learning_rate": 1.229322425633495e-06, "loss": 0.5182, "step": 12065 }, { "epoch": 0.78, "grad_norm": 1.1234792470932007, "learning_rate": 1.228636029350021e-06, "loss": 0.5028, "step": 12066 }, { "epoch": 0.78, "grad_norm": 1.1874927282333374, "learning_rate": 1.227949797903134e-06, "loss": 0.521, "step": 12067 }, { "epoch": 0.78, "grad_norm": 1.2289735078811646, "learning_rate": 1.2272637313228302e-06, "loss": 0.5253, "step": 12068 }, { "epoch": 0.78, "grad_norm": 1.1836119890213013, "learning_rate": 1.226577829639094e-06, "loss": 0.4261, "step": 12069 }, { "epoch": 0.78, "grad_norm": 1.2792723178863525, "learning_rate": 1.2258920928819056e-06, "loss": 0.524, "step": 12070 }, { "epoch": 0.78, "grad_norm": 1.3378239870071411, "learning_rate": 1.2252065210812387e-06, "loss": 0.5369, "step": 12071 }, { "epoch": 0.78, "grad_norm": 1.3077640533447266, "learning_rate": 1.2245211142670532e-06, "loss": 0.5081, "step": 12072 }, { "epoch": 0.78, "grad_norm": 1.2243074178695679, "learning_rate": 1.2238358724693095e-06, "loss": 0.5113, "step": 12073 }, { "epoch": 0.78, "grad_norm": 1.194937825202942, "learning_rate": 1.2231507957179567e-06, "loss": 0.4664, "step": 12074 }, { "epoch": 0.78, "grad_norm": 1.322213888168335, "learning_rate": 1.2224658840429387e-06, "loss": 0.5011, "step": 12075 }, { "epoch": 0.78, "grad_norm": 1.1765977144241333, "learning_rate": 1.2217811374741906e-06, "loss": 0.5035, "step": 12076 }, { "epoch": 0.78, "grad_norm": 1.4436769485473633, "learning_rate": 1.221096556041642e-06, "loss": 0.485, "step": 12077 }, { "epoch": 0.78, "grad_norm": 1.3334814310073853, "learning_rate": 1.2204121397752123e-06, "loss": 0.5666, "step": 12078 }, { "epoch": 0.78, "grad_norm": 1.161991000175476, "learning_rate": 1.2197278887048164e-06, "loss": 0.5587, "step": 12079 }, { "epoch": 0.78, "grad_norm": 1.2547223567962646, "learning_rate": 1.2190438028603612e-06, "loss": 0.5245, "step": 12080 }, { "epoch": 0.78, "grad_norm": 1.2141565084457397, "learning_rate": 1.2183598822717468e-06, "loss": 0.5076, "step": 12081 }, { "epoch": 0.78, "grad_norm": 1.1819877624511719, "learning_rate": 1.2176761269688647e-06, "loss": 0.501, "step": 12082 }, { "epoch": 0.78, "grad_norm": 1.2095717191696167, "learning_rate": 1.2169925369816021e-06, "loss": 0.5113, "step": 12083 }, { "epoch": 0.78, "grad_norm": 1.1903191804885864, "learning_rate": 1.216309112339834e-06, "loss": 0.523, "step": 12084 }, { "epoch": 0.78, "grad_norm": 1.1691187620162964, "learning_rate": 1.2156258530734328e-06, "loss": 0.486, "step": 12085 }, { "epoch": 0.78, "grad_norm": 1.0981802940368652, "learning_rate": 1.2149427592122614e-06, "loss": 0.4796, "step": 12086 }, { "epoch": 0.78, "grad_norm": 1.1412653923034668, "learning_rate": 1.2142598307861763e-06, "loss": 0.4859, "step": 12087 }, { "epoch": 0.78, "grad_norm": 1.2051434516906738, "learning_rate": 1.2135770678250269e-06, "loss": 0.4625, "step": 12088 }, { "epoch": 0.78, "grad_norm": 1.2059522867202759, "learning_rate": 1.2128944703586565e-06, "loss": 0.4896, "step": 12089 }, { "epoch": 0.78, "grad_norm": 1.161420226097107, "learning_rate": 1.2122120384168961e-06, "loss": 0.5523, "step": 12090 }, { "epoch": 0.78, "grad_norm": 1.163703203201294, "learning_rate": 1.211529772029575e-06, "loss": 0.5288, "step": 12091 }, { "epoch": 0.78, "grad_norm": 1.253081202507019, "learning_rate": 1.2108476712265133e-06, "loss": 0.4963, "step": 12092 }, { "epoch": 0.78, "grad_norm": 1.3689014911651611, "learning_rate": 1.2101657360375234e-06, "loss": 0.518, "step": 12093 }, { "epoch": 0.78, "grad_norm": 1.1625226736068726, "learning_rate": 1.2094839664924113e-06, "loss": 0.4626, "step": 12094 }, { "epoch": 0.78, "grad_norm": 1.1928037405014038, "learning_rate": 1.208802362620977e-06, "loss": 0.52, "step": 12095 }, { "epoch": 0.78, "grad_norm": 1.1482067108154297, "learning_rate": 1.2081209244530085e-06, "loss": 0.4956, "step": 12096 }, { "epoch": 0.78, "grad_norm": 1.2576267719268799, "learning_rate": 1.2074396520182912e-06, "loss": 0.5028, "step": 12097 }, { "epoch": 0.78, "grad_norm": 1.244215488433838, "learning_rate": 1.2067585453466018e-06, "loss": 0.5131, "step": 12098 }, { "epoch": 0.78, "grad_norm": 1.2286477088928223, "learning_rate": 1.2060776044677096e-06, "loss": 0.5264, "step": 12099 }, { "epoch": 0.78, "grad_norm": 1.2766315937042236, "learning_rate": 1.2053968294113789e-06, "loss": 0.5098, "step": 12100 }, { "epoch": 0.78, "grad_norm": 1.3094160556793213, "learning_rate": 1.2047162202073603e-06, "loss": 0.5109, "step": 12101 }, { "epoch": 0.78, "grad_norm": 1.0822590589523315, "learning_rate": 1.2040357768854044e-06, "loss": 0.4763, "step": 12102 }, { "epoch": 0.78, "grad_norm": 1.2384668588638306, "learning_rate": 1.2033554994752505e-06, "loss": 0.5548, "step": 12103 }, { "epoch": 0.78, "grad_norm": 1.347445011138916, "learning_rate": 1.2026753880066338e-06, "loss": 0.4821, "step": 12104 }, { "epoch": 0.78, "grad_norm": 1.2153795957565308, "learning_rate": 1.2019954425092772e-06, "loss": 0.5293, "step": 12105 }, { "epoch": 0.78, "grad_norm": 1.050525188446045, "learning_rate": 1.2013156630129014e-06, "loss": 0.5156, "step": 12106 }, { "epoch": 0.78, "grad_norm": 1.2718477249145508, "learning_rate": 1.2006360495472163e-06, "loss": 0.551, "step": 12107 }, { "epoch": 0.78, "grad_norm": 1.2074564695358276, "learning_rate": 1.1999566021419278e-06, "loss": 0.4895, "step": 12108 }, { "epoch": 0.78, "grad_norm": 1.272927165031433, "learning_rate": 1.1992773208267327e-06, "loss": 0.4894, "step": 12109 }, { "epoch": 0.78, "grad_norm": 1.1345258951187134, "learning_rate": 1.1985982056313189e-06, "loss": 0.5178, "step": 12110 }, { "epoch": 0.78, "grad_norm": 1.135153889656067, "learning_rate": 1.1979192565853698e-06, "loss": 0.4721, "step": 12111 }, { "epoch": 0.78, "grad_norm": 1.172616720199585, "learning_rate": 1.1972404737185606e-06, "loss": 0.5348, "step": 12112 }, { "epoch": 0.78, "grad_norm": 1.2099339962005615, "learning_rate": 1.1965618570605602e-06, "loss": 0.4849, "step": 12113 }, { "epoch": 0.78, "grad_norm": 1.217380404472351, "learning_rate": 1.1958834066410263e-06, "loss": 0.4845, "step": 12114 }, { "epoch": 0.78, "grad_norm": 1.224467158317566, "learning_rate": 1.1952051224896144e-06, "loss": 0.4723, "step": 12115 }, { "epoch": 0.78, "grad_norm": 1.158443808555603, "learning_rate": 1.1945270046359702e-06, "loss": 0.5146, "step": 12116 }, { "epoch": 0.78, "grad_norm": 1.3807796239852905, "learning_rate": 1.1938490531097323e-06, "loss": 0.5111, "step": 12117 }, { "epoch": 0.78, "grad_norm": 1.239778995513916, "learning_rate": 1.1931712679405332e-06, "loss": 0.5223, "step": 12118 }, { "epoch": 0.78, "grad_norm": 1.147213101387024, "learning_rate": 1.1924936491579953e-06, "loss": 0.5287, "step": 12119 }, { "epoch": 0.78, "grad_norm": 1.1496094465255737, "learning_rate": 1.1918161967917364e-06, "loss": 0.5038, "step": 12120 }, { "epoch": 0.78, "grad_norm": 1.1371846199035645, "learning_rate": 1.1911389108713667e-06, "loss": 0.4462, "step": 12121 }, { "epoch": 0.78, "grad_norm": 1.3137112855911255, "learning_rate": 1.1904617914264877e-06, "loss": 0.5241, "step": 12122 }, { "epoch": 0.78, "grad_norm": 1.2596373558044434, "learning_rate": 1.189784838486695e-06, "loss": 0.5928, "step": 12123 }, { "epoch": 0.78, "grad_norm": 1.3145389556884766, "learning_rate": 1.1891080520815783e-06, "loss": 0.4561, "step": 12124 }, { "epoch": 0.78, "grad_norm": 1.2051072120666504, "learning_rate": 1.1884314322407147e-06, "loss": 0.5239, "step": 12125 }, { "epoch": 0.78, "grad_norm": 1.2085251808166504, "learning_rate": 1.1877549789936794e-06, "loss": 0.5231, "step": 12126 }, { "epoch": 0.78, "grad_norm": 1.1364600658416748, "learning_rate": 1.1870786923700378e-06, "loss": 0.5526, "step": 12127 }, { "epoch": 0.78, "grad_norm": 1.3080042600631714, "learning_rate": 1.18640257239935e-06, "loss": 0.5336, "step": 12128 }, { "epoch": 0.78, "grad_norm": 1.2406296730041504, "learning_rate": 1.1857266191111655e-06, "loss": 0.4986, "step": 12129 }, { "epoch": 0.78, "grad_norm": 1.4327783584594727, "learning_rate": 1.1850508325350318e-06, "loss": 0.5136, "step": 12130 }, { "epoch": 0.78, "grad_norm": 1.3181055784225464, "learning_rate": 1.1843752127004815e-06, "loss": 0.5098, "step": 12131 }, { "epoch": 0.78, "grad_norm": 1.2134766578674316, "learning_rate": 1.1836997596370464e-06, "loss": 0.5267, "step": 12132 }, { "epoch": 0.78, "grad_norm": 1.3504009246826172, "learning_rate": 1.1830244733742491e-06, "loss": 0.5469, "step": 12133 }, { "epoch": 0.78, "grad_norm": 1.276381492614746, "learning_rate": 1.1823493539416036e-06, "loss": 0.5551, "step": 12134 }, { "epoch": 0.78, "grad_norm": 1.118313193321228, "learning_rate": 1.1816744013686182e-06, "loss": 0.46, "step": 12135 }, { "epoch": 0.78, "grad_norm": 1.268591046333313, "learning_rate": 1.1809996156847948e-06, "loss": 0.5182, "step": 12136 }, { "epoch": 0.78, "grad_norm": 1.2868318557739258, "learning_rate": 1.1803249969196234e-06, "loss": 0.5659, "step": 12137 }, { "epoch": 0.78, "grad_norm": 1.234095573425293, "learning_rate": 1.179650545102592e-06, "loss": 0.5171, "step": 12138 }, { "epoch": 0.78, "grad_norm": 1.2926820516586304, "learning_rate": 1.1789762602631781e-06, "loss": 0.5165, "step": 12139 }, { "epoch": 0.78, "grad_norm": 1.0928419828414917, "learning_rate": 1.1783021424308538e-06, "loss": 0.4615, "step": 12140 }, { "epoch": 0.78, "grad_norm": 1.12743079662323, "learning_rate": 1.177628191635083e-06, "loss": 0.4857, "step": 12141 }, { "epoch": 0.78, "grad_norm": 1.0860620737075806, "learning_rate": 1.176954407905323e-06, "loss": 0.5211, "step": 12142 }, { "epoch": 0.78, "grad_norm": 1.2074946165084839, "learning_rate": 1.176280791271021e-06, "loss": 0.491, "step": 12143 }, { "epoch": 0.78, "grad_norm": 1.3450902700424194, "learning_rate": 1.1756073417616203e-06, "loss": 0.5465, "step": 12144 }, { "epoch": 0.78, "grad_norm": 1.0702078342437744, "learning_rate": 1.1749340594065557e-06, "loss": 0.5038, "step": 12145 }, { "epoch": 0.78, "grad_norm": 1.2060216665267944, "learning_rate": 1.1742609442352542e-06, "loss": 0.5048, "step": 12146 }, { "epoch": 0.78, "grad_norm": 1.2006622552871704, "learning_rate": 1.1735879962771368e-06, "loss": 0.5041, "step": 12147 }, { "epoch": 0.78, "grad_norm": 1.296242356300354, "learning_rate": 1.172915215561617e-06, "loss": 0.4819, "step": 12148 }, { "epoch": 0.78, "grad_norm": 1.1206034421920776, "learning_rate": 1.1722426021180977e-06, "loss": 0.5073, "step": 12149 }, { "epoch": 0.78, "grad_norm": 1.1307905912399292, "learning_rate": 1.1715701559759784e-06, "loss": 0.4678, "step": 12150 }, { "epoch": 0.78, "grad_norm": 1.1811180114746094, "learning_rate": 1.1708978771646506e-06, "loss": 0.4875, "step": 12151 }, { "epoch": 0.78, "grad_norm": 1.2164098024368286, "learning_rate": 1.170225765713497e-06, "loss": 0.5003, "step": 12152 }, { "epoch": 0.78, "grad_norm": 1.3048791885375977, "learning_rate": 1.1695538216518953e-06, "loss": 0.4959, "step": 12153 }, { "epoch": 0.78, "grad_norm": 1.208193063735962, "learning_rate": 1.1688820450092125e-06, "loss": 0.5644, "step": 12154 }, { "epoch": 0.78, "grad_norm": 1.0559300184249878, "learning_rate": 1.1682104358148111e-06, "loss": 0.4479, "step": 12155 }, { "epoch": 0.78, "grad_norm": 1.1472227573394775, "learning_rate": 1.1675389940980447e-06, "loss": 0.5403, "step": 12156 }, { "epoch": 0.78, "grad_norm": 4.6144585609436035, "learning_rate": 1.1668677198882615e-06, "loss": 0.4879, "step": 12157 }, { "epoch": 0.78, "grad_norm": 1.2500534057617188, "learning_rate": 1.1661966132148013e-06, "loss": 0.4791, "step": 12158 }, { "epoch": 0.78, "grad_norm": 1.1397473812103271, "learning_rate": 1.165525674106996e-06, "loss": 0.5049, "step": 12159 }, { "epoch": 0.78, "grad_norm": 1.137932300567627, "learning_rate": 1.1648549025941696e-06, "loss": 0.4691, "step": 12160 }, { "epoch": 0.78, "grad_norm": 1.176980972290039, "learning_rate": 1.1641842987056407e-06, "loss": 0.5195, "step": 12161 }, { "epoch": 0.79, "grad_norm": 1.2199733257293701, "learning_rate": 1.1635138624707193e-06, "loss": 0.503, "step": 12162 }, { "epoch": 0.79, "grad_norm": 1.1338109970092773, "learning_rate": 1.1628435939187088e-06, "loss": 0.4994, "step": 12163 }, { "epoch": 0.79, "grad_norm": 1.266487717628479, "learning_rate": 1.1621734930789047e-06, "loss": 0.4958, "step": 12164 }, { "epoch": 0.79, "grad_norm": 1.3070223331451416, "learning_rate": 1.1615035599805975e-06, "loss": 0.4944, "step": 12165 }, { "epoch": 0.79, "grad_norm": 1.1876862049102783, "learning_rate": 1.160833794653064e-06, "loss": 0.5106, "step": 12166 }, { "epoch": 0.79, "grad_norm": 1.3846211433410645, "learning_rate": 1.1601641971255806e-06, "loss": 0.4856, "step": 12167 }, { "epoch": 0.79, "grad_norm": 1.104826807975769, "learning_rate": 1.159494767427413e-06, "loss": 0.4999, "step": 12168 }, { "epoch": 0.79, "grad_norm": 1.2003560066223145, "learning_rate": 1.158825505587821e-06, "loss": 0.5184, "step": 12169 }, { "epoch": 0.79, "grad_norm": 1.1297212839126587, "learning_rate": 1.1581564116360556e-06, "loss": 0.5235, "step": 12170 }, { "epoch": 0.79, "grad_norm": 1.1260886192321777, "learning_rate": 1.1574874856013624e-06, "loss": 0.4538, "step": 12171 }, { "epoch": 0.79, "grad_norm": 1.1498132944107056, "learning_rate": 1.156818727512976e-06, "loss": 0.4691, "step": 12172 }, { "epoch": 0.79, "grad_norm": 1.2692924737930298, "learning_rate": 1.1561501374001277e-06, "loss": 0.5048, "step": 12173 }, { "epoch": 0.79, "grad_norm": 1.2136967182159424, "learning_rate": 1.1554817152920394e-06, "loss": 0.5299, "step": 12174 }, { "epoch": 0.79, "grad_norm": 1.2644857168197632, "learning_rate": 1.1548134612179267e-06, "loss": 0.5155, "step": 12175 }, { "epoch": 0.79, "grad_norm": 1.390479564666748, "learning_rate": 1.154145375206997e-06, "loss": 0.5159, "step": 12176 }, { "epoch": 0.79, "grad_norm": 1.2676072120666504, "learning_rate": 1.1534774572884517e-06, "loss": 0.5307, "step": 12177 }, { "epoch": 0.79, "grad_norm": 1.155487060546875, "learning_rate": 1.1528097074914807e-06, "loss": 0.4797, "step": 12178 }, { "epoch": 0.79, "grad_norm": 1.2089558839797974, "learning_rate": 1.152142125845272e-06, "loss": 0.5183, "step": 12179 }, { "epoch": 0.79, "grad_norm": 1.2224823236465454, "learning_rate": 1.1514747123790033e-06, "loss": 0.5438, "step": 12180 }, { "epoch": 0.79, "grad_norm": 1.1482635736465454, "learning_rate": 1.1508074671218456e-06, "loss": 0.5016, "step": 12181 }, { "epoch": 0.79, "grad_norm": 1.4003775119781494, "learning_rate": 1.1501403901029623e-06, "loss": 0.5359, "step": 12182 }, { "epoch": 0.79, "grad_norm": 1.247510552406311, "learning_rate": 1.1494734813515113e-06, "loss": 0.4936, "step": 12183 }, { "epoch": 0.79, "grad_norm": 1.1384624242782593, "learning_rate": 1.1488067408966386e-06, "loss": 0.4956, "step": 12184 }, { "epoch": 0.79, "grad_norm": 1.1578421592712402, "learning_rate": 1.1481401687674871e-06, "loss": 0.4883, "step": 12185 }, { "epoch": 0.79, "grad_norm": 1.1185848712921143, "learning_rate": 1.1474737649931912e-06, "loss": 0.4628, "step": 12186 }, { "epoch": 0.79, "grad_norm": 1.2671242952346802, "learning_rate": 1.1468075296028768e-06, "loss": 0.532, "step": 12187 }, { "epoch": 0.79, "grad_norm": 1.1302145719528198, "learning_rate": 1.146141462625664e-06, "loss": 0.4324, "step": 12188 }, { "epoch": 0.79, "grad_norm": 1.1557661294937134, "learning_rate": 1.1454755640906662e-06, "loss": 0.5159, "step": 12189 }, { "epoch": 0.79, "grad_norm": 1.2272382974624634, "learning_rate": 1.1448098340269853e-06, "loss": 0.4906, "step": 12190 }, { "epoch": 0.79, "grad_norm": 1.1969118118286133, "learning_rate": 1.1441442724637203e-06, "loss": 0.4946, "step": 12191 }, { "epoch": 0.79, "grad_norm": 1.1513406038284302, "learning_rate": 1.1434788794299606e-06, "loss": 0.4943, "step": 12192 }, { "epoch": 0.79, "grad_norm": 1.2505699396133423, "learning_rate": 1.1428136549547909e-06, "loss": 0.4889, "step": 12193 }, { "epoch": 0.79, "grad_norm": 1.1259174346923828, "learning_rate": 1.1421485990672826e-06, "loss": 0.4593, "step": 12194 }, { "epoch": 0.79, "grad_norm": 1.1588785648345947, "learning_rate": 1.141483711796506e-06, "loss": 0.5117, "step": 12195 }, { "epoch": 0.79, "grad_norm": 1.2510871887207031, "learning_rate": 1.1408189931715213e-06, "loss": 0.5094, "step": 12196 }, { "epoch": 0.79, "grad_norm": 1.2257475852966309, "learning_rate": 1.1401544432213817e-06, "loss": 0.51, "step": 12197 }, { "epoch": 0.79, "grad_norm": 1.6702616214752197, "learning_rate": 1.1394900619751343e-06, "loss": 0.5478, "step": 12198 }, { "epoch": 0.79, "grad_norm": 1.2761331796646118, "learning_rate": 1.1388258494618143e-06, "loss": 0.5317, "step": 12199 }, { "epoch": 0.79, "grad_norm": 1.5070823431015015, "learning_rate": 1.1381618057104548e-06, "loss": 0.5476, "step": 12200 }, { "epoch": 0.79, "grad_norm": 1.1791658401489258, "learning_rate": 1.1374979307500788e-06, "loss": 0.5108, "step": 12201 }, { "epoch": 0.79, "grad_norm": 1.1154078245162964, "learning_rate": 1.1368342246097042e-06, "loss": 0.4926, "step": 12202 }, { "epoch": 0.79, "grad_norm": 1.3045836687088013, "learning_rate": 1.1361706873183375e-06, "loss": 0.542, "step": 12203 }, { "epoch": 0.79, "grad_norm": 1.1804434061050415, "learning_rate": 1.1355073189049808e-06, "loss": 0.5179, "step": 12204 }, { "epoch": 0.79, "grad_norm": 1.103704571723938, "learning_rate": 1.1348441193986294e-06, "loss": 0.4668, "step": 12205 }, { "epoch": 0.79, "grad_norm": 1.2210369110107422, "learning_rate": 1.134181088828269e-06, "loss": 0.5184, "step": 12206 }, { "epoch": 0.79, "grad_norm": 1.1458494663238525, "learning_rate": 1.1335182272228802e-06, "loss": 0.4664, "step": 12207 }, { "epoch": 0.79, "grad_norm": 1.360746145248413, "learning_rate": 1.132855534611433e-06, "loss": 0.5046, "step": 12208 }, { "epoch": 0.79, "grad_norm": 1.2526863813400269, "learning_rate": 1.1321930110228929e-06, "loss": 0.5059, "step": 12209 }, { "epoch": 0.79, "grad_norm": 1.1905359029769897, "learning_rate": 1.1315306564862172e-06, "loss": 0.5313, "step": 12210 }, { "epoch": 0.79, "grad_norm": 1.1064856052398682, "learning_rate": 1.1308684710303559e-06, "loss": 0.4723, "step": 12211 }, { "epoch": 0.79, "grad_norm": 1.1810729503631592, "learning_rate": 1.1302064546842528e-06, "loss": 0.5067, "step": 12212 }, { "epoch": 0.79, "grad_norm": 1.1925618648529053, "learning_rate": 1.1295446074768396e-06, "loss": 0.4566, "step": 12213 }, { "epoch": 0.79, "grad_norm": 1.2522786855697632, "learning_rate": 1.1288829294370456e-06, "loss": 0.4821, "step": 12214 }, { "epoch": 0.79, "grad_norm": 1.4166752099990845, "learning_rate": 1.1282214205937913e-06, "loss": 0.5015, "step": 12215 }, { "epoch": 0.79, "grad_norm": 1.5011506080627441, "learning_rate": 1.1275600809759895e-06, "loss": 0.5081, "step": 12216 }, { "epoch": 0.79, "grad_norm": 1.2410732507705688, "learning_rate": 1.1268989106125455e-06, "loss": 0.5127, "step": 12217 }, { "epoch": 0.79, "grad_norm": 1.3557038307189941, "learning_rate": 1.1262379095323588e-06, "loss": 0.506, "step": 12218 }, { "epoch": 0.79, "grad_norm": 1.1909226179122925, "learning_rate": 1.125577077764317e-06, "loss": 0.5226, "step": 12219 }, { "epoch": 0.79, "grad_norm": 1.2187418937683105, "learning_rate": 1.1249164153373054e-06, "loss": 0.4467, "step": 12220 }, { "epoch": 0.79, "grad_norm": 1.2139359712600708, "learning_rate": 1.124255922280199e-06, "loss": 0.5197, "step": 12221 }, { "epoch": 0.79, "grad_norm": 1.2027580738067627, "learning_rate": 1.123595598621867e-06, "loss": 0.5308, "step": 12222 }, { "epoch": 0.79, "grad_norm": 1.1621452569961548, "learning_rate": 1.1229354443911699e-06, "loss": 0.5171, "step": 12223 }, { "epoch": 0.79, "grad_norm": 1.1509429216384888, "learning_rate": 1.1222754596169632e-06, "loss": 0.4967, "step": 12224 }, { "epoch": 0.79, "grad_norm": 1.2330117225646973, "learning_rate": 1.1216156443280901e-06, "loss": 0.5561, "step": 12225 }, { "epoch": 0.79, "grad_norm": 1.3729796409606934, "learning_rate": 1.1209559985533908e-06, "loss": 0.5457, "step": 12226 }, { "epoch": 0.79, "grad_norm": 1.2799104452133179, "learning_rate": 1.120296522321696e-06, "loss": 0.5241, "step": 12227 }, { "epoch": 0.79, "grad_norm": 1.2535210847854614, "learning_rate": 1.1196372156618314e-06, "loss": 0.5281, "step": 12228 }, { "epoch": 0.79, "grad_norm": 1.2540713548660278, "learning_rate": 1.1189780786026122e-06, "loss": 0.5108, "step": 12229 }, { "epoch": 0.79, "grad_norm": 1.1961830854415894, "learning_rate": 1.1183191111728497e-06, "loss": 0.5224, "step": 12230 }, { "epoch": 0.79, "grad_norm": 1.1685949563980103, "learning_rate": 1.1176603134013425e-06, "loss": 0.5028, "step": 12231 }, { "epoch": 0.79, "grad_norm": 1.1850649118423462, "learning_rate": 1.1170016853168864e-06, "loss": 0.5059, "step": 12232 }, { "epoch": 0.79, "grad_norm": 1.1436775922775269, "learning_rate": 1.1163432269482683e-06, "loss": 0.5589, "step": 12233 }, { "epoch": 0.79, "grad_norm": 1.2009296417236328, "learning_rate": 1.1156849383242675e-06, "loss": 0.5257, "step": 12234 }, { "epoch": 0.79, "grad_norm": 1.1864826679229736, "learning_rate": 1.115026819473657e-06, "loss": 0.511, "step": 12235 }, { "epoch": 0.79, "grad_norm": 1.2803010940551758, "learning_rate": 1.1143688704252015e-06, "loss": 0.478, "step": 12236 }, { "epoch": 0.79, "grad_norm": 1.3435673713684082, "learning_rate": 1.1137110912076566e-06, "loss": 0.5143, "step": 12237 }, { "epoch": 0.79, "grad_norm": 1.2268232107162476, "learning_rate": 1.1130534818497734e-06, "loss": 0.5112, "step": 12238 }, { "epoch": 0.79, "grad_norm": 1.2409557104110718, "learning_rate": 1.1123960423802938e-06, "loss": 0.4441, "step": 12239 }, { "epoch": 0.79, "grad_norm": 1.1635818481445312, "learning_rate": 1.1117387728279528e-06, "loss": 0.5097, "step": 12240 }, { "epoch": 0.79, "grad_norm": 1.1536613702774048, "learning_rate": 1.1110816732214802e-06, "loss": 0.4586, "step": 12241 }, { "epoch": 0.79, "grad_norm": 1.182701826095581, "learning_rate": 1.1104247435895922e-06, "loss": 0.4856, "step": 12242 }, { "epoch": 0.79, "grad_norm": 1.244358777999878, "learning_rate": 1.1097679839610037e-06, "loss": 0.5307, "step": 12243 }, { "epoch": 0.79, "grad_norm": 1.1400915384292603, "learning_rate": 1.1091113943644193e-06, "loss": 0.5296, "step": 12244 }, { "epoch": 0.79, "grad_norm": 1.1382098197937012, "learning_rate": 1.1084549748285378e-06, "loss": 0.5203, "step": 12245 }, { "epoch": 0.79, "grad_norm": 1.2781927585601807, "learning_rate": 1.1077987253820489e-06, "loss": 0.4855, "step": 12246 }, { "epoch": 0.79, "grad_norm": 1.158929467201233, "learning_rate": 1.1071426460536367e-06, "loss": 0.4919, "step": 12247 }, { "epoch": 0.79, "grad_norm": 1.3273036479949951, "learning_rate": 1.1064867368719745e-06, "loss": 0.5523, "step": 12248 }, { "epoch": 0.79, "grad_norm": 1.3753128051757812, "learning_rate": 1.1058309978657316e-06, "loss": 0.5234, "step": 12249 }, { "epoch": 0.79, "grad_norm": 1.1352667808532715, "learning_rate": 1.1051754290635691e-06, "loss": 0.5394, "step": 12250 }, { "epoch": 0.79, "grad_norm": 1.2721835374832153, "learning_rate": 1.1045200304941395e-06, "loss": 0.535, "step": 12251 }, { "epoch": 0.79, "grad_norm": 1.1674118041992188, "learning_rate": 1.1038648021860886e-06, "loss": 0.5258, "step": 12252 }, { "epoch": 0.79, "grad_norm": 1.264912724494934, "learning_rate": 1.1032097441680567e-06, "loss": 0.4902, "step": 12253 }, { "epoch": 0.79, "grad_norm": 1.5108418464660645, "learning_rate": 1.1025548564686718e-06, "loss": 0.4851, "step": 12254 }, { "epoch": 0.79, "grad_norm": 1.3326914310455322, "learning_rate": 1.1019001391165585e-06, "loss": 0.4909, "step": 12255 }, { "epoch": 0.79, "grad_norm": 1.285987377166748, "learning_rate": 1.1012455921403325e-06, "loss": 0.5303, "step": 12256 }, { "epoch": 0.79, "grad_norm": 1.2818197011947632, "learning_rate": 1.1005912155686034e-06, "loss": 0.5473, "step": 12257 }, { "epoch": 0.79, "grad_norm": 1.2578109502792358, "learning_rate": 1.099937009429971e-06, "loss": 0.5253, "step": 12258 }, { "epoch": 0.79, "grad_norm": 1.2129913568496704, "learning_rate": 1.099282973753032e-06, "loss": 0.5188, "step": 12259 }, { "epoch": 0.79, "grad_norm": 1.1627023220062256, "learning_rate": 1.098629108566368e-06, "loss": 0.4828, "step": 12260 }, { "epoch": 0.79, "grad_norm": 1.2029814720153809, "learning_rate": 1.0979754138985598e-06, "loss": 0.47, "step": 12261 }, { "epoch": 0.79, "grad_norm": 1.1957006454467773, "learning_rate": 1.0973218897781795e-06, "loss": 0.5037, "step": 12262 }, { "epoch": 0.79, "grad_norm": 1.2267017364501953, "learning_rate": 1.0966685362337902e-06, "loss": 0.5541, "step": 12263 }, { "epoch": 0.79, "grad_norm": 1.140873908996582, "learning_rate": 1.0960153532939482e-06, "loss": 0.4741, "step": 12264 }, { "epoch": 0.79, "grad_norm": 1.2497977018356323, "learning_rate": 1.0953623409872043e-06, "loss": 0.5403, "step": 12265 }, { "epoch": 0.79, "grad_norm": 1.174588918685913, "learning_rate": 1.094709499342097e-06, "loss": 0.5382, "step": 12266 }, { "epoch": 0.79, "grad_norm": 1.4585977792739868, "learning_rate": 1.0940568283871617e-06, "loss": 0.5185, "step": 12267 }, { "epoch": 0.79, "grad_norm": 1.298782229423523, "learning_rate": 1.093404328150925e-06, "loss": 0.481, "step": 12268 }, { "epoch": 0.79, "grad_norm": 1.2605066299438477, "learning_rate": 1.0927519986619062e-06, "loss": 0.5604, "step": 12269 }, { "epoch": 0.79, "grad_norm": 1.1976615190505981, "learning_rate": 1.092099839948616e-06, "loss": 0.4944, "step": 12270 }, { "epoch": 0.79, "grad_norm": 1.2776222229003906, "learning_rate": 1.091447852039561e-06, "loss": 0.5196, "step": 12271 }, { "epoch": 0.79, "grad_norm": 1.158431887626648, "learning_rate": 1.090796034963235e-06, "loss": 0.5246, "step": 12272 }, { "epoch": 0.79, "grad_norm": 1.1399388313293457, "learning_rate": 1.0901443887481283e-06, "loss": 0.5034, "step": 12273 }, { "epoch": 0.79, "grad_norm": 1.1856721639633179, "learning_rate": 1.0894929134227227e-06, "loss": 0.4539, "step": 12274 }, { "epoch": 0.79, "grad_norm": 1.194048285484314, "learning_rate": 1.088841609015493e-06, "loss": 0.4992, "step": 12275 }, { "epoch": 0.79, "grad_norm": 1.1469786167144775, "learning_rate": 1.0881904755549056e-06, "loss": 0.5357, "step": 12276 }, { "epoch": 0.79, "grad_norm": 1.1110085248947144, "learning_rate": 1.0875395130694205e-06, "loss": 0.5037, "step": 12277 }, { "epoch": 0.79, "grad_norm": 1.2164479494094849, "learning_rate": 1.0868887215874885e-06, "loss": 0.5306, "step": 12278 }, { "epoch": 0.79, "grad_norm": 1.328999638557434, "learning_rate": 1.0862381011375538e-06, "loss": 0.5148, "step": 12279 }, { "epoch": 0.79, "grad_norm": 1.2695626020431519, "learning_rate": 1.0855876517480546e-06, "loss": 0.5372, "step": 12280 }, { "epoch": 0.79, "grad_norm": 1.2449524402618408, "learning_rate": 1.08493737344742e-06, "loss": 0.5355, "step": 12281 }, { "epoch": 0.79, "grad_norm": 1.1401429176330566, "learning_rate": 1.0842872662640724e-06, "loss": 0.531, "step": 12282 }, { "epoch": 0.79, "grad_norm": 1.1825720071792603, "learning_rate": 1.0836373302264252e-06, "loss": 0.532, "step": 12283 }, { "epoch": 0.79, "grad_norm": 1.265700101852417, "learning_rate": 1.0829875653628852e-06, "loss": 0.5725, "step": 12284 }, { "epoch": 0.79, "grad_norm": 1.2237014770507812, "learning_rate": 1.0823379717018534e-06, "loss": 0.4997, "step": 12285 }, { "epoch": 0.79, "grad_norm": 1.2230873107910156, "learning_rate": 1.0816885492717206e-06, "loss": 0.513, "step": 12286 }, { "epoch": 0.79, "grad_norm": 1.2011922597885132, "learning_rate": 1.0810392981008737e-06, "loss": 0.4799, "step": 12287 }, { "epoch": 0.79, "grad_norm": 1.2627943754196167, "learning_rate": 1.0803902182176861e-06, "loss": 0.4906, "step": 12288 }, { "epoch": 0.79, "grad_norm": 1.327556848526001, "learning_rate": 1.07974130965053e-06, "loss": 0.5214, "step": 12289 }, { "epoch": 0.79, "grad_norm": 1.2147586345672607, "learning_rate": 1.0790925724277662e-06, "loss": 0.5116, "step": 12290 }, { "epoch": 0.79, "grad_norm": 1.2089539766311646, "learning_rate": 1.0784440065777507e-06, "loss": 0.4967, "step": 12291 }, { "epoch": 0.79, "grad_norm": 1.1953163146972656, "learning_rate": 1.0777956121288308e-06, "loss": 0.5482, "step": 12292 }, { "epoch": 0.79, "grad_norm": 1.3157217502593994, "learning_rate": 1.0771473891093442e-06, "loss": 0.4798, "step": 12293 }, { "epoch": 0.79, "grad_norm": 1.1327948570251465, "learning_rate": 1.076499337547624e-06, "loss": 0.5099, "step": 12294 }, { "epoch": 0.79, "grad_norm": 1.1563928127288818, "learning_rate": 1.0758514574719946e-06, "loss": 0.5109, "step": 12295 }, { "epoch": 0.79, "grad_norm": 1.2231318950653076, "learning_rate": 1.0752037489107753e-06, "loss": 0.5191, "step": 12296 }, { "epoch": 0.79, "grad_norm": 1.2685017585754395, "learning_rate": 1.0745562118922725e-06, "loss": 0.5596, "step": 12297 }, { "epoch": 0.79, "grad_norm": 1.2879194021224976, "learning_rate": 1.07390884644479e-06, "loss": 0.5784, "step": 12298 }, { "epoch": 0.79, "grad_norm": 1.2734081745147705, "learning_rate": 1.0732616525966227e-06, "loss": 0.4893, "step": 12299 }, { "epoch": 0.79, "grad_norm": 1.2591432332992554, "learning_rate": 1.072614630376057e-06, "loss": 0.5155, "step": 12300 }, { "epoch": 0.79, "grad_norm": 1.1043545007705688, "learning_rate": 1.0719677798113747e-06, "loss": 0.492, "step": 12301 }, { "epoch": 0.79, "grad_norm": 1.1577459573745728, "learning_rate": 1.0713211009308444e-06, "loss": 0.5265, "step": 12302 }, { "epoch": 0.79, "grad_norm": 1.3270511627197266, "learning_rate": 1.0706745937627333e-06, "loss": 0.5047, "step": 12303 }, { "epoch": 0.79, "grad_norm": 1.3467016220092773, "learning_rate": 1.0700282583352977e-06, "loss": 0.5102, "step": 12304 }, { "epoch": 0.79, "grad_norm": 1.1544078588485718, "learning_rate": 1.0693820946767875e-06, "loss": 0.5287, "step": 12305 }, { "epoch": 0.79, "grad_norm": 1.4616777896881104, "learning_rate": 1.0687361028154457e-06, "loss": 0.5282, "step": 12306 }, { "epoch": 0.79, "grad_norm": 1.1594343185424805, "learning_rate": 1.0680902827795053e-06, "loss": 0.4782, "step": 12307 }, { "epoch": 0.79, "grad_norm": 1.325321912765503, "learning_rate": 1.0674446345971939e-06, "loss": 0.4966, "step": 12308 }, { "epoch": 0.79, "grad_norm": 1.1849040985107422, "learning_rate": 1.0667991582967313e-06, "loss": 0.5238, "step": 12309 }, { "epoch": 0.79, "grad_norm": 1.2833616733551025, "learning_rate": 1.0661538539063298e-06, "loss": 0.5454, "step": 12310 }, { "epoch": 0.79, "grad_norm": 1.1729291677474976, "learning_rate": 1.0655087214541937e-06, "loss": 0.5067, "step": 12311 }, { "epoch": 0.79, "grad_norm": 1.2870116233825684, "learning_rate": 1.064863760968522e-06, "loss": 0.5043, "step": 12312 }, { "epoch": 0.79, "grad_norm": 1.142317771911621, "learning_rate": 1.064218972477501e-06, "loss": 0.4571, "step": 12313 }, { "epoch": 0.79, "grad_norm": 1.284738302230835, "learning_rate": 1.0635743560093142e-06, "loss": 0.5258, "step": 12314 }, { "epoch": 0.79, "grad_norm": 1.164282202720642, "learning_rate": 1.062929911592136e-06, "loss": 0.4927, "step": 12315 }, { "epoch": 0.79, "grad_norm": 1.1616661548614502, "learning_rate": 1.0622856392541337e-06, "loss": 0.4351, "step": 12316 }, { "epoch": 0.8, "grad_norm": 1.255486011505127, "learning_rate": 1.061641539023467e-06, "loss": 0.5223, "step": 12317 }, { "epoch": 0.8, "grad_norm": 1.240565538406372, "learning_rate": 1.0609976109282887e-06, "loss": 0.559, "step": 12318 }, { "epoch": 0.8, "grad_norm": 1.2312408685684204, "learning_rate": 1.0603538549967407e-06, "loss": 0.4935, "step": 12319 }, { "epoch": 0.8, "grad_norm": 1.6296913623809814, "learning_rate": 1.0597102712569613e-06, "loss": 0.4849, "step": 12320 }, { "epoch": 0.8, "grad_norm": 1.1909856796264648, "learning_rate": 1.0590668597370795e-06, "loss": 0.4936, "step": 12321 }, { "epoch": 0.8, "grad_norm": 1.1542514562606812, "learning_rate": 1.0584236204652183e-06, "loss": 0.5051, "step": 12322 }, { "epoch": 0.8, "grad_norm": 1.1722100973129272, "learning_rate": 1.0577805534694908e-06, "loss": 0.513, "step": 12323 }, { "epoch": 0.8, "grad_norm": 1.2821435928344727, "learning_rate": 1.0571376587780053e-06, "loss": 0.513, "step": 12324 }, { "epoch": 0.8, "grad_norm": 1.1974833011627197, "learning_rate": 1.0564949364188587e-06, "loss": 0.4958, "step": 12325 }, { "epoch": 0.8, "grad_norm": 1.1300561428070068, "learning_rate": 1.0558523864201442e-06, "loss": 0.5003, "step": 12326 }, { "epoch": 0.8, "grad_norm": 1.1877858638763428, "learning_rate": 1.055210008809946e-06, "loss": 0.5233, "step": 12327 }, { "epoch": 0.8, "grad_norm": 1.2497159242630005, "learning_rate": 1.054567803616341e-06, "loss": 0.5175, "step": 12328 }, { "epoch": 0.8, "grad_norm": 1.267082929611206, "learning_rate": 1.0539257708673982e-06, "loss": 0.5044, "step": 12329 }, { "epoch": 0.8, "grad_norm": 1.3044575452804565, "learning_rate": 1.0532839105911786e-06, "loss": 0.5626, "step": 12330 }, { "epoch": 0.8, "grad_norm": 1.1743618249893188, "learning_rate": 1.0526422228157363e-06, "loss": 0.4727, "step": 12331 }, { "epoch": 0.8, "grad_norm": 1.2212926149368286, "learning_rate": 1.052000707569118e-06, "loss": 0.5168, "step": 12332 }, { "epoch": 0.8, "grad_norm": 1.1599565744400024, "learning_rate": 1.051359364879363e-06, "loss": 0.5372, "step": 12333 }, { "epoch": 0.8, "grad_norm": 1.549540400505066, "learning_rate": 1.050718194774502e-06, "loss": 0.5133, "step": 12334 }, { "epoch": 0.8, "grad_norm": 1.2216099500656128, "learning_rate": 1.0500771972825614e-06, "loss": 0.5524, "step": 12335 }, { "epoch": 0.8, "grad_norm": 1.2146332263946533, "learning_rate": 1.0494363724315543e-06, "loss": 0.5197, "step": 12336 }, { "epoch": 0.8, "grad_norm": 1.2168567180633545, "learning_rate": 1.0487957202494903e-06, "loss": 0.5345, "step": 12337 }, { "epoch": 0.8, "grad_norm": 1.3822928667068481, "learning_rate": 1.0481552407643715e-06, "loss": 0.5723, "step": 12338 }, { "epoch": 0.8, "grad_norm": 1.2336539030075073, "learning_rate": 1.0475149340041906e-06, "loss": 0.4928, "step": 12339 }, { "epoch": 0.8, "grad_norm": 1.2919877767562866, "learning_rate": 1.0468747999969354e-06, "loss": 0.511, "step": 12340 }, { "epoch": 0.8, "grad_norm": 1.2967474460601807, "learning_rate": 1.046234838770584e-06, "loss": 0.5496, "step": 12341 }, { "epoch": 0.8, "grad_norm": 1.2164053916931152, "learning_rate": 1.0455950503531058e-06, "loss": 0.4872, "step": 12342 }, { "epoch": 0.8, "grad_norm": 1.2556546926498413, "learning_rate": 1.0449554347724655e-06, "loss": 0.5127, "step": 12343 }, { "epoch": 0.8, "grad_norm": 1.2311232089996338, "learning_rate": 1.0443159920566192e-06, "loss": 0.536, "step": 12344 }, { "epoch": 0.8, "grad_norm": 1.1842259168624878, "learning_rate": 1.0436767222335148e-06, "loss": 0.4937, "step": 12345 }, { "epoch": 0.8, "grad_norm": 1.2153406143188477, "learning_rate": 1.0430376253310935e-06, "loss": 0.4697, "step": 12346 }, { "epoch": 0.8, "grad_norm": 1.2996625900268555, "learning_rate": 1.0423987013772897e-06, "loss": 0.5217, "step": 12347 }, { "epoch": 0.8, "grad_norm": 1.2288779020309448, "learning_rate": 1.0417599504000269e-06, "loss": 0.5057, "step": 12348 }, { "epoch": 0.8, "grad_norm": 1.2796286344528198, "learning_rate": 1.041121372427224e-06, "loss": 0.4759, "step": 12349 }, { "epoch": 0.8, "grad_norm": 1.428181767463684, "learning_rate": 1.0404829674867921e-06, "loss": 0.5185, "step": 12350 }, { "epoch": 0.8, "grad_norm": 1.2011173963546753, "learning_rate": 1.0398447356066337e-06, "loss": 0.5088, "step": 12351 }, { "epoch": 0.8, "grad_norm": 1.1817059516906738, "learning_rate": 1.0392066768146447e-06, "loss": 0.4999, "step": 12352 }, { "epoch": 0.8, "grad_norm": 1.1891366243362427, "learning_rate": 1.0385687911387144e-06, "loss": 0.5015, "step": 12353 }, { "epoch": 0.8, "grad_norm": 1.1719669103622437, "learning_rate": 1.0379310786067203e-06, "loss": 0.5148, "step": 12354 }, { "epoch": 0.8, "grad_norm": 1.2530641555786133, "learning_rate": 1.037293539246536e-06, "loss": 0.5145, "step": 12355 }, { "epoch": 0.8, "grad_norm": 1.1759157180786133, "learning_rate": 1.0366561730860275e-06, "loss": 0.5134, "step": 12356 }, { "epoch": 0.8, "grad_norm": 1.1693611145019531, "learning_rate": 1.0360189801530524e-06, "loss": 0.4518, "step": 12357 }, { "epoch": 0.8, "grad_norm": 1.2059205770492554, "learning_rate": 1.03538196047546e-06, "loss": 0.5004, "step": 12358 }, { "epoch": 0.8, "grad_norm": 1.2861343622207642, "learning_rate": 1.034745114081095e-06, "loss": 0.5274, "step": 12359 }, { "epoch": 0.8, "grad_norm": 1.2041292190551758, "learning_rate": 1.0341084409977886e-06, "loss": 0.4772, "step": 12360 }, { "epoch": 0.8, "grad_norm": 1.2132635116577148, "learning_rate": 1.0334719412533705e-06, "loss": 0.536, "step": 12361 }, { "epoch": 0.8, "grad_norm": 1.1744682788848877, "learning_rate": 1.0328356148756602e-06, "loss": 0.4945, "step": 12362 }, { "epoch": 0.8, "grad_norm": 1.259945034980774, "learning_rate": 1.0321994618924696e-06, "loss": 0.508, "step": 12363 }, { "epoch": 0.8, "grad_norm": 1.2112977504730225, "learning_rate": 1.0315634823316033e-06, "loss": 0.4872, "step": 12364 }, { "epoch": 0.8, "grad_norm": 1.1991766691207886, "learning_rate": 1.0309276762208604e-06, "loss": 0.5214, "step": 12365 }, { "epoch": 0.8, "grad_norm": 1.1942806243896484, "learning_rate": 1.0302920435880265e-06, "loss": 0.5297, "step": 12366 }, { "epoch": 0.8, "grad_norm": 1.3461949825286865, "learning_rate": 1.0296565844608858e-06, "loss": 0.5394, "step": 12367 }, { "epoch": 0.8, "grad_norm": 1.1809391975402832, "learning_rate": 1.0290212988672116e-06, "loss": 0.5505, "step": 12368 }, { "epoch": 0.8, "grad_norm": 1.2265522480010986, "learning_rate": 1.0283861868347716e-06, "loss": 0.5021, "step": 12369 }, { "epoch": 0.8, "grad_norm": 1.1855777502059937, "learning_rate": 1.027751248391326e-06, "loss": 0.5193, "step": 12370 }, { "epoch": 0.8, "grad_norm": 1.1687158346176147, "learning_rate": 1.0271164835646231e-06, "loss": 0.5259, "step": 12371 }, { "epoch": 0.8, "grad_norm": 1.160905361175537, "learning_rate": 1.0264818923824083e-06, "loss": 0.4764, "step": 12372 }, { "epoch": 0.8, "grad_norm": 1.2489291429519653, "learning_rate": 1.0258474748724184e-06, "loss": 0.5194, "step": 12373 }, { "epoch": 0.8, "grad_norm": 1.125739574432373, "learning_rate": 1.0252132310623825e-06, "loss": 0.4986, "step": 12374 }, { "epoch": 0.8, "grad_norm": 1.335961937904358, "learning_rate": 1.0245791609800204e-06, "loss": 0.5518, "step": 12375 }, { "epoch": 0.8, "grad_norm": 1.1734063625335693, "learning_rate": 1.023945264653048e-06, "loss": 0.5222, "step": 12376 }, { "epoch": 0.8, "grad_norm": 1.257463812828064, "learning_rate": 1.0233115421091687e-06, "loss": 0.5049, "step": 12377 }, { "epoch": 0.8, "grad_norm": 1.2764790058135986, "learning_rate": 1.0226779933760816e-06, "loss": 0.5903, "step": 12378 }, { "epoch": 0.8, "grad_norm": 1.2222334146499634, "learning_rate": 1.022044618481478e-06, "loss": 0.489, "step": 12379 }, { "epoch": 0.8, "grad_norm": 1.1741466522216797, "learning_rate": 1.0214114174530403e-06, "loss": 0.5259, "step": 12380 }, { "epoch": 0.8, "grad_norm": 1.2634764909744263, "learning_rate": 1.0207783903184465e-06, "loss": 0.5034, "step": 12381 }, { "epoch": 0.8, "grad_norm": 1.353446125984192, "learning_rate": 1.0201455371053615e-06, "loss": 0.4961, "step": 12382 }, { "epoch": 0.8, "grad_norm": 1.1832743883132935, "learning_rate": 1.0195128578414464e-06, "loss": 0.5224, "step": 12383 }, { "epoch": 0.8, "grad_norm": 1.2464113235473633, "learning_rate": 1.018880352554355e-06, "loss": 0.4836, "step": 12384 }, { "epoch": 0.8, "grad_norm": 1.2967313528060913, "learning_rate": 1.0182480212717333e-06, "loss": 0.5121, "step": 12385 }, { "epoch": 0.8, "grad_norm": 1.1260371208190918, "learning_rate": 1.0176158640212158e-06, "loss": 0.5712, "step": 12386 }, { "epoch": 0.8, "grad_norm": 1.251438021659851, "learning_rate": 1.0169838808304344e-06, "loss": 0.4694, "step": 12387 }, { "epoch": 0.8, "grad_norm": 1.3531869649887085, "learning_rate": 1.0163520717270114e-06, "loss": 0.5555, "step": 12388 }, { "epoch": 0.8, "grad_norm": 1.3357089757919312, "learning_rate": 1.015720436738561e-06, "loss": 0.5006, "step": 12389 }, { "epoch": 0.8, "grad_norm": 1.55514395236969, "learning_rate": 1.0150889758926924e-06, "loss": 0.5302, "step": 12390 }, { "epoch": 0.8, "grad_norm": 1.332643747329712, "learning_rate": 1.0144576892170021e-06, "loss": 0.4845, "step": 12391 }, { "epoch": 0.8, "grad_norm": 1.1844570636749268, "learning_rate": 1.013826576739083e-06, "loss": 0.4845, "step": 12392 }, { "epoch": 0.8, "grad_norm": 1.2208260297775269, "learning_rate": 1.0131956384865204e-06, "loss": 0.4831, "step": 12393 }, { "epoch": 0.8, "grad_norm": 1.2882534265518188, "learning_rate": 1.012564874486891e-06, "loss": 0.5244, "step": 12394 }, { "epoch": 0.8, "grad_norm": 1.1391160488128662, "learning_rate": 1.0119342847677626e-06, "loss": 0.5137, "step": 12395 }, { "epoch": 0.8, "grad_norm": 1.1686943769454956, "learning_rate": 1.0113038693566967e-06, "loss": 0.4899, "step": 12396 }, { "epoch": 0.8, "grad_norm": 1.30806565284729, "learning_rate": 1.010673628281248e-06, "loss": 0.5411, "step": 12397 }, { "epoch": 0.8, "grad_norm": 1.3220160007476807, "learning_rate": 1.0100435615689624e-06, "loss": 0.5151, "step": 12398 }, { "epoch": 0.8, "grad_norm": 1.289964199066162, "learning_rate": 1.0094136692473783e-06, "loss": 0.5287, "step": 12399 }, { "epoch": 0.8, "grad_norm": 1.186374306678772, "learning_rate": 1.0087839513440285e-06, "loss": 0.4808, "step": 12400 }, { "epoch": 0.8, "grad_norm": 1.2318028211593628, "learning_rate": 1.0081544078864331e-06, "loss": 0.5605, "step": 12401 }, { "epoch": 0.8, "grad_norm": 1.1548560857772827, "learning_rate": 1.007525038902109e-06, "loss": 0.5086, "step": 12402 }, { "epoch": 0.8, "grad_norm": 1.253217101097107, "learning_rate": 1.0068958444185656e-06, "loss": 0.5678, "step": 12403 }, { "epoch": 0.8, "grad_norm": 1.2399333715438843, "learning_rate": 1.0062668244633022e-06, "loss": 0.4953, "step": 12404 }, { "epoch": 0.8, "grad_norm": 1.1552172899246216, "learning_rate": 1.0056379790638116e-06, "loss": 0.4791, "step": 12405 }, { "epoch": 0.8, "grad_norm": 1.1388535499572754, "learning_rate": 1.005009308247581e-06, "loss": 0.4974, "step": 12406 }, { "epoch": 0.8, "grad_norm": 1.2571686506271362, "learning_rate": 1.004380812042085e-06, "loss": 0.4954, "step": 12407 }, { "epoch": 0.8, "grad_norm": 1.2303855419158936, "learning_rate": 1.0037524904747946e-06, "loss": 0.5022, "step": 12408 }, { "epoch": 0.8, "grad_norm": 1.2132965326309204, "learning_rate": 1.003124343573173e-06, "loss": 0.5149, "step": 12409 }, { "epoch": 0.8, "grad_norm": 1.3047829866409302, "learning_rate": 1.0024963713646735e-06, "loss": 0.4926, "step": 12410 }, { "epoch": 0.8, "grad_norm": 1.3283801078796387, "learning_rate": 1.001868573876746e-06, "loss": 0.483, "step": 12411 }, { "epoch": 0.8, "grad_norm": 1.3065268993377686, "learning_rate": 1.001240951136826e-06, "loss": 0.494, "step": 12412 }, { "epoch": 0.8, "grad_norm": 1.1967201232910156, "learning_rate": 1.0006135031723468e-06, "loss": 0.4976, "step": 12413 }, { "epoch": 0.8, "grad_norm": 1.4933998584747314, "learning_rate": 9.999862300107332e-07, "loss": 0.5378, "step": 12414 }, { "epoch": 0.8, "grad_norm": 1.2328996658325195, "learning_rate": 9.993591316794015e-07, "loss": 0.5216, "step": 12415 }, { "epoch": 0.8, "grad_norm": 1.1802932024002075, "learning_rate": 9.9873220820576e-07, "loss": 0.5343, "step": 12416 }, { "epoch": 0.8, "grad_norm": 1.2519503831863403, "learning_rate": 9.981054596172118e-07, "loss": 0.4775, "step": 12417 }, { "epoch": 0.8, "grad_norm": 1.1766830682754517, "learning_rate": 9.974788859411478e-07, "loss": 0.5248, "step": 12418 }, { "epoch": 0.8, "grad_norm": 1.225400686264038, "learning_rate": 9.968524872049545e-07, "loss": 0.524, "step": 12419 }, { "epoch": 0.8, "grad_norm": 1.2522408962249756, "learning_rate": 9.962262634360114e-07, "loss": 0.5176, "step": 12420 }, { "epoch": 0.8, "grad_norm": 1.3180137872695923, "learning_rate": 9.956002146616877e-07, "loss": 0.5428, "step": 12421 }, { "epoch": 0.8, "grad_norm": 1.2859344482421875, "learning_rate": 9.949743409093476e-07, "loss": 0.5285, "step": 12422 }, { "epoch": 0.8, "grad_norm": 1.268125295639038, "learning_rate": 9.943486422063476e-07, "loss": 0.4973, "step": 12423 }, { "epoch": 0.8, "grad_norm": 1.230237603187561, "learning_rate": 9.937231185800318e-07, "loss": 0.5321, "step": 12424 }, { "epoch": 0.8, "grad_norm": 1.284795880317688, "learning_rate": 9.930977700577427e-07, "loss": 0.5472, "step": 12425 }, { "epoch": 0.8, "grad_norm": 1.1663342714309692, "learning_rate": 9.924725966668114e-07, "loss": 0.5639, "step": 12426 }, { "epoch": 0.8, "grad_norm": 1.0955818891525269, "learning_rate": 9.918475984345637e-07, "loss": 0.508, "step": 12427 }, { "epoch": 0.8, "grad_norm": 1.474664330482483, "learning_rate": 9.912227753883164e-07, "loss": 0.5608, "step": 12428 }, { "epoch": 0.8, "grad_norm": 1.2528384923934937, "learning_rate": 9.9059812755538e-07, "loss": 0.5376, "step": 12429 }, { "epoch": 0.8, "grad_norm": 1.2543781995773315, "learning_rate": 9.89973654963054e-07, "loss": 0.4467, "step": 12430 }, { "epoch": 0.8, "grad_norm": 1.3641993999481201, "learning_rate": 9.893493576386332e-07, "loss": 0.5257, "step": 12431 }, { "epoch": 0.8, "grad_norm": 1.1949771642684937, "learning_rate": 9.887252356094045e-07, "loss": 0.5153, "step": 12432 }, { "epoch": 0.8, "grad_norm": 1.2547205686569214, "learning_rate": 9.881012889026465e-07, "loss": 0.5234, "step": 12433 }, { "epoch": 0.8, "grad_norm": 1.2940914630889893, "learning_rate": 9.874775175456302e-07, "loss": 0.5433, "step": 12434 }, { "epoch": 0.8, "grad_norm": 1.1726219654083252, "learning_rate": 9.86853921565621e-07, "loss": 0.503, "step": 12435 }, { "epoch": 0.8, "grad_norm": 1.2248529195785522, "learning_rate": 9.86230500989871e-07, "loss": 0.5043, "step": 12436 }, { "epoch": 0.8, "grad_norm": 1.312852382659912, "learning_rate": 9.856072558456304e-07, "loss": 0.4771, "step": 12437 }, { "epoch": 0.8, "grad_norm": 1.2546230554580688, "learning_rate": 9.84984186160139e-07, "loss": 0.5162, "step": 12438 }, { "epoch": 0.8, "grad_norm": 1.3249657154083252, "learning_rate": 9.843612919606304e-07, "loss": 0.4936, "step": 12439 }, { "epoch": 0.8, "grad_norm": 1.2836447954177856, "learning_rate": 9.837385732743288e-07, "loss": 0.5359, "step": 12440 }, { "epoch": 0.8, "grad_norm": 1.2251579761505127, "learning_rate": 9.831160301284537e-07, "loss": 0.5077, "step": 12441 }, { "epoch": 0.8, "grad_norm": 1.2226746082305908, "learning_rate": 9.824936625502119e-07, "loss": 0.5366, "step": 12442 }, { "epoch": 0.8, "grad_norm": 1.2101752758026123, "learning_rate": 9.81871470566807e-07, "loss": 0.5083, "step": 12443 }, { "epoch": 0.8, "grad_norm": 1.100334882736206, "learning_rate": 9.812494542054329e-07, "loss": 0.5151, "step": 12444 }, { "epoch": 0.8, "grad_norm": 1.1736689805984497, "learning_rate": 9.806276134932763e-07, "loss": 0.4801, "step": 12445 }, { "epoch": 0.8, "grad_norm": 1.211243748664856, "learning_rate": 9.800059484575176e-07, "loss": 0.5309, "step": 12446 }, { "epoch": 0.8, "grad_norm": 1.2389942407608032, "learning_rate": 9.793844591253276e-07, "loss": 0.5228, "step": 12447 }, { "epoch": 0.8, "grad_norm": 1.1606495380401611, "learning_rate": 9.787631455238684e-07, "loss": 0.5058, "step": 12448 }, { "epoch": 0.8, "grad_norm": 1.2074902057647705, "learning_rate": 9.781420076802973e-07, "loss": 0.4854, "step": 12449 }, { "epoch": 0.8, "grad_norm": 1.2540642023086548, "learning_rate": 9.775210456217626e-07, "loss": 0.529, "step": 12450 }, { "epoch": 0.8, "grad_norm": 1.2595694065093994, "learning_rate": 9.769002593754045e-07, "loss": 0.5129, "step": 12451 }, { "epoch": 0.8, "grad_norm": 1.2248003482818604, "learning_rate": 9.762796489683568e-07, "loss": 0.5016, "step": 12452 }, { "epoch": 0.8, "grad_norm": 1.27824068069458, "learning_rate": 9.756592144277454e-07, "loss": 0.5143, "step": 12453 }, { "epoch": 0.8, "grad_norm": 1.1781065464019775, "learning_rate": 9.750389557806854e-07, "loss": 0.4763, "step": 12454 }, { "epoch": 0.8, "grad_norm": 1.1857348680496216, "learning_rate": 9.744188730542886e-07, "loss": 0.5231, "step": 12455 }, { "epoch": 0.8, "grad_norm": 1.1421366930007935, "learning_rate": 9.737989662756564e-07, "loss": 0.4375, "step": 12456 }, { "epoch": 0.8, "grad_norm": 1.2677470445632935, "learning_rate": 9.731792354718833e-07, "loss": 0.5403, "step": 12457 }, { "epoch": 0.8, "grad_norm": 1.0375328063964844, "learning_rate": 9.725596806700582e-07, "loss": 0.4901, "step": 12458 }, { "epoch": 0.8, "grad_norm": 1.1617891788482666, "learning_rate": 9.719403018972566e-07, "loss": 0.4611, "step": 12459 }, { "epoch": 0.8, "grad_norm": 1.2699639797210693, "learning_rate": 9.713210991805528e-07, "loss": 0.5237, "step": 12460 }, { "epoch": 0.8, "grad_norm": 1.3488049507141113, "learning_rate": 9.70702072547009e-07, "loss": 0.5289, "step": 12461 }, { "epoch": 0.8, "grad_norm": 1.1440255641937256, "learning_rate": 9.700832220236821e-07, "loss": 0.4859, "step": 12462 }, { "epoch": 0.8, "grad_norm": 1.2077094316482544, "learning_rate": 9.694645476376202e-07, "loss": 0.5223, "step": 12463 }, { "epoch": 0.8, "grad_norm": 1.1941784620285034, "learning_rate": 9.688460494158652e-07, "loss": 0.5034, "step": 12464 }, { "epoch": 0.8, "grad_norm": 1.1798207759857178, "learning_rate": 9.682277273854478e-07, "loss": 0.5386, "step": 12465 }, { "epoch": 0.8, "grad_norm": 1.2194470167160034, "learning_rate": 9.676095815733943e-07, "loss": 0.5348, "step": 12466 }, { "epoch": 0.8, "grad_norm": 1.185367226600647, "learning_rate": 9.669916120067223e-07, "loss": 0.4968, "step": 12467 }, { "epoch": 0.8, "grad_norm": 1.404099464416504, "learning_rate": 9.663738187124416e-07, "loss": 0.5014, "step": 12468 }, { "epoch": 0.8, "grad_norm": 1.2366561889648438, "learning_rate": 9.657562017175542e-07, "loss": 0.572, "step": 12469 }, { "epoch": 0.8, "grad_norm": 1.1777235269546509, "learning_rate": 9.651387610490559e-07, "loss": 0.5495, "step": 12470 }, { "epoch": 0.8, "grad_norm": 1.328201174736023, "learning_rate": 9.64521496733931e-07, "loss": 0.5651, "step": 12471 }, { "epoch": 0.81, "grad_norm": 1.1512339115142822, "learning_rate": 9.639044087991607e-07, "loss": 0.5018, "step": 12472 }, { "epoch": 0.81, "grad_norm": 1.1361515522003174, "learning_rate": 9.632874972717144e-07, "loss": 0.4738, "step": 12473 }, { "epoch": 0.81, "grad_norm": 1.2017271518707275, "learning_rate": 9.626707621785585e-07, "loss": 0.5398, "step": 12474 }, { "epoch": 0.81, "grad_norm": 1.1495606899261475, "learning_rate": 9.620542035466457e-07, "loss": 0.4992, "step": 12475 }, { "epoch": 0.81, "grad_norm": 1.2237911224365234, "learning_rate": 9.614378214029258e-07, "loss": 0.4639, "step": 12476 }, { "epoch": 0.81, "grad_norm": 1.2554868459701538, "learning_rate": 9.608216157743388e-07, "loss": 0.4987, "step": 12477 }, { "epoch": 0.81, "grad_norm": 1.3286153078079224, "learning_rate": 9.60205586687818e-07, "loss": 0.5077, "step": 12478 }, { "epoch": 0.81, "grad_norm": 1.2022725343704224, "learning_rate": 9.595897341702893e-07, "loss": 0.4809, "step": 12479 }, { "epoch": 0.81, "grad_norm": 1.2567706108093262, "learning_rate": 9.589740582486679e-07, "loss": 0.554, "step": 12480 }, { "epoch": 0.81, "grad_norm": 1.3025299310684204, "learning_rate": 9.58358558949864e-07, "loss": 0.5112, "step": 12481 }, { "epoch": 0.81, "grad_norm": 1.3194220066070557, "learning_rate": 9.577432363007804e-07, "loss": 0.5432, "step": 12482 }, { "epoch": 0.81, "grad_norm": 3.7938930988311768, "learning_rate": 9.571280903283114e-07, "loss": 0.4676, "step": 12483 }, { "epoch": 0.81, "grad_norm": 1.109270453453064, "learning_rate": 9.56513121059342e-07, "loss": 0.4673, "step": 12484 }, { "epoch": 0.81, "grad_norm": 1.1845366954803467, "learning_rate": 9.558983285207517e-07, "loss": 0.4976, "step": 12485 }, { "epoch": 0.81, "grad_norm": 1.1434074640274048, "learning_rate": 9.552837127394115e-07, "loss": 0.547, "step": 12486 }, { "epoch": 0.81, "grad_norm": 1.1704672574996948, "learning_rate": 9.546692737421853e-07, "loss": 0.5401, "step": 12487 }, { "epoch": 0.81, "grad_norm": 1.2391862869262695, "learning_rate": 9.540550115559288e-07, "loss": 0.5632, "step": 12488 }, { "epoch": 0.81, "grad_norm": 1.3518009185791016, "learning_rate": 9.534409262074879e-07, "loss": 0.5423, "step": 12489 }, { "epoch": 0.81, "grad_norm": 1.1603925228118896, "learning_rate": 9.528270177237036e-07, "loss": 0.5455, "step": 12490 }, { "epoch": 0.81, "grad_norm": 1.3017874956130981, "learning_rate": 9.522132861314088e-07, "loss": 0.5224, "step": 12491 }, { "epoch": 0.81, "grad_norm": 1.3306933641433716, "learning_rate": 9.515997314574271e-07, "loss": 0.4792, "step": 12492 }, { "epoch": 0.81, "grad_norm": 1.2434800863265991, "learning_rate": 9.509863537285768e-07, "loss": 0.5387, "step": 12493 }, { "epoch": 0.81, "grad_norm": 1.2040839195251465, "learning_rate": 9.503731529716676e-07, "loss": 0.5364, "step": 12494 }, { "epoch": 0.81, "grad_norm": 1.1791296005249023, "learning_rate": 9.497601292134984e-07, "loss": 0.512, "step": 12495 }, { "epoch": 0.81, "grad_norm": 1.2918564081192017, "learning_rate": 9.491472824808646e-07, "loss": 0.5166, "step": 12496 }, { "epoch": 0.81, "grad_norm": 1.1590237617492676, "learning_rate": 9.485346128005513e-07, "loss": 0.4993, "step": 12497 }, { "epoch": 0.81, "grad_norm": 1.2316193580627441, "learning_rate": 9.479221201993372e-07, "loss": 0.5942, "step": 12498 }, { "epoch": 0.81, "grad_norm": 1.1053425073623657, "learning_rate": 9.473098047039935e-07, "loss": 0.4996, "step": 12499 }, { "epoch": 0.81, "grad_norm": 1.1677672863006592, "learning_rate": 9.466976663412813e-07, "loss": 0.5081, "step": 12500 }, { "epoch": 0.81, "grad_norm": 1.4532032012939453, "learning_rate": 9.460857051379568e-07, "loss": 0.5087, "step": 12501 }, { "epoch": 0.81, "grad_norm": 1.2465498447418213, "learning_rate": 9.454739211207664e-07, "loss": 0.4698, "step": 12502 }, { "epoch": 0.81, "grad_norm": 1.1202421188354492, "learning_rate": 9.448623143164504e-07, "loss": 0.5131, "step": 12503 }, { "epoch": 0.81, "grad_norm": 1.347178339958191, "learning_rate": 9.442508847517401e-07, "loss": 0.5109, "step": 12504 }, { "epoch": 0.81, "grad_norm": 1.2111541032791138, "learning_rate": 9.436396324533614e-07, "loss": 0.5054, "step": 12505 }, { "epoch": 0.81, "grad_norm": 1.1596097946166992, "learning_rate": 9.430285574480269e-07, "loss": 0.4806, "step": 12506 }, { "epoch": 0.81, "grad_norm": 1.1758167743682861, "learning_rate": 9.424176597624474e-07, "loss": 0.5316, "step": 12507 }, { "epoch": 0.81, "grad_norm": 1.3245142698287964, "learning_rate": 9.418069394233231e-07, "loss": 0.5272, "step": 12508 }, { "epoch": 0.81, "grad_norm": 1.2222775220870972, "learning_rate": 9.411963964573473e-07, "loss": 0.5225, "step": 12509 }, { "epoch": 0.81, "grad_norm": 1.195225477218628, "learning_rate": 9.405860308912046e-07, "loss": 0.5169, "step": 12510 }, { "epoch": 0.81, "grad_norm": 1.0844721794128418, "learning_rate": 9.399758427515748e-07, "loss": 0.5059, "step": 12511 }, { "epoch": 0.81, "grad_norm": 1.3192825317382812, "learning_rate": 9.39365832065125e-07, "loss": 0.5123, "step": 12512 }, { "epoch": 0.81, "grad_norm": 1.401293396949768, "learning_rate": 9.387559988585176e-07, "loss": 0.5417, "step": 12513 }, { "epoch": 0.81, "grad_norm": 1.1507420539855957, "learning_rate": 9.381463431584076e-07, "loss": 0.4629, "step": 12514 }, { "epoch": 0.81, "grad_norm": 1.1895231008529663, "learning_rate": 9.375368649914413e-07, "loss": 0.5171, "step": 12515 }, { "epoch": 0.81, "grad_norm": 1.2724968194961548, "learning_rate": 9.369275643842568e-07, "loss": 0.5418, "step": 12516 }, { "epoch": 0.81, "grad_norm": 1.2593927383422852, "learning_rate": 9.363184413634874e-07, "loss": 0.5274, "step": 12517 }, { "epoch": 0.81, "grad_norm": 1.2173514366149902, "learning_rate": 9.357094959557534e-07, "loss": 0.5467, "step": 12518 }, { "epoch": 0.81, "grad_norm": 1.2237260341644287, "learning_rate": 9.351007281876706e-07, "loss": 0.5508, "step": 12519 }, { "epoch": 0.81, "grad_norm": 1.1521735191345215, "learning_rate": 9.344921380858479e-07, "loss": 0.5059, "step": 12520 }, { "epoch": 0.81, "grad_norm": 1.3638596534729004, "learning_rate": 9.338837256768846e-07, "loss": 0.5193, "step": 12521 }, { "epoch": 0.81, "grad_norm": 1.1480138301849365, "learning_rate": 9.332754909873726e-07, "loss": 0.4652, "step": 12522 }, { "epoch": 0.81, "grad_norm": 1.5859638452529907, "learning_rate": 9.326674340438984e-07, "loss": 0.4739, "step": 12523 }, { "epoch": 0.81, "grad_norm": 1.2687329053878784, "learning_rate": 9.320595548730354e-07, "loss": 0.5328, "step": 12524 }, { "epoch": 0.81, "grad_norm": 1.218224048614502, "learning_rate": 9.314518535013533e-07, "loss": 0.4923, "step": 12525 }, { "epoch": 0.81, "grad_norm": 1.3193436861038208, "learning_rate": 9.308443299554143e-07, "loss": 0.4923, "step": 12526 }, { "epoch": 0.81, "grad_norm": 1.350244164466858, "learning_rate": 9.302369842617709e-07, "loss": 0.4892, "step": 12527 }, { "epoch": 0.81, "grad_norm": 1.1800708770751953, "learning_rate": 9.296298164469686e-07, "loss": 0.494, "step": 12528 }, { "epoch": 0.81, "grad_norm": 1.3489296436309814, "learning_rate": 9.290228265375472e-07, "loss": 0.4957, "step": 12529 }, { "epoch": 0.81, "grad_norm": 1.1922755241394043, "learning_rate": 9.284160145600329e-07, "loss": 0.4813, "step": 12530 }, { "epoch": 0.81, "grad_norm": 1.2558363676071167, "learning_rate": 9.278093805409499e-07, "loss": 0.5516, "step": 12531 }, { "epoch": 0.81, "grad_norm": 1.238605260848999, "learning_rate": 9.272029245068126e-07, "loss": 0.4979, "step": 12532 }, { "epoch": 0.81, "grad_norm": 1.124494194984436, "learning_rate": 9.265966464841275e-07, "loss": 0.511, "step": 12533 }, { "epoch": 0.81, "grad_norm": 1.1863863468170166, "learning_rate": 9.259905464993935e-07, "loss": 0.5072, "step": 12534 }, { "epoch": 0.81, "grad_norm": 1.1463477611541748, "learning_rate": 9.253846245791026e-07, "loss": 0.4903, "step": 12535 }, { "epoch": 0.81, "grad_norm": 1.346791386604309, "learning_rate": 9.24778880749736e-07, "loss": 0.4937, "step": 12536 }, { "epoch": 0.81, "grad_norm": 1.1887280941009521, "learning_rate": 9.241733150377707e-07, "loss": 0.584, "step": 12537 }, { "epoch": 0.81, "grad_norm": 1.1254464387893677, "learning_rate": 9.235679274696735e-07, "loss": 0.5053, "step": 12538 }, { "epoch": 0.81, "grad_norm": 1.3352880477905273, "learning_rate": 9.229627180719053e-07, "loss": 0.5177, "step": 12539 }, { "epoch": 0.81, "grad_norm": 1.1488767862319946, "learning_rate": 9.223576868709188e-07, "loss": 0.5214, "step": 12540 }, { "epoch": 0.81, "grad_norm": 1.1660985946655273, "learning_rate": 9.217528338931558e-07, "loss": 0.5272, "step": 12541 }, { "epoch": 0.81, "grad_norm": 1.2196011543273926, "learning_rate": 9.21148159165055e-07, "loss": 0.4565, "step": 12542 }, { "epoch": 0.81, "grad_norm": 1.3057318925857544, "learning_rate": 9.205436627130443e-07, "loss": 0.5259, "step": 12543 }, { "epoch": 0.81, "grad_norm": 1.1093459129333496, "learning_rate": 9.199393445635452e-07, "loss": 0.5102, "step": 12544 }, { "epoch": 0.81, "grad_norm": 1.1454521417617798, "learning_rate": 9.193352047429699e-07, "loss": 0.5386, "step": 12545 }, { "epoch": 0.81, "grad_norm": 1.2682946920394897, "learning_rate": 9.187312432777268e-07, "loss": 0.4783, "step": 12546 }, { "epoch": 0.81, "grad_norm": 1.2044094800949097, "learning_rate": 9.181274601942092e-07, "loss": 0.52, "step": 12547 }, { "epoch": 0.81, "grad_norm": 1.2021644115447998, "learning_rate": 9.175238555188093e-07, "loss": 0.4957, "step": 12548 }, { "epoch": 0.81, "grad_norm": 1.2294971942901611, "learning_rate": 9.169204292779088e-07, "loss": 0.5351, "step": 12549 }, { "epoch": 0.81, "grad_norm": 1.3138670921325684, "learning_rate": 9.163171814978816e-07, "loss": 0.4952, "step": 12550 }, { "epoch": 0.81, "grad_norm": 1.2075189352035522, "learning_rate": 9.157141122050944e-07, "loss": 0.4936, "step": 12551 }, { "epoch": 0.81, "grad_norm": 1.2099237442016602, "learning_rate": 9.151112214259072e-07, "loss": 0.5257, "step": 12552 }, { "epoch": 0.81, "grad_norm": 1.2158056497573853, "learning_rate": 9.145085091866679e-07, "loss": 0.4905, "step": 12553 }, { "epoch": 0.81, "grad_norm": 1.2028011083602905, "learning_rate": 9.139059755137214e-07, "loss": 0.4928, "step": 12554 }, { "epoch": 0.81, "grad_norm": 1.1867918968200684, "learning_rate": 9.133036204334023e-07, "loss": 0.493, "step": 12555 }, { "epoch": 0.81, "grad_norm": 1.2046374082565308, "learning_rate": 9.127014439720378e-07, "loss": 0.4871, "step": 12556 }, { "epoch": 0.81, "grad_norm": 1.2658100128173828, "learning_rate": 9.120994461559479e-07, "loss": 0.5198, "step": 12557 }, { "epoch": 0.81, "grad_norm": 1.1145727634429932, "learning_rate": 9.114976270114456e-07, "loss": 0.4824, "step": 12558 }, { "epoch": 0.81, "grad_norm": 1.175974726676941, "learning_rate": 9.108959865648326e-07, "loss": 0.5426, "step": 12559 }, { "epoch": 0.81, "grad_norm": 1.3549888134002686, "learning_rate": 9.102945248424055e-07, "loss": 0.5589, "step": 12560 }, { "epoch": 0.81, "grad_norm": 1.4247610569000244, "learning_rate": 9.096932418704535e-07, "loss": 0.5319, "step": 12561 }, { "epoch": 0.81, "grad_norm": 1.1940875053405762, "learning_rate": 9.090921376752565e-07, "loss": 0.4628, "step": 12562 }, { "epoch": 0.81, "grad_norm": 1.318617820739746, "learning_rate": 9.084912122830885e-07, "loss": 0.5548, "step": 12563 }, { "epoch": 0.81, "grad_norm": 1.325723648071289, "learning_rate": 9.078904657202126e-07, "loss": 0.5458, "step": 12564 }, { "epoch": 0.81, "grad_norm": 1.1891964673995972, "learning_rate": 9.072898980128864e-07, "loss": 0.4824, "step": 12565 }, { "epoch": 0.81, "grad_norm": 1.2482812404632568, "learning_rate": 9.066895091873596e-07, "loss": 0.5349, "step": 12566 }, { "epoch": 0.81, "grad_norm": 1.1458483934402466, "learning_rate": 9.060892992698733e-07, "loss": 0.4695, "step": 12567 }, { "epoch": 0.81, "grad_norm": 1.1798555850982666, "learning_rate": 9.054892682866628e-07, "loss": 0.4748, "step": 12568 }, { "epoch": 0.81, "grad_norm": 1.177205204963684, "learning_rate": 9.04889416263951e-07, "loss": 0.5165, "step": 12569 }, { "epoch": 0.81, "grad_norm": 1.290266513824463, "learning_rate": 9.042897432279573e-07, "loss": 0.5262, "step": 12570 }, { "epoch": 0.81, "grad_norm": 1.1580917835235596, "learning_rate": 9.036902492048921e-07, "loss": 0.4784, "step": 12571 }, { "epoch": 0.81, "grad_norm": 1.5145988464355469, "learning_rate": 9.030909342209587e-07, "loss": 0.5243, "step": 12572 }, { "epoch": 0.81, "grad_norm": 1.279886245727539, "learning_rate": 9.024917983023485e-07, "loss": 0.5528, "step": 12573 }, { "epoch": 0.81, "grad_norm": 1.2548218965530396, "learning_rate": 9.018928414752509e-07, "loss": 0.5138, "step": 12574 }, { "epoch": 0.81, "grad_norm": 1.2607758045196533, "learning_rate": 9.012940637658435e-07, "loss": 0.5204, "step": 12575 }, { "epoch": 0.81, "grad_norm": 1.1778172254562378, "learning_rate": 9.006954652002975e-07, "loss": 0.4925, "step": 12576 }, { "epoch": 0.81, "grad_norm": 1.2222732305526733, "learning_rate": 9.000970458047781e-07, "loss": 0.5136, "step": 12577 }, { "epoch": 0.81, "grad_norm": 1.1865158081054688, "learning_rate": 8.994988056054377e-07, "loss": 0.5225, "step": 12578 }, { "epoch": 0.81, "grad_norm": 1.280261516571045, "learning_rate": 8.989007446284248e-07, "loss": 0.5436, "step": 12579 }, { "epoch": 0.81, "grad_norm": 1.2839127779006958, "learning_rate": 8.983028628998797e-07, "loss": 0.5222, "step": 12580 }, { "epoch": 0.81, "grad_norm": 1.3057888746261597, "learning_rate": 8.977051604459347e-07, "loss": 0.4906, "step": 12581 }, { "epoch": 0.81, "grad_norm": 1.1602778434753418, "learning_rate": 8.971076372927124e-07, "loss": 0.5301, "step": 12582 }, { "epoch": 0.81, "grad_norm": 1.179165244102478, "learning_rate": 8.965102934663294e-07, "loss": 0.5107, "step": 12583 }, { "epoch": 0.81, "grad_norm": 1.2349730730056763, "learning_rate": 8.959131289928941e-07, "loss": 0.5165, "step": 12584 }, { "epoch": 0.81, "grad_norm": 1.2052587270736694, "learning_rate": 8.953161438985075e-07, "loss": 0.4866, "step": 12585 }, { "epoch": 0.81, "grad_norm": 1.2729499340057373, "learning_rate": 8.947193382092623e-07, "loss": 0.5148, "step": 12586 }, { "epoch": 0.81, "grad_norm": 1.1950945854187012, "learning_rate": 8.941227119512436e-07, "loss": 0.4622, "step": 12587 }, { "epoch": 0.81, "grad_norm": 1.2527456283569336, "learning_rate": 8.935262651505272e-07, "loss": 0.4857, "step": 12588 }, { "epoch": 0.81, "grad_norm": 1.1832491159439087, "learning_rate": 8.929299978331829e-07, "loss": 0.5353, "step": 12589 }, { "epoch": 0.81, "grad_norm": 1.199386477470398, "learning_rate": 8.923339100252714e-07, "loss": 0.5364, "step": 12590 }, { "epoch": 0.81, "grad_norm": 1.184419870376587, "learning_rate": 8.917380017528476e-07, "loss": 0.4872, "step": 12591 }, { "epoch": 0.81, "grad_norm": 1.4693142175674438, "learning_rate": 8.911422730419555e-07, "loss": 0.5273, "step": 12592 }, { "epoch": 0.81, "grad_norm": 1.2310161590576172, "learning_rate": 8.905467239186355e-07, "loss": 0.5023, "step": 12593 }, { "epoch": 0.81, "grad_norm": 1.1857503652572632, "learning_rate": 8.899513544089139e-07, "loss": 0.5036, "step": 12594 }, { "epoch": 0.81, "grad_norm": 1.2630239725112915, "learning_rate": 8.893561645388149e-07, "loss": 0.5813, "step": 12595 }, { "epoch": 0.81, "grad_norm": 1.1469277143478394, "learning_rate": 8.887611543343527e-07, "loss": 0.4978, "step": 12596 }, { "epoch": 0.81, "grad_norm": 1.166143774986267, "learning_rate": 8.881663238215326e-07, "loss": 0.5015, "step": 12597 }, { "epoch": 0.81, "grad_norm": 1.1858820915222168, "learning_rate": 8.87571673026354e-07, "loss": 0.5056, "step": 12598 }, { "epoch": 0.81, "grad_norm": 1.167900562286377, "learning_rate": 8.869772019748091e-07, "loss": 0.5237, "step": 12599 }, { "epoch": 0.81, "grad_norm": 1.1684869527816772, "learning_rate": 8.863829106928773e-07, "loss": 0.4956, "step": 12600 }, { "epoch": 0.81, "grad_norm": 1.2631186246871948, "learning_rate": 8.857887992065351e-07, "loss": 0.5001, "step": 12601 }, { "epoch": 0.81, "grad_norm": 1.0963022708892822, "learning_rate": 8.851948675417499e-07, "loss": 0.4881, "step": 12602 }, { "epoch": 0.81, "grad_norm": 1.1203852891921997, "learning_rate": 8.846011157244805e-07, "loss": 0.4786, "step": 12603 }, { "epoch": 0.81, "grad_norm": 1.2014552354812622, "learning_rate": 8.840075437806784e-07, "loss": 0.5276, "step": 12604 }, { "epoch": 0.81, "grad_norm": 2.8593358993530273, "learning_rate": 8.834141517362888e-07, "loss": 0.5098, "step": 12605 }, { "epoch": 0.81, "grad_norm": 1.1878129243850708, "learning_rate": 8.828209396172443e-07, "loss": 0.5028, "step": 12606 }, { "epoch": 0.81, "grad_norm": 1.2464262247085571, "learning_rate": 8.822279074494738e-07, "loss": 0.4959, "step": 12607 }, { "epoch": 0.81, "grad_norm": 1.171331524848938, "learning_rate": 8.816350552588976e-07, "loss": 0.4984, "step": 12608 }, { "epoch": 0.81, "grad_norm": 1.1993108987808228, "learning_rate": 8.810423830714277e-07, "loss": 0.506, "step": 12609 }, { "epoch": 0.81, "grad_norm": 1.1726367473602295, "learning_rate": 8.804498909129683e-07, "loss": 0.5465, "step": 12610 }, { "epoch": 0.81, "grad_norm": 1.1411198377609253, "learning_rate": 8.798575788094166e-07, "loss": 0.4944, "step": 12611 }, { "epoch": 0.81, "grad_norm": 1.3178868293762207, "learning_rate": 8.79265446786659e-07, "loss": 0.5024, "step": 12612 }, { "epoch": 0.81, "grad_norm": 1.2097347974777222, "learning_rate": 8.786734948705766e-07, "loss": 0.5208, "step": 12613 }, { "epoch": 0.81, "grad_norm": 1.2211029529571533, "learning_rate": 8.780817230870431e-07, "loss": 0.4636, "step": 12614 }, { "epoch": 0.81, "grad_norm": 1.163317084312439, "learning_rate": 8.774901314619227e-07, "loss": 0.465, "step": 12615 }, { "epoch": 0.81, "grad_norm": 1.0769072771072388, "learning_rate": 8.768987200210721e-07, "loss": 0.4283, "step": 12616 }, { "epoch": 0.81, "grad_norm": 1.1843619346618652, "learning_rate": 8.763074887903422e-07, "loss": 0.5229, "step": 12617 }, { "epoch": 0.81, "grad_norm": 1.3064184188842773, "learning_rate": 8.757164377955718e-07, "loss": 0.5082, "step": 12618 }, { "epoch": 0.81, "grad_norm": 1.157529592514038, "learning_rate": 8.751255670625947e-07, "loss": 0.4932, "step": 12619 }, { "epoch": 0.81, "grad_norm": 1.230339527130127, "learning_rate": 8.745348766172368e-07, "loss": 0.5045, "step": 12620 }, { "epoch": 0.81, "grad_norm": 1.2326427698135376, "learning_rate": 8.739443664853154e-07, "loss": 0.5744, "step": 12621 }, { "epoch": 0.81, "grad_norm": 1.2665504217147827, "learning_rate": 8.733540366926408e-07, "loss": 0.4912, "step": 12622 }, { "epoch": 0.81, "grad_norm": 1.1101351976394653, "learning_rate": 8.727638872650151e-07, "loss": 0.5156, "step": 12623 }, { "epoch": 0.81, "grad_norm": 1.2353466749191284, "learning_rate": 8.721739182282307e-07, "loss": 0.4892, "step": 12624 }, { "epoch": 0.81, "grad_norm": 1.285308837890625, "learning_rate": 8.71584129608074e-07, "loss": 0.4913, "step": 12625 }, { "epoch": 0.82, "grad_norm": 1.2527133226394653, "learning_rate": 8.709945214303239e-07, "loss": 0.5191, "step": 12626 }, { "epoch": 0.82, "grad_norm": 1.1960015296936035, "learning_rate": 8.704050937207503e-07, "loss": 0.508, "step": 12627 }, { "epoch": 0.82, "grad_norm": 1.2944872379302979, "learning_rate": 8.698158465051166e-07, "loss": 0.5826, "step": 12628 }, { "epoch": 0.82, "grad_norm": 1.1829335689544678, "learning_rate": 8.692267798091753e-07, "loss": 0.5457, "step": 12629 }, { "epoch": 0.82, "grad_norm": 1.201158881187439, "learning_rate": 8.686378936586736e-07, "loss": 0.566, "step": 12630 }, { "epoch": 0.82, "grad_norm": 1.1896405220031738, "learning_rate": 8.680491880793507e-07, "loss": 0.4963, "step": 12631 }, { "epoch": 0.82, "grad_norm": 1.1559100151062012, "learning_rate": 8.674606630969368e-07, "loss": 0.4828, "step": 12632 }, { "epoch": 0.82, "grad_norm": 1.1741358041763306, "learning_rate": 8.668723187371558e-07, "loss": 0.4939, "step": 12633 }, { "epoch": 0.82, "grad_norm": 1.2584047317504883, "learning_rate": 8.662841550257228e-07, "loss": 0.536, "step": 12634 }, { "epoch": 0.82, "grad_norm": 1.4014607667922974, "learning_rate": 8.656961719883434e-07, "loss": 0.5449, "step": 12635 }, { "epoch": 0.82, "grad_norm": 1.1364667415618896, "learning_rate": 8.651083696507173e-07, "loss": 0.4748, "step": 12636 }, { "epoch": 0.82, "grad_norm": 1.1534514427185059, "learning_rate": 8.645207480385364e-07, "loss": 0.4946, "step": 12637 }, { "epoch": 0.82, "grad_norm": 1.2964813709259033, "learning_rate": 8.639333071774841e-07, "loss": 0.5603, "step": 12638 }, { "epoch": 0.82, "grad_norm": 1.2002179622650146, "learning_rate": 8.633460470932359e-07, "loss": 0.4778, "step": 12639 }, { "epoch": 0.82, "grad_norm": 1.232704758644104, "learning_rate": 8.6275896781146e-07, "loss": 0.5337, "step": 12640 }, { "epoch": 0.82, "grad_norm": 1.1833834648132324, "learning_rate": 8.621720693578145e-07, "loss": 0.5484, "step": 12641 }, { "epoch": 0.82, "grad_norm": 1.2502810955047607, "learning_rate": 8.61585351757952e-07, "loss": 0.4742, "step": 12642 }, { "epoch": 0.82, "grad_norm": 1.103963851928711, "learning_rate": 8.60998815037517e-07, "loss": 0.4919, "step": 12643 }, { "epoch": 0.82, "grad_norm": 1.2668098211288452, "learning_rate": 8.604124592221441e-07, "loss": 0.5221, "step": 12644 }, { "epoch": 0.82, "grad_norm": 1.1999884843826294, "learning_rate": 8.598262843374633e-07, "loss": 0.505, "step": 12645 }, { "epoch": 0.82, "grad_norm": 1.2368417978286743, "learning_rate": 8.592402904090946e-07, "loss": 0.5612, "step": 12646 }, { "epoch": 0.82, "grad_norm": 1.3211719989776611, "learning_rate": 8.586544774626482e-07, "loss": 0.4959, "step": 12647 }, { "epoch": 0.82, "grad_norm": 1.2812362909317017, "learning_rate": 8.580688455237296e-07, "loss": 0.5355, "step": 12648 }, { "epoch": 0.82, "grad_norm": 1.1210721731185913, "learning_rate": 8.574833946179356e-07, "loss": 0.5176, "step": 12649 }, { "epoch": 0.82, "grad_norm": 1.1588906049728394, "learning_rate": 8.568981247708547e-07, "loss": 0.5007, "step": 12650 }, { "epoch": 0.82, "grad_norm": 1.1408710479736328, "learning_rate": 8.563130360080674e-07, "loss": 0.4906, "step": 12651 }, { "epoch": 0.82, "grad_norm": 1.2138845920562744, "learning_rate": 8.557281283551478e-07, "loss": 0.5139, "step": 12652 }, { "epoch": 0.82, "grad_norm": 1.2419769763946533, "learning_rate": 8.551434018376575e-07, "loss": 0.4985, "step": 12653 }, { "epoch": 0.82, "grad_norm": 1.2684718370437622, "learning_rate": 8.545588564811558e-07, "loss": 0.5082, "step": 12654 }, { "epoch": 0.82, "grad_norm": 1.1920204162597656, "learning_rate": 8.539744923111909e-07, "loss": 0.4831, "step": 12655 }, { "epoch": 0.82, "grad_norm": 1.184017539024353, "learning_rate": 8.533903093533042e-07, "loss": 0.455, "step": 12656 }, { "epoch": 0.82, "grad_norm": 1.1250228881835938, "learning_rate": 8.528063076330301e-07, "loss": 0.5195, "step": 12657 }, { "epoch": 0.82, "grad_norm": 1.1402018070220947, "learning_rate": 8.522224871758911e-07, "loss": 0.4496, "step": 12658 }, { "epoch": 0.82, "grad_norm": 1.2096697092056274, "learning_rate": 8.51638848007405e-07, "loss": 0.4933, "step": 12659 }, { "epoch": 0.82, "grad_norm": 1.1756110191345215, "learning_rate": 8.510553901530827e-07, "loss": 0.4741, "step": 12660 }, { "epoch": 0.82, "grad_norm": 1.1484029293060303, "learning_rate": 8.504721136384259e-07, "loss": 0.5129, "step": 12661 }, { "epoch": 0.82, "grad_norm": 1.1431536674499512, "learning_rate": 8.498890184889258e-07, "loss": 0.5273, "step": 12662 }, { "epoch": 0.82, "grad_norm": 1.3462458848953247, "learning_rate": 8.493061047300693e-07, "loss": 0.5231, "step": 12663 }, { "epoch": 0.82, "grad_norm": 1.2334884405136108, "learning_rate": 8.487233723873345e-07, "loss": 0.5287, "step": 12664 }, { "epoch": 0.82, "grad_norm": 1.1722240447998047, "learning_rate": 8.481408214861903e-07, "loss": 0.5168, "step": 12665 }, { "epoch": 0.82, "grad_norm": 1.2980915307998657, "learning_rate": 8.475584520520996e-07, "loss": 0.5465, "step": 12666 }, { "epoch": 0.82, "grad_norm": 1.0930813550949097, "learning_rate": 8.46976264110515e-07, "loss": 0.4862, "step": 12667 }, { "epoch": 0.82, "grad_norm": 1.3378171920776367, "learning_rate": 8.463942576868828e-07, "loss": 0.5067, "step": 12668 }, { "epoch": 0.82, "grad_norm": 1.2982234954833984, "learning_rate": 8.458124328066408e-07, "loss": 0.5185, "step": 12669 }, { "epoch": 0.82, "grad_norm": 1.1661055088043213, "learning_rate": 8.45230789495221e-07, "loss": 0.4904, "step": 12670 }, { "epoch": 0.82, "grad_norm": 1.183334231376648, "learning_rate": 8.446493277780427e-07, "loss": 0.5264, "step": 12671 }, { "epoch": 0.82, "grad_norm": 1.1839345693588257, "learning_rate": 8.440680476805213e-07, "loss": 0.5029, "step": 12672 }, { "epoch": 0.82, "grad_norm": 1.1763970851898193, "learning_rate": 8.434869492280628e-07, "loss": 0.5167, "step": 12673 }, { "epoch": 0.82, "grad_norm": 1.203141689300537, "learning_rate": 8.429060324460658e-07, "loss": 0.5015, "step": 12674 }, { "epoch": 0.82, "grad_norm": 1.279422402381897, "learning_rate": 8.423252973599222e-07, "loss": 0.529, "step": 12675 }, { "epoch": 0.82, "grad_norm": 1.2738947868347168, "learning_rate": 8.417447439950116e-07, "loss": 0.5187, "step": 12676 }, { "epoch": 0.82, "grad_norm": 1.2781916856765747, "learning_rate": 8.411643723767099e-07, "loss": 0.5144, "step": 12677 }, { "epoch": 0.82, "grad_norm": 1.216686725616455, "learning_rate": 8.405841825303834e-07, "loss": 0.5685, "step": 12678 }, { "epoch": 0.82, "grad_norm": 1.140552282333374, "learning_rate": 8.400041744813909e-07, "loss": 0.4869, "step": 12679 }, { "epoch": 0.82, "grad_norm": 1.134055733680725, "learning_rate": 8.394243482550829e-07, "loss": 0.4826, "step": 12680 }, { "epoch": 0.82, "grad_norm": 1.238544225692749, "learning_rate": 8.388447038768038e-07, "loss": 0.5379, "step": 12681 }, { "epoch": 0.82, "grad_norm": 1.200211524963379, "learning_rate": 8.38265241371885e-07, "loss": 0.4726, "step": 12682 }, { "epoch": 0.82, "grad_norm": 1.2012180089950562, "learning_rate": 8.376859607656557e-07, "loss": 0.5017, "step": 12683 }, { "epoch": 0.82, "grad_norm": 1.318311095237732, "learning_rate": 8.371068620834333e-07, "loss": 0.4996, "step": 12684 }, { "epoch": 0.82, "grad_norm": 1.271974802017212, "learning_rate": 8.365279453505304e-07, "loss": 0.5552, "step": 12685 }, { "epoch": 0.82, "grad_norm": 1.162192463874817, "learning_rate": 8.359492105922484e-07, "loss": 0.4891, "step": 12686 }, { "epoch": 0.82, "grad_norm": 1.1438789367675781, "learning_rate": 8.353706578338849e-07, "loss": 0.5396, "step": 12687 }, { "epoch": 0.82, "grad_norm": 1.2138404846191406, "learning_rate": 8.347922871007236e-07, "loss": 0.5184, "step": 12688 }, { "epoch": 0.82, "grad_norm": 1.1643376350402832, "learning_rate": 8.342140984180446e-07, "loss": 0.5137, "step": 12689 }, { "epoch": 0.82, "grad_norm": 1.1873003244400024, "learning_rate": 8.336360918111202e-07, "loss": 0.4657, "step": 12690 }, { "epoch": 0.82, "grad_norm": 1.4235453605651855, "learning_rate": 8.330582673052124e-07, "loss": 0.5901, "step": 12691 }, { "epoch": 0.82, "grad_norm": 1.1495290994644165, "learning_rate": 8.324806249255768e-07, "loss": 0.4611, "step": 12692 }, { "epoch": 0.82, "grad_norm": 1.215514898300171, "learning_rate": 8.319031646974624e-07, "loss": 0.5209, "step": 12693 }, { "epoch": 0.82, "grad_norm": 1.2368851900100708, "learning_rate": 8.313258866461055e-07, "loss": 0.4741, "step": 12694 }, { "epoch": 0.82, "grad_norm": 1.2064186334609985, "learning_rate": 8.307487907967383e-07, "loss": 0.5332, "step": 12695 }, { "epoch": 0.82, "grad_norm": 1.1751118898391724, "learning_rate": 8.301718771745854e-07, "loss": 0.5792, "step": 12696 }, { "epoch": 0.82, "grad_norm": 1.181519865989685, "learning_rate": 8.295951458048607e-07, "loss": 0.5065, "step": 12697 }, { "epoch": 0.82, "grad_norm": 1.260047435760498, "learning_rate": 8.290185967127729e-07, "loss": 0.5135, "step": 12698 }, { "epoch": 0.82, "grad_norm": 1.1864478588104248, "learning_rate": 8.284422299235218e-07, "loss": 0.4584, "step": 12699 }, { "epoch": 0.82, "grad_norm": 1.1540032625198364, "learning_rate": 8.278660454622973e-07, "loss": 0.5193, "step": 12700 }, { "epoch": 0.82, "grad_norm": 1.132124662399292, "learning_rate": 8.272900433542835e-07, "loss": 0.5189, "step": 12701 }, { "epoch": 0.82, "grad_norm": 1.2824422121047974, "learning_rate": 8.267142236246562e-07, "loss": 0.5478, "step": 12702 }, { "epoch": 0.82, "grad_norm": 1.2044782638549805, "learning_rate": 8.261385862985827e-07, "loss": 0.5301, "step": 12703 }, { "epoch": 0.82, "grad_norm": 1.2976813316345215, "learning_rate": 8.255631314012231e-07, "loss": 0.5355, "step": 12704 }, { "epoch": 0.82, "grad_norm": 1.2125226259231567, "learning_rate": 8.249878589577298e-07, "loss": 0.5373, "step": 12705 }, { "epoch": 0.82, "grad_norm": 1.1180915832519531, "learning_rate": 8.244127689932446e-07, "loss": 0.4603, "step": 12706 }, { "epoch": 0.82, "grad_norm": 1.2538127899169922, "learning_rate": 8.23837861532904e-07, "loss": 0.5135, "step": 12707 }, { "epoch": 0.82, "grad_norm": 1.1358168125152588, "learning_rate": 8.232631366018362e-07, "loss": 0.469, "step": 12708 }, { "epoch": 0.82, "grad_norm": 1.1227238178253174, "learning_rate": 8.2268859422516e-07, "loss": 0.46, "step": 12709 }, { "epoch": 0.82, "grad_norm": 1.168811321258545, "learning_rate": 8.221142344279892e-07, "loss": 0.5039, "step": 12710 }, { "epoch": 0.82, "grad_norm": 1.089228868484497, "learning_rate": 8.215400572354249e-07, "loss": 0.5228, "step": 12711 }, { "epoch": 0.82, "grad_norm": 1.154524326324463, "learning_rate": 8.20966062672564e-07, "loss": 0.535, "step": 12712 }, { "epoch": 0.82, "grad_norm": 1.3492729663848877, "learning_rate": 8.203922507644946e-07, "loss": 0.4545, "step": 12713 }, { "epoch": 0.82, "grad_norm": 1.1327977180480957, "learning_rate": 8.198186215362969e-07, "loss": 0.4731, "step": 12714 }, { "epoch": 0.82, "grad_norm": 1.2050749063491821, "learning_rate": 8.192451750130414e-07, "loss": 0.532, "step": 12715 }, { "epoch": 0.82, "grad_norm": 1.2000211477279663, "learning_rate": 8.186719112197944e-07, "loss": 0.5316, "step": 12716 }, { "epoch": 0.82, "grad_norm": 1.295409083366394, "learning_rate": 8.18098830181609e-07, "loss": 0.5426, "step": 12717 }, { "epoch": 0.82, "grad_norm": 1.2065843343734741, "learning_rate": 8.17525931923534e-07, "loss": 0.4662, "step": 12718 }, { "epoch": 0.82, "grad_norm": 1.1257683038711548, "learning_rate": 8.169532164706101e-07, "loss": 0.4685, "step": 12719 }, { "epoch": 0.82, "grad_norm": 1.261668086051941, "learning_rate": 8.163806838478683e-07, "loss": 0.519, "step": 12720 }, { "epoch": 0.82, "grad_norm": 1.1857492923736572, "learning_rate": 8.15808334080333e-07, "loss": 0.4677, "step": 12721 }, { "epoch": 0.82, "grad_norm": 1.2627007961273193, "learning_rate": 8.152361671930209e-07, "loss": 0.5165, "step": 12722 }, { "epoch": 0.82, "grad_norm": 1.4079111814498901, "learning_rate": 8.146641832109386e-07, "loss": 0.4907, "step": 12723 }, { "epoch": 0.82, "grad_norm": 1.1791325807571411, "learning_rate": 8.140923821590863e-07, "loss": 0.5438, "step": 12724 }, { "epoch": 0.82, "grad_norm": 1.2496459484100342, "learning_rate": 8.135207640624559e-07, "loss": 0.5104, "step": 12725 }, { "epoch": 0.82, "grad_norm": 1.1361472606658936, "learning_rate": 8.129493289460321e-07, "loss": 0.4998, "step": 12726 }, { "epoch": 0.82, "grad_norm": 1.1284229755401611, "learning_rate": 8.1237807683479e-07, "loss": 0.4681, "step": 12727 }, { "epoch": 0.82, "grad_norm": 1.272552728652954, "learning_rate": 8.118070077536993e-07, "loss": 0.5157, "step": 12728 }, { "epoch": 0.82, "grad_norm": 1.3270066976547241, "learning_rate": 8.112361217277176e-07, "loss": 0.4846, "step": 12729 }, { "epoch": 0.82, "grad_norm": 1.1628128290176392, "learning_rate": 8.106654187817975e-07, "loss": 0.4728, "step": 12730 }, { "epoch": 0.82, "grad_norm": 1.2396694421768188, "learning_rate": 8.100948989408836e-07, "loss": 0.5126, "step": 12731 }, { "epoch": 0.82, "grad_norm": 1.3655376434326172, "learning_rate": 8.095245622299114e-07, "loss": 0.5398, "step": 12732 }, { "epoch": 0.82, "grad_norm": 1.326877474784851, "learning_rate": 8.089544086738088e-07, "loss": 0.5286, "step": 12733 }, { "epoch": 0.82, "grad_norm": 1.2186721563339233, "learning_rate": 8.083844382974976e-07, "loss": 0.4953, "step": 12734 }, { "epoch": 0.82, "grad_norm": 1.1975743770599365, "learning_rate": 8.078146511258867e-07, "loss": 0.5456, "step": 12735 }, { "epoch": 0.82, "grad_norm": 1.135530948638916, "learning_rate": 8.072450471838817e-07, "loss": 0.4723, "step": 12736 }, { "epoch": 0.82, "grad_norm": 1.2735706567764282, "learning_rate": 8.066756264963776e-07, "loss": 0.5111, "step": 12737 }, { "epoch": 0.82, "grad_norm": 1.2515519857406616, "learning_rate": 8.061063890882637e-07, "loss": 0.5007, "step": 12738 }, { "epoch": 0.82, "grad_norm": 1.3140990734100342, "learning_rate": 8.055373349844187e-07, "loss": 0.5234, "step": 12739 }, { "epoch": 0.82, "grad_norm": 1.2576169967651367, "learning_rate": 8.049684642097161e-07, "loss": 0.4985, "step": 12740 }, { "epoch": 0.82, "grad_norm": 1.1884020566940308, "learning_rate": 8.043997767890171e-07, "loss": 0.4995, "step": 12741 }, { "epoch": 0.82, "grad_norm": 1.1464157104492188, "learning_rate": 8.038312727471798e-07, "loss": 0.4975, "step": 12742 }, { "epoch": 0.82, "grad_norm": 1.2154172658920288, "learning_rate": 8.032629521090507e-07, "loss": 0.4878, "step": 12743 }, { "epoch": 0.82, "grad_norm": 1.2979121208190918, "learning_rate": 8.026948148994706e-07, "loss": 0.5111, "step": 12744 }, { "epoch": 0.82, "grad_norm": 1.1674093008041382, "learning_rate": 8.021268611432709e-07, "loss": 0.5464, "step": 12745 }, { "epoch": 0.82, "grad_norm": 1.2746776342391968, "learning_rate": 8.015590908652765e-07, "loss": 0.5191, "step": 12746 }, { "epoch": 0.82, "grad_norm": 1.1975398063659668, "learning_rate": 8.00991504090301e-07, "loss": 0.494, "step": 12747 }, { "epoch": 0.82, "grad_norm": 1.3185569047927856, "learning_rate": 8.004241008431535e-07, "loss": 0.5712, "step": 12748 }, { "epoch": 0.82, "grad_norm": 1.2353625297546387, "learning_rate": 7.998568811486335e-07, "loss": 0.5372, "step": 12749 }, { "epoch": 0.82, "grad_norm": 1.2906588315963745, "learning_rate": 7.992898450315334e-07, "loss": 0.4545, "step": 12750 }, { "epoch": 0.82, "grad_norm": 1.2410629987716675, "learning_rate": 7.987229925166351e-07, "loss": 0.5047, "step": 12751 }, { "epoch": 0.82, "grad_norm": 1.2302170991897583, "learning_rate": 7.98156323628716e-07, "loss": 0.5108, "step": 12752 }, { "epoch": 0.82, "grad_norm": 1.6343210935592651, "learning_rate": 7.975898383925424e-07, "loss": 0.4941, "step": 12753 }, { "epoch": 0.82, "grad_norm": 1.1804137229919434, "learning_rate": 7.970235368328749e-07, "loss": 0.5039, "step": 12754 }, { "epoch": 0.82, "grad_norm": 1.4066741466522217, "learning_rate": 7.964574189744661e-07, "loss": 0.5352, "step": 12755 }, { "epoch": 0.82, "grad_norm": 1.1667182445526123, "learning_rate": 7.958914848420568e-07, "loss": 0.5128, "step": 12756 }, { "epoch": 0.82, "grad_norm": 1.1170122623443604, "learning_rate": 7.953257344603838e-07, "loss": 0.4975, "step": 12757 }, { "epoch": 0.82, "grad_norm": 1.165144920349121, "learning_rate": 7.947601678541749e-07, "loss": 0.5033, "step": 12758 }, { "epoch": 0.82, "grad_norm": 1.1925431489944458, "learning_rate": 7.941947850481507e-07, "loss": 0.4453, "step": 12759 }, { "epoch": 0.82, "grad_norm": 1.2068779468536377, "learning_rate": 7.936295860670201e-07, "loss": 0.5065, "step": 12760 }, { "epoch": 0.82, "grad_norm": 1.1414223909378052, "learning_rate": 7.930645709354878e-07, "loss": 0.5046, "step": 12761 }, { "epoch": 0.82, "grad_norm": 1.2231457233428955, "learning_rate": 7.924997396782491e-07, "loss": 0.5454, "step": 12762 }, { "epoch": 0.82, "grad_norm": 1.1909552812576294, "learning_rate": 7.919350923199909e-07, "loss": 0.5075, "step": 12763 }, { "epoch": 0.82, "grad_norm": 1.1949266195297241, "learning_rate": 7.913706288853945e-07, "loss": 0.5139, "step": 12764 }, { "epoch": 0.82, "grad_norm": 1.4288272857666016, "learning_rate": 7.90806349399128e-07, "loss": 0.521, "step": 12765 }, { "epoch": 0.82, "grad_norm": 1.2784186601638794, "learning_rate": 7.902422538858562e-07, "loss": 0.5075, "step": 12766 }, { "epoch": 0.82, "grad_norm": 1.1145919561386108, "learning_rate": 7.896783423702342e-07, "loss": 0.4571, "step": 12767 }, { "epoch": 0.82, "grad_norm": 1.3133198022842407, "learning_rate": 7.891146148769091e-07, "loss": 0.5528, "step": 12768 }, { "epoch": 0.82, "grad_norm": 1.2130405902862549, "learning_rate": 7.885510714305211e-07, "loss": 0.4987, "step": 12769 }, { "epoch": 0.82, "grad_norm": 1.1861270666122437, "learning_rate": 7.879877120556989e-07, "loss": 0.5076, "step": 12770 }, { "epoch": 0.82, "grad_norm": 1.1620248556137085, "learning_rate": 7.874245367770667e-07, "loss": 0.5194, "step": 12771 }, { "epoch": 0.82, "grad_norm": 1.2050637006759644, "learning_rate": 7.868615456192391e-07, "loss": 0.5213, "step": 12772 }, { "epoch": 0.82, "grad_norm": 1.2736639976501465, "learning_rate": 7.862987386068238e-07, "loss": 0.5245, "step": 12773 }, { "epoch": 0.82, "grad_norm": 1.1222953796386719, "learning_rate": 7.857361157644188e-07, "loss": 0.4916, "step": 12774 }, { "epoch": 0.82, "grad_norm": 1.1057143211364746, "learning_rate": 7.851736771166162e-07, "loss": 0.5072, "step": 12775 }, { "epoch": 0.82, "grad_norm": 1.2577391862869263, "learning_rate": 7.846114226879969e-07, "loss": 0.513, "step": 12776 }, { "epoch": 0.82, "grad_norm": 1.307693600654602, "learning_rate": 7.840493525031367e-07, "loss": 0.5553, "step": 12777 }, { "epoch": 0.82, "grad_norm": 1.0933581590652466, "learning_rate": 7.834874665866021e-07, "loss": 0.5115, "step": 12778 }, { "epoch": 0.82, "grad_norm": 1.2280478477478027, "learning_rate": 7.829257649629512e-07, "loss": 0.5189, "step": 12779 }, { "epoch": 0.82, "grad_norm": 1.2356221675872803, "learning_rate": 7.823642476567351e-07, "loss": 0.5527, "step": 12780 }, { "epoch": 0.83, "grad_norm": 1.1867485046386719, "learning_rate": 7.818029146924971e-07, "loss": 0.5182, "step": 12781 }, { "epoch": 0.83, "grad_norm": 1.263270378112793, "learning_rate": 7.812417660947691e-07, "loss": 0.4975, "step": 12782 }, { "epoch": 0.83, "grad_norm": 1.1824935674667358, "learning_rate": 7.806808018880795e-07, "loss": 0.5208, "step": 12783 }, { "epoch": 0.83, "grad_norm": 1.2893928289413452, "learning_rate": 7.801200220969457e-07, "loss": 0.5025, "step": 12784 }, { "epoch": 0.83, "grad_norm": 1.2952772378921509, "learning_rate": 7.795594267458789e-07, "loss": 0.4566, "step": 12785 }, { "epoch": 0.83, "grad_norm": 1.224729299545288, "learning_rate": 7.789990158593803e-07, "loss": 0.4618, "step": 12786 }, { "epoch": 0.83, "grad_norm": 1.172870397567749, "learning_rate": 7.784387894619455e-07, "loss": 0.5077, "step": 12787 }, { "epoch": 0.83, "grad_norm": 1.165479063987732, "learning_rate": 7.77878747578058e-07, "loss": 0.4651, "step": 12788 }, { "epoch": 0.83, "grad_norm": 1.3534413576126099, "learning_rate": 7.773188902321976e-07, "loss": 0.5626, "step": 12789 }, { "epoch": 0.83, "grad_norm": 1.1736966371536255, "learning_rate": 7.767592174488342e-07, "loss": 0.5113, "step": 12790 }, { "epoch": 0.83, "grad_norm": 1.2150503396987915, "learning_rate": 7.761997292524287e-07, "loss": 0.4834, "step": 12791 }, { "epoch": 0.83, "grad_norm": 1.1222530603408813, "learning_rate": 7.756404256674354e-07, "loss": 0.4816, "step": 12792 }, { "epoch": 0.83, "grad_norm": 1.1799615621566772, "learning_rate": 7.750813067183016e-07, "loss": 0.4776, "step": 12793 }, { "epoch": 0.83, "grad_norm": 1.2495981454849243, "learning_rate": 7.745223724294626e-07, "loss": 0.5629, "step": 12794 }, { "epoch": 0.83, "grad_norm": 1.2282875776290894, "learning_rate": 7.739636228253484e-07, "loss": 0.5179, "step": 12795 }, { "epoch": 0.83, "grad_norm": 1.2926216125488281, "learning_rate": 7.734050579303809e-07, "loss": 0.5181, "step": 12796 }, { "epoch": 0.83, "grad_norm": 1.2174224853515625, "learning_rate": 7.728466777689736e-07, "loss": 0.521, "step": 12797 }, { "epoch": 0.83, "grad_norm": 1.1805171966552734, "learning_rate": 7.72288482365533e-07, "loss": 0.5024, "step": 12798 }, { "epoch": 0.83, "grad_norm": 1.200355052947998, "learning_rate": 7.71730471744454e-07, "loss": 0.5627, "step": 12799 }, { "epoch": 0.83, "grad_norm": 1.1880345344543457, "learning_rate": 7.711726459301272e-07, "loss": 0.4948, "step": 12800 }, { "epoch": 0.83, "grad_norm": 1.3273227214813232, "learning_rate": 7.70615004946933e-07, "loss": 0.4766, "step": 12801 }, { "epoch": 0.83, "grad_norm": 1.2991520166397095, "learning_rate": 7.700575488192458e-07, "loss": 0.5565, "step": 12802 }, { "epoch": 0.83, "grad_norm": 1.1692075729370117, "learning_rate": 7.695002775714289e-07, "loss": 0.5143, "step": 12803 }, { "epoch": 0.83, "grad_norm": 1.1738994121551514, "learning_rate": 7.689431912278416e-07, "loss": 0.4826, "step": 12804 }, { "epoch": 0.83, "grad_norm": 1.333484172821045, "learning_rate": 7.683862898128302e-07, "loss": 0.5384, "step": 12805 }, { "epoch": 0.83, "grad_norm": 1.2165876626968384, "learning_rate": 7.678295733507357e-07, "loss": 0.4855, "step": 12806 }, { "epoch": 0.83, "grad_norm": 1.234163761138916, "learning_rate": 7.672730418658919e-07, "loss": 0.4982, "step": 12807 }, { "epoch": 0.83, "grad_norm": 1.2227600812911987, "learning_rate": 7.667166953826227e-07, "loss": 0.512, "step": 12808 }, { "epoch": 0.83, "grad_norm": 1.2308909893035889, "learning_rate": 7.661605339252448e-07, "loss": 0.5283, "step": 12809 }, { "epoch": 0.83, "grad_norm": 1.2724268436431885, "learning_rate": 7.656045575180682e-07, "loss": 0.5001, "step": 12810 }, { "epoch": 0.83, "grad_norm": 1.1094070672988892, "learning_rate": 7.650487661853895e-07, "loss": 0.4839, "step": 12811 }, { "epoch": 0.83, "grad_norm": 1.3446215391159058, "learning_rate": 7.644931599515031e-07, "loss": 0.4937, "step": 12812 }, { "epoch": 0.83, "grad_norm": 1.3842917680740356, "learning_rate": 7.639377388406932e-07, "loss": 0.4828, "step": 12813 }, { "epoch": 0.83, "grad_norm": 1.2953296899795532, "learning_rate": 7.633825028772357e-07, "loss": 0.5244, "step": 12814 }, { "epoch": 0.83, "grad_norm": 1.2060390710830688, "learning_rate": 7.628274520853978e-07, "loss": 0.5189, "step": 12815 }, { "epoch": 0.83, "grad_norm": 1.3595163822174072, "learning_rate": 7.622725864894414e-07, "loss": 0.5096, "step": 12816 }, { "epoch": 0.83, "grad_norm": 1.2838743925094604, "learning_rate": 7.617179061136154e-07, "loss": 0.5062, "step": 12817 }, { "epoch": 0.83, "grad_norm": 1.3154709339141846, "learning_rate": 7.611634109821653e-07, "loss": 0.5619, "step": 12818 }, { "epoch": 0.83, "grad_norm": 1.1784934997558594, "learning_rate": 7.606091011193256e-07, "loss": 0.51, "step": 12819 }, { "epoch": 0.83, "grad_norm": 1.2539236545562744, "learning_rate": 7.600549765493248e-07, "loss": 0.5366, "step": 12820 }, { "epoch": 0.83, "grad_norm": 1.2834632396697998, "learning_rate": 7.595010372963812e-07, "loss": 0.5027, "step": 12821 }, { "epoch": 0.83, "grad_norm": 1.146056890487671, "learning_rate": 7.589472833847083e-07, "loss": 0.5477, "step": 12822 }, { "epoch": 0.83, "grad_norm": 1.2941230535507202, "learning_rate": 7.583937148385062e-07, "loss": 0.5247, "step": 12823 }, { "epoch": 0.83, "grad_norm": 1.161833643913269, "learning_rate": 7.578403316819716e-07, "loss": 0.482, "step": 12824 }, { "epoch": 0.83, "grad_norm": 1.2318686246871948, "learning_rate": 7.572871339392907e-07, "loss": 0.5144, "step": 12825 }, { "epoch": 0.83, "grad_norm": 1.2969180345535278, "learning_rate": 7.56734121634643e-07, "loss": 0.49, "step": 12826 }, { "epoch": 0.83, "grad_norm": 1.115670084953308, "learning_rate": 7.561812947921993e-07, "loss": 0.5164, "step": 12827 }, { "epoch": 0.83, "grad_norm": 1.1668293476104736, "learning_rate": 7.55628653436123e-07, "loss": 0.5032, "step": 12828 }, { "epoch": 0.83, "grad_norm": 1.3482478857040405, "learning_rate": 7.550761975905669e-07, "loss": 0.525, "step": 12829 }, { "epoch": 0.83, "grad_norm": 1.2298085689544678, "learning_rate": 7.545239272796773e-07, "loss": 0.5402, "step": 12830 }, { "epoch": 0.83, "grad_norm": 1.155315637588501, "learning_rate": 7.539718425275938e-07, "loss": 0.4678, "step": 12831 }, { "epoch": 0.83, "grad_norm": 1.2272435426712036, "learning_rate": 7.53419943358446e-07, "loss": 0.5439, "step": 12832 }, { "epoch": 0.83, "grad_norm": 1.3313689231872559, "learning_rate": 7.528682297963568e-07, "loss": 0.5273, "step": 12833 }, { "epoch": 0.83, "grad_norm": 1.16974675655365, "learning_rate": 7.523167018654398e-07, "loss": 0.4794, "step": 12834 }, { "epoch": 0.83, "grad_norm": 1.161012053489685, "learning_rate": 7.517653595898e-07, "loss": 0.5114, "step": 12835 }, { "epoch": 0.83, "grad_norm": 1.259061336517334, "learning_rate": 7.512142029935359e-07, "loss": 0.5036, "step": 12836 }, { "epoch": 0.83, "grad_norm": 1.2597700357437134, "learning_rate": 7.506632321007362e-07, "loss": 0.4798, "step": 12837 }, { "epoch": 0.83, "grad_norm": 1.159933090209961, "learning_rate": 7.501124469354837e-07, "loss": 0.5276, "step": 12838 }, { "epoch": 0.83, "grad_norm": 1.2864798307418823, "learning_rate": 7.495618475218524e-07, "loss": 0.4952, "step": 12839 }, { "epoch": 0.83, "grad_norm": 1.442318320274353, "learning_rate": 7.490114338839055e-07, "loss": 0.5176, "step": 12840 }, { "epoch": 0.83, "grad_norm": 1.131020426750183, "learning_rate": 7.484612060457008e-07, "loss": 0.5067, "step": 12841 }, { "epoch": 0.83, "grad_norm": 1.1859670877456665, "learning_rate": 7.47911164031288e-07, "loss": 0.4948, "step": 12842 }, { "epoch": 0.83, "grad_norm": 1.3319194316864014, "learning_rate": 7.473613078647074e-07, "loss": 0.5118, "step": 12843 }, { "epoch": 0.83, "grad_norm": 1.2876200675964355, "learning_rate": 7.468116375699935e-07, "loss": 0.4948, "step": 12844 }, { "epoch": 0.83, "grad_norm": 1.2900655269622803, "learning_rate": 7.462621531711683e-07, "loss": 0.521, "step": 12845 }, { "epoch": 0.83, "grad_norm": 1.2546882629394531, "learning_rate": 7.457128546922493e-07, "loss": 0.5061, "step": 12846 }, { "epoch": 0.83, "grad_norm": 1.1355273723602295, "learning_rate": 7.451637421572455e-07, "loss": 0.4796, "step": 12847 }, { "epoch": 0.83, "grad_norm": 1.1776068210601807, "learning_rate": 7.446148155901578e-07, "loss": 0.5094, "step": 12848 }, { "epoch": 0.83, "grad_norm": 1.2008086442947388, "learning_rate": 7.44066075014977e-07, "loss": 0.5298, "step": 12849 }, { "epoch": 0.83, "grad_norm": 1.1562247276306152, "learning_rate": 7.435175204556872e-07, "loss": 0.4728, "step": 12850 }, { "epoch": 0.83, "grad_norm": 1.2696343660354614, "learning_rate": 7.429691519362642e-07, "loss": 0.5536, "step": 12851 }, { "epoch": 0.83, "grad_norm": 1.4701260328292847, "learning_rate": 7.424209694806766e-07, "loss": 0.4806, "step": 12852 }, { "epoch": 0.83, "grad_norm": 1.3663071393966675, "learning_rate": 7.418729731128854e-07, "loss": 0.5019, "step": 12853 }, { "epoch": 0.83, "grad_norm": 1.3714474439620972, "learning_rate": 7.413251628568385e-07, "loss": 0.517, "step": 12854 }, { "epoch": 0.83, "grad_norm": 1.256650686264038, "learning_rate": 7.407775387364818e-07, "loss": 0.4968, "step": 12855 }, { "epoch": 0.83, "grad_norm": 1.1885300874710083, "learning_rate": 7.402301007757496e-07, "loss": 0.491, "step": 12856 }, { "epoch": 0.83, "grad_norm": 1.3793870210647583, "learning_rate": 7.396828489985708e-07, "loss": 0.477, "step": 12857 }, { "epoch": 0.83, "grad_norm": 1.1566933393478394, "learning_rate": 7.391357834288615e-07, "loss": 0.4956, "step": 12858 }, { "epoch": 0.83, "grad_norm": 1.3073762655258179, "learning_rate": 7.385889040905336e-07, "loss": 0.5434, "step": 12859 }, { "epoch": 0.83, "grad_norm": 1.102076768875122, "learning_rate": 7.380422110074908e-07, "loss": 0.516, "step": 12860 }, { "epoch": 0.83, "grad_norm": 1.2562953233718872, "learning_rate": 7.374957042036268e-07, "loss": 0.5272, "step": 12861 }, { "epoch": 0.83, "grad_norm": 1.251255750656128, "learning_rate": 7.36949383702828e-07, "loss": 0.498, "step": 12862 }, { "epoch": 0.83, "grad_norm": 1.2112321853637695, "learning_rate": 7.364032495289742e-07, "loss": 0.4558, "step": 12863 }, { "epoch": 0.83, "grad_norm": 1.4269500970840454, "learning_rate": 7.358573017059323e-07, "loss": 0.5728, "step": 12864 }, { "epoch": 0.83, "grad_norm": 1.276334285736084, "learning_rate": 7.353115402575666e-07, "loss": 0.4607, "step": 12865 }, { "epoch": 0.83, "grad_norm": 1.2556023597717285, "learning_rate": 7.347659652077304e-07, "loss": 0.4978, "step": 12866 }, { "epoch": 0.83, "grad_norm": 1.1389472484588623, "learning_rate": 7.342205765802695e-07, "loss": 0.5131, "step": 12867 }, { "epoch": 0.83, "grad_norm": 1.1068986654281616, "learning_rate": 7.336753743990215e-07, "loss": 0.4744, "step": 12868 }, { "epoch": 0.83, "grad_norm": 1.2337294816970825, "learning_rate": 7.331303586878163e-07, "loss": 0.4687, "step": 12869 }, { "epoch": 0.83, "grad_norm": 1.328869104385376, "learning_rate": 7.325855294704737e-07, "loss": 0.5207, "step": 12870 }, { "epoch": 0.83, "grad_norm": 1.2503454685211182, "learning_rate": 7.320408867708073e-07, "loss": 0.5359, "step": 12871 }, { "epoch": 0.83, "grad_norm": 1.1415231227874756, "learning_rate": 7.314964306126227e-07, "loss": 0.5031, "step": 12872 }, { "epoch": 0.83, "grad_norm": 1.1840800046920776, "learning_rate": 7.309521610197157e-07, "loss": 0.5063, "step": 12873 }, { "epoch": 0.83, "grad_norm": 1.4648195505142212, "learning_rate": 7.304080780158756e-07, "loss": 0.5255, "step": 12874 }, { "epoch": 0.83, "grad_norm": 1.234169363975525, "learning_rate": 7.298641816248836e-07, "loss": 0.5486, "step": 12875 }, { "epoch": 0.83, "grad_norm": 1.265979528427124, "learning_rate": 7.293204718705105e-07, "loss": 0.5269, "step": 12876 }, { "epoch": 0.83, "grad_norm": 1.2544609308242798, "learning_rate": 7.287769487765206e-07, "loss": 0.4656, "step": 12877 }, { "epoch": 0.83, "grad_norm": 1.1955622434616089, "learning_rate": 7.282336123666705e-07, "loss": 0.4696, "step": 12878 }, { "epoch": 0.83, "grad_norm": 1.2203619480133057, "learning_rate": 7.27690462664708e-07, "loss": 0.547, "step": 12879 }, { "epoch": 0.83, "grad_norm": 1.12389075756073, "learning_rate": 7.271474996943729e-07, "loss": 0.5025, "step": 12880 }, { "epoch": 0.83, "grad_norm": 1.257737636566162, "learning_rate": 7.266047234793972e-07, "loss": 0.5459, "step": 12881 }, { "epoch": 0.83, "grad_norm": 1.2947784662246704, "learning_rate": 7.260621340435025e-07, "loss": 0.5439, "step": 12882 }, { "epoch": 0.83, "grad_norm": 1.290711522102356, "learning_rate": 7.255197314104051e-07, "loss": 0.4755, "step": 12883 }, { "epoch": 0.83, "grad_norm": 1.1927765607833862, "learning_rate": 7.249775156038124e-07, "loss": 0.5034, "step": 12884 }, { "epoch": 0.83, "grad_norm": 1.1549010276794434, "learning_rate": 7.244354866474224e-07, "loss": 0.4982, "step": 12885 }, { "epoch": 0.83, "grad_norm": 1.1882497072219849, "learning_rate": 7.238936445649269e-07, "loss": 0.489, "step": 12886 }, { "epoch": 0.83, "grad_norm": 1.3795055150985718, "learning_rate": 7.233519893800068e-07, "loss": 0.4537, "step": 12887 }, { "epoch": 0.83, "grad_norm": 1.2371723651885986, "learning_rate": 7.228105211163378e-07, "loss": 0.5026, "step": 12888 }, { "epoch": 0.83, "grad_norm": 1.1242468357086182, "learning_rate": 7.22269239797585e-07, "loss": 0.4981, "step": 12889 }, { "epoch": 0.83, "grad_norm": 1.2120898962020874, "learning_rate": 7.217281454474073e-07, "loss": 0.4797, "step": 12890 }, { "epoch": 0.83, "grad_norm": 1.119213581085205, "learning_rate": 7.211872380894541e-07, "loss": 0.4883, "step": 12891 }, { "epoch": 0.83, "grad_norm": 1.25348699092865, "learning_rate": 7.206465177473687e-07, "loss": 0.5066, "step": 12892 }, { "epoch": 0.83, "grad_norm": 1.2183986902236938, "learning_rate": 7.201059844447817e-07, "loss": 0.4827, "step": 12893 }, { "epoch": 0.83, "grad_norm": 1.2183938026428223, "learning_rate": 7.195656382053201e-07, "loss": 0.5324, "step": 12894 }, { "epoch": 0.83, "grad_norm": 1.2448176145553589, "learning_rate": 7.190254790526008e-07, "loss": 0.4786, "step": 12895 }, { "epoch": 0.83, "grad_norm": 1.3261443376541138, "learning_rate": 7.184855070102326e-07, "loss": 0.5278, "step": 12896 }, { "epoch": 0.83, "grad_norm": 1.2132760286331177, "learning_rate": 7.179457221018165e-07, "loss": 0.5341, "step": 12897 }, { "epoch": 0.83, "grad_norm": 1.2508153915405273, "learning_rate": 7.174061243509462e-07, "loss": 0.5404, "step": 12898 }, { "epoch": 0.83, "grad_norm": 1.2518593072891235, "learning_rate": 7.168667137812036e-07, "loss": 0.5112, "step": 12899 }, { "epoch": 0.83, "grad_norm": 1.1188480854034424, "learning_rate": 7.163274904161666e-07, "loss": 0.5146, "step": 12900 }, { "epoch": 0.83, "grad_norm": 1.2252804040908813, "learning_rate": 7.157884542794024e-07, "loss": 0.5044, "step": 12901 }, { "epoch": 0.83, "grad_norm": 1.2167086601257324, "learning_rate": 7.152496053944725e-07, "loss": 0.4868, "step": 12902 }, { "epoch": 0.83, "grad_norm": 1.3262494802474976, "learning_rate": 7.147109437849265e-07, "loss": 0.5186, "step": 12903 }, { "epoch": 0.83, "grad_norm": 1.2783464193344116, "learning_rate": 7.141724694743108e-07, "loss": 0.5471, "step": 12904 }, { "epoch": 0.83, "grad_norm": 1.2021178007125854, "learning_rate": 7.136341824861576e-07, "loss": 0.5128, "step": 12905 }, { "epoch": 0.83, "grad_norm": 1.2213300466537476, "learning_rate": 7.130960828439948e-07, "loss": 0.5095, "step": 12906 }, { "epoch": 0.83, "grad_norm": 1.2063179016113281, "learning_rate": 7.125581705713425e-07, "loss": 0.4841, "step": 12907 }, { "epoch": 0.83, "grad_norm": 1.2582204341888428, "learning_rate": 7.120204456917102e-07, "loss": 0.5222, "step": 12908 }, { "epoch": 0.83, "grad_norm": 2.030024766921997, "learning_rate": 7.114829082286017e-07, "loss": 0.4941, "step": 12909 }, { "epoch": 0.83, "grad_norm": 1.170148253440857, "learning_rate": 7.109455582055114e-07, "loss": 0.5274, "step": 12910 }, { "epoch": 0.83, "grad_norm": 1.2453011274337769, "learning_rate": 7.10408395645924e-07, "loss": 0.5062, "step": 12911 }, { "epoch": 0.83, "grad_norm": 1.200289011001587, "learning_rate": 7.098714205733181e-07, "loss": 0.5179, "step": 12912 }, { "epoch": 0.83, "grad_norm": 1.1653720140457153, "learning_rate": 7.093346330111639e-07, "loss": 0.5088, "step": 12913 }, { "epoch": 0.83, "grad_norm": 1.1329073905944824, "learning_rate": 7.08798032982923e-07, "loss": 0.4893, "step": 12914 }, { "epoch": 0.83, "grad_norm": 1.2462220191955566, "learning_rate": 7.082616205120485e-07, "loss": 0.5543, "step": 12915 }, { "epoch": 0.83, "grad_norm": 1.1798253059387207, "learning_rate": 7.07725395621987e-07, "loss": 0.5636, "step": 12916 }, { "epoch": 0.83, "grad_norm": 1.2512577772140503, "learning_rate": 7.071893583361728e-07, "loss": 0.481, "step": 12917 }, { "epoch": 0.83, "grad_norm": 1.2102994918823242, "learning_rate": 7.066535086780363e-07, "loss": 0.5078, "step": 12918 }, { "epoch": 0.83, "grad_norm": 1.2094663381576538, "learning_rate": 7.061178466709978e-07, "loss": 0.5428, "step": 12919 }, { "epoch": 0.83, "grad_norm": 1.1674338579177856, "learning_rate": 7.055823723384703e-07, "loss": 0.4846, "step": 12920 }, { "epoch": 0.83, "grad_norm": 1.1527212858200073, "learning_rate": 7.050470857038572e-07, "loss": 0.4884, "step": 12921 }, { "epoch": 0.83, "grad_norm": 1.1976172924041748, "learning_rate": 7.045119867905559e-07, "loss": 0.5371, "step": 12922 }, { "epoch": 0.83, "grad_norm": 1.2065637111663818, "learning_rate": 7.039770756219522e-07, "loss": 0.5435, "step": 12923 }, { "epoch": 0.83, "grad_norm": 1.2171542644500732, "learning_rate": 7.034423522214268e-07, "loss": 0.5287, "step": 12924 }, { "epoch": 0.83, "grad_norm": 1.1689432859420776, "learning_rate": 7.029078166123505e-07, "loss": 0.4583, "step": 12925 }, { "epoch": 0.83, "grad_norm": 1.2455202341079712, "learning_rate": 7.023734688180872e-07, "loss": 0.5026, "step": 12926 }, { "epoch": 0.83, "grad_norm": 1.7591561079025269, "learning_rate": 7.018393088619924e-07, "loss": 0.4986, "step": 12927 }, { "epoch": 0.83, "grad_norm": 1.267732858657837, "learning_rate": 7.01305336767411e-07, "loss": 0.5604, "step": 12928 }, { "epoch": 0.83, "grad_norm": 1.2327802181243896, "learning_rate": 7.007715525576831e-07, "loss": 0.5102, "step": 12929 }, { "epoch": 0.83, "grad_norm": 1.4282673597335815, "learning_rate": 7.002379562561379e-07, "loss": 0.5252, "step": 12930 }, { "epoch": 0.83, "grad_norm": 1.1856036186218262, "learning_rate": 6.997045478860981e-07, "loss": 0.4817, "step": 12931 }, { "epoch": 0.83, "grad_norm": 1.225083827972412, "learning_rate": 6.991713274708778e-07, "loss": 0.5017, "step": 12932 }, { "epoch": 0.83, "grad_norm": 1.2279058694839478, "learning_rate": 6.986382950337834e-07, "loss": 0.4895, "step": 12933 }, { "epoch": 0.83, "grad_norm": 1.305397868156433, "learning_rate": 6.981054505981105e-07, "loss": 0.5947, "step": 12934 }, { "epoch": 0.83, "grad_norm": 1.2283399105072021, "learning_rate": 6.975727941871491e-07, "loss": 0.5183, "step": 12935 }, { "epoch": 0.84, "grad_norm": 1.309165120124817, "learning_rate": 6.970403258241809e-07, "loss": 0.5365, "step": 12936 }, { "epoch": 0.84, "grad_norm": 1.2620257139205933, "learning_rate": 6.965080455324791e-07, "loss": 0.4365, "step": 12937 }, { "epoch": 0.84, "grad_norm": 1.0975463390350342, "learning_rate": 6.959759533353061e-07, "loss": 0.5116, "step": 12938 }, { "epoch": 0.84, "grad_norm": 1.1140917539596558, "learning_rate": 6.954440492559201e-07, "loss": 0.4419, "step": 12939 }, { "epoch": 0.84, "grad_norm": 1.393599033355713, "learning_rate": 6.949123333175684e-07, "loss": 0.5089, "step": 12940 }, { "epoch": 0.84, "grad_norm": 1.1886496543884277, "learning_rate": 6.943808055434914e-07, "loss": 0.4531, "step": 12941 }, { "epoch": 0.84, "grad_norm": 1.2091379165649414, "learning_rate": 6.938494659569222e-07, "loss": 0.5077, "step": 12942 }, { "epoch": 0.84, "grad_norm": 1.1270508766174316, "learning_rate": 6.933183145810818e-07, "loss": 0.4889, "step": 12943 }, { "epoch": 0.84, "grad_norm": 1.1026798486709595, "learning_rate": 6.927873514391859e-07, "loss": 0.4717, "step": 12944 }, { "epoch": 0.84, "grad_norm": 1.1601994037628174, "learning_rate": 6.922565765544425e-07, "loss": 0.5426, "step": 12945 }, { "epoch": 0.84, "grad_norm": 1.2779264450073242, "learning_rate": 6.917259899500511e-07, "loss": 0.5119, "step": 12946 }, { "epoch": 0.84, "grad_norm": 1.1543586254119873, "learning_rate": 6.911955916492003e-07, "loss": 0.4824, "step": 12947 }, { "epoch": 0.84, "grad_norm": 1.2042746543884277, "learning_rate": 6.90665381675073e-07, "loss": 0.5233, "step": 12948 }, { "epoch": 0.84, "grad_norm": 1.1126741170883179, "learning_rate": 6.901353600508437e-07, "loss": 0.496, "step": 12949 }, { "epoch": 0.84, "grad_norm": 1.2927876710891724, "learning_rate": 6.896055267996782e-07, "loss": 0.5133, "step": 12950 }, { "epoch": 0.84, "grad_norm": 1.194339394569397, "learning_rate": 6.890758819447352e-07, "loss": 0.4889, "step": 12951 }, { "epoch": 0.84, "grad_norm": 1.2415770292282104, "learning_rate": 6.885464255091617e-07, "loss": 0.5281, "step": 12952 }, { "epoch": 0.84, "grad_norm": 1.1418448686599731, "learning_rate": 6.88017157516101e-07, "loss": 0.499, "step": 12953 }, { "epoch": 0.84, "grad_norm": 1.2021890878677368, "learning_rate": 6.874880779886844e-07, "loss": 0.4532, "step": 12954 }, { "epoch": 0.84, "grad_norm": 1.2242085933685303, "learning_rate": 6.869591869500381e-07, "loss": 0.482, "step": 12955 }, { "epoch": 0.84, "grad_norm": 1.2587108612060547, "learning_rate": 6.864304844232778e-07, "loss": 0.4882, "step": 12956 }, { "epoch": 0.84, "grad_norm": 1.1739436388015747, "learning_rate": 6.859019704315128e-07, "loss": 0.459, "step": 12957 }, { "epoch": 0.84, "grad_norm": 1.208442211151123, "learning_rate": 6.853736449978415e-07, "loss": 0.5305, "step": 12958 }, { "epoch": 0.84, "grad_norm": 1.1770083904266357, "learning_rate": 6.848455081453558e-07, "loss": 0.5313, "step": 12959 }, { "epoch": 0.84, "grad_norm": 1.2223340272903442, "learning_rate": 6.843175598971403e-07, "loss": 0.5309, "step": 12960 }, { "epoch": 0.84, "grad_norm": 1.2383896112442017, "learning_rate": 6.837898002762689e-07, "loss": 0.5028, "step": 12961 }, { "epoch": 0.84, "grad_norm": 1.146367073059082, "learning_rate": 6.832622293058095e-07, "loss": 0.4496, "step": 12962 }, { "epoch": 0.84, "grad_norm": 1.3473272323608398, "learning_rate": 6.827348470088224e-07, "loss": 0.5291, "step": 12963 }, { "epoch": 0.84, "grad_norm": 1.36069917678833, "learning_rate": 6.822076534083549e-07, "loss": 0.5152, "step": 12964 }, { "epoch": 0.84, "grad_norm": 1.3056105375289917, "learning_rate": 6.816806485274508e-07, "loss": 0.5238, "step": 12965 }, { "epoch": 0.84, "grad_norm": 1.1920748949050903, "learning_rate": 6.811538323891443e-07, "loss": 0.5437, "step": 12966 }, { "epoch": 0.84, "grad_norm": 1.2007246017456055, "learning_rate": 6.806272050164608e-07, "loss": 0.5183, "step": 12967 }, { "epoch": 0.84, "grad_norm": 1.2125442028045654, "learning_rate": 6.801007664324194e-07, "loss": 0.5037, "step": 12968 }, { "epoch": 0.84, "grad_norm": 1.18716561794281, "learning_rate": 6.795745166600265e-07, "loss": 0.4927, "step": 12969 }, { "epoch": 0.84, "grad_norm": 1.3417656421661377, "learning_rate": 6.790484557222849e-07, "loss": 0.5534, "step": 12970 }, { "epoch": 0.84, "grad_norm": 1.2828887701034546, "learning_rate": 6.785225836421872e-07, "loss": 0.4768, "step": 12971 }, { "epoch": 0.84, "grad_norm": 1.3528931140899658, "learning_rate": 6.779969004427179e-07, "loss": 0.5474, "step": 12972 }, { "epoch": 0.84, "grad_norm": 1.1874520778656006, "learning_rate": 6.77471406146853e-07, "loss": 0.5497, "step": 12973 }, { "epoch": 0.84, "grad_norm": 1.2761428356170654, "learning_rate": 6.769461007775618e-07, "loss": 0.5366, "step": 12974 }, { "epoch": 0.84, "grad_norm": 1.3015329837799072, "learning_rate": 6.764209843578017e-07, "loss": 0.5695, "step": 12975 }, { "epoch": 0.84, "grad_norm": 1.2282732725143433, "learning_rate": 6.758960569105255e-07, "loss": 0.4676, "step": 12976 }, { "epoch": 0.84, "grad_norm": 1.168832778930664, "learning_rate": 6.753713184586763e-07, "loss": 0.4879, "step": 12977 }, { "epoch": 0.84, "grad_norm": 1.2783735990524292, "learning_rate": 6.748467690251887e-07, "loss": 0.5253, "step": 12978 }, { "epoch": 0.84, "grad_norm": 1.3452624082565308, "learning_rate": 6.743224086329903e-07, "loss": 0.5566, "step": 12979 }, { "epoch": 0.84, "grad_norm": 1.3115065097808838, "learning_rate": 6.73798237305e-07, "loss": 0.4531, "step": 12980 }, { "epoch": 0.84, "grad_norm": 1.2000088691711426, "learning_rate": 6.732742550641258e-07, "loss": 0.5086, "step": 12981 }, { "epoch": 0.84, "grad_norm": 1.1022628545761108, "learning_rate": 6.727504619332709e-07, "loss": 0.4907, "step": 12982 }, { "epoch": 0.84, "grad_norm": 1.7909592390060425, "learning_rate": 6.722268579353286e-07, "loss": 0.4902, "step": 12983 }, { "epoch": 0.84, "grad_norm": 1.2119507789611816, "learning_rate": 6.717034430931846e-07, "loss": 0.5033, "step": 12984 }, { "epoch": 0.84, "grad_norm": 1.2340983152389526, "learning_rate": 6.711802174297161e-07, "loss": 0.496, "step": 12985 }, { "epoch": 0.84, "grad_norm": 1.1966503858566284, "learning_rate": 6.70657180967793e-07, "loss": 0.5185, "step": 12986 }, { "epoch": 0.84, "grad_norm": 1.1734296083450317, "learning_rate": 6.701343337302735e-07, "loss": 0.5359, "step": 12987 }, { "epoch": 0.84, "grad_norm": 1.1446524858474731, "learning_rate": 6.696116757400112e-07, "loss": 0.4728, "step": 12988 }, { "epoch": 0.84, "grad_norm": 1.53584885597229, "learning_rate": 6.690892070198496e-07, "loss": 0.5619, "step": 12989 }, { "epoch": 0.84, "grad_norm": 1.2726783752441406, "learning_rate": 6.685669275926255e-07, "loss": 0.5059, "step": 12990 }, { "epoch": 0.84, "grad_norm": 1.2277506589889526, "learning_rate": 6.680448374811649e-07, "loss": 0.5149, "step": 12991 }, { "epoch": 0.84, "grad_norm": 1.1971303224563599, "learning_rate": 6.675229367082897e-07, "loss": 0.5533, "step": 12992 }, { "epoch": 0.84, "grad_norm": 1.1327377557754517, "learning_rate": 6.670012252968073e-07, "loss": 0.4851, "step": 12993 }, { "epoch": 0.84, "grad_norm": 1.1798558235168457, "learning_rate": 6.664797032695224e-07, "loss": 0.5302, "step": 12994 }, { "epoch": 0.84, "grad_norm": 1.3366475105285645, "learning_rate": 6.65958370649229e-07, "loss": 0.4881, "step": 12995 }, { "epoch": 0.84, "grad_norm": 1.20474374294281, "learning_rate": 6.654372274587134e-07, "loss": 0.5058, "step": 12996 }, { "epoch": 0.84, "grad_norm": 1.3739529848098755, "learning_rate": 6.649162737207532e-07, "loss": 0.5466, "step": 12997 }, { "epoch": 0.84, "grad_norm": 1.1872929334640503, "learning_rate": 6.643955094581189e-07, "loss": 0.5256, "step": 12998 }, { "epoch": 0.84, "grad_norm": 1.3154609203338623, "learning_rate": 6.638749346935697e-07, "loss": 0.5472, "step": 12999 }, { "epoch": 0.84, "grad_norm": 1.2478059530258179, "learning_rate": 6.6335454944986e-07, "loss": 0.4911, "step": 13000 }, { "epoch": 0.84, "grad_norm": 1.3578766584396362, "learning_rate": 6.628343537497339e-07, "loss": 0.5551, "step": 13001 }, { "epoch": 0.84, "grad_norm": 1.291847586631775, "learning_rate": 6.623143476159288e-07, "loss": 0.5245, "step": 13002 }, { "epoch": 0.84, "grad_norm": 1.1680700778961182, "learning_rate": 6.617945310711715e-07, "loss": 0.4877, "step": 13003 }, { "epoch": 0.84, "grad_norm": 1.2085036039352417, "learning_rate": 6.612749041381844e-07, "loss": 0.4687, "step": 13004 }, { "epoch": 0.84, "grad_norm": 1.1445406675338745, "learning_rate": 6.607554668396754e-07, "loss": 0.5191, "step": 13005 }, { "epoch": 0.84, "grad_norm": 1.1697508096694946, "learning_rate": 6.602362191983496e-07, "loss": 0.5406, "step": 13006 }, { "epoch": 0.84, "grad_norm": 1.2895206212997437, "learning_rate": 6.597171612369024e-07, "loss": 0.5548, "step": 13007 }, { "epoch": 0.84, "grad_norm": 1.2418185472488403, "learning_rate": 6.591982929780194e-07, "loss": 0.5344, "step": 13008 }, { "epoch": 0.84, "grad_norm": 1.1656872034072876, "learning_rate": 6.586796144443813e-07, "loss": 0.4652, "step": 13009 }, { "epoch": 0.84, "grad_norm": 1.2231427431106567, "learning_rate": 6.581611256586551e-07, "loss": 0.5011, "step": 13010 }, { "epoch": 0.84, "grad_norm": 1.410025715827942, "learning_rate": 6.576428266435043e-07, "loss": 0.5232, "step": 13011 }, { "epoch": 0.84, "grad_norm": 1.2583494186401367, "learning_rate": 6.571247174215816e-07, "loss": 0.5394, "step": 13012 }, { "epoch": 0.84, "grad_norm": 1.238110065460205, "learning_rate": 6.566067980155328e-07, "loss": 0.4986, "step": 13013 }, { "epoch": 0.84, "grad_norm": 1.3479379415512085, "learning_rate": 6.560890684479948e-07, "loss": 0.5507, "step": 13014 }, { "epoch": 0.84, "grad_norm": 1.2588527202606201, "learning_rate": 6.555715287415975e-07, "loss": 0.4985, "step": 13015 }, { "epoch": 0.84, "grad_norm": 1.1525555849075317, "learning_rate": 6.550541789189585e-07, "loss": 0.5359, "step": 13016 }, { "epoch": 0.84, "grad_norm": 1.3826096057891846, "learning_rate": 6.545370190026917e-07, "loss": 0.528, "step": 13017 }, { "epoch": 0.84, "grad_norm": 1.1482689380645752, "learning_rate": 6.540200490153997e-07, "loss": 0.5506, "step": 13018 }, { "epoch": 0.84, "grad_norm": 1.2037937641143799, "learning_rate": 6.535032689796794e-07, "loss": 0.5265, "step": 13019 }, { "epoch": 0.84, "grad_norm": 1.1656454801559448, "learning_rate": 6.529866789181166e-07, "loss": 0.4856, "step": 13020 }, { "epoch": 0.84, "grad_norm": 1.2116165161132812, "learning_rate": 6.524702788532916e-07, "loss": 0.5169, "step": 13021 }, { "epoch": 0.84, "grad_norm": 1.2965987920761108, "learning_rate": 6.51954068807773e-07, "loss": 0.5597, "step": 13022 }, { "epoch": 0.84, "grad_norm": 1.2893997430801392, "learning_rate": 6.514380488041238e-07, "loss": 0.4863, "step": 13023 }, { "epoch": 0.84, "grad_norm": 1.1674598455429077, "learning_rate": 6.509222188648984e-07, "loss": 0.4814, "step": 13024 }, { "epoch": 0.84, "grad_norm": 1.268580436706543, "learning_rate": 6.504065790126419e-07, "loss": 0.5142, "step": 13025 }, { "epoch": 0.84, "grad_norm": 1.2875089645385742, "learning_rate": 6.498911292698929e-07, "loss": 0.4961, "step": 13026 }, { "epoch": 0.84, "grad_norm": 1.2445323467254639, "learning_rate": 6.493758696591779e-07, "loss": 0.5008, "step": 13027 }, { "epoch": 0.84, "grad_norm": 1.1403309106826782, "learning_rate": 6.488608002030189e-07, "loss": 0.4792, "step": 13028 }, { "epoch": 0.84, "grad_norm": 1.3181679248809814, "learning_rate": 6.483459209239284e-07, "loss": 0.533, "step": 13029 }, { "epoch": 0.84, "grad_norm": 1.2157176733016968, "learning_rate": 6.478312318444102e-07, "loss": 0.5068, "step": 13030 }, { "epoch": 0.84, "grad_norm": 1.1619987487792969, "learning_rate": 6.473167329869612e-07, "loss": 0.506, "step": 13031 }, { "epoch": 0.84, "grad_norm": 1.1752301454544067, "learning_rate": 6.468024243740667e-07, "loss": 0.5192, "step": 13032 }, { "epoch": 0.84, "grad_norm": 1.2141884565353394, "learning_rate": 6.462883060282066e-07, "loss": 0.5026, "step": 13033 }, { "epoch": 0.84, "grad_norm": 1.3517699241638184, "learning_rate": 6.457743779718523e-07, "loss": 0.4618, "step": 13034 }, { "epoch": 0.84, "grad_norm": 1.1724780797958374, "learning_rate": 6.452606402274652e-07, "loss": 0.4968, "step": 13035 }, { "epoch": 0.84, "grad_norm": 1.1221119165420532, "learning_rate": 6.44747092817502e-07, "loss": 0.4775, "step": 13036 }, { "epoch": 0.84, "grad_norm": 1.17868173122406, "learning_rate": 6.442337357644052e-07, "loss": 0.5088, "step": 13037 }, { "epoch": 0.84, "grad_norm": 1.1934641599655151, "learning_rate": 6.437205690906139e-07, "loss": 0.4868, "step": 13038 }, { "epoch": 0.84, "grad_norm": 1.1387852430343628, "learning_rate": 6.432075928185571e-07, "loss": 0.489, "step": 13039 }, { "epoch": 0.84, "grad_norm": 1.3334838151931763, "learning_rate": 6.426948069706568e-07, "loss": 0.5199, "step": 13040 }, { "epoch": 0.84, "grad_norm": 1.1798137426376343, "learning_rate": 6.421822115693233e-07, "loss": 0.4999, "step": 13041 }, { "epoch": 0.84, "grad_norm": 1.2143956422805786, "learning_rate": 6.41669806636962e-07, "loss": 0.5704, "step": 13042 }, { "epoch": 0.84, "grad_norm": 1.2293643951416016, "learning_rate": 6.411575921959689e-07, "loss": 0.5326, "step": 13043 }, { "epoch": 0.84, "grad_norm": 1.2476941347122192, "learning_rate": 6.406455682687318e-07, "loss": 0.4941, "step": 13044 }, { "epoch": 0.84, "grad_norm": 1.1189863681793213, "learning_rate": 6.401337348776304e-07, "loss": 0.5372, "step": 13045 }, { "epoch": 0.84, "grad_norm": 1.2583363056182861, "learning_rate": 6.396220920450341e-07, "loss": 0.4928, "step": 13046 }, { "epoch": 0.84, "grad_norm": 5.710033416748047, "learning_rate": 6.391106397933055e-07, "loss": 0.5049, "step": 13047 }, { "epoch": 0.84, "grad_norm": 1.1814653873443604, "learning_rate": 6.385993781448003e-07, "loss": 0.4857, "step": 13048 }, { "epoch": 0.84, "grad_norm": 1.1350535154342651, "learning_rate": 6.380883071218635e-07, "loss": 0.4657, "step": 13049 }, { "epoch": 0.84, "grad_norm": 1.285496473312378, "learning_rate": 6.375774267468332e-07, "loss": 0.5561, "step": 13050 }, { "epoch": 0.84, "grad_norm": 1.2034382820129395, "learning_rate": 6.370667370420391e-07, "loss": 0.477, "step": 13051 }, { "epoch": 0.84, "grad_norm": 1.202940583229065, "learning_rate": 6.365562380298001e-07, "loss": 0.4485, "step": 13052 }, { "epoch": 0.84, "grad_norm": 1.2240880727767944, "learning_rate": 6.360459297324306e-07, "loss": 0.5393, "step": 13053 }, { "epoch": 0.84, "grad_norm": 1.319139838218689, "learning_rate": 6.355358121722338e-07, "loss": 0.4752, "step": 13054 }, { "epoch": 0.84, "grad_norm": 1.228990077972412, "learning_rate": 6.350258853715069e-07, "loss": 0.5595, "step": 13055 }, { "epoch": 0.84, "grad_norm": 1.240501880645752, "learning_rate": 6.345161493525371e-07, "loss": 0.5339, "step": 13056 }, { "epoch": 0.84, "grad_norm": 1.2589834928512573, "learning_rate": 6.340066041376025e-07, "loss": 0.5151, "step": 13057 }, { "epoch": 0.84, "grad_norm": 1.3078839778900146, "learning_rate": 6.334972497489749e-07, "loss": 0.4915, "step": 13058 }, { "epoch": 0.84, "grad_norm": 1.2159615755081177, "learning_rate": 6.329880862089172e-07, "loss": 0.5454, "step": 13059 }, { "epoch": 0.84, "grad_norm": 1.554817795753479, "learning_rate": 6.324791135396824e-07, "loss": 0.4767, "step": 13060 }, { "epoch": 0.84, "grad_norm": 1.268507480621338, "learning_rate": 6.319703317635178e-07, "loss": 0.5181, "step": 13061 }, { "epoch": 0.84, "grad_norm": 1.2354096174240112, "learning_rate": 6.314617409026613e-07, "loss": 0.5275, "step": 13062 }, { "epoch": 0.84, "grad_norm": 1.2061673402786255, "learning_rate": 6.3095334097934e-07, "loss": 0.5295, "step": 13063 }, { "epoch": 0.84, "grad_norm": 1.3081616163253784, "learning_rate": 6.304451320157756e-07, "loss": 0.4856, "step": 13064 }, { "epoch": 0.84, "grad_norm": 1.1620980501174927, "learning_rate": 6.299371140341814e-07, "loss": 0.4759, "step": 13065 }, { "epoch": 0.84, "grad_norm": 1.168167233467102, "learning_rate": 6.294292870567603e-07, "loss": 0.4908, "step": 13066 }, { "epoch": 0.84, "grad_norm": 1.2804391384124756, "learning_rate": 6.289216511057095e-07, "loss": 0.543, "step": 13067 }, { "epoch": 0.84, "grad_norm": 1.149875521659851, "learning_rate": 6.284142062032167e-07, "loss": 0.539, "step": 13068 }, { "epoch": 0.84, "grad_norm": 1.1854865550994873, "learning_rate": 6.279069523714588e-07, "loss": 0.4989, "step": 13069 }, { "epoch": 0.84, "grad_norm": 1.3803298473358154, "learning_rate": 6.273998896326083e-07, "loss": 0.4945, "step": 13070 }, { "epoch": 0.84, "grad_norm": 1.2894723415374756, "learning_rate": 6.268930180088268e-07, "loss": 0.5702, "step": 13071 }, { "epoch": 0.84, "grad_norm": 1.2294564247131348, "learning_rate": 6.26386337522269e-07, "loss": 0.4856, "step": 13072 }, { "epoch": 0.84, "grad_norm": 1.3610048294067383, "learning_rate": 6.258798481950801e-07, "loss": 0.5302, "step": 13073 }, { "epoch": 0.84, "grad_norm": 1.2822469472885132, "learning_rate": 6.253735500493985e-07, "loss": 0.51, "step": 13074 }, { "epoch": 0.84, "grad_norm": 1.293534755706787, "learning_rate": 6.248674431073515e-07, "loss": 0.5328, "step": 13075 }, { "epoch": 0.84, "grad_norm": 1.1859639883041382, "learning_rate": 6.243615273910608e-07, "loss": 0.5061, "step": 13076 }, { "epoch": 0.84, "grad_norm": 1.2172629833221436, "learning_rate": 6.238558029226382e-07, "loss": 0.5113, "step": 13077 }, { "epoch": 0.84, "grad_norm": 1.3819193840026855, "learning_rate": 6.233502697241878e-07, "loss": 0.5448, "step": 13078 }, { "epoch": 0.84, "grad_norm": 1.1678194999694824, "learning_rate": 6.228449278178056e-07, "loss": 0.4817, "step": 13079 }, { "epoch": 0.84, "grad_norm": 1.1838734149932861, "learning_rate": 6.22339777225579e-07, "loss": 0.5371, "step": 13080 }, { "epoch": 0.84, "grad_norm": 1.2038856744766235, "learning_rate": 6.218348179695855e-07, "loss": 0.5315, "step": 13081 }, { "epoch": 0.84, "grad_norm": 1.144445538520813, "learning_rate": 6.213300500718961e-07, "loss": 0.5064, "step": 13082 }, { "epoch": 0.84, "grad_norm": 1.2067956924438477, "learning_rate": 6.208254735545732e-07, "loss": 0.5274, "step": 13083 }, { "epoch": 0.84, "grad_norm": 1.162116289138794, "learning_rate": 6.203210884396699e-07, "loss": 0.5009, "step": 13084 }, { "epoch": 0.84, "grad_norm": 1.240491509437561, "learning_rate": 6.198168947492328e-07, "loss": 0.5192, "step": 13085 }, { "epoch": 0.84, "grad_norm": 1.2196458578109741, "learning_rate": 6.193128925052988e-07, "loss": 0.5369, "step": 13086 }, { "epoch": 0.84, "grad_norm": 1.2614247798919678, "learning_rate": 6.188090817298953e-07, "loss": 0.4899, "step": 13087 }, { "epoch": 0.84, "grad_norm": 1.1844590902328491, "learning_rate": 6.18305462445043e-07, "loss": 0.5015, "step": 13088 }, { "epoch": 0.84, "grad_norm": 1.223618507385254, "learning_rate": 6.178020346727537e-07, "loss": 0.503, "step": 13089 }, { "epoch": 0.84, "grad_norm": 1.2152405977249146, "learning_rate": 6.172987984350321e-07, "loss": 0.4901, "step": 13090 }, { "epoch": 0.85, "grad_norm": 1.194867730140686, "learning_rate": 6.167957537538716e-07, "loss": 0.5101, "step": 13091 }, { "epoch": 0.85, "grad_norm": 1.2239810228347778, "learning_rate": 6.162929006512613e-07, "loss": 0.4912, "step": 13092 }, { "epoch": 0.85, "grad_norm": 1.269821286201477, "learning_rate": 6.157902391491772e-07, "loss": 0.5771, "step": 13093 }, { "epoch": 0.85, "grad_norm": 1.2553584575653076, "learning_rate": 6.152877692695902e-07, "loss": 0.4768, "step": 13094 }, { "epoch": 0.85, "grad_norm": 1.1962120532989502, "learning_rate": 6.147854910344625e-07, "loss": 0.5298, "step": 13095 }, { "epoch": 0.85, "grad_norm": 1.2333245277404785, "learning_rate": 6.142834044657464e-07, "loss": 0.5123, "step": 13096 }, { "epoch": 0.85, "grad_norm": 1.2145401239395142, "learning_rate": 6.137815095853888e-07, "loss": 0.5395, "step": 13097 }, { "epoch": 0.85, "grad_norm": 1.1184746026992798, "learning_rate": 6.132798064153234e-07, "loss": 0.5056, "step": 13098 }, { "epoch": 0.85, "grad_norm": 1.2162959575653076, "learning_rate": 6.127782949774802e-07, "loss": 0.5185, "step": 13099 }, { "epoch": 0.85, "grad_norm": 1.158758282661438, "learning_rate": 6.122769752937785e-07, "loss": 0.4819, "step": 13100 }, { "epoch": 0.85, "grad_norm": 1.1230618953704834, "learning_rate": 6.117758473861296e-07, "loss": 0.4921, "step": 13101 }, { "epoch": 0.85, "grad_norm": 1.201642632484436, "learning_rate": 6.112749112764365e-07, "loss": 0.5043, "step": 13102 }, { "epoch": 0.85, "grad_norm": 1.2773388624191284, "learning_rate": 6.107741669865952e-07, "loss": 0.5362, "step": 13103 }, { "epoch": 0.85, "grad_norm": 1.2444777488708496, "learning_rate": 6.102736145384897e-07, "loss": 0.5476, "step": 13104 }, { "epoch": 0.85, "grad_norm": 1.182955026626587, "learning_rate": 6.097732539539992e-07, "loss": 0.4947, "step": 13105 }, { "epoch": 0.85, "grad_norm": 1.2132976055145264, "learning_rate": 6.092730852549922e-07, "loss": 0.479, "step": 13106 }, { "epoch": 0.85, "grad_norm": 1.1182559728622437, "learning_rate": 6.087731084633303e-07, "loss": 0.4731, "step": 13107 }, { "epoch": 0.85, "grad_norm": 1.2200595140457153, "learning_rate": 6.08273323600867e-07, "loss": 0.489, "step": 13108 }, { "epoch": 0.85, "grad_norm": 1.2022680044174194, "learning_rate": 6.077737306894465e-07, "loss": 0.5039, "step": 13109 }, { "epoch": 0.85, "grad_norm": 1.15499746799469, "learning_rate": 6.072743297509031e-07, "loss": 0.5262, "step": 13110 }, { "epoch": 0.85, "grad_norm": 1.1085649728775024, "learning_rate": 6.067751208070655e-07, "loss": 0.5204, "step": 13111 }, { "epoch": 0.85, "grad_norm": 1.0942113399505615, "learning_rate": 6.062761038797527e-07, "loss": 0.482, "step": 13112 }, { "epoch": 0.85, "grad_norm": 1.1549692153930664, "learning_rate": 6.057772789907756e-07, "loss": 0.5069, "step": 13113 }, { "epoch": 0.85, "grad_norm": 1.1801003217697144, "learning_rate": 6.052786461619359e-07, "loss": 0.5078, "step": 13114 }, { "epoch": 0.85, "grad_norm": 1.1753078699111938, "learning_rate": 6.047802054150298e-07, "loss": 0.4828, "step": 13115 }, { "epoch": 0.85, "grad_norm": 1.1696770191192627, "learning_rate": 6.042819567718395e-07, "loss": 0.4842, "step": 13116 }, { "epoch": 0.85, "grad_norm": 1.1905238628387451, "learning_rate": 6.037839002541441e-07, "loss": 0.4837, "step": 13117 }, { "epoch": 0.85, "grad_norm": 1.1807931661605835, "learning_rate": 6.032860358837117e-07, "loss": 0.5408, "step": 13118 }, { "epoch": 0.85, "grad_norm": 1.2480710744857788, "learning_rate": 6.027883636823035e-07, "loss": 0.5194, "step": 13119 }, { "epoch": 0.85, "grad_norm": 1.2634693384170532, "learning_rate": 6.022908836716712e-07, "loss": 0.5733, "step": 13120 }, { "epoch": 0.85, "grad_norm": 1.3252137899398804, "learning_rate": 6.017935958735576e-07, "loss": 0.527, "step": 13121 }, { "epoch": 0.85, "grad_norm": 1.2108590602874756, "learning_rate": 6.012965003096982e-07, "loss": 0.4743, "step": 13122 }, { "epoch": 0.85, "grad_norm": 1.3313205242156982, "learning_rate": 6.007995970018204e-07, "loss": 0.5013, "step": 13123 }, { "epoch": 0.85, "grad_norm": 1.2799054384231567, "learning_rate": 6.00302885971642e-07, "loss": 0.5172, "step": 13124 }, { "epoch": 0.85, "grad_norm": 1.293613314628601, "learning_rate": 5.998063672408738e-07, "loss": 0.5281, "step": 13125 }, { "epoch": 0.85, "grad_norm": 1.271565318107605, "learning_rate": 5.993100408312158e-07, "loss": 0.5408, "step": 13126 }, { "epoch": 0.85, "grad_norm": 1.2038074731826782, "learning_rate": 5.988139067643617e-07, "loss": 0.5005, "step": 13127 }, { "epoch": 0.85, "grad_norm": 1.1776279211044312, "learning_rate": 5.983179650619969e-07, "loss": 0.544, "step": 13128 }, { "epoch": 0.85, "grad_norm": 1.199334979057312, "learning_rate": 5.978222157457986e-07, "loss": 0.4988, "step": 13129 }, { "epoch": 0.85, "grad_norm": 1.2610175609588623, "learning_rate": 5.973266588374322e-07, "loss": 0.5023, "step": 13130 }, { "epoch": 0.85, "grad_norm": 1.3397300243377686, "learning_rate": 5.96831294358558e-07, "loss": 0.5019, "step": 13131 }, { "epoch": 0.85, "grad_norm": 1.1278746128082275, "learning_rate": 5.963361223308278e-07, "loss": 0.5199, "step": 13132 }, { "epoch": 0.85, "grad_norm": 1.2428393363952637, "learning_rate": 5.958411427758848e-07, "loss": 0.4991, "step": 13133 }, { "epoch": 0.85, "grad_norm": 1.4797202348709106, "learning_rate": 5.953463557153627e-07, "loss": 0.478, "step": 13134 }, { "epoch": 0.85, "grad_norm": 1.2331013679504395, "learning_rate": 5.94851761170887e-07, "loss": 0.56, "step": 13135 }, { "epoch": 0.85, "grad_norm": 1.2092138528823853, "learning_rate": 5.94357359164075e-07, "loss": 0.4954, "step": 13136 }, { "epoch": 0.85, "grad_norm": 1.2228131294250488, "learning_rate": 5.938631497165359e-07, "loss": 0.451, "step": 13137 }, { "epoch": 0.85, "grad_norm": 1.2346349954605103, "learning_rate": 5.933691328498719e-07, "loss": 0.5305, "step": 13138 }, { "epoch": 0.85, "grad_norm": 1.1136091947555542, "learning_rate": 5.928753085856725e-07, "loss": 0.4896, "step": 13139 }, { "epoch": 0.85, "grad_norm": 1.2590855360031128, "learning_rate": 5.923816769455231e-07, "loss": 0.5435, "step": 13140 }, { "epoch": 0.85, "grad_norm": 1.2353864908218384, "learning_rate": 5.918882379509988e-07, "loss": 0.5542, "step": 13141 }, { "epoch": 0.85, "grad_norm": 1.207851529121399, "learning_rate": 5.913949916236661e-07, "loss": 0.5129, "step": 13142 }, { "epoch": 0.85, "grad_norm": 1.2089099884033203, "learning_rate": 5.909019379850845e-07, "loss": 0.5038, "step": 13143 }, { "epoch": 0.85, "grad_norm": 1.2228525876998901, "learning_rate": 5.904090770568043e-07, "loss": 0.4703, "step": 13144 }, { "epoch": 0.85, "grad_norm": 1.2333917617797852, "learning_rate": 5.899164088603654e-07, "loss": 0.5505, "step": 13145 }, { "epoch": 0.85, "grad_norm": 1.147031545639038, "learning_rate": 5.894239334173024e-07, "loss": 0.5036, "step": 13146 }, { "epoch": 0.85, "grad_norm": 1.2046020030975342, "learning_rate": 5.889316507491399e-07, "loss": 0.5252, "step": 13147 }, { "epoch": 0.85, "grad_norm": 1.2841055393218994, "learning_rate": 5.884395608773941e-07, "loss": 0.4977, "step": 13148 }, { "epoch": 0.85, "grad_norm": 1.2071946859359741, "learning_rate": 5.879476638235726e-07, "loss": 0.4968, "step": 13149 }, { "epoch": 0.85, "grad_norm": 1.1875885725021362, "learning_rate": 5.874559596091772e-07, "loss": 0.4938, "step": 13150 }, { "epoch": 0.85, "grad_norm": 1.2103967666625977, "learning_rate": 5.869644482556958e-07, "loss": 0.5014, "step": 13151 }, { "epoch": 0.85, "grad_norm": 1.232179045677185, "learning_rate": 5.864731297846127e-07, "loss": 0.4786, "step": 13152 }, { "epoch": 0.85, "grad_norm": 1.1276544332504272, "learning_rate": 5.859820042174019e-07, "loss": 0.4805, "step": 13153 }, { "epoch": 0.85, "grad_norm": 1.266417384147644, "learning_rate": 5.854910715755296e-07, "loss": 0.505, "step": 13154 }, { "epoch": 0.85, "grad_norm": 1.3946946859359741, "learning_rate": 5.850003318804531e-07, "loss": 0.5289, "step": 13155 }, { "epoch": 0.85, "grad_norm": 1.2338868379592896, "learning_rate": 5.845097851536224e-07, "loss": 0.5263, "step": 13156 }, { "epoch": 0.85, "grad_norm": 1.171064019203186, "learning_rate": 5.840194314164754e-07, "loss": 0.5021, "step": 13157 }, { "epoch": 0.85, "grad_norm": 1.278806447982788, "learning_rate": 5.835292706904461e-07, "loss": 0.4865, "step": 13158 }, { "epoch": 0.85, "grad_norm": 1.1819127798080444, "learning_rate": 5.830393029969572e-07, "loss": 0.4946, "step": 13159 }, { "epoch": 0.85, "grad_norm": 1.1984953880310059, "learning_rate": 5.825495283574245e-07, "loss": 0.4693, "step": 13160 }, { "epoch": 0.85, "grad_norm": 1.2798833847045898, "learning_rate": 5.820599467932553e-07, "loss": 0.5515, "step": 13161 }, { "epoch": 0.85, "grad_norm": 1.195053219795227, "learning_rate": 5.81570558325848e-07, "loss": 0.5465, "step": 13162 }, { "epoch": 0.85, "grad_norm": 1.2188427448272705, "learning_rate": 5.810813629765911e-07, "loss": 0.4868, "step": 13163 }, { "epoch": 0.85, "grad_norm": 1.215834379196167, "learning_rate": 5.805923607668668e-07, "loss": 0.5014, "step": 13164 }, { "epoch": 0.85, "grad_norm": 1.5096157789230347, "learning_rate": 5.801035517180481e-07, "loss": 0.5231, "step": 13165 }, { "epoch": 0.85, "grad_norm": 1.2467966079711914, "learning_rate": 5.796149358514997e-07, "loss": 0.479, "step": 13166 }, { "epoch": 0.85, "grad_norm": 1.1401773691177368, "learning_rate": 5.791265131885776e-07, "loss": 0.4781, "step": 13167 }, { "epoch": 0.85, "grad_norm": 1.2270468473434448, "learning_rate": 5.786382837506305e-07, "loss": 0.5458, "step": 13168 }, { "epoch": 0.85, "grad_norm": 1.1394251585006714, "learning_rate": 5.781502475589962e-07, "loss": 0.4915, "step": 13169 }, { "epoch": 0.85, "grad_norm": 1.2698243856430054, "learning_rate": 5.776624046350054e-07, "loss": 0.5278, "step": 13170 }, { "epoch": 0.85, "grad_norm": 1.2031710147857666, "learning_rate": 5.771747549999818e-07, "loss": 0.519, "step": 13171 }, { "epoch": 0.85, "grad_norm": 1.170863151550293, "learning_rate": 5.76687298675238e-07, "loss": 0.5335, "step": 13172 }, { "epoch": 0.85, "grad_norm": 1.2091277837753296, "learning_rate": 5.762000356820807e-07, "loss": 0.523, "step": 13173 }, { "epoch": 0.85, "grad_norm": 1.2462610006332397, "learning_rate": 5.757129660418071e-07, "loss": 0.502, "step": 13174 }, { "epoch": 0.85, "grad_norm": 1.1334737539291382, "learning_rate": 5.752260897757033e-07, "loss": 0.5055, "step": 13175 }, { "epoch": 0.85, "grad_norm": 1.2417315244674683, "learning_rate": 5.747394069050516e-07, "loss": 0.5287, "step": 13176 }, { "epoch": 0.85, "grad_norm": 1.1827934980392456, "learning_rate": 5.742529174511235e-07, "loss": 0.4866, "step": 13177 }, { "epoch": 0.85, "grad_norm": 1.2319124937057495, "learning_rate": 5.737666214351811e-07, "loss": 0.4795, "step": 13178 }, { "epoch": 0.85, "grad_norm": 1.2269364595413208, "learning_rate": 5.732805188784801e-07, "loss": 0.4757, "step": 13179 }, { "epoch": 0.85, "grad_norm": 1.1409461498260498, "learning_rate": 5.727946098022674e-07, "loss": 0.4825, "step": 13180 }, { "epoch": 0.85, "grad_norm": 1.3693273067474365, "learning_rate": 5.723088942277793e-07, "loss": 0.5232, "step": 13181 }, { "epoch": 0.85, "grad_norm": 1.161453127861023, "learning_rate": 5.718233721762456e-07, "loss": 0.4893, "step": 13182 }, { "epoch": 0.85, "grad_norm": 1.1171973943710327, "learning_rate": 5.713380436688876e-07, "loss": 0.5482, "step": 13183 }, { "epoch": 0.85, "grad_norm": 1.2221980094909668, "learning_rate": 5.708529087269177e-07, "loss": 0.5566, "step": 13184 }, { "epoch": 0.85, "grad_norm": 1.123923897743225, "learning_rate": 5.703679673715407e-07, "loss": 0.5125, "step": 13185 }, { "epoch": 0.85, "grad_norm": 1.2955267429351807, "learning_rate": 5.698832196239501e-07, "loss": 0.5141, "step": 13186 }, { "epoch": 0.85, "grad_norm": 1.1323362588882446, "learning_rate": 5.693986655053341e-07, "loss": 0.5117, "step": 13187 }, { "epoch": 0.85, "grad_norm": 1.1943246126174927, "learning_rate": 5.689143050368712e-07, "loss": 0.5692, "step": 13188 }, { "epoch": 0.85, "grad_norm": 1.255367636680603, "learning_rate": 5.684301382397317e-07, "loss": 0.5307, "step": 13189 }, { "epoch": 0.85, "grad_norm": 1.3158966302871704, "learning_rate": 5.679461651350776e-07, "loss": 0.5102, "step": 13190 }, { "epoch": 0.85, "grad_norm": 1.3012871742248535, "learning_rate": 5.674623857440625e-07, "loss": 0.5349, "step": 13191 }, { "epoch": 0.85, "grad_norm": 1.126032829284668, "learning_rate": 5.669788000878296e-07, "loss": 0.5051, "step": 13192 }, { "epoch": 0.85, "grad_norm": 1.2167729139328003, "learning_rate": 5.664954081875162e-07, "loss": 0.5231, "step": 13193 }, { "epoch": 0.85, "grad_norm": 1.1532251834869385, "learning_rate": 5.660122100642496e-07, "loss": 0.4963, "step": 13194 }, { "epoch": 0.85, "grad_norm": 1.2545018196105957, "learning_rate": 5.655292057391493e-07, "loss": 0.4883, "step": 13195 }, { "epoch": 0.85, "grad_norm": 1.1244906187057495, "learning_rate": 5.650463952333268e-07, "loss": 0.5084, "step": 13196 }, { "epoch": 0.85, "grad_norm": 1.229574203491211, "learning_rate": 5.645637785678848e-07, "loss": 0.451, "step": 13197 }, { "epoch": 0.85, "grad_norm": 1.2161198854446411, "learning_rate": 5.640813557639152e-07, "loss": 0.5137, "step": 13198 }, { "epoch": 0.85, "grad_norm": 1.1822538375854492, "learning_rate": 5.63599126842505e-07, "loss": 0.5114, "step": 13199 }, { "epoch": 0.85, "grad_norm": 1.1969879865646362, "learning_rate": 5.631170918247308e-07, "loss": 0.5048, "step": 13200 }, { "epoch": 0.85, "grad_norm": 1.221123456954956, "learning_rate": 5.626352507316612e-07, "loss": 0.5347, "step": 13201 }, { "epoch": 0.85, "grad_norm": 1.1522928476333618, "learning_rate": 5.621536035843561e-07, "loss": 0.4965, "step": 13202 }, { "epoch": 0.85, "grad_norm": 1.1217886209487915, "learning_rate": 5.616721504038685e-07, "loss": 0.5078, "step": 13203 }, { "epoch": 0.85, "grad_norm": 1.1271520853042603, "learning_rate": 5.61190891211239e-07, "loss": 0.4761, "step": 13204 }, { "epoch": 0.85, "grad_norm": 1.2964444160461426, "learning_rate": 5.607098260275034e-07, "loss": 0.5451, "step": 13205 }, { "epoch": 0.85, "grad_norm": 1.087244987487793, "learning_rate": 5.602289548736877e-07, "loss": 0.4572, "step": 13206 }, { "epoch": 0.85, "grad_norm": 1.2066962718963623, "learning_rate": 5.597482777708096e-07, "loss": 0.5028, "step": 13207 }, { "epoch": 0.85, "grad_norm": 1.2303522825241089, "learning_rate": 5.592677947398784e-07, "loss": 0.5039, "step": 13208 }, { "epoch": 0.85, "grad_norm": 1.1797778606414795, "learning_rate": 5.58787505801896e-07, "loss": 0.4959, "step": 13209 }, { "epoch": 0.85, "grad_norm": 1.2220969200134277, "learning_rate": 5.583074109778519e-07, "loss": 0.5666, "step": 13210 }, { "epoch": 0.85, "grad_norm": 1.2375946044921875, "learning_rate": 5.578275102887309e-07, "loss": 0.5261, "step": 13211 }, { "epoch": 0.85, "grad_norm": 1.2410322427749634, "learning_rate": 5.573478037555085e-07, "loss": 0.4688, "step": 13212 }, { "epoch": 0.85, "grad_norm": 1.2122753858566284, "learning_rate": 5.568682913991519e-07, "loss": 0.5332, "step": 13213 }, { "epoch": 0.85, "grad_norm": 1.277042269706726, "learning_rate": 5.563889732406197e-07, "loss": 0.5204, "step": 13214 }, { "epoch": 0.85, "grad_norm": 1.1695623397827148, "learning_rate": 5.559098493008591e-07, "loss": 0.5544, "step": 13215 }, { "epoch": 0.85, "grad_norm": 1.226710557937622, "learning_rate": 5.55430919600814e-07, "loss": 0.5393, "step": 13216 }, { "epoch": 0.85, "grad_norm": 1.1991502046585083, "learning_rate": 5.549521841614159e-07, "loss": 0.5042, "step": 13217 }, { "epoch": 0.85, "grad_norm": 1.2159087657928467, "learning_rate": 5.544736430035902e-07, "loss": 0.5165, "step": 13218 }, { "epoch": 0.85, "grad_norm": 1.181221842765808, "learning_rate": 5.539952961482514e-07, "loss": 0.5302, "step": 13219 }, { "epoch": 0.85, "grad_norm": 1.219610333442688, "learning_rate": 5.535171436163072e-07, "loss": 0.5045, "step": 13220 }, { "epoch": 0.85, "grad_norm": 1.3601864576339722, "learning_rate": 5.530391854286566e-07, "loss": 0.5061, "step": 13221 }, { "epoch": 0.85, "grad_norm": 1.1921124458312988, "learning_rate": 5.525614216061898e-07, "loss": 0.5099, "step": 13222 }, { "epoch": 0.85, "grad_norm": 1.2933003902435303, "learning_rate": 5.520838521697896e-07, "loss": 0.5343, "step": 13223 }, { "epoch": 0.85, "grad_norm": 1.2194002866744995, "learning_rate": 5.516064771403273e-07, "loss": 0.514, "step": 13224 }, { "epoch": 0.85, "grad_norm": 1.1843867301940918, "learning_rate": 5.511292965386694e-07, "loss": 0.5066, "step": 13225 }, { "epoch": 0.85, "grad_norm": 1.285556674003601, "learning_rate": 5.506523103856715e-07, "loss": 0.5494, "step": 13226 }, { "epoch": 0.85, "grad_norm": 1.2188347578048706, "learning_rate": 5.501755187021829e-07, "loss": 0.5171, "step": 13227 }, { "epoch": 0.85, "grad_norm": 1.2347944974899292, "learning_rate": 5.496989215090403e-07, "loss": 0.5265, "step": 13228 }, { "epoch": 0.85, "grad_norm": 1.1816986799240112, "learning_rate": 5.492225188270756e-07, "loss": 0.4981, "step": 13229 }, { "epoch": 0.85, "grad_norm": 1.2748578786849976, "learning_rate": 5.487463106771118e-07, "loss": 0.4984, "step": 13230 }, { "epoch": 0.85, "grad_norm": 1.2379264831542969, "learning_rate": 5.482702970799625e-07, "loss": 0.5021, "step": 13231 }, { "epoch": 0.85, "grad_norm": 1.1117749214172363, "learning_rate": 5.477944780564343e-07, "loss": 0.5031, "step": 13232 }, { "epoch": 0.85, "grad_norm": 1.1759922504425049, "learning_rate": 5.473188536273211e-07, "loss": 0.529, "step": 13233 }, { "epoch": 0.85, "grad_norm": 1.1806893348693848, "learning_rate": 5.468434238134124e-07, "loss": 0.4975, "step": 13234 }, { "epoch": 0.85, "grad_norm": 1.2604626417160034, "learning_rate": 5.463681886354888e-07, "loss": 0.468, "step": 13235 }, { "epoch": 0.85, "grad_norm": 1.1550657749176025, "learning_rate": 5.458931481143209e-07, "loss": 0.4798, "step": 13236 }, { "epoch": 0.85, "grad_norm": 1.2920805215835571, "learning_rate": 5.454183022706721e-07, "loss": 0.508, "step": 13237 }, { "epoch": 0.85, "grad_norm": 1.1840040683746338, "learning_rate": 5.44943651125297e-07, "loss": 0.5215, "step": 13238 }, { "epoch": 0.85, "grad_norm": 1.4097168445587158, "learning_rate": 5.4446919469894e-07, "loss": 0.5308, "step": 13239 }, { "epoch": 0.85, "grad_norm": 1.1981408596038818, "learning_rate": 5.43994933012339e-07, "loss": 0.494, "step": 13240 }, { "epoch": 0.85, "grad_norm": 1.2035765647888184, "learning_rate": 5.435208660862229e-07, "loss": 0.5176, "step": 13241 }, { "epoch": 0.85, "grad_norm": 1.290372371673584, "learning_rate": 5.430469939413119e-07, "loss": 0.5331, "step": 13242 }, { "epoch": 0.85, "grad_norm": 1.2393913269042969, "learning_rate": 5.425733165983172e-07, "loss": 0.5025, "step": 13243 }, { "epoch": 0.85, "grad_norm": 1.1864155530929565, "learning_rate": 5.420998340779438e-07, "loss": 0.4732, "step": 13244 }, { "epoch": 0.85, "grad_norm": 1.1517250537872314, "learning_rate": 5.41626546400884e-07, "loss": 0.5072, "step": 13245 }, { "epoch": 0.86, "grad_norm": 1.1410125494003296, "learning_rate": 5.411534535878254e-07, "loss": 0.5291, "step": 13246 }, { "epoch": 0.86, "grad_norm": 1.1774044036865234, "learning_rate": 5.406805556594453e-07, "loss": 0.4755, "step": 13247 }, { "epoch": 0.86, "grad_norm": 1.1289689540863037, "learning_rate": 5.402078526364129e-07, "loss": 0.5002, "step": 13248 }, { "epoch": 0.86, "grad_norm": 1.1320494413375854, "learning_rate": 5.397353445393888e-07, "loss": 0.5172, "step": 13249 }, { "epoch": 0.86, "grad_norm": 1.5877349376678467, "learning_rate": 5.392630313890263e-07, "loss": 0.471, "step": 13250 }, { "epoch": 0.86, "grad_norm": 1.1206544637680054, "learning_rate": 5.387909132059665e-07, "loss": 0.528, "step": 13251 }, { "epoch": 0.86, "grad_norm": 1.2402830123901367, "learning_rate": 5.383189900108465e-07, "loss": 0.5308, "step": 13252 }, { "epoch": 0.86, "grad_norm": 1.4287444353103638, "learning_rate": 5.378472618242914e-07, "loss": 0.4858, "step": 13253 }, { "epoch": 0.86, "grad_norm": 1.2102924585342407, "learning_rate": 5.373757286669202e-07, "loss": 0.5251, "step": 13254 }, { "epoch": 0.86, "grad_norm": 1.098102331161499, "learning_rate": 5.369043905593424e-07, "loss": 0.4523, "step": 13255 }, { "epoch": 0.86, "grad_norm": 1.3059172630310059, "learning_rate": 5.364332475221596e-07, "loss": 0.5846, "step": 13256 }, { "epoch": 0.86, "grad_norm": 1.1836563348770142, "learning_rate": 5.359622995759623e-07, "loss": 0.5069, "step": 13257 }, { "epoch": 0.86, "grad_norm": 1.1543470621109009, "learning_rate": 5.354915467413358e-07, "loss": 0.4921, "step": 13258 }, { "epoch": 0.86, "grad_norm": 1.1396950483322144, "learning_rate": 5.350209890388546e-07, "loss": 0.497, "step": 13259 }, { "epoch": 0.86, "grad_norm": 1.1410454511642456, "learning_rate": 5.345506264890865e-07, "loss": 0.4886, "step": 13260 }, { "epoch": 0.86, "grad_norm": 1.090476155281067, "learning_rate": 5.340804591125898e-07, "loss": 0.4618, "step": 13261 }, { "epoch": 0.86, "grad_norm": 1.271066665649414, "learning_rate": 5.336104869299147e-07, "loss": 0.5075, "step": 13262 }, { "epoch": 0.86, "grad_norm": 1.2400130033493042, "learning_rate": 5.331407099616009e-07, "loss": 0.5126, "step": 13263 }, { "epoch": 0.86, "grad_norm": 1.163205623626709, "learning_rate": 5.326711282281821e-07, "loss": 0.5051, "step": 13264 }, { "epoch": 0.86, "grad_norm": 1.1403154134750366, "learning_rate": 5.322017417501824e-07, "loss": 0.5188, "step": 13265 }, { "epoch": 0.86, "grad_norm": 1.2426056861877441, "learning_rate": 5.317325505481169e-07, "loss": 0.5185, "step": 13266 }, { "epoch": 0.86, "grad_norm": 1.2111315727233887, "learning_rate": 5.312635546424949e-07, "loss": 0.5132, "step": 13267 }, { "epoch": 0.86, "grad_norm": 1.1976040601730347, "learning_rate": 5.307947540538127e-07, "loss": 0.5015, "step": 13268 }, { "epoch": 0.86, "grad_norm": 1.264877438545227, "learning_rate": 5.303261488025608e-07, "loss": 0.5366, "step": 13269 }, { "epoch": 0.86, "grad_norm": 1.1159768104553223, "learning_rate": 5.298577389092207e-07, "loss": 0.4846, "step": 13270 }, { "epoch": 0.86, "grad_norm": 1.1704962253570557, "learning_rate": 5.29389524394266e-07, "loss": 0.5073, "step": 13271 }, { "epoch": 0.86, "grad_norm": 1.212638020515442, "learning_rate": 5.28921505278161e-07, "loss": 0.533, "step": 13272 }, { "epoch": 0.86, "grad_norm": 1.306702971458435, "learning_rate": 5.284536815813623e-07, "loss": 0.5253, "step": 13273 }, { "epoch": 0.86, "grad_norm": 1.2528363466262817, "learning_rate": 5.279860533243153e-07, "loss": 0.5779, "step": 13274 }, { "epoch": 0.86, "grad_norm": 1.0471513271331787, "learning_rate": 5.275186205274601e-07, "loss": 0.4484, "step": 13275 }, { "epoch": 0.86, "grad_norm": 1.278399109840393, "learning_rate": 5.270513832112268e-07, "loss": 0.5162, "step": 13276 }, { "epoch": 0.86, "grad_norm": 1.1417995691299438, "learning_rate": 5.265843413960376e-07, "loss": 0.4781, "step": 13277 }, { "epoch": 0.86, "grad_norm": 1.2463418245315552, "learning_rate": 5.261174951023046e-07, "loss": 0.5302, "step": 13278 }, { "epoch": 0.86, "grad_norm": 1.193395733833313, "learning_rate": 5.256508443504344e-07, "loss": 0.5211, "step": 13279 }, { "epoch": 0.86, "grad_norm": 1.1161882877349854, "learning_rate": 5.25184389160821e-07, "loss": 0.4807, "step": 13280 }, { "epoch": 0.86, "grad_norm": 1.230123519897461, "learning_rate": 5.247181295538522e-07, "loss": 0.5577, "step": 13281 }, { "epoch": 0.86, "grad_norm": 1.2372004985809326, "learning_rate": 5.242520655499084e-07, "loss": 0.5253, "step": 13282 }, { "epoch": 0.86, "grad_norm": 1.2019100189208984, "learning_rate": 5.237861971693586e-07, "loss": 0.47, "step": 13283 }, { "epoch": 0.86, "grad_norm": 1.2645423412322998, "learning_rate": 5.233205244325657e-07, "loss": 0.4934, "step": 13284 }, { "epoch": 0.86, "grad_norm": 2.0337674617767334, "learning_rate": 5.228550473598831e-07, "loss": 0.4788, "step": 13285 }, { "epoch": 0.86, "grad_norm": 1.1728187799453735, "learning_rate": 5.223897659716548e-07, "loss": 0.4649, "step": 13286 }, { "epoch": 0.86, "grad_norm": 1.2084795236587524, "learning_rate": 5.219246802882172e-07, "loss": 0.4801, "step": 13287 }, { "epoch": 0.86, "grad_norm": 1.2239649295806885, "learning_rate": 5.214597903298984e-07, "loss": 0.5305, "step": 13288 }, { "epoch": 0.86, "grad_norm": 1.2946341037750244, "learning_rate": 5.209950961170174e-07, "loss": 0.5395, "step": 13289 }, { "epoch": 0.86, "grad_norm": 1.2491732835769653, "learning_rate": 5.205305976698849e-07, "loss": 0.4819, "step": 13290 }, { "epoch": 0.86, "grad_norm": 1.1495546102523804, "learning_rate": 5.200662950088031e-07, "loss": 0.5056, "step": 13291 }, { "epoch": 0.86, "grad_norm": 1.2084981203079224, "learning_rate": 5.196021881540647e-07, "loss": 0.528, "step": 13292 }, { "epoch": 0.86, "grad_norm": 1.0946624279022217, "learning_rate": 5.191382771259551e-07, "loss": 0.4742, "step": 13293 }, { "epoch": 0.86, "grad_norm": 1.168662428855896, "learning_rate": 5.186745619447503e-07, "loss": 0.5244, "step": 13294 }, { "epoch": 0.86, "grad_norm": 1.3492947816848755, "learning_rate": 5.182110426307185e-07, "loss": 0.4708, "step": 13295 }, { "epoch": 0.86, "grad_norm": 1.2163749933242798, "learning_rate": 5.177477192041192e-07, "loss": 0.5126, "step": 13296 }, { "epoch": 0.86, "grad_norm": 1.3427857160568237, "learning_rate": 5.172845916852037e-07, "loss": 0.4959, "step": 13297 }, { "epoch": 0.86, "grad_norm": 1.2734156847000122, "learning_rate": 5.168216600942116e-07, "loss": 0.4598, "step": 13298 }, { "epoch": 0.86, "grad_norm": 1.3011783361434937, "learning_rate": 5.163589244513784e-07, "loss": 0.4879, "step": 13299 }, { "epoch": 0.86, "grad_norm": 1.202790379524231, "learning_rate": 5.158963847769288e-07, "loss": 0.5144, "step": 13300 }, { "epoch": 0.86, "grad_norm": 1.3618371486663818, "learning_rate": 5.154340410910791e-07, "loss": 0.5073, "step": 13301 }, { "epoch": 0.86, "grad_norm": 1.2944046258926392, "learning_rate": 5.149718934140368e-07, "loss": 0.5217, "step": 13302 }, { "epoch": 0.86, "grad_norm": 1.2796595096588135, "learning_rate": 5.145099417660027e-07, "loss": 0.5009, "step": 13303 }, { "epoch": 0.86, "grad_norm": 1.1989119052886963, "learning_rate": 5.140481861671648e-07, "loss": 0.502, "step": 13304 }, { "epoch": 0.86, "grad_norm": 1.1872950792312622, "learning_rate": 5.135866266377076e-07, "loss": 0.4883, "step": 13305 }, { "epoch": 0.86, "grad_norm": 1.166040062904358, "learning_rate": 5.131252631978034e-07, "loss": 0.5601, "step": 13306 }, { "epoch": 0.86, "grad_norm": 1.2415642738342285, "learning_rate": 5.126640958676188e-07, "loss": 0.4951, "step": 13307 }, { "epoch": 0.86, "grad_norm": 1.2328853607177734, "learning_rate": 5.122031246673076e-07, "loss": 0.5487, "step": 13308 }, { "epoch": 0.86, "grad_norm": 1.138791561126709, "learning_rate": 5.117423496170199e-07, "loss": 0.5226, "step": 13309 }, { "epoch": 0.86, "grad_norm": 1.2601158618927002, "learning_rate": 5.112817707368939e-07, "loss": 0.4976, "step": 13310 }, { "epoch": 0.86, "grad_norm": 1.1197190284729004, "learning_rate": 5.108213880470603e-07, "loss": 0.491, "step": 13311 }, { "epoch": 0.86, "grad_norm": 1.2060294151306152, "learning_rate": 5.103612015676429e-07, "loss": 0.5051, "step": 13312 }, { "epoch": 0.86, "grad_norm": 1.190298080444336, "learning_rate": 5.099012113187535e-07, "loss": 0.5054, "step": 13313 }, { "epoch": 0.86, "grad_norm": 1.253859043121338, "learning_rate": 5.09441417320497e-07, "loss": 0.5337, "step": 13314 }, { "epoch": 0.86, "grad_norm": 1.1437885761260986, "learning_rate": 5.089818195929702e-07, "loss": 0.5507, "step": 13315 }, { "epoch": 0.86, "grad_norm": 1.3554203510284424, "learning_rate": 5.08522418156262e-07, "loss": 0.5142, "step": 13316 }, { "epoch": 0.86, "grad_norm": 1.2011191844940186, "learning_rate": 5.080632130304502e-07, "loss": 0.5452, "step": 13317 }, { "epoch": 0.86, "grad_norm": 1.2468775510787964, "learning_rate": 5.07604204235606e-07, "loss": 0.544, "step": 13318 }, { "epoch": 0.86, "grad_norm": 1.2108166217803955, "learning_rate": 5.071453917917913e-07, "loss": 0.5269, "step": 13319 }, { "epoch": 0.86, "grad_norm": 1.1486389636993408, "learning_rate": 5.066867757190597e-07, "loss": 0.4603, "step": 13320 }, { "epoch": 0.86, "grad_norm": 1.184329867362976, "learning_rate": 5.062283560374576e-07, "loss": 0.5631, "step": 13321 }, { "epoch": 0.86, "grad_norm": 1.306196928024292, "learning_rate": 5.057701327670184e-07, "loss": 0.4894, "step": 13322 }, { "epoch": 0.86, "grad_norm": 1.2552748918533325, "learning_rate": 5.053121059277722e-07, "loss": 0.5293, "step": 13323 }, { "epoch": 0.86, "grad_norm": 1.2634761333465576, "learning_rate": 5.048542755397368e-07, "loss": 0.5213, "step": 13324 }, { "epoch": 0.86, "grad_norm": 1.1632399559020996, "learning_rate": 5.043966416229235e-07, "loss": 0.4886, "step": 13325 }, { "epoch": 0.86, "grad_norm": 1.349124550819397, "learning_rate": 5.039392041973351e-07, "loss": 0.5066, "step": 13326 }, { "epoch": 0.86, "grad_norm": 1.3073465824127197, "learning_rate": 5.034819632829635e-07, "loss": 0.543, "step": 13327 }, { "epoch": 0.86, "grad_norm": 1.185252070426941, "learning_rate": 5.030249188997937e-07, "loss": 0.5058, "step": 13328 }, { "epoch": 0.86, "grad_norm": 1.2941852807998657, "learning_rate": 5.025680710678021e-07, "loss": 0.5312, "step": 13329 }, { "epoch": 0.86, "grad_norm": 1.2305039167404175, "learning_rate": 5.021114198069571e-07, "loss": 0.5202, "step": 13330 }, { "epoch": 0.86, "grad_norm": 1.2752538919448853, "learning_rate": 5.016549651372171e-07, "loss": 0.5351, "step": 13331 }, { "epoch": 0.86, "grad_norm": 1.187264323234558, "learning_rate": 5.011987070785341e-07, "loss": 0.5346, "step": 13332 }, { "epoch": 0.86, "grad_norm": 1.4053514003753662, "learning_rate": 5.007426456508468e-07, "loss": 0.5073, "step": 13333 }, { "epoch": 0.86, "grad_norm": 1.198469638824463, "learning_rate": 5.002867808740908e-07, "loss": 0.4832, "step": 13334 }, { "epoch": 0.86, "grad_norm": 1.2750353813171387, "learning_rate": 4.998311127681898e-07, "loss": 0.5302, "step": 13335 }, { "epoch": 0.86, "grad_norm": 1.2060580253601074, "learning_rate": 4.993756413530604e-07, "loss": 0.5038, "step": 13336 }, { "epoch": 0.86, "grad_norm": 1.350403070449829, "learning_rate": 4.989203666486097e-07, "loss": 0.5451, "step": 13337 }, { "epoch": 0.86, "grad_norm": 1.3169586658477783, "learning_rate": 4.984652886747382e-07, "loss": 0.5925, "step": 13338 }, { "epoch": 0.86, "grad_norm": 1.2581359148025513, "learning_rate": 4.980104074513337e-07, "loss": 0.4812, "step": 13339 }, { "epoch": 0.86, "grad_norm": 1.1482652425765991, "learning_rate": 4.975557229982792e-07, "loss": 0.477, "step": 13340 }, { "epoch": 0.86, "grad_norm": 1.230611801147461, "learning_rate": 4.971012353354476e-07, "loss": 0.485, "step": 13341 }, { "epoch": 0.86, "grad_norm": 1.2577143907546997, "learning_rate": 4.966469444827032e-07, "loss": 0.5895, "step": 13342 }, { "epoch": 0.86, "grad_norm": 1.2826645374298096, "learning_rate": 4.961928504599017e-07, "loss": 0.548, "step": 13343 }, { "epoch": 0.86, "grad_norm": 1.3066002130508423, "learning_rate": 4.957389532868922e-07, "loss": 0.5358, "step": 13344 }, { "epoch": 0.86, "grad_norm": 1.2762292623519897, "learning_rate": 4.952852529835106e-07, "loss": 0.5411, "step": 13345 }, { "epoch": 0.86, "grad_norm": 1.1824679374694824, "learning_rate": 4.948317495695887e-07, "loss": 0.557, "step": 13346 }, { "epoch": 0.86, "grad_norm": 1.2631911039352417, "learning_rate": 4.943784430649473e-07, "loss": 0.5005, "step": 13347 }, { "epoch": 0.86, "grad_norm": 1.1482455730438232, "learning_rate": 4.939253334893995e-07, "loss": 0.4896, "step": 13348 }, { "epoch": 0.86, "grad_norm": 1.324118971824646, "learning_rate": 4.934724208627495e-07, "loss": 0.54, "step": 13349 }, { "epoch": 0.86, "grad_norm": 1.235621690750122, "learning_rate": 4.930197052047941e-07, "loss": 0.5124, "step": 13350 }, { "epoch": 0.86, "grad_norm": 1.2573423385620117, "learning_rate": 4.925671865353182e-07, "loss": 0.4827, "step": 13351 }, { "epoch": 0.86, "grad_norm": 1.1983164548873901, "learning_rate": 4.921148648741009e-07, "loss": 0.4942, "step": 13352 }, { "epoch": 0.86, "grad_norm": 1.2540456056594849, "learning_rate": 4.916627402409124e-07, "loss": 0.5389, "step": 13353 }, { "epoch": 0.86, "grad_norm": 1.3170768022537231, "learning_rate": 4.912108126555142e-07, "loss": 0.4807, "step": 13354 }, { "epoch": 0.86, "grad_norm": 1.3482344150543213, "learning_rate": 4.907590821376595e-07, "loss": 0.5423, "step": 13355 }, { "epoch": 0.86, "grad_norm": 1.240145206451416, "learning_rate": 4.903075487070901e-07, "loss": 0.5182, "step": 13356 }, { "epoch": 0.86, "grad_norm": 1.315583348274231, "learning_rate": 4.898562123835432e-07, "loss": 0.5143, "step": 13357 }, { "epoch": 0.86, "grad_norm": 1.162719964981079, "learning_rate": 4.894050731867445e-07, "loss": 0.4996, "step": 13358 }, { "epoch": 0.86, "grad_norm": 1.1840060949325562, "learning_rate": 4.889541311364121e-07, "loss": 0.504, "step": 13359 }, { "epoch": 0.86, "grad_norm": 1.1917941570281982, "learning_rate": 4.885033862522564e-07, "loss": 0.5376, "step": 13360 }, { "epoch": 0.86, "grad_norm": 1.2126739025115967, "learning_rate": 4.88052838553979e-07, "loss": 0.5251, "step": 13361 }, { "epoch": 0.86, "grad_norm": 1.1742944717407227, "learning_rate": 4.8760248806127e-07, "loss": 0.5206, "step": 13362 }, { "epoch": 0.86, "grad_norm": 1.1575783491134644, "learning_rate": 4.871523347938139e-07, "loss": 0.4916, "step": 13363 }, { "epoch": 0.86, "grad_norm": 1.1842340230941772, "learning_rate": 4.867023787712861e-07, "loss": 0.4828, "step": 13364 }, { "epoch": 0.86, "grad_norm": 1.53632652759552, "learning_rate": 4.862526200133527e-07, "loss": 0.5031, "step": 13365 }, { "epoch": 0.86, "grad_norm": 1.2053314447402954, "learning_rate": 4.858030585396723e-07, "loss": 0.5229, "step": 13366 }, { "epoch": 0.86, "grad_norm": 1.382150411605835, "learning_rate": 4.853536943698939e-07, "loss": 0.509, "step": 13367 }, { "epoch": 0.86, "grad_norm": 1.0836915969848633, "learning_rate": 4.849045275236563e-07, "loss": 0.4709, "step": 13368 }, { "epoch": 0.86, "grad_norm": 1.1813620328903198, "learning_rate": 4.844555580205929e-07, "loss": 0.4835, "step": 13369 }, { "epoch": 0.86, "grad_norm": 1.2138092517852783, "learning_rate": 4.840067858803272e-07, "loss": 0.5137, "step": 13370 }, { "epoch": 0.86, "grad_norm": 1.1345739364624023, "learning_rate": 4.83558211122473e-07, "loss": 0.5209, "step": 13371 }, { "epoch": 0.86, "grad_norm": 1.183821201324463, "learning_rate": 4.831098337666368e-07, "loss": 0.5126, "step": 13372 }, { "epoch": 0.86, "grad_norm": 1.2803621292114258, "learning_rate": 4.826616538324175e-07, "loss": 0.533, "step": 13373 }, { "epoch": 0.86, "grad_norm": 1.301901936531067, "learning_rate": 4.82213671339401e-07, "loss": 0.4741, "step": 13374 }, { "epoch": 0.86, "grad_norm": 1.1967430114746094, "learning_rate": 4.817658863071689e-07, "loss": 0.4663, "step": 13375 }, { "epoch": 0.86, "grad_norm": 1.1579670906066895, "learning_rate": 4.813182987552928e-07, "loss": 0.5182, "step": 13376 }, { "epoch": 0.86, "grad_norm": 1.174672245979309, "learning_rate": 4.808709087033359e-07, "loss": 0.4745, "step": 13377 }, { "epoch": 0.86, "grad_norm": 1.1428717374801636, "learning_rate": 4.804237161708514e-07, "loss": 0.5228, "step": 13378 }, { "epoch": 0.86, "grad_norm": 1.1846776008605957, "learning_rate": 4.79976721177387e-07, "loss": 0.4944, "step": 13379 }, { "epoch": 0.86, "grad_norm": 1.2434146404266357, "learning_rate": 4.795299237424772e-07, "loss": 0.5085, "step": 13380 }, { "epoch": 0.86, "grad_norm": 1.237838864326477, "learning_rate": 4.790833238856518e-07, "loss": 0.5059, "step": 13381 }, { "epoch": 0.86, "grad_norm": 1.1776210069656372, "learning_rate": 4.786369216264297e-07, "loss": 0.4658, "step": 13382 }, { "epoch": 0.86, "grad_norm": 1.1891779899597168, "learning_rate": 4.781907169843225e-07, "loss": 0.5128, "step": 13383 }, { "epoch": 0.86, "grad_norm": 1.158804178237915, "learning_rate": 4.777447099788329e-07, "loss": 0.4837, "step": 13384 }, { "epoch": 0.86, "grad_norm": 1.1690691709518433, "learning_rate": 4.772989006294554e-07, "loss": 0.5013, "step": 13385 }, { "epoch": 0.86, "grad_norm": 1.2232286930084229, "learning_rate": 4.768532889556732e-07, "loss": 0.4909, "step": 13386 }, { "epoch": 0.86, "grad_norm": 1.155930995941162, "learning_rate": 4.764078749769641e-07, "loss": 0.5003, "step": 13387 }, { "epoch": 0.86, "grad_norm": 1.1792484521865845, "learning_rate": 4.759626587127952e-07, "loss": 0.5007, "step": 13388 }, { "epoch": 0.86, "grad_norm": 1.2219256162643433, "learning_rate": 4.755176401826267e-07, "loss": 0.4993, "step": 13389 }, { "epoch": 0.86, "grad_norm": 1.1840739250183105, "learning_rate": 4.75072819405909e-07, "loss": 0.4877, "step": 13390 }, { "epoch": 0.86, "grad_norm": 1.215395450592041, "learning_rate": 4.7462819640208435e-07, "loss": 0.5009, "step": 13391 }, { "epoch": 0.86, "grad_norm": 1.1976549625396729, "learning_rate": 4.74183771190585e-07, "loss": 0.5051, "step": 13392 }, { "epoch": 0.86, "grad_norm": 1.2056918144226074, "learning_rate": 4.7373954379083595e-07, "loss": 0.471, "step": 13393 }, { "epoch": 0.86, "grad_norm": 1.0961716175079346, "learning_rate": 4.7329551422225394e-07, "loss": 0.5151, "step": 13394 }, { "epoch": 0.86, "grad_norm": 1.187278151512146, "learning_rate": 4.728516825042456e-07, "loss": 0.5075, "step": 13395 }, { "epoch": 0.86, "grad_norm": 1.2519586086273193, "learning_rate": 4.7240804865621103e-07, "loss": 0.5233, "step": 13396 }, { "epoch": 0.86, "grad_norm": 1.3009272813796997, "learning_rate": 4.7196461269753857e-07, "loss": 0.546, "step": 13397 }, { "epoch": 0.86, "grad_norm": 1.1618504524230957, "learning_rate": 4.715213746476105e-07, "loss": 0.4926, "step": 13398 }, { "epoch": 0.86, "grad_norm": 1.1633979082107544, "learning_rate": 4.710783345257991e-07, "loss": 0.5271, "step": 13399 }, { "epoch": 0.86, "grad_norm": 1.168500542640686, "learning_rate": 4.7063549235146943e-07, "loss": 0.4777, "step": 13400 }, { "epoch": 0.87, "grad_norm": 1.2193502187728882, "learning_rate": 4.7019284814397714e-07, "loss": 0.5125, "step": 13401 }, { "epoch": 0.87, "grad_norm": 1.2432347536087036, "learning_rate": 4.697504019226673e-07, "loss": 0.5061, "step": 13402 }, { "epoch": 0.87, "grad_norm": 1.7896010875701904, "learning_rate": 4.693081537068794e-07, "loss": 0.5267, "step": 13403 }, { "epoch": 0.87, "grad_norm": 1.2667052745819092, "learning_rate": 4.688661035159425e-07, "loss": 0.4916, "step": 13404 }, { "epoch": 0.87, "grad_norm": 1.3076467514038086, "learning_rate": 4.684242513691789e-07, "loss": 0.5247, "step": 13405 }, { "epoch": 0.87, "grad_norm": 1.2217010259628296, "learning_rate": 4.679825972858987e-07, "loss": 0.5087, "step": 13406 }, { "epoch": 0.87, "grad_norm": 1.196777582168579, "learning_rate": 4.675411412854064e-07, "loss": 0.5388, "step": 13407 }, { "epoch": 0.87, "grad_norm": 1.3241428136825562, "learning_rate": 4.6709988338699717e-07, "loss": 0.4495, "step": 13408 }, { "epoch": 0.87, "grad_norm": 1.1000574827194214, "learning_rate": 4.6665882360995673e-07, "loss": 0.515, "step": 13409 }, { "epoch": 0.87, "grad_norm": 1.675945520401001, "learning_rate": 4.662179619735635e-07, "loss": 0.5267, "step": 13410 }, { "epoch": 0.87, "grad_norm": 1.204331636428833, "learning_rate": 4.6577729849708544e-07, "loss": 0.5038, "step": 13411 }, { "epoch": 0.87, "grad_norm": 1.120640516281128, "learning_rate": 4.6533683319978316e-07, "loss": 0.51, "step": 13412 }, { "epoch": 0.87, "grad_norm": 1.1754189729690552, "learning_rate": 4.6489656610090807e-07, "loss": 0.488, "step": 13413 }, { "epoch": 0.87, "grad_norm": 1.1666502952575684, "learning_rate": 4.6445649721970464e-07, "loss": 0.5324, "step": 13414 }, { "epoch": 0.87, "grad_norm": 1.2607154846191406, "learning_rate": 4.6401662657540424e-07, "loss": 0.529, "step": 13415 }, { "epoch": 0.87, "grad_norm": 1.2463384866714478, "learning_rate": 4.635769541872348e-07, "loss": 0.4882, "step": 13416 }, { "epoch": 0.87, "grad_norm": 1.2494080066680908, "learning_rate": 4.631374800744121e-07, "loss": 0.5257, "step": 13417 }, { "epoch": 0.87, "grad_norm": 1.1956721544265747, "learning_rate": 4.6269820425614507e-07, "loss": 0.5174, "step": 13418 }, { "epoch": 0.87, "grad_norm": 1.1576547622680664, "learning_rate": 4.6225912675163355e-07, "loss": 0.4962, "step": 13419 }, { "epoch": 0.87, "grad_norm": 1.136208415031433, "learning_rate": 4.6182024758006874e-07, "loss": 0.472, "step": 13420 }, { "epoch": 0.87, "grad_norm": 1.114338755607605, "learning_rate": 4.6138156676063086e-07, "loss": 0.4928, "step": 13421 }, { "epoch": 0.87, "grad_norm": 1.2411102056503296, "learning_rate": 4.6094308431249567e-07, "loss": 0.4741, "step": 13422 }, { "epoch": 0.87, "grad_norm": 1.1633590459823608, "learning_rate": 4.605048002548268e-07, "loss": 0.4948, "step": 13423 }, { "epoch": 0.87, "grad_norm": 1.120734453201294, "learning_rate": 4.600667146067811e-07, "loss": 0.52, "step": 13424 }, { "epoch": 0.87, "grad_norm": 1.2378106117248535, "learning_rate": 4.59628827387506e-07, "loss": 0.5251, "step": 13425 }, { "epoch": 0.87, "grad_norm": 1.1966814994812012, "learning_rate": 4.5919113861614185e-07, "loss": 0.4859, "step": 13426 }, { "epoch": 0.87, "grad_norm": 1.1665689945220947, "learning_rate": 4.5875364831181654e-07, "loss": 0.5283, "step": 13427 }, { "epoch": 0.87, "grad_norm": 1.290574073791504, "learning_rate": 4.583163564936527e-07, "loss": 0.5211, "step": 13428 }, { "epoch": 0.87, "grad_norm": 1.2220135927200317, "learning_rate": 4.5787926318076323e-07, "loss": 0.5512, "step": 13429 }, { "epoch": 0.87, "grad_norm": 1.1077377796173096, "learning_rate": 4.5744236839225186e-07, "loss": 0.4708, "step": 13430 }, { "epoch": 0.87, "grad_norm": 1.2820987701416016, "learning_rate": 4.5700567214721545e-07, "loss": 0.5465, "step": 13431 }, { "epoch": 0.87, "grad_norm": 1.1969709396362305, "learning_rate": 4.565691744647405e-07, "loss": 0.5061, "step": 13432 }, { "epoch": 0.87, "grad_norm": 1.1159826517105103, "learning_rate": 4.561328753639038e-07, "loss": 0.4641, "step": 13433 }, { "epoch": 0.87, "grad_norm": 1.2070120573043823, "learning_rate": 4.5569677486377586e-07, "loss": 0.5175, "step": 13434 }, { "epoch": 0.87, "grad_norm": 1.127698302268982, "learning_rate": 4.552608729834174e-07, "loss": 0.484, "step": 13435 }, { "epoch": 0.87, "grad_norm": 1.1858131885528564, "learning_rate": 4.5482516974188043e-07, "loss": 0.4837, "step": 13436 }, { "epoch": 0.87, "grad_norm": 1.3953373432159424, "learning_rate": 4.543896651582086e-07, "loss": 0.5486, "step": 13437 }, { "epoch": 0.87, "grad_norm": 1.244317889213562, "learning_rate": 4.539543592514378e-07, "loss": 0.5403, "step": 13438 }, { "epoch": 0.87, "grad_norm": 1.213149905204773, "learning_rate": 4.5351925204059176e-07, "loss": 0.5238, "step": 13439 }, { "epoch": 0.87, "grad_norm": 1.1953024864196777, "learning_rate": 4.530843435446897e-07, "loss": 0.523, "step": 13440 }, { "epoch": 0.87, "grad_norm": 1.125243067741394, "learning_rate": 4.526496337827391e-07, "loss": 0.4961, "step": 13441 }, { "epoch": 0.87, "grad_norm": 1.2070833444595337, "learning_rate": 4.5221512277374146e-07, "loss": 0.4832, "step": 13442 }, { "epoch": 0.87, "grad_norm": 1.0943143367767334, "learning_rate": 4.5178081053668776e-07, "loss": 0.4854, "step": 13443 }, { "epoch": 0.87, "grad_norm": 1.2649695873260498, "learning_rate": 4.5134669709055943e-07, "loss": 0.488, "step": 13444 }, { "epoch": 0.87, "grad_norm": 1.1822665929794312, "learning_rate": 4.5091278245433136e-07, "loss": 0.513, "step": 13445 }, { "epoch": 0.87, "grad_norm": 1.3908601999282837, "learning_rate": 4.5047906664696884e-07, "loss": 0.5033, "step": 13446 }, { "epoch": 0.87, "grad_norm": 1.2838398218154907, "learning_rate": 4.5004554968742784e-07, "loss": 0.5297, "step": 13447 }, { "epoch": 0.87, "grad_norm": 1.1893032789230347, "learning_rate": 4.4961223159465774e-07, "loss": 0.5039, "step": 13448 }, { "epoch": 0.87, "grad_norm": 1.2488712072372437, "learning_rate": 4.4917911238759715e-07, "loss": 0.5067, "step": 13449 }, { "epoch": 0.87, "grad_norm": 1.1509088277816772, "learning_rate": 4.4874619208517546e-07, "loss": 0.4898, "step": 13450 }, { "epoch": 0.87, "grad_norm": 1.4566785097122192, "learning_rate": 4.4831347070631527e-07, "loss": 0.4884, "step": 13451 }, { "epoch": 0.87, "grad_norm": 1.2132573127746582, "learning_rate": 4.4788094826992977e-07, "loss": 0.4851, "step": 13452 }, { "epoch": 0.87, "grad_norm": 1.2738453149795532, "learning_rate": 4.474486247949239e-07, "loss": 0.4973, "step": 13453 }, { "epoch": 0.87, "grad_norm": 1.2304260730743408, "learning_rate": 4.470165003001925e-07, "loss": 0.5049, "step": 13454 }, { "epoch": 0.87, "grad_norm": 1.2704237699508667, "learning_rate": 4.465845748046238e-07, "loss": 0.5318, "step": 13455 }, { "epoch": 0.87, "grad_norm": 1.3167058229446411, "learning_rate": 4.461528483270944e-07, "loss": 0.5149, "step": 13456 }, { "epoch": 0.87, "grad_norm": 1.3422083854675293, "learning_rate": 4.457213208864752e-07, "loss": 0.548, "step": 13457 }, { "epoch": 0.87, "grad_norm": 1.2222881317138672, "learning_rate": 4.4528999250162684e-07, "loss": 0.5332, "step": 13458 }, { "epoch": 0.87, "grad_norm": 1.3572665452957153, "learning_rate": 4.4485886319140137e-07, "loss": 0.5304, "step": 13459 }, { "epoch": 0.87, "grad_norm": 1.1793889999389648, "learning_rate": 4.4442793297464257e-07, "loss": 0.5119, "step": 13460 }, { "epoch": 0.87, "grad_norm": 1.2595645189285278, "learning_rate": 4.4399720187018656e-07, "loss": 0.5264, "step": 13461 }, { "epoch": 0.87, "grad_norm": 1.2730205059051514, "learning_rate": 4.4356666989685715e-07, "loss": 0.4614, "step": 13462 }, { "epoch": 0.87, "grad_norm": 1.2834277153015137, "learning_rate": 4.431363370734726e-07, "loss": 0.5089, "step": 13463 }, { "epoch": 0.87, "grad_norm": 1.3063732385635376, "learning_rate": 4.427062034188423e-07, "loss": 0.5103, "step": 13464 }, { "epoch": 0.87, "grad_norm": 1.3248517513275146, "learning_rate": 4.422762689517657e-07, "loss": 0.5693, "step": 13465 }, { "epoch": 0.87, "grad_norm": 1.2409156560897827, "learning_rate": 4.418465336910344e-07, "loss": 0.5176, "step": 13466 }, { "epoch": 0.87, "grad_norm": 1.181524634361267, "learning_rate": 4.4141699765543166e-07, "loss": 0.4675, "step": 13467 }, { "epoch": 0.87, "grad_norm": 1.1571044921875, "learning_rate": 4.409876608637298e-07, "loss": 0.4702, "step": 13468 }, { "epoch": 0.87, "grad_norm": 1.1768437623977661, "learning_rate": 4.4055852333469486e-07, "loss": 0.4754, "step": 13469 }, { "epoch": 0.87, "grad_norm": 1.2623082399368286, "learning_rate": 4.401295850870829e-07, "loss": 0.5009, "step": 13470 }, { "epoch": 0.87, "grad_norm": 1.2498823404312134, "learning_rate": 4.397008461396429e-07, "loss": 0.4807, "step": 13471 }, { "epoch": 0.87, "grad_norm": 1.1002092361450195, "learning_rate": 4.3927230651111264e-07, "loss": 0.4599, "step": 13472 }, { "epoch": 0.87, "grad_norm": 1.3776323795318604, "learning_rate": 4.388439662202243e-07, "loss": 0.5231, "step": 13473 }, { "epoch": 0.87, "grad_norm": 1.3039613962173462, "learning_rate": 4.3841582528569736e-07, "loss": 0.539, "step": 13474 }, { "epoch": 0.87, "grad_norm": 1.2849946022033691, "learning_rate": 4.379878837262452e-07, "loss": 0.5039, "step": 13475 }, { "epoch": 0.87, "grad_norm": 1.2173148393630981, "learning_rate": 4.375601415605729e-07, "loss": 0.4703, "step": 13476 }, { "epoch": 0.87, "grad_norm": 1.186356544494629, "learning_rate": 4.3713259880737545e-07, "loss": 0.4846, "step": 13477 }, { "epoch": 0.87, "grad_norm": 1.1659501791000366, "learning_rate": 4.367052554853396e-07, "loss": 0.5111, "step": 13478 }, { "epoch": 0.87, "grad_norm": 1.1927329301834106, "learning_rate": 4.362781116131448e-07, "loss": 0.5324, "step": 13479 }, { "epoch": 0.87, "grad_norm": 1.2187739610671997, "learning_rate": 4.358511672094584e-07, "loss": 0.5573, "step": 13480 }, { "epoch": 0.87, "grad_norm": 1.1135163307189941, "learning_rate": 4.354244222929416e-07, "loss": 0.4967, "step": 13481 }, { "epoch": 0.87, "grad_norm": 1.174655556678772, "learning_rate": 4.3499787688224605e-07, "loss": 0.5133, "step": 13482 }, { "epoch": 0.87, "grad_norm": 1.8939013481140137, "learning_rate": 4.3457153099601577e-07, "loss": 0.4869, "step": 13483 }, { "epoch": 0.87, "grad_norm": 1.3881151676177979, "learning_rate": 4.341453846528859e-07, "loss": 0.5501, "step": 13484 }, { "epoch": 0.87, "grad_norm": 1.1702102422714233, "learning_rate": 4.337194378714804e-07, "loss": 0.4691, "step": 13485 }, { "epoch": 0.87, "grad_norm": 1.1953421831130981, "learning_rate": 4.3329369067041713e-07, "loss": 0.5146, "step": 13486 }, { "epoch": 0.87, "grad_norm": 1.2467608451843262, "learning_rate": 4.328681430683046e-07, "loss": 0.4817, "step": 13487 }, { "epoch": 0.87, "grad_norm": 1.2724534273147583, "learning_rate": 4.3244279508374175e-07, "loss": 0.5079, "step": 13488 }, { "epoch": 0.87, "grad_norm": 1.2824448347091675, "learning_rate": 4.3201764673531985e-07, "loss": 0.4866, "step": 13489 }, { "epoch": 0.87, "grad_norm": 1.1716923713684082, "learning_rate": 4.3159269804162183e-07, "loss": 0.4802, "step": 13490 }, { "epoch": 0.87, "grad_norm": 1.2534513473510742, "learning_rate": 4.3116794902122007e-07, "loss": 0.5372, "step": 13491 }, { "epoch": 0.87, "grad_norm": 1.2741665840148926, "learning_rate": 4.307433996926791e-07, "loss": 0.4846, "step": 13492 }, { "epoch": 0.87, "grad_norm": 1.212680697441101, "learning_rate": 4.303190500745552e-07, "loss": 0.5007, "step": 13493 }, { "epoch": 0.87, "grad_norm": 1.234639048576355, "learning_rate": 4.298949001853969e-07, "loss": 0.4825, "step": 13494 }, { "epoch": 0.87, "grad_norm": 1.2390820980072021, "learning_rate": 4.2947095004373993e-07, "loss": 0.5256, "step": 13495 }, { "epoch": 0.87, "grad_norm": 1.0775694847106934, "learning_rate": 4.2904719966811613e-07, "loss": 0.4678, "step": 13496 }, { "epoch": 0.87, "grad_norm": 1.1363780498504639, "learning_rate": 4.286236490770462e-07, "loss": 0.4623, "step": 13497 }, { "epoch": 0.87, "grad_norm": 1.2478667497634888, "learning_rate": 4.2820029828904206e-07, "loss": 0.5326, "step": 13498 }, { "epoch": 0.87, "grad_norm": 1.3051965236663818, "learning_rate": 4.2777714732260834e-07, "loss": 0.5622, "step": 13499 }, { "epoch": 0.87, "grad_norm": 1.14494788646698, "learning_rate": 4.2735419619623795e-07, "loss": 0.4926, "step": 13500 }, { "epoch": 0.87, "grad_norm": 1.172008991241455, "learning_rate": 4.2693144492841844e-07, "loss": 0.5145, "step": 13501 }, { "epoch": 0.87, "grad_norm": 1.5702052116394043, "learning_rate": 4.265088935376266e-07, "loss": 0.5029, "step": 13502 }, { "epoch": 0.87, "grad_norm": 1.2203775644302368, "learning_rate": 4.2608654204233214e-07, "loss": 0.5401, "step": 13503 }, { "epoch": 0.87, "grad_norm": 1.200825810432434, "learning_rate": 4.256643904609931e-07, "loss": 0.5585, "step": 13504 }, { "epoch": 0.87, "grad_norm": 1.31753671169281, "learning_rate": 4.2524243881206183e-07, "loss": 0.5023, "step": 13505 }, { "epoch": 0.87, "grad_norm": 1.2338833808898926, "learning_rate": 4.2482068711398037e-07, "loss": 0.5584, "step": 13506 }, { "epoch": 0.87, "grad_norm": 1.3134748935699463, "learning_rate": 4.243991353851823e-07, "loss": 0.4952, "step": 13507 }, { "epoch": 0.87, "grad_norm": 1.0803159475326538, "learning_rate": 4.2397778364409393e-07, "loss": 0.4533, "step": 13508 }, { "epoch": 0.87, "grad_norm": 1.5518611669540405, "learning_rate": 4.2355663190913e-07, "loss": 0.4983, "step": 13509 }, { "epoch": 0.87, "grad_norm": 1.1504487991333008, "learning_rate": 4.2313568019869743e-07, "loss": 0.4771, "step": 13510 }, { "epoch": 0.87, "grad_norm": 1.0480397939682007, "learning_rate": 4.2271492853119653e-07, "loss": 0.5025, "step": 13511 }, { "epoch": 0.87, "grad_norm": 1.267320990562439, "learning_rate": 4.2229437692501593e-07, "loss": 0.5335, "step": 13512 }, { "epoch": 0.87, "grad_norm": 1.173937439918518, "learning_rate": 4.218740253985376e-07, "loss": 0.5157, "step": 13513 }, { "epoch": 0.87, "grad_norm": 1.2459784746170044, "learning_rate": 4.2145387397013513e-07, "loss": 0.4933, "step": 13514 }, { "epoch": 0.87, "grad_norm": 1.2101749181747437, "learning_rate": 4.2103392265817e-07, "loss": 0.5502, "step": 13515 }, { "epoch": 0.87, "grad_norm": 1.323195457458496, "learning_rate": 4.2061417148099803e-07, "loss": 0.4978, "step": 13516 }, { "epoch": 0.87, "grad_norm": 1.26161527633667, "learning_rate": 4.201946204569657e-07, "loss": 0.517, "step": 13517 }, { "epoch": 0.87, "grad_norm": 1.1698299646377563, "learning_rate": 4.1977526960441053e-07, "loss": 0.5227, "step": 13518 }, { "epoch": 0.87, "grad_norm": 1.4121088981628418, "learning_rate": 4.1935611894166116e-07, "loss": 0.5359, "step": 13519 }, { "epoch": 0.87, "grad_norm": 1.1703808307647705, "learning_rate": 4.1893716848703856e-07, "loss": 0.4806, "step": 13520 }, { "epoch": 0.87, "grad_norm": 1.2394311428070068, "learning_rate": 4.185184182588525e-07, "loss": 0.4771, "step": 13521 }, { "epoch": 0.87, "grad_norm": 1.2781566381454468, "learning_rate": 4.1809986827540563e-07, "loss": 0.4623, "step": 13522 }, { "epoch": 0.87, "grad_norm": 1.3354897499084473, "learning_rate": 4.1768151855499205e-07, "loss": 0.5264, "step": 13523 }, { "epoch": 0.87, "grad_norm": 1.2373700141906738, "learning_rate": 4.172633691158967e-07, "loss": 0.4918, "step": 13524 }, { "epoch": 0.87, "grad_norm": 1.3734116554260254, "learning_rate": 4.168454199763966e-07, "loss": 0.5487, "step": 13525 }, { "epoch": 0.87, "grad_norm": 1.2749892473220825, "learning_rate": 4.1642767115475827e-07, "loss": 0.5786, "step": 13526 }, { "epoch": 0.87, "grad_norm": 1.1866533756256104, "learning_rate": 4.160101226692398e-07, "loss": 0.5076, "step": 13527 }, { "epoch": 0.87, "grad_norm": 1.1473567485809326, "learning_rate": 4.1559277453809275e-07, "loss": 0.4172, "step": 13528 }, { "epoch": 0.87, "grad_norm": 1.3111649751663208, "learning_rate": 4.1517562677955704e-07, "loss": 0.5096, "step": 13529 }, { "epoch": 0.87, "grad_norm": 1.1987076997756958, "learning_rate": 4.1475867941186576e-07, "loss": 0.5328, "step": 13530 }, { "epoch": 0.87, "grad_norm": 1.2053583860397339, "learning_rate": 4.1434193245324385e-07, "loss": 0.5461, "step": 13531 }, { "epoch": 0.87, "grad_norm": 1.2080886363983154, "learning_rate": 4.139253859219039e-07, "loss": 0.5042, "step": 13532 }, { "epoch": 0.87, "grad_norm": 1.219772219657898, "learning_rate": 4.135090398360525e-07, "loss": 0.5098, "step": 13533 }, { "epoch": 0.87, "grad_norm": 1.2269076108932495, "learning_rate": 4.1309289421388844e-07, "loss": 0.4952, "step": 13534 }, { "epoch": 0.87, "grad_norm": 1.2557265758514404, "learning_rate": 4.1267694907359934e-07, "loss": 0.5058, "step": 13535 }, { "epoch": 0.87, "grad_norm": 1.3287516832351685, "learning_rate": 4.122612044333651e-07, "loss": 0.5369, "step": 13536 }, { "epoch": 0.87, "grad_norm": 1.2636730670928955, "learning_rate": 4.11845660311358e-07, "loss": 0.5319, "step": 13537 }, { "epoch": 0.87, "grad_norm": 1.2809579372406006, "learning_rate": 4.1143031672573884e-07, "loss": 0.5305, "step": 13538 }, { "epoch": 0.87, "grad_norm": 1.2636524438858032, "learning_rate": 4.110151736946616e-07, "loss": 0.5088, "step": 13539 }, { "epoch": 0.87, "grad_norm": 1.1976673603057861, "learning_rate": 4.1060023123627166e-07, "loss": 0.4782, "step": 13540 }, { "epoch": 0.87, "grad_norm": 1.204396367073059, "learning_rate": 4.101854893687046e-07, "loss": 0.5089, "step": 13541 }, { "epoch": 0.87, "grad_norm": 1.2343977689743042, "learning_rate": 4.097709481100881e-07, "loss": 0.4724, "step": 13542 }, { "epoch": 0.87, "grad_norm": 1.3437803983688354, "learning_rate": 4.0935660747854157e-07, "loss": 0.5305, "step": 13543 }, { "epoch": 0.87, "grad_norm": 1.2383010387420654, "learning_rate": 4.0894246749217216e-07, "loss": 0.5546, "step": 13544 }, { "epoch": 0.87, "grad_norm": 1.2152364253997803, "learning_rate": 4.0852852816908316e-07, "loss": 0.5358, "step": 13545 }, { "epoch": 0.87, "grad_norm": 1.228472113609314, "learning_rate": 4.081147895273657e-07, "loss": 0.516, "step": 13546 }, { "epoch": 0.87, "grad_norm": 1.3205195665359497, "learning_rate": 4.0770125158510364e-07, "loss": 0.5592, "step": 13547 }, { "epoch": 0.87, "grad_norm": 1.2690030336380005, "learning_rate": 4.0728791436037143e-07, "loss": 0.5852, "step": 13548 }, { "epoch": 0.87, "grad_norm": 1.1895411014556885, "learning_rate": 4.068747778712362e-07, "loss": 0.5206, "step": 13549 }, { "epoch": 0.87, "grad_norm": 1.178839921951294, "learning_rate": 4.0646184213575303e-07, "loss": 0.5642, "step": 13550 }, { "epoch": 0.87, "grad_norm": 1.2353254556655884, "learning_rate": 4.0604910717197077e-07, "loss": 0.4917, "step": 13551 }, { "epoch": 0.87, "grad_norm": 1.3076592683792114, "learning_rate": 4.0563657299793004e-07, "loss": 0.5757, "step": 13552 }, { "epoch": 0.87, "grad_norm": 1.1958764791488647, "learning_rate": 4.0522423963166137e-07, "loss": 0.4994, "step": 13553 }, { "epoch": 0.87, "grad_norm": 1.254309058189392, "learning_rate": 4.048121070911859e-07, "loss": 0.5422, "step": 13554 }, { "epoch": 0.87, "grad_norm": 1.1836525201797485, "learning_rate": 4.044001753945187e-07, "loss": 0.4928, "step": 13555 }, { "epoch": 0.88, "grad_norm": 1.2427570819854736, "learning_rate": 4.03988444559662e-07, "loss": 0.4851, "step": 13556 }, { "epoch": 0.88, "grad_norm": 1.2642982006072998, "learning_rate": 4.035769146046126e-07, "loss": 0.499, "step": 13557 }, { "epoch": 0.88, "grad_norm": 1.2529702186584473, "learning_rate": 4.031655855473571e-07, "loss": 0.5335, "step": 13558 }, { "epoch": 0.88, "grad_norm": 1.2435075044631958, "learning_rate": 4.0275445740587447e-07, "loss": 0.4957, "step": 13559 }, { "epoch": 0.88, "grad_norm": 1.391158938407898, "learning_rate": 4.0234353019813264e-07, "loss": 0.5512, "step": 13560 }, { "epoch": 0.88, "grad_norm": 1.2471383810043335, "learning_rate": 4.0193280394209445e-07, "loss": 0.5258, "step": 13561 }, { "epoch": 0.88, "grad_norm": 1.2792174816131592, "learning_rate": 4.0152227865570934e-07, "loss": 0.5307, "step": 13562 }, { "epoch": 0.88, "grad_norm": 1.3565120697021484, "learning_rate": 4.0111195435692085e-07, "loss": 0.5313, "step": 13563 }, { "epoch": 0.88, "grad_norm": 1.1573083400726318, "learning_rate": 4.00701831063664e-07, "loss": 0.5354, "step": 13564 }, { "epoch": 0.88, "grad_norm": 1.2158482074737549, "learning_rate": 4.0029190879386334e-07, "loss": 0.5163, "step": 13565 }, { "epoch": 0.88, "grad_norm": 1.2118175029754639, "learning_rate": 3.9988218756543684e-07, "loss": 0.5105, "step": 13566 }, { "epoch": 0.88, "grad_norm": 1.209319829940796, "learning_rate": 3.9947266739629067e-07, "loss": 0.5391, "step": 13567 }, { "epoch": 0.88, "grad_norm": 1.1722744703292847, "learning_rate": 3.990633483043249e-07, "loss": 0.4734, "step": 13568 }, { "epoch": 0.88, "grad_norm": 1.0862269401550293, "learning_rate": 3.986542303074298e-07, "loss": 0.4646, "step": 13569 }, { "epoch": 0.88, "grad_norm": 1.1117801666259766, "learning_rate": 3.9824531342348593e-07, "loss": 0.4806, "step": 13570 }, { "epoch": 0.88, "grad_norm": 1.2180895805358887, "learning_rate": 3.978365976703674e-07, "loss": 0.4993, "step": 13571 }, { "epoch": 0.88, "grad_norm": 1.249877691268921, "learning_rate": 3.974280830659377e-07, "loss": 0.5275, "step": 13572 }, { "epoch": 0.88, "grad_norm": 1.1479369401931763, "learning_rate": 3.970197696280514e-07, "loss": 0.5139, "step": 13573 }, { "epoch": 0.88, "grad_norm": 1.1497440338134766, "learning_rate": 3.966116573745549e-07, "loss": 0.4827, "step": 13574 }, { "epoch": 0.88, "grad_norm": 1.1286473274230957, "learning_rate": 3.9620374632328595e-07, "loss": 0.4945, "step": 13575 }, { "epoch": 0.88, "grad_norm": 1.229408860206604, "learning_rate": 3.957960364920732e-07, "loss": 0.561, "step": 13576 }, { "epoch": 0.88, "grad_norm": 1.206606149673462, "learning_rate": 3.9538852789873684e-07, "loss": 0.5547, "step": 13577 }, { "epoch": 0.88, "grad_norm": 1.3726638555526733, "learning_rate": 3.949812205610887e-07, "loss": 0.4742, "step": 13578 }, { "epoch": 0.88, "grad_norm": 1.121301531791687, "learning_rate": 3.9457411449692894e-07, "loss": 0.4678, "step": 13579 }, { "epoch": 0.88, "grad_norm": 1.3277252912521362, "learning_rate": 3.9416720972405343e-07, "loss": 0.5048, "step": 13580 }, { "epoch": 0.88, "grad_norm": 1.1325187683105469, "learning_rate": 3.937605062602451e-07, "loss": 0.5262, "step": 13581 }, { "epoch": 0.88, "grad_norm": 1.1932268142700195, "learning_rate": 3.933540041232814e-07, "loss": 0.5094, "step": 13582 }, { "epoch": 0.88, "grad_norm": 1.1987543106079102, "learning_rate": 3.929477033309292e-07, "loss": 0.5069, "step": 13583 }, { "epoch": 0.88, "grad_norm": 1.3127434253692627, "learning_rate": 3.925416039009461e-07, "loss": 0.4848, "step": 13584 }, { "epoch": 0.88, "grad_norm": 1.2693254947662354, "learning_rate": 3.921357058510822e-07, "loss": 0.501, "step": 13585 }, { "epoch": 0.88, "grad_norm": 1.2593843936920166, "learning_rate": 3.917300091990778e-07, "loss": 0.4986, "step": 13586 }, { "epoch": 0.88, "grad_norm": 1.1685874462127686, "learning_rate": 3.9132451396266533e-07, "loss": 0.475, "step": 13587 }, { "epoch": 0.88, "grad_norm": 1.3614976406097412, "learning_rate": 3.909192201595685e-07, "loss": 0.5387, "step": 13588 }, { "epoch": 0.88, "grad_norm": 1.2282038927078247, "learning_rate": 3.905141278075003e-07, "loss": 0.512, "step": 13589 }, { "epoch": 0.88, "grad_norm": 1.2992630004882812, "learning_rate": 3.9010923692416713e-07, "loss": 0.5511, "step": 13590 }, { "epoch": 0.88, "grad_norm": 1.345628023147583, "learning_rate": 3.897045475272654e-07, "loss": 0.512, "step": 13591 }, { "epoch": 0.88, "grad_norm": 1.1888458728790283, "learning_rate": 3.893000596344837e-07, "loss": 0.4858, "step": 13592 }, { "epoch": 0.88, "grad_norm": 1.215114712715149, "learning_rate": 3.8889577326350016e-07, "loss": 0.5421, "step": 13593 }, { "epoch": 0.88, "grad_norm": 1.2968896627426147, "learning_rate": 3.884916884319856e-07, "loss": 0.5006, "step": 13594 }, { "epoch": 0.88, "grad_norm": 1.3814384937286377, "learning_rate": 3.880878051576009e-07, "loss": 0.5442, "step": 13595 }, { "epoch": 0.88, "grad_norm": 1.3679121732711792, "learning_rate": 3.876841234579998e-07, "loss": 0.5513, "step": 13596 }, { "epoch": 0.88, "grad_norm": 1.205496907234192, "learning_rate": 3.872806433508269e-07, "loss": 0.5005, "step": 13597 }, { "epoch": 0.88, "grad_norm": 1.2175555229187012, "learning_rate": 3.86877364853715e-07, "loss": 0.5191, "step": 13598 }, { "epoch": 0.88, "grad_norm": 1.2203015089035034, "learning_rate": 3.8647428798429086e-07, "loss": 0.5619, "step": 13599 }, { "epoch": 0.88, "grad_norm": 1.2359066009521484, "learning_rate": 3.860714127601734e-07, "loss": 0.485, "step": 13600 }, { "epoch": 0.88, "grad_norm": 1.2435715198516846, "learning_rate": 3.8566873919896996e-07, "loss": 0.5025, "step": 13601 }, { "epoch": 0.88, "grad_norm": 1.2642226219177246, "learning_rate": 3.8526626731828165e-07, "loss": 0.5405, "step": 13602 }, { "epoch": 0.88, "grad_norm": 2.2044517993927, "learning_rate": 3.8486399713569766e-07, "loss": 0.5126, "step": 13603 }, { "epoch": 0.88, "grad_norm": 1.2073825597763062, "learning_rate": 3.844619286688017e-07, "loss": 0.5417, "step": 13604 }, { "epoch": 0.88, "grad_norm": 1.2241405248641968, "learning_rate": 3.8406006193516585e-07, "loss": 0.5204, "step": 13605 }, { "epoch": 0.88, "grad_norm": 1.1882141828536987, "learning_rate": 3.836583969523561e-07, "loss": 0.4871, "step": 13606 }, { "epoch": 0.88, "grad_norm": 1.2023667097091675, "learning_rate": 3.8325693373792716e-07, "loss": 0.5165, "step": 13607 }, { "epoch": 0.88, "grad_norm": 1.179134488105774, "learning_rate": 3.8285567230942733e-07, "loss": 0.5334, "step": 13608 }, { "epoch": 0.88, "grad_norm": 1.1601097583770752, "learning_rate": 3.8245461268439255e-07, "loss": 0.493, "step": 13609 }, { "epoch": 0.88, "grad_norm": 1.247252345085144, "learning_rate": 3.8205375488035323e-07, "loss": 0.5033, "step": 13610 }, { "epoch": 0.88, "grad_norm": 1.227197527885437, "learning_rate": 3.8165309891482983e-07, "loss": 0.5163, "step": 13611 }, { "epoch": 0.88, "grad_norm": 1.2200440168380737, "learning_rate": 3.8125264480533443e-07, "loss": 0.4389, "step": 13612 }, { "epoch": 0.88, "grad_norm": 1.2616446018218994, "learning_rate": 3.808523925693697e-07, "loss": 0.4995, "step": 13613 }, { "epoch": 0.88, "grad_norm": 1.199444055557251, "learning_rate": 3.8045234222442886e-07, "loss": 0.5644, "step": 13614 }, { "epoch": 0.88, "grad_norm": 1.297081708908081, "learning_rate": 3.8005249378799735e-07, "loss": 0.5557, "step": 13615 }, { "epoch": 0.88, "grad_norm": 1.1959502696990967, "learning_rate": 3.7965284727755125e-07, "loss": 0.5144, "step": 13616 }, { "epoch": 0.88, "grad_norm": 1.152807593345642, "learning_rate": 3.7925340271055933e-07, "loss": 0.5208, "step": 13617 }, { "epoch": 0.88, "grad_norm": 1.2061409950256348, "learning_rate": 3.788541601044793e-07, "loss": 0.5466, "step": 13618 }, { "epoch": 0.88, "grad_norm": 1.2081681489944458, "learning_rate": 3.784551194767616e-07, "loss": 0.5072, "step": 13619 }, { "epoch": 0.88, "grad_norm": 1.257020115852356, "learning_rate": 3.780562808448468e-07, "loss": 0.5025, "step": 13620 }, { "epoch": 0.88, "grad_norm": 2.1515867710113525, "learning_rate": 3.7765764422616646e-07, "loss": 0.4968, "step": 13621 }, { "epoch": 0.88, "grad_norm": 1.2439334392547607, "learning_rate": 3.77259209638145e-07, "loss": 0.5319, "step": 13622 }, { "epoch": 0.88, "grad_norm": 1.1589105129241943, "learning_rate": 3.768609770981968e-07, "loss": 0.4713, "step": 13623 }, { "epoch": 0.88, "grad_norm": 1.1684787273406982, "learning_rate": 3.764629466237274e-07, "loss": 0.55, "step": 13624 }, { "epoch": 0.88, "grad_norm": 1.2378060817718506, "learning_rate": 3.7606511823213454e-07, "loss": 0.5294, "step": 13625 }, { "epoch": 0.88, "grad_norm": 1.1979107856750488, "learning_rate": 3.7566749194080434e-07, "loss": 0.5434, "step": 13626 }, { "epoch": 0.88, "grad_norm": 1.2084205150604248, "learning_rate": 3.7527006776711727e-07, "loss": 0.4713, "step": 13627 }, { "epoch": 0.88, "grad_norm": 1.4065333604812622, "learning_rate": 3.748728457284434e-07, "loss": 0.5, "step": 13628 }, { "epoch": 0.88, "grad_norm": 1.1291065216064453, "learning_rate": 3.7447582584214493e-07, "loss": 0.4973, "step": 13629 }, { "epoch": 0.88, "grad_norm": 1.4289579391479492, "learning_rate": 3.740790081255735e-07, "loss": 0.4973, "step": 13630 }, { "epoch": 0.88, "grad_norm": 2.972468614578247, "learning_rate": 3.736823925960753e-07, "loss": 0.5201, "step": 13631 }, { "epoch": 0.88, "grad_norm": 1.1714760065078735, "learning_rate": 3.73285979270982e-07, "loss": 0.5204, "step": 13632 }, { "epoch": 0.88, "grad_norm": 1.1867308616638184, "learning_rate": 3.728897681676219e-07, "loss": 0.5328, "step": 13633 }, { "epoch": 0.88, "grad_norm": 1.376704216003418, "learning_rate": 3.7249375930331175e-07, "loss": 0.4928, "step": 13634 }, { "epoch": 0.88, "grad_norm": 1.2582942247390747, "learning_rate": 3.7209795269536e-07, "loss": 0.5045, "step": 13635 }, { "epoch": 0.88, "grad_norm": 1.1801530122756958, "learning_rate": 3.7170234836106714e-07, "loss": 0.4651, "step": 13636 }, { "epoch": 0.88, "grad_norm": 1.1704766750335693, "learning_rate": 3.713069463177238e-07, "loss": 0.486, "step": 13637 }, { "epoch": 0.88, "grad_norm": 1.2041739225387573, "learning_rate": 3.709117465826112e-07, "loss": 0.4995, "step": 13638 }, { "epoch": 0.88, "grad_norm": 1.1711117029190063, "learning_rate": 3.7051674917300275e-07, "loss": 0.4795, "step": 13639 }, { "epoch": 0.88, "grad_norm": 1.2194979190826416, "learning_rate": 3.701219541061629e-07, "loss": 0.519, "step": 13640 }, { "epoch": 0.88, "grad_norm": 1.2512280941009521, "learning_rate": 3.697273613993474e-07, "loss": 0.5347, "step": 13641 }, { "epoch": 0.88, "grad_norm": 1.1814380884170532, "learning_rate": 3.6933297106980294e-07, "loss": 0.5212, "step": 13642 }, { "epoch": 0.88, "grad_norm": 1.2678688764572144, "learning_rate": 3.689387831347674e-07, "loss": 0.5201, "step": 13643 }, { "epoch": 0.88, "grad_norm": 1.224586844444275, "learning_rate": 3.6854479761146866e-07, "loss": 0.4974, "step": 13644 }, { "epoch": 0.88, "grad_norm": 1.2253433465957642, "learning_rate": 3.6815101451712743e-07, "loss": 0.4983, "step": 13645 }, { "epoch": 0.88, "grad_norm": 1.3148725032806396, "learning_rate": 3.677574338689549e-07, "loss": 0.4635, "step": 13646 }, { "epoch": 0.88, "grad_norm": 1.2190345525741577, "learning_rate": 3.67364055684154e-07, "loss": 0.5028, "step": 13647 }, { "epoch": 0.88, "grad_norm": 1.2093087434768677, "learning_rate": 3.6697087997991767e-07, "loss": 0.4756, "step": 13648 }, { "epoch": 0.88, "grad_norm": 1.1105467081069946, "learning_rate": 3.665779067734315e-07, "loss": 0.5003, "step": 13649 }, { "epoch": 0.88, "grad_norm": 1.1165776252746582, "learning_rate": 3.6618513608186966e-07, "loss": 0.4593, "step": 13650 }, { "epoch": 0.88, "grad_norm": 1.2015759944915771, "learning_rate": 3.657925679224006e-07, "loss": 0.503, "step": 13651 }, { "epoch": 0.88, "grad_norm": 1.2439987659454346, "learning_rate": 3.6540020231218164e-07, "loss": 0.5093, "step": 13652 }, { "epoch": 0.88, "grad_norm": 1.178633689880371, "learning_rate": 3.650080392683625e-07, "loss": 0.5323, "step": 13653 }, { "epoch": 0.88, "grad_norm": 1.2556321620941162, "learning_rate": 3.6461607880808437e-07, "loss": 0.5225, "step": 13654 }, { "epoch": 0.88, "grad_norm": 1.3010895252227783, "learning_rate": 3.642243209484775e-07, "loss": 0.5713, "step": 13655 }, { "epoch": 0.88, "grad_norm": 1.1404484510421753, "learning_rate": 3.6383276570666484e-07, "loss": 0.4701, "step": 13656 }, { "epoch": 0.88, "grad_norm": 1.184033989906311, "learning_rate": 3.634414130997599e-07, "loss": 0.4759, "step": 13657 }, { "epoch": 0.88, "grad_norm": 1.2931891679763794, "learning_rate": 3.63050263144869e-07, "loss": 0.5064, "step": 13658 }, { "epoch": 0.88, "grad_norm": 1.2546863555908203, "learning_rate": 3.6265931585908744e-07, "loss": 0.5283, "step": 13659 }, { "epoch": 0.88, "grad_norm": 1.1995153427124023, "learning_rate": 3.6226857125950366e-07, "loss": 0.4812, "step": 13660 }, { "epoch": 0.88, "grad_norm": 1.1921849250793457, "learning_rate": 3.618780293631941e-07, "loss": 0.5028, "step": 13661 }, { "epoch": 0.88, "grad_norm": 1.2140692472457886, "learning_rate": 3.614876901872294e-07, "loss": 0.5194, "step": 13662 }, { "epoch": 0.88, "grad_norm": 1.2159889936447144, "learning_rate": 3.610975537486705e-07, "loss": 0.4775, "step": 13663 }, { "epoch": 0.88, "grad_norm": 1.2354254722595215, "learning_rate": 3.6070762006456874e-07, "loss": 0.5263, "step": 13664 }, { "epoch": 0.88, "grad_norm": 1.3612295389175415, "learning_rate": 3.603178891519671e-07, "loss": 0.5126, "step": 13665 }, { "epoch": 0.88, "grad_norm": 1.2418888807296753, "learning_rate": 3.5992836102790143e-07, "loss": 0.4894, "step": 13666 }, { "epoch": 0.88, "grad_norm": 1.7549490928649902, "learning_rate": 3.5953903570939417e-07, "loss": 0.5193, "step": 13667 }, { "epoch": 0.88, "grad_norm": 1.1811195611953735, "learning_rate": 3.591499132134635e-07, "loss": 0.5088, "step": 13668 }, { "epoch": 0.88, "grad_norm": 1.2605066299438477, "learning_rate": 3.5876099355711625e-07, "loss": 0.5046, "step": 13669 }, { "epoch": 0.88, "grad_norm": 1.2663429975509644, "learning_rate": 3.5837227675735164e-07, "loss": 0.5148, "step": 13670 }, { "epoch": 0.88, "grad_norm": 1.4251221418380737, "learning_rate": 3.579837628311594e-07, "loss": 0.5159, "step": 13671 }, { "epoch": 0.88, "grad_norm": 1.3827861547470093, "learning_rate": 3.57595451795521e-07, "loss": 0.4931, "step": 13672 }, { "epoch": 0.88, "grad_norm": 1.1716783046722412, "learning_rate": 3.5720734366740675e-07, "loss": 0.5202, "step": 13673 }, { "epoch": 0.88, "grad_norm": 1.3486248254776, "learning_rate": 3.568194384637813e-07, "loss": 0.4805, "step": 13674 }, { "epoch": 0.88, "grad_norm": 1.2583377361297607, "learning_rate": 3.564317362015984e-07, "loss": 0.5212, "step": 13675 }, { "epoch": 0.88, "grad_norm": 1.1772648096084595, "learning_rate": 3.5604423689780396e-07, "loss": 0.5303, "step": 13676 }, { "epoch": 0.88, "grad_norm": 1.1905412673950195, "learning_rate": 3.5565694056933496e-07, "loss": 0.4767, "step": 13677 }, { "epoch": 0.88, "grad_norm": 1.2452456951141357, "learning_rate": 3.5526984723311787e-07, "loss": 0.5414, "step": 13678 }, { "epoch": 0.88, "grad_norm": 1.22904372215271, "learning_rate": 3.5488295690607187e-07, "loss": 0.4656, "step": 13679 }, { "epoch": 0.88, "grad_norm": 1.1666520833969116, "learning_rate": 3.544962696051074e-07, "loss": 0.4939, "step": 13680 }, { "epoch": 0.88, "grad_norm": 1.3377317190170288, "learning_rate": 3.541097853471265e-07, "loss": 0.5228, "step": 13681 }, { "epoch": 0.88, "grad_norm": 1.3071963787078857, "learning_rate": 3.537235041490195e-07, "loss": 0.5412, "step": 13682 }, { "epoch": 0.88, "grad_norm": 1.2279036045074463, "learning_rate": 3.533374260276706e-07, "loss": 0.5062, "step": 13683 }, { "epoch": 0.88, "grad_norm": 1.189866304397583, "learning_rate": 3.529515509999537e-07, "loss": 0.4756, "step": 13684 }, { "epoch": 0.88, "grad_norm": 1.2181460857391357, "learning_rate": 3.5256587908273576e-07, "loss": 0.467, "step": 13685 }, { "epoch": 0.88, "grad_norm": 1.237985372543335, "learning_rate": 3.5218041029287273e-07, "loss": 0.5328, "step": 13686 }, { "epoch": 0.88, "grad_norm": 1.2013925313949585, "learning_rate": 3.517951446472123e-07, "loss": 0.5181, "step": 13687 }, { "epoch": 0.88, "grad_norm": 1.2540075778961182, "learning_rate": 3.514100821625932e-07, "loss": 0.5118, "step": 13688 }, { "epoch": 0.88, "grad_norm": 1.1869800090789795, "learning_rate": 3.510252228558458e-07, "loss": 0.5288, "step": 13689 }, { "epoch": 0.88, "grad_norm": 1.1317819356918335, "learning_rate": 3.5064056674379176e-07, "loss": 0.5093, "step": 13690 }, { "epoch": 0.88, "grad_norm": 1.1739306449890137, "learning_rate": 3.5025611384324364e-07, "loss": 0.5192, "step": 13691 }, { "epoch": 0.88, "grad_norm": 1.1740972995758057, "learning_rate": 3.498718641710036e-07, "loss": 0.51, "step": 13692 }, { "epoch": 0.88, "grad_norm": 1.2164722681045532, "learning_rate": 3.494878177438665e-07, "loss": 0.5427, "step": 13693 }, { "epoch": 0.88, "grad_norm": 1.1127862930297852, "learning_rate": 3.49103974578619e-07, "loss": 0.4908, "step": 13694 }, { "epoch": 0.88, "grad_norm": 1.2261887788772583, "learning_rate": 3.487203346920376e-07, "loss": 0.5428, "step": 13695 }, { "epoch": 0.88, "grad_norm": 1.342741847038269, "learning_rate": 3.4833689810088944e-07, "loss": 0.5296, "step": 13696 }, { "epoch": 0.88, "grad_norm": 1.140516757965088, "learning_rate": 3.479536648219339e-07, "loss": 0.5032, "step": 13697 }, { "epoch": 0.88, "grad_norm": 1.2105761766433716, "learning_rate": 3.47570634871921e-07, "loss": 0.5524, "step": 13698 }, { "epoch": 0.88, "grad_norm": 1.2969274520874023, "learning_rate": 3.4718780826759223e-07, "loss": 0.5469, "step": 13699 }, { "epoch": 0.88, "grad_norm": 1.2888541221618652, "learning_rate": 3.4680518502568037e-07, "loss": 0.4904, "step": 13700 }, { "epoch": 0.88, "grad_norm": 1.2913627624511719, "learning_rate": 3.4642276516290876e-07, "loss": 0.5165, "step": 13701 }, { "epoch": 0.88, "grad_norm": 1.4400807619094849, "learning_rate": 3.460405486959911e-07, "loss": 0.5353, "step": 13702 }, { "epoch": 0.88, "grad_norm": 1.1924299001693726, "learning_rate": 3.456585356416342e-07, "loss": 0.5313, "step": 13703 }, { "epoch": 0.88, "grad_norm": 1.2375189065933228, "learning_rate": 3.452767260165335e-07, "loss": 0.5276, "step": 13704 }, { "epoch": 0.88, "grad_norm": 1.2058720588684082, "learning_rate": 3.4489511983737847e-07, "loss": 0.4689, "step": 13705 }, { "epoch": 0.88, "grad_norm": 1.32777738571167, "learning_rate": 3.445137171208468e-07, "loss": 0.5522, "step": 13706 }, { "epoch": 0.88, "grad_norm": 1.2911640405654907, "learning_rate": 3.4413251788361024e-07, "loss": 0.4822, "step": 13707 }, { "epoch": 0.88, "grad_norm": 1.1882998943328857, "learning_rate": 3.4375152214232875e-07, "loss": 0.5125, "step": 13708 }, { "epoch": 0.88, "grad_norm": 1.1721004247665405, "learning_rate": 3.433707299136546e-07, "loss": 0.5145, "step": 13709 }, { "epoch": 0.88, "grad_norm": 1.1614490747451782, "learning_rate": 3.429901412142311e-07, "loss": 0.4716, "step": 13710 }, { "epoch": 0.89, "grad_norm": 1.1511048078536987, "learning_rate": 3.426097560606939e-07, "loss": 0.5285, "step": 13711 }, { "epoch": 0.89, "grad_norm": 1.2101550102233887, "learning_rate": 3.4222957446966797e-07, "loss": 0.5751, "step": 13712 }, { "epoch": 0.89, "grad_norm": 1.1795655488967896, "learning_rate": 3.418495964577706e-07, "loss": 0.5012, "step": 13713 }, { "epoch": 0.89, "grad_norm": 1.137560486793518, "learning_rate": 3.414698220416085e-07, "loss": 0.4741, "step": 13714 }, { "epoch": 0.89, "grad_norm": 1.2625607252120972, "learning_rate": 3.4109025123778174e-07, "loss": 0.5158, "step": 13715 }, { "epoch": 0.89, "grad_norm": 1.2063055038452148, "learning_rate": 3.4071088406287924e-07, "loss": 0.4481, "step": 13716 }, { "epoch": 0.89, "grad_norm": 1.1708334684371948, "learning_rate": 3.403317205334833e-07, "loss": 0.5047, "step": 13717 }, { "epoch": 0.89, "grad_norm": 1.114542007446289, "learning_rate": 3.3995276066616566e-07, "loss": 0.4974, "step": 13718 }, { "epoch": 0.89, "grad_norm": 1.16073739528656, "learning_rate": 3.3957400447749035e-07, "loss": 0.4639, "step": 13719 }, { "epoch": 0.89, "grad_norm": 1.1262649297714233, "learning_rate": 3.391954519840107e-07, "loss": 0.5144, "step": 13720 }, { "epoch": 0.89, "grad_norm": 1.0634417533874512, "learning_rate": 3.38817103202273e-07, "loss": 0.485, "step": 13721 }, { "epoch": 0.89, "grad_norm": 1.139607548713684, "learning_rate": 3.3843895814881346e-07, "loss": 0.4926, "step": 13722 }, { "epoch": 0.89, "grad_norm": 1.253886342048645, "learning_rate": 3.380610168401599e-07, "loss": 0.5388, "step": 13723 }, { "epoch": 0.89, "grad_norm": 1.2091957330703735, "learning_rate": 3.3768327929283197e-07, "loss": 0.5031, "step": 13724 }, { "epoch": 0.89, "grad_norm": 1.2409143447875977, "learning_rate": 3.3730574552333917e-07, "loss": 0.5383, "step": 13725 }, { "epoch": 0.89, "grad_norm": 1.230699896812439, "learning_rate": 3.369284155481817e-07, "loss": 0.5233, "step": 13726 }, { "epoch": 0.89, "grad_norm": 1.269461750984192, "learning_rate": 3.365512893838524e-07, "loss": 0.5352, "step": 13727 }, { "epoch": 0.89, "grad_norm": 1.1237612962722778, "learning_rate": 3.3617436704683424e-07, "loss": 0.4425, "step": 13728 }, { "epoch": 0.89, "grad_norm": 1.1490150690078735, "learning_rate": 3.357976485536013e-07, "loss": 0.4747, "step": 13729 }, { "epoch": 0.89, "grad_norm": 1.1569700241088867, "learning_rate": 3.3542113392061984e-07, "loss": 0.5683, "step": 13730 }, { "epoch": 0.89, "grad_norm": 1.2669697999954224, "learning_rate": 3.350448231643466e-07, "loss": 0.5148, "step": 13731 }, { "epoch": 0.89, "grad_norm": 1.4346777200698853, "learning_rate": 3.3466871630122743e-07, "loss": 0.541, "step": 13732 }, { "epoch": 0.89, "grad_norm": 1.1837284564971924, "learning_rate": 3.3429281334770194e-07, "loss": 0.5289, "step": 13733 }, { "epoch": 0.89, "grad_norm": 1.1197924613952637, "learning_rate": 3.3391711432020024e-07, "loss": 0.508, "step": 13734 }, { "epoch": 0.89, "grad_norm": 1.2816588878631592, "learning_rate": 3.335416192351426e-07, "loss": 0.5093, "step": 13735 }, { "epoch": 0.89, "grad_norm": 1.1933804750442505, "learning_rate": 3.331663281089409e-07, "loss": 0.5063, "step": 13736 }, { "epoch": 0.89, "grad_norm": 1.2126054763793945, "learning_rate": 3.3279124095799977e-07, "loss": 0.5336, "step": 13737 }, { "epoch": 0.89, "grad_norm": 1.195065975189209, "learning_rate": 3.324163577987105e-07, "loss": 0.4992, "step": 13738 }, { "epoch": 0.89, "grad_norm": 1.2318921089172363, "learning_rate": 3.3204167864746007e-07, "loss": 0.5043, "step": 13739 }, { "epoch": 0.89, "grad_norm": 1.1625370979309082, "learning_rate": 3.316672035206242e-07, "loss": 0.472, "step": 13740 }, { "epoch": 0.89, "grad_norm": 1.0969417095184326, "learning_rate": 3.3129293243457093e-07, "loss": 0.455, "step": 13741 }, { "epoch": 0.89, "grad_norm": 1.2784010171890259, "learning_rate": 3.3091886540565833e-07, "loss": 0.5047, "step": 13742 }, { "epoch": 0.89, "grad_norm": 1.176912546157837, "learning_rate": 3.3054500245023547e-07, "loss": 0.4547, "step": 13743 }, { "epoch": 0.89, "grad_norm": 1.203999638557434, "learning_rate": 3.3017134358464263e-07, "loss": 0.5168, "step": 13744 }, { "epoch": 0.89, "grad_norm": 1.170996069908142, "learning_rate": 3.2979788882521234e-07, "loss": 0.5456, "step": 13745 }, { "epoch": 0.89, "grad_norm": 1.150549292564392, "learning_rate": 3.294246381882671e-07, "loss": 0.4946, "step": 13746 }, { "epoch": 0.89, "grad_norm": 1.403658151626587, "learning_rate": 3.2905159169012046e-07, "loss": 0.5284, "step": 13747 }, { "epoch": 0.89, "grad_norm": 1.216711401939392, "learning_rate": 3.2867874934707833e-07, "loss": 0.4963, "step": 13748 }, { "epoch": 0.89, "grad_norm": 1.211484432220459, "learning_rate": 3.2830611117543543e-07, "loss": 0.4917, "step": 13749 }, { "epoch": 0.89, "grad_norm": 1.2015297412872314, "learning_rate": 3.2793367719147926e-07, "loss": 0.5301, "step": 13750 }, { "epoch": 0.89, "grad_norm": 1.1738059520721436, "learning_rate": 3.2756144741148745e-07, "loss": 0.4716, "step": 13751 }, { "epoch": 0.89, "grad_norm": 1.3596268892288208, "learning_rate": 3.271894218517302e-07, "loss": 0.5449, "step": 13752 }, { "epoch": 0.89, "grad_norm": 1.3784594535827637, "learning_rate": 3.2681760052846734e-07, "loss": 0.5165, "step": 13753 }, { "epoch": 0.89, "grad_norm": 1.2003041505813599, "learning_rate": 3.2644598345795085e-07, "loss": 0.4971, "step": 13754 }, { "epoch": 0.89, "grad_norm": 1.2148687839508057, "learning_rate": 3.260745706564217e-07, "loss": 0.4809, "step": 13755 }, { "epoch": 0.89, "grad_norm": 1.2576483488082886, "learning_rate": 3.25703362140114e-07, "loss": 0.5126, "step": 13756 }, { "epoch": 0.89, "grad_norm": 1.426712155342102, "learning_rate": 3.253323579252526e-07, "loss": 0.5022, "step": 13757 }, { "epoch": 0.89, "grad_norm": 1.2643086910247803, "learning_rate": 3.2496155802805294e-07, "loss": 0.5138, "step": 13758 }, { "epoch": 0.89, "grad_norm": 1.1073368787765503, "learning_rate": 3.24590962464722e-07, "loss": 0.5184, "step": 13759 }, { "epoch": 0.89, "grad_norm": 1.1363449096679688, "learning_rate": 3.242205712514579e-07, "loss": 0.497, "step": 13760 }, { "epoch": 0.89, "grad_norm": 1.2877849340438843, "learning_rate": 3.238503844044488e-07, "loss": 0.527, "step": 13761 }, { "epoch": 0.89, "grad_norm": 1.2156113386154175, "learning_rate": 3.2348040193987407e-07, "loss": 0.5141, "step": 13762 }, { "epoch": 0.89, "grad_norm": 1.192339301109314, "learning_rate": 3.2311062387390567e-07, "loss": 0.504, "step": 13763 }, { "epoch": 0.89, "grad_norm": 1.1399977207183838, "learning_rate": 3.2274105022270575e-07, "loss": 0.4776, "step": 13764 }, { "epoch": 0.89, "grad_norm": 1.2185070514678955, "learning_rate": 3.2237168100242633e-07, "loss": 0.507, "step": 13765 }, { "epoch": 0.89, "grad_norm": 1.2176847457885742, "learning_rate": 3.2200251622921343e-07, "loss": 0.5028, "step": 13766 }, { "epoch": 0.89, "grad_norm": 1.2823861837387085, "learning_rate": 3.216335559192002e-07, "loss": 0.5339, "step": 13767 }, { "epoch": 0.89, "grad_norm": 1.2458478212356567, "learning_rate": 3.2126480008851436e-07, "loss": 0.5214, "step": 13768 }, { "epoch": 0.89, "grad_norm": 1.1434731483459473, "learning_rate": 3.208962487532724e-07, "loss": 0.4967, "step": 13769 }, { "epoch": 0.89, "grad_norm": 1.1749886274337769, "learning_rate": 3.2052790192958317e-07, "loss": 0.5237, "step": 13770 }, { "epoch": 0.89, "grad_norm": 1.3243236541748047, "learning_rate": 3.201597596335471e-07, "loss": 0.5298, "step": 13771 }, { "epoch": 0.89, "grad_norm": 1.2056001424789429, "learning_rate": 3.197918218812529e-07, "loss": 0.4663, "step": 13772 }, { "epoch": 0.89, "grad_norm": 1.1996184587478638, "learning_rate": 3.1942408868878283e-07, "loss": 0.5193, "step": 13773 }, { "epoch": 0.89, "grad_norm": 1.261392593383789, "learning_rate": 3.190565600722101e-07, "loss": 0.5089, "step": 13774 }, { "epoch": 0.89, "grad_norm": 1.1128674745559692, "learning_rate": 3.1868923604759905e-07, "loss": 0.5032, "step": 13775 }, { "epoch": 0.89, "grad_norm": 1.160698413848877, "learning_rate": 3.1832211663100244e-07, "loss": 0.4767, "step": 13776 }, { "epoch": 0.89, "grad_norm": 1.0819514989852905, "learning_rate": 3.179552018384674e-07, "loss": 0.4779, "step": 13777 }, { "epoch": 0.89, "grad_norm": 1.2097824811935425, "learning_rate": 3.1758849168603057e-07, "loss": 0.518, "step": 13778 }, { "epoch": 0.89, "grad_norm": 1.2416380643844604, "learning_rate": 3.172219861897202e-07, "loss": 0.4899, "step": 13779 }, { "epoch": 0.89, "grad_norm": 1.136863350868225, "learning_rate": 3.1685568536555577e-07, "loss": 0.4889, "step": 13780 }, { "epoch": 0.89, "grad_norm": 1.2579340934753418, "learning_rate": 3.1648958922954555e-07, "loss": 0.518, "step": 13781 }, { "epoch": 0.89, "grad_norm": 1.2156424522399902, "learning_rate": 3.1612369779769224e-07, "loss": 0.5024, "step": 13782 }, { "epoch": 0.89, "grad_norm": 1.177467942237854, "learning_rate": 3.1575801108598703e-07, "loss": 0.5011, "step": 13783 }, { "epoch": 0.89, "grad_norm": 1.1552895307540894, "learning_rate": 3.1539252911041486e-07, "loss": 0.4957, "step": 13784 }, { "epoch": 0.89, "grad_norm": 1.2622429132461548, "learning_rate": 3.1502725188694796e-07, "loss": 0.5025, "step": 13785 }, { "epoch": 0.89, "grad_norm": 1.1669667959213257, "learning_rate": 3.1466217943155244e-07, "loss": 0.4719, "step": 13786 }, { "epoch": 0.89, "grad_norm": 1.238321304321289, "learning_rate": 3.142973117601844e-07, "loss": 0.549, "step": 13787 }, { "epoch": 0.89, "grad_norm": 1.2293730974197388, "learning_rate": 3.139326488887917e-07, "loss": 0.5189, "step": 13788 }, { "epoch": 0.89, "grad_norm": 1.2373685836791992, "learning_rate": 3.135681908333138e-07, "loss": 0.5216, "step": 13789 }, { "epoch": 0.89, "grad_norm": 1.1613796949386597, "learning_rate": 3.132039376096785e-07, "loss": 0.5178, "step": 13790 }, { "epoch": 0.89, "grad_norm": 1.158150315284729, "learning_rate": 3.1283988923380635e-07, "loss": 0.4846, "step": 13791 }, { "epoch": 0.89, "grad_norm": 1.2116241455078125, "learning_rate": 3.124760457216103e-07, "loss": 0.4823, "step": 13792 }, { "epoch": 0.89, "grad_norm": 1.2448556423187256, "learning_rate": 3.1211240708899193e-07, "loss": 0.4864, "step": 13793 }, { "epoch": 0.89, "grad_norm": 1.271895170211792, "learning_rate": 3.1174897335184526e-07, "loss": 0.5401, "step": 13794 }, { "epoch": 0.89, "grad_norm": 1.3571624755859375, "learning_rate": 3.1138574452605596e-07, "loss": 0.4985, "step": 13795 }, { "epoch": 0.89, "grad_norm": 1.2377599477767944, "learning_rate": 3.110227206274985e-07, "loss": 0.4594, "step": 13796 }, { "epoch": 0.89, "grad_norm": 1.2246060371398926, "learning_rate": 3.106599016720396e-07, "loss": 0.4616, "step": 13797 }, { "epoch": 0.89, "grad_norm": 1.3551466464996338, "learning_rate": 3.1029728767553834e-07, "loss": 0.4956, "step": 13798 }, { "epoch": 0.89, "grad_norm": 1.2460260391235352, "learning_rate": 3.0993487865384256e-07, "loss": 0.5578, "step": 13799 }, { "epoch": 0.89, "grad_norm": 1.2481002807617188, "learning_rate": 3.0957267462279283e-07, "loss": 0.5378, "step": 13800 }, { "epoch": 0.89, "grad_norm": 1.2772297859191895, "learning_rate": 3.0921067559822106e-07, "loss": 0.54, "step": 13801 }, { "epoch": 0.89, "grad_norm": 1.2662038803100586, "learning_rate": 3.0884888159594727e-07, "loss": 0.5251, "step": 13802 }, { "epoch": 0.89, "grad_norm": 1.2403737306594849, "learning_rate": 3.0848729263178557e-07, "loss": 0.5135, "step": 13803 }, { "epoch": 0.89, "grad_norm": 1.2174408435821533, "learning_rate": 3.081259087215399e-07, "loss": 0.4872, "step": 13804 }, { "epoch": 0.89, "grad_norm": 1.3831697702407837, "learning_rate": 3.07764729881006e-07, "loss": 0.4993, "step": 13805 }, { "epoch": 0.89, "grad_norm": 1.3119908571243286, "learning_rate": 3.074037561259691e-07, "loss": 0.4946, "step": 13806 }, { "epoch": 0.89, "grad_norm": 1.1868579387664795, "learning_rate": 3.0704298747220807e-07, "loss": 0.5136, "step": 13807 }, { "epoch": 0.89, "grad_norm": 1.295927882194519, "learning_rate": 3.066824239354893e-07, "loss": 0.5331, "step": 13808 }, { "epoch": 0.89, "grad_norm": 1.2529710531234741, "learning_rate": 3.06322065531573e-07, "loss": 0.5259, "step": 13809 }, { "epoch": 0.89, "grad_norm": 1.3573492765426636, "learning_rate": 3.059619122762092e-07, "loss": 0.5297, "step": 13810 }, { "epoch": 0.89, "grad_norm": 1.1839330196380615, "learning_rate": 3.0560196418513934e-07, "loss": 0.5021, "step": 13811 }, { "epoch": 0.89, "grad_norm": 1.105250597000122, "learning_rate": 3.0524222127409574e-07, "loss": 0.4776, "step": 13812 }, { "epoch": 0.89, "grad_norm": 1.1273843050003052, "learning_rate": 3.0488268355880315e-07, "loss": 0.4616, "step": 13813 }, { "epoch": 0.89, "grad_norm": 1.2438139915466309, "learning_rate": 3.045233510549739e-07, "loss": 0.5008, "step": 13814 }, { "epoch": 0.89, "grad_norm": 1.6725555658340454, "learning_rate": 3.0416422377831434e-07, "loss": 0.472, "step": 13815 }, { "epoch": 0.89, "grad_norm": 1.2082873582839966, "learning_rate": 3.038053017445214e-07, "loss": 0.519, "step": 13816 }, { "epoch": 0.89, "grad_norm": 1.218530297279358, "learning_rate": 3.034465849692825e-07, "loss": 0.5112, "step": 13817 }, { "epoch": 0.89, "grad_norm": 1.164695143699646, "learning_rate": 3.030880734682762e-07, "loss": 0.5197, "step": 13818 }, { "epoch": 0.89, "grad_norm": 1.282126545906067, "learning_rate": 3.027297672571722e-07, "loss": 0.5117, "step": 13819 }, { "epoch": 0.89, "grad_norm": 1.1850730180740356, "learning_rate": 3.0237166635163073e-07, "loss": 0.5249, "step": 13820 }, { "epoch": 0.89, "grad_norm": 1.2218186855316162, "learning_rate": 3.020137707673032e-07, "loss": 0.5302, "step": 13821 }, { "epoch": 0.89, "grad_norm": 1.2098066806793213, "learning_rate": 3.016560805198332e-07, "loss": 0.4802, "step": 13822 }, { "epoch": 0.89, "grad_norm": 1.1967594623565674, "learning_rate": 3.0129859562485373e-07, "loss": 0.4906, "step": 13823 }, { "epoch": 0.89, "grad_norm": 1.158373475074768, "learning_rate": 3.009413160979907e-07, "loss": 0.5264, "step": 13824 }, { "epoch": 0.89, "grad_norm": 1.38820481300354, "learning_rate": 3.0058424195485827e-07, "loss": 0.5585, "step": 13825 }, { "epoch": 0.89, "grad_norm": 1.2102758884429932, "learning_rate": 3.0022737321106386e-07, "loss": 0.5099, "step": 13826 }, { "epoch": 0.89, "grad_norm": 1.1252095699310303, "learning_rate": 2.998707098822057e-07, "loss": 0.4783, "step": 13827 }, { "epoch": 0.89, "grad_norm": 1.2339415550231934, "learning_rate": 2.995142519838723e-07, "loss": 0.5179, "step": 13828 }, { "epoch": 0.89, "grad_norm": 1.2199746370315552, "learning_rate": 2.991579995316435e-07, "loss": 0.5037, "step": 13829 }, { "epoch": 0.89, "grad_norm": 1.0791304111480713, "learning_rate": 2.9880195254109067e-07, "loss": 0.4696, "step": 13830 }, { "epoch": 0.89, "grad_norm": 1.2671650648117065, "learning_rate": 2.9844611102777474e-07, "loss": 0.509, "step": 13831 }, { "epoch": 0.89, "grad_norm": 1.264466643333435, "learning_rate": 2.980904750072489e-07, "loss": 0.5068, "step": 13832 }, { "epoch": 0.89, "grad_norm": 1.2213536500930786, "learning_rate": 2.977350444950572e-07, "loss": 0.5261, "step": 13833 }, { "epoch": 0.89, "grad_norm": 1.2960669994354248, "learning_rate": 2.973798195067351e-07, "loss": 0.5399, "step": 13834 }, { "epoch": 0.89, "grad_norm": 1.3877235651016235, "learning_rate": 2.97024800057808e-07, "loss": 0.4675, "step": 13835 }, { "epoch": 0.89, "grad_norm": 1.2467689514160156, "learning_rate": 2.9666998616379396e-07, "loss": 0.5062, "step": 13836 }, { "epoch": 0.89, "grad_norm": 1.230226755142212, "learning_rate": 2.9631537784019893e-07, "loss": 0.4713, "step": 13837 }, { "epoch": 0.89, "grad_norm": 1.3181464672088623, "learning_rate": 2.959609751025233e-07, "loss": 0.5667, "step": 13838 }, { "epoch": 0.89, "grad_norm": 1.187592625617981, "learning_rate": 2.956067779662569e-07, "loss": 0.5023, "step": 13839 }, { "epoch": 0.89, "grad_norm": 1.1474088430404663, "learning_rate": 2.952527864468807e-07, "loss": 0.4691, "step": 13840 }, { "epoch": 0.89, "grad_norm": 1.1788514852523804, "learning_rate": 2.9489900055986675e-07, "loss": 0.5029, "step": 13841 }, { "epoch": 0.89, "grad_norm": 1.2102339267730713, "learning_rate": 2.945454203206793e-07, "loss": 0.4739, "step": 13842 }, { "epoch": 0.89, "grad_norm": 1.1884653568267822, "learning_rate": 2.9419204574477e-07, "loss": 0.5155, "step": 13843 }, { "epoch": 0.89, "grad_norm": 1.2378381490707397, "learning_rate": 2.9383887684758525e-07, "loss": 0.506, "step": 13844 }, { "epoch": 0.89, "grad_norm": 1.2847247123718262, "learning_rate": 2.934859136445617e-07, "loss": 0.5517, "step": 13845 }, { "epoch": 0.89, "grad_norm": 1.3423298597335815, "learning_rate": 2.9313315615112523e-07, "loss": 0.5346, "step": 13846 }, { "epoch": 0.89, "grad_norm": 1.2173869609832764, "learning_rate": 2.927806043826953e-07, "loss": 0.5069, "step": 13847 }, { "epoch": 0.89, "grad_norm": 1.1773173809051514, "learning_rate": 2.924282583546806e-07, "loss": 0.5139, "step": 13848 }, { "epoch": 0.89, "grad_norm": 1.124083399772644, "learning_rate": 2.920761180824805e-07, "loss": 0.4832, "step": 13849 }, { "epoch": 0.89, "grad_norm": 1.1779227256774902, "learning_rate": 2.917241835814866e-07, "loss": 0.4784, "step": 13850 }, { "epoch": 0.89, "grad_norm": 1.2072184085845947, "learning_rate": 2.91372454867081e-07, "loss": 0.5418, "step": 13851 }, { "epoch": 0.89, "grad_norm": 1.13828706741333, "learning_rate": 2.91020931954637e-07, "loss": 0.5428, "step": 13852 }, { "epoch": 0.89, "grad_norm": 1.2055935859680176, "learning_rate": 2.906696148595189e-07, "loss": 0.4874, "step": 13853 }, { "epoch": 0.89, "grad_norm": 1.3123292922973633, "learning_rate": 2.9031850359708236e-07, "loss": 0.5492, "step": 13854 }, { "epoch": 0.89, "grad_norm": 1.2331148386001587, "learning_rate": 2.8996759818267215e-07, "loss": 0.4995, "step": 13855 }, { "epoch": 0.89, "grad_norm": 1.1227455139160156, "learning_rate": 2.896168986316261e-07, "loss": 0.4321, "step": 13856 }, { "epoch": 0.89, "grad_norm": 1.1959997415542603, "learning_rate": 2.892664049592725e-07, "loss": 0.5223, "step": 13857 }, { "epoch": 0.89, "grad_norm": 1.1204408407211304, "learning_rate": 2.889161171809307e-07, "loss": 0.5026, "step": 13858 }, { "epoch": 0.89, "grad_norm": 1.2027896642684937, "learning_rate": 2.8856603531191074e-07, "loss": 0.4775, "step": 13859 }, { "epoch": 0.89, "grad_norm": 1.1514317989349365, "learning_rate": 2.882161593675142e-07, "loss": 0.4986, "step": 13860 }, { "epoch": 0.89, "grad_norm": 1.1077147722244263, "learning_rate": 2.878664893630317e-07, "loss": 0.4458, "step": 13861 }, { "epoch": 0.89, "grad_norm": 1.1921530961990356, "learning_rate": 2.875170253137477e-07, "loss": 0.5041, "step": 13862 }, { "epoch": 0.89, "grad_norm": 1.2151503562927246, "learning_rate": 2.8716776723493655e-07, "loss": 0.4869, "step": 13863 }, { "epoch": 0.89, "grad_norm": 1.1895476579666138, "learning_rate": 2.868187151418633e-07, "loss": 0.4473, "step": 13864 }, { "epoch": 0.89, "grad_norm": 1.4186285734176636, "learning_rate": 2.8646986904978357e-07, "loss": 0.4407, "step": 13865 }, { "epoch": 0.9, "grad_norm": 1.1347935199737549, "learning_rate": 2.8612122897394456e-07, "loss": 0.489, "step": 13866 }, { "epoch": 0.9, "grad_norm": 1.1068073511123657, "learning_rate": 2.8577279492958464e-07, "loss": 0.4783, "step": 13867 }, { "epoch": 0.9, "grad_norm": 1.2323291301727295, "learning_rate": 2.8542456693193277e-07, "loss": 0.5486, "step": 13868 }, { "epoch": 0.9, "grad_norm": 1.1554385423660278, "learning_rate": 2.850765449962101e-07, "loss": 0.487, "step": 13869 }, { "epoch": 0.9, "grad_norm": 1.2012524604797363, "learning_rate": 2.8472872913762663e-07, "loss": 0.5009, "step": 13870 }, { "epoch": 0.9, "grad_norm": 1.1287219524383545, "learning_rate": 2.8438111937138466e-07, "loss": 0.5058, "step": 13871 }, { "epoch": 0.9, "grad_norm": 1.2273463010787964, "learning_rate": 2.840337157126771e-07, "loss": 0.4804, "step": 13872 }, { "epoch": 0.9, "grad_norm": 1.214854121208191, "learning_rate": 2.836865181766896e-07, "loss": 0.5096, "step": 13873 }, { "epoch": 0.9, "grad_norm": 1.2064570188522339, "learning_rate": 2.8333952677859546e-07, "loss": 0.5509, "step": 13874 }, { "epoch": 0.9, "grad_norm": 1.1453412771224976, "learning_rate": 2.8299274153356147e-07, "loss": 0.4677, "step": 13875 }, { "epoch": 0.9, "grad_norm": 1.1634511947631836, "learning_rate": 2.8264616245674447e-07, "loss": 0.4851, "step": 13876 }, { "epoch": 0.9, "grad_norm": 1.1602108478546143, "learning_rate": 2.8229978956329285e-07, "loss": 0.5193, "step": 13877 }, { "epoch": 0.9, "grad_norm": 1.256977915763855, "learning_rate": 2.819536228683467e-07, "loss": 0.5295, "step": 13878 }, { "epoch": 0.9, "grad_norm": 1.472745656967163, "learning_rate": 2.816076623870334e-07, "loss": 0.5419, "step": 13879 }, { "epoch": 0.9, "grad_norm": 1.2100154161453247, "learning_rate": 2.812619081344764e-07, "loss": 0.5193, "step": 13880 }, { "epoch": 0.9, "grad_norm": 1.2454732656478882, "learning_rate": 2.809163601257864e-07, "loss": 0.5269, "step": 13881 }, { "epoch": 0.9, "grad_norm": 1.2468653917312622, "learning_rate": 2.8057101837606735e-07, "loss": 0.5384, "step": 13882 }, { "epoch": 0.9, "grad_norm": 1.231548547744751, "learning_rate": 2.8022588290041286e-07, "loss": 0.4732, "step": 13883 }, { "epoch": 0.9, "grad_norm": 1.1860264539718628, "learning_rate": 2.79880953713908e-07, "loss": 0.5049, "step": 13884 }, { "epoch": 0.9, "grad_norm": 1.1876747608184814, "learning_rate": 2.7953623083162797e-07, "loss": 0.5068, "step": 13885 }, { "epoch": 0.9, "grad_norm": 1.1811383962631226, "learning_rate": 2.791917142686401e-07, "loss": 0.4599, "step": 13886 }, { "epoch": 0.9, "grad_norm": 1.0988391637802124, "learning_rate": 2.78847404040003e-07, "loss": 0.496, "step": 13887 }, { "epoch": 0.9, "grad_norm": 1.210861086845398, "learning_rate": 2.785033001607651e-07, "loss": 0.4741, "step": 13888 }, { "epoch": 0.9, "grad_norm": 1.175624966621399, "learning_rate": 2.781594026459672e-07, "loss": 0.4795, "step": 13889 }, { "epoch": 0.9, "grad_norm": 1.2085832357406616, "learning_rate": 2.7781571151063835e-07, "loss": 0.5227, "step": 13890 }, { "epoch": 0.9, "grad_norm": 1.1386868953704834, "learning_rate": 2.77472226769801e-07, "loss": 0.4877, "step": 13891 }, { "epoch": 0.9, "grad_norm": 1.2921545505523682, "learning_rate": 2.771289484384687e-07, "loss": 0.4769, "step": 13892 }, { "epoch": 0.9, "grad_norm": 1.1955586671829224, "learning_rate": 2.76785876531645e-07, "loss": 0.5004, "step": 13893 }, { "epoch": 0.9, "grad_norm": 1.3258954286575317, "learning_rate": 2.764430110643246e-07, "loss": 0.5439, "step": 13894 }, { "epoch": 0.9, "grad_norm": 1.216126799583435, "learning_rate": 2.761003520514938e-07, "loss": 0.5652, "step": 13895 }, { "epoch": 0.9, "grad_norm": 1.2102328538894653, "learning_rate": 2.7575789950812837e-07, "loss": 0.497, "step": 13896 }, { "epoch": 0.9, "grad_norm": 1.3231854438781738, "learning_rate": 2.7541565344919583e-07, "loss": 0.5734, "step": 13897 }, { "epoch": 0.9, "grad_norm": 1.258685827255249, "learning_rate": 2.7507361388965593e-07, "loss": 0.4887, "step": 13898 }, { "epoch": 0.9, "grad_norm": 1.2267568111419678, "learning_rate": 2.747317808444577e-07, "loss": 0.5283, "step": 13899 }, { "epoch": 0.9, "grad_norm": 1.2053310871124268, "learning_rate": 2.743901543285421e-07, "loss": 0.5146, "step": 13900 }, { "epoch": 0.9, "grad_norm": 1.1780253648757935, "learning_rate": 2.7404873435684154e-07, "loss": 0.4059, "step": 13901 }, { "epoch": 0.9, "grad_norm": 1.409958839416504, "learning_rate": 2.7370752094427687e-07, "loss": 0.5416, "step": 13902 }, { "epoch": 0.9, "grad_norm": 1.225042700767517, "learning_rate": 2.733665141057623e-07, "loss": 0.5018, "step": 13903 }, { "epoch": 0.9, "grad_norm": 1.1990505456924438, "learning_rate": 2.7302571385620257e-07, "loss": 0.556, "step": 13904 }, { "epoch": 0.9, "grad_norm": 1.3353309631347656, "learning_rate": 2.72685120210493e-07, "loss": 0.5168, "step": 13905 }, { "epoch": 0.9, "grad_norm": 1.315780758857727, "learning_rate": 2.723447331835205e-07, "loss": 0.5136, "step": 13906 }, { "epoch": 0.9, "grad_norm": 1.1353319883346558, "learning_rate": 2.720045527901627e-07, "loss": 0.5197, "step": 13907 }, { "epoch": 0.9, "grad_norm": 1.1544283628463745, "learning_rate": 2.7166457904528654e-07, "loss": 0.5285, "step": 13908 }, { "epoch": 0.9, "grad_norm": 1.240104079246521, "learning_rate": 2.7132481196375293e-07, "loss": 0.5244, "step": 13909 }, { "epoch": 0.9, "grad_norm": 1.2898023128509521, "learning_rate": 2.7098525156041113e-07, "loss": 0.5539, "step": 13910 }, { "epoch": 0.9, "grad_norm": 1.3295884132385254, "learning_rate": 2.7064589785010307e-07, "loss": 0.5593, "step": 13911 }, { "epoch": 0.9, "grad_norm": 1.093477487564087, "learning_rate": 2.70306750847662e-07, "loss": 0.4803, "step": 13912 }, { "epoch": 0.9, "grad_norm": 1.1964690685272217, "learning_rate": 2.699678105679093e-07, "loss": 0.5103, "step": 13913 }, { "epoch": 0.9, "grad_norm": 1.0639314651489258, "learning_rate": 2.6962907702565933e-07, "loss": 0.4965, "step": 13914 }, { "epoch": 0.9, "grad_norm": 1.2303650379180908, "learning_rate": 2.692905502357185e-07, "loss": 0.4924, "step": 13915 }, { "epoch": 0.9, "grad_norm": 1.2472639083862305, "learning_rate": 2.6895223021288217e-07, "loss": 0.494, "step": 13916 }, { "epoch": 0.9, "grad_norm": 1.1904557943344116, "learning_rate": 2.686141169719375e-07, "loss": 0.4955, "step": 13917 }, { "epoch": 0.9, "grad_norm": 1.1956870555877686, "learning_rate": 2.682762105276637e-07, "loss": 0.5428, "step": 13918 }, { "epoch": 0.9, "grad_norm": 1.0805467367172241, "learning_rate": 2.6793851089482736e-07, "loss": 0.4651, "step": 13919 }, { "epoch": 0.9, "grad_norm": 1.1454739570617676, "learning_rate": 2.676010180881905e-07, "loss": 0.4951, "step": 13920 }, { "epoch": 0.9, "grad_norm": 1.249783992767334, "learning_rate": 2.67263732122503e-07, "loss": 0.521, "step": 13921 }, { "epoch": 0.9, "grad_norm": 1.2522330284118652, "learning_rate": 2.66926653012507e-07, "loss": 0.4903, "step": 13922 }, { "epoch": 0.9, "grad_norm": 1.0891460180282593, "learning_rate": 2.665897807729362e-07, "loss": 0.4709, "step": 13923 }, { "epoch": 0.9, "grad_norm": 1.2253683805465698, "learning_rate": 2.6625311541851386e-07, "loss": 0.5085, "step": 13924 }, { "epoch": 0.9, "grad_norm": 1.3384631872177124, "learning_rate": 2.6591665696395376e-07, "loss": 0.4841, "step": 13925 }, { "epoch": 0.9, "grad_norm": 1.22145414352417, "learning_rate": 2.6558040542396303e-07, "loss": 0.5407, "step": 13926 }, { "epoch": 0.9, "grad_norm": 1.185020089149475, "learning_rate": 2.6524436081323765e-07, "loss": 0.4732, "step": 13927 }, { "epoch": 0.9, "grad_norm": 1.2011654376983643, "learning_rate": 2.6490852314646486e-07, "loss": 0.5062, "step": 13928 }, { "epoch": 0.9, "grad_norm": 1.3015949726104736, "learning_rate": 2.645728924383245e-07, "loss": 0.4899, "step": 13929 }, { "epoch": 0.9, "grad_norm": 1.4706953763961792, "learning_rate": 2.6423746870348597e-07, "loss": 0.5383, "step": 13930 }, { "epoch": 0.9, "grad_norm": 1.2009633779525757, "learning_rate": 2.6390225195660866e-07, "loss": 0.5035, "step": 13931 }, { "epoch": 0.9, "grad_norm": 1.188626766204834, "learning_rate": 2.635672422123442e-07, "loss": 0.5265, "step": 13932 }, { "epoch": 0.9, "grad_norm": 1.1831939220428467, "learning_rate": 2.632324394853358e-07, "loss": 0.4614, "step": 13933 }, { "epoch": 0.9, "grad_norm": 1.2435375452041626, "learning_rate": 2.628978437902163e-07, "loss": 0.5325, "step": 13934 }, { "epoch": 0.9, "grad_norm": 1.2703619003295898, "learning_rate": 2.6256345514161007e-07, "loss": 0.522, "step": 13935 }, { "epoch": 0.9, "grad_norm": 1.2799005508422852, "learning_rate": 2.622292735541332e-07, "loss": 0.4698, "step": 13936 }, { "epoch": 0.9, "grad_norm": 1.2608319520950317, "learning_rate": 2.6189529904239064e-07, "loss": 0.5138, "step": 13937 }, { "epoch": 0.9, "grad_norm": 1.1918268203735352, "learning_rate": 2.615615316209802e-07, "loss": 0.5519, "step": 13938 }, { "epoch": 0.9, "grad_norm": 1.2985777854919434, "learning_rate": 2.612279713044896e-07, "loss": 0.5401, "step": 13939 }, { "epoch": 0.9, "grad_norm": 1.5863919258117676, "learning_rate": 2.608946181074984e-07, "loss": 0.5444, "step": 13940 }, { "epoch": 0.9, "grad_norm": 1.271252155303955, "learning_rate": 2.6056147204457595e-07, "loss": 0.4821, "step": 13941 }, { "epoch": 0.9, "grad_norm": 1.118165135383606, "learning_rate": 2.6022853313028507e-07, "loss": 0.4546, "step": 13942 }, { "epoch": 0.9, "grad_norm": 1.2167671918869019, "learning_rate": 2.598958013791747e-07, "loss": 0.5002, "step": 13943 }, { "epoch": 0.9, "grad_norm": 1.229725956916809, "learning_rate": 2.5956327680578986e-07, "loss": 0.5673, "step": 13944 }, { "epoch": 0.9, "grad_norm": 1.333533763885498, "learning_rate": 2.5923095942466337e-07, "loss": 0.4799, "step": 13945 }, { "epoch": 0.9, "grad_norm": 1.1722856760025024, "learning_rate": 2.588988492503203e-07, "loss": 0.5519, "step": 13946 }, { "epoch": 0.9, "grad_norm": 1.124638319015503, "learning_rate": 2.5856694629727675e-07, "loss": 0.513, "step": 13947 }, { "epoch": 0.9, "grad_norm": 1.912218689918518, "learning_rate": 2.5823525058003894e-07, "loss": 0.4786, "step": 13948 }, { "epoch": 0.9, "grad_norm": 1.6979597806930542, "learning_rate": 2.579037621131042e-07, "loss": 0.4816, "step": 13949 }, { "epoch": 0.9, "grad_norm": 1.1656559705734253, "learning_rate": 2.5757248091096143e-07, "loss": 0.4815, "step": 13950 }, { "epoch": 0.9, "grad_norm": 1.181917667388916, "learning_rate": 2.572414069880891e-07, "loss": 0.4622, "step": 13951 }, { "epoch": 0.9, "grad_norm": 1.162967562675476, "learning_rate": 2.569105403589589e-07, "loss": 0.4851, "step": 13952 }, { "epoch": 0.9, "grad_norm": 1.173996090888977, "learning_rate": 2.5657988103803266e-07, "loss": 0.4382, "step": 13953 }, { "epoch": 0.9, "grad_norm": 1.2647987604141235, "learning_rate": 2.5624942903976044e-07, "loss": 0.5185, "step": 13954 }, { "epoch": 0.9, "grad_norm": 1.2522722482681274, "learning_rate": 2.5591918437858686e-07, "loss": 0.4943, "step": 13955 }, { "epoch": 0.9, "grad_norm": 1.3157998323440552, "learning_rate": 2.555891470689459e-07, "loss": 0.5365, "step": 13956 }, { "epoch": 0.9, "grad_norm": 1.3032587766647339, "learning_rate": 2.5525931712526207e-07, "loss": 0.5242, "step": 13957 }, { "epoch": 0.9, "grad_norm": 1.2686787843704224, "learning_rate": 2.549296945619534e-07, "loss": 0.5276, "step": 13958 }, { "epoch": 0.9, "grad_norm": 1.1912498474121094, "learning_rate": 2.546002793934238e-07, "loss": 0.498, "step": 13959 }, { "epoch": 0.9, "grad_norm": 1.1177537441253662, "learning_rate": 2.5427107163407296e-07, "loss": 0.5211, "step": 13960 }, { "epoch": 0.9, "grad_norm": 1.2761757373809814, "learning_rate": 2.5394207129828986e-07, "loss": 0.5269, "step": 13961 }, { "epoch": 0.9, "grad_norm": 1.1445558071136475, "learning_rate": 2.5361327840045413e-07, "loss": 0.4804, "step": 13962 }, { "epoch": 0.9, "grad_norm": 1.2355399131774902, "learning_rate": 2.532846929549354e-07, "loss": 0.5293, "step": 13963 }, { "epoch": 0.9, "grad_norm": 1.2359551191329956, "learning_rate": 2.529563149760966e-07, "loss": 0.5542, "step": 13964 }, { "epoch": 0.9, "grad_norm": 1.2787749767303467, "learning_rate": 2.526281444782891e-07, "loss": 0.4753, "step": 13965 }, { "epoch": 0.9, "grad_norm": 1.2369095087051392, "learning_rate": 2.523001814758574e-07, "loss": 0.4789, "step": 13966 }, { "epoch": 0.9, "grad_norm": 1.225708246231079, "learning_rate": 2.5197242598313633e-07, "loss": 0.448, "step": 13967 }, { "epoch": 0.9, "grad_norm": 1.7490153312683105, "learning_rate": 2.516448780144493e-07, "loss": 0.4835, "step": 13968 }, { "epoch": 0.9, "grad_norm": 1.2252358198165894, "learning_rate": 2.5131753758411425e-07, "loss": 0.4903, "step": 13969 }, { "epoch": 0.9, "grad_norm": 1.3005917072296143, "learning_rate": 2.509904047064371e-07, "loss": 0.5046, "step": 13970 }, { "epoch": 0.9, "grad_norm": 1.1727690696716309, "learning_rate": 2.50663479395718e-07, "loss": 0.5244, "step": 13971 }, { "epoch": 0.9, "grad_norm": 1.2022802829742432, "learning_rate": 2.503367616662439e-07, "loss": 0.5035, "step": 13972 }, { "epoch": 0.9, "grad_norm": 1.1608893871307373, "learning_rate": 2.5001025153229606e-07, "loss": 0.4632, "step": 13973 }, { "epoch": 0.9, "grad_norm": 1.1373801231384277, "learning_rate": 2.4968394900814483e-07, "loss": 0.4967, "step": 13974 }, { "epoch": 0.9, "grad_norm": 1.1749387979507446, "learning_rate": 2.4935785410805213e-07, "loss": 0.5241, "step": 13975 }, { "epoch": 0.9, "grad_norm": 1.2313892841339111, "learning_rate": 2.490319668462704e-07, "loss": 0.5335, "step": 13976 }, { "epoch": 0.9, "grad_norm": 1.2264206409454346, "learning_rate": 2.487062872370449e-07, "loss": 0.5205, "step": 13977 }, { "epoch": 0.9, "grad_norm": 1.1478267908096313, "learning_rate": 2.483808152946082e-07, "loss": 0.4856, "step": 13978 }, { "epoch": 0.9, "grad_norm": 1.0874452590942383, "learning_rate": 2.4805555103318667e-07, "loss": 0.4891, "step": 13979 }, { "epoch": 0.9, "grad_norm": 1.2693344354629517, "learning_rate": 2.4773049446699727e-07, "loss": 0.5132, "step": 13980 }, { "epoch": 0.9, "grad_norm": 1.1973989009857178, "learning_rate": 2.4740564561024636e-07, "loss": 0.4842, "step": 13981 }, { "epoch": 0.9, "grad_norm": 1.2979779243469238, "learning_rate": 2.4708100447713324e-07, "loss": 0.5252, "step": 13982 }, { "epoch": 0.9, "grad_norm": 1.2560189962387085, "learning_rate": 2.46756571081847e-07, "loss": 0.5185, "step": 13983 }, { "epoch": 0.9, "grad_norm": 1.316024899482727, "learning_rate": 2.46432345438567e-07, "loss": 0.5092, "step": 13984 }, { "epoch": 0.9, "grad_norm": 1.2755961418151855, "learning_rate": 2.461083275614651e-07, "loss": 0.4828, "step": 13985 }, { "epoch": 0.9, "grad_norm": 1.203190565109253, "learning_rate": 2.4578451746470276e-07, "loss": 0.563, "step": 13986 }, { "epoch": 0.9, "grad_norm": 1.2714056968688965, "learning_rate": 2.454609151624332e-07, "loss": 0.4927, "step": 13987 }, { "epoch": 0.9, "grad_norm": 1.1742159128189087, "learning_rate": 2.4513752066880005e-07, "loss": 0.4902, "step": 13988 }, { "epoch": 0.9, "grad_norm": 1.2415683269500732, "learning_rate": 2.448143339979386e-07, "loss": 0.5317, "step": 13989 }, { "epoch": 0.9, "grad_norm": 1.2037866115570068, "learning_rate": 2.444913551639738e-07, "loss": 0.5394, "step": 13990 }, { "epoch": 0.9, "grad_norm": 1.2574177980422974, "learning_rate": 2.4416858418102263e-07, "loss": 0.5124, "step": 13991 }, { "epoch": 0.9, "grad_norm": 1.3323841094970703, "learning_rate": 2.438460210631921e-07, "loss": 0.4635, "step": 13992 }, { "epoch": 0.9, "grad_norm": 1.1728202104568481, "learning_rate": 2.4352366582458097e-07, "loss": 0.5103, "step": 13993 }, { "epoch": 0.9, "grad_norm": 1.1848843097686768, "learning_rate": 2.432015184792791e-07, "loss": 0.5223, "step": 13994 }, { "epoch": 0.9, "grad_norm": 1.3547029495239258, "learning_rate": 2.428795790413657e-07, "loss": 0.5279, "step": 13995 }, { "epoch": 0.9, "grad_norm": 1.156394124031067, "learning_rate": 2.425578475249124e-07, "loss": 0.4778, "step": 13996 }, { "epoch": 0.9, "grad_norm": 1.196358561515808, "learning_rate": 2.4223632394398125e-07, "loss": 0.4974, "step": 13997 }, { "epoch": 0.9, "grad_norm": 1.178411841392517, "learning_rate": 2.419150083126248e-07, "loss": 0.5422, "step": 13998 }, { "epoch": 0.9, "grad_norm": 1.1434458494186401, "learning_rate": 2.415939006448875e-07, "loss": 0.5256, "step": 13999 }, { "epoch": 0.9, "grad_norm": 1.226493000984192, "learning_rate": 2.412730009548048e-07, "loss": 0.4956, "step": 14000 }, { "epoch": 0.9, "grad_norm": 1.1507667303085327, "learning_rate": 2.409523092564009e-07, "loss": 0.4815, "step": 14001 }, { "epoch": 0.9, "grad_norm": 1.1501411199569702, "learning_rate": 2.40631825563693e-07, "loss": 0.498, "step": 14002 }, { "epoch": 0.9, "grad_norm": 1.1925370693206787, "learning_rate": 2.403115498906883e-07, "loss": 0.5316, "step": 14003 }, { "epoch": 0.9, "grad_norm": 1.1551659107208252, "learning_rate": 2.3999148225138603e-07, "loss": 0.4975, "step": 14004 }, { "epoch": 0.9, "grad_norm": 1.2237930297851562, "learning_rate": 2.3967162265977507e-07, "loss": 0.4927, "step": 14005 }, { "epoch": 0.9, "grad_norm": 1.2505131959915161, "learning_rate": 2.393519711298364e-07, "loss": 0.5526, "step": 14006 }, { "epoch": 0.9, "grad_norm": 1.138465404510498, "learning_rate": 2.3903252767553953e-07, "loss": 0.518, "step": 14007 }, { "epoch": 0.9, "grad_norm": 1.1927775144577026, "learning_rate": 2.3871329231084704e-07, "loss": 0.5189, "step": 14008 }, { "epoch": 0.9, "grad_norm": 1.2260578870773315, "learning_rate": 2.3839426504971286e-07, "loss": 0.5075, "step": 14009 }, { "epoch": 0.9, "grad_norm": 1.1789252758026123, "learning_rate": 2.3807544590608022e-07, "loss": 0.5054, "step": 14010 }, { "epoch": 0.9, "grad_norm": 1.2696360349655151, "learning_rate": 2.3775683489388357e-07, "loss": 0.5227, "step": 14011 }, { "epoch": 0.9, "grad_norm": 1.109462857246399, "learning_rate": 2.374384320270501e-07, "loss": 0.4923, "step": 14012 }, { "epoch": 0.9, "grad_norm": 1.2637419700622559, "learning_rate": 2.371202373194942e-07, "loss": 0.4908, "step": 14013 }, { "epoch": 0.9, "grad_norm": 1.2050756216049194, "learning_rate": 2.368022507851242e-07, "loss": 0.4789, "step": 14014 }, { "epoch": 0.9, "grad_norm": 1.2846176624298096, "learning_rate": 2.3648447243783901e-07, "loss": 0.5261, "step": 14015 }, { "epoch": 0.9, "grad_norm": 1.2715462446212769, "learning_rate": 2.3616690229152694e-07, "loss": 0.5667, "step": 14016 }, { "epoch": 0.9, "grad_norm": 1.1646794080734253, "learning_rate": 2.3584954036006914e-07, "loss": 0.495, "step": 14017 }, { "epoch": 0.9, "grad_norm": 1.1316674947738647, "learning_rate": 2.3553238665733668e-07, "loss": 0.514, "step": 14018 }, { "epoch": 0.9, "grad_norm": 1.1272649765014648, "learning_rate": 2.3521544119719074e-07, "loss": 0.5163, "step": 14019 }, { "epoch": 0.9, "grad_norm": 1.2560062408447266, "learning_rate": 2.3489870399348412e-07, "loss": 0.5397, "step": 14020 }, { "epoch": 0.91, "grad_norm": 1.3079118728637695, "learning_rate": 2.345821750600613e-07, "loss": 0.5262, "step": 14021 }, { "epoch": 0.91, "grad_norm": 1.1652222871780396, "learning_rate": 2.3426585441075678e-07, "loss": 0.4527, "step": 14022 }, { "epoch": 0.91, "grad_norm": 1.372022032737732, "learning_rate": 2.3394974205939614e-07, "loss": 0.5086, "step": 14023 }, { "epoch": 0.91, "grad_norm": 1.1379939317703247, "learning_rate": 2.3363383801979612e-07, "loss": 0.4556, "step": 14024 }, { "epoch": 0.91, "grad_norm": 1.3794859647750854, "learning_rate": 2.333181423057629e-07, "loss": 0.4473, "step": 14025 }, { "epoch": 0.91, "grad_norm": 1.2331002950668335, "learning_rate": 2.330026549310954e-07, "loss": 0.5127, "step": 14026 }, { "epoch": 0.91, "grad_norm": 1.1517242193222046, "learning_rate": 2.3268737590958269e-07, "loss": 0.4865, "step": 14027 }, { "epoch": 0.91, "grad_norm": 1.1922165155410767, "learning_rate": 2.323723052550053e-07, "loss": 0.5002, "step": 14028 }, { "epoch": 0.91, "grad_norm": 1.1622178554534912, "learning_rate": 2.3205744298113397e-07, "loss": 0.5083, "step": 14029 }, { "epoch": 0.91, "grad_norm": 1.152868390083313, "learning_rate": 2.3174278910173097e-07, "loss": 0.512, "step": 14030 }, { "epoch": 0.91, "grad_norm": 1.258574366569519, "learning_rate": 2.3142834363054755e-07, "loss": 0.4359, "step": 14031 }, { "epoch": 0.91, "grad_norm": 1.11565363407135, "learning_rate": 2.311141065813277e-07, "loss": 0.5316, "step": 14032 }, { "epoch": 0.91, "grad_norm": 1.1574629545211792, "learning_rate": 2.3080007796780713e-07, "loss": 0.4973, "step": 14033 }, { "epoch": 0.91, "grad_norm": 1.1412453651428223, "learning_rate": 2.304862578037098e-07, "loss": 0.4952, "step": 14034 }, { "epoch": 0.91, "grad_norm": 1.207065463066101, "learning_rate": 2.3017264610275314e-07, "loss": 0.5176, "step": 14035 }, { "epoch": 0.91, "grad_norm": 1.2744613885879517, "learning_rate": 2.2985924287864448e-07, "loss": 0.5256, "step": 14036 }, { "epoch": 0.91, "grad_norm": 1.1709011793136597, "learning_rate": 2.295460481450801e-07, "loss": 0.4724, "step": 14037 }, { "epoch": 0.91, "grad_norm": 1.2020132541656494, "learning_rate": 2.2923306191575012e-07, "loss": 0.4808, "step": 14038 }, { "epoch": 0.91, "grad_norm": 1.117118239402771, "learning_rate": 2.2892028420433477e-07, "loss": 0.5253, "step": 14039 }, { "epoch": 0.91, "grad_norm": 1.255819320678711, "learning_rate": 2.2860771502450363e-07, "loss": 0.5379, "step": 14040 }, { "epoch": 0.91, "grad_norm": 1.20323646068573, "learning_rate": 2.282953543899197e-07, "loss": 0.5177, "step": 14041 }, { "epoch": 0.91, "grad_norm": 1.2228835821151733, "learning_rate": 2.2798320231423422e-07, "loss": 0.5604, "step": 14042 }, { "epoch": 0.91, "grad_norm": 1.1642497777938843, "learning_rate": 2.2767125881109076e-07, "loss": 0.479, "step": 14043 }, { "epoch": 0.91, "grad_norm": 1.1123353242874146, "learning_rate": 2.2735952389412396e-07, "loss": 0.5095, "step": 14044 }, { "epoch": 0.91, "grad_norm": 1.1986130475997925, "learning_rate": 2.2704799757695906e-07, "loss": 0.4779, "step": 14045 }, { "epoch": 0.91, "grad_norm": 2.1682240962982178, "learning_rate": 2.2673667987321124e-07, "loss": 0.4926, "step": 14046 }, { "epoch": 0.91, "grad_norm": 1.3057140111923218, "learning_rate": 2.2642557079648908e-07, "loss": 0.5659, "step": 14047 }, { "epoch": 0.91, "grad_norm": 1.3359798192977905, "learning_rate": 2.2611467036038836e-07, "loss": 0.5671, "step": 14048 }, { "epoch": 0.91, "grad_norm": 1.3047953844070435, "learning_rate": 2.2580397857849878e-07, "loss": 0.5272, "step": 14049 }, { "epoch": 0.91, "grad_norm": 1.1857582330703735, "learning_rate": 2.2549349546439946e-07, "loss": 0.544, "step": 14050 }, { "epoch": 0.91, "grad_norm": 1.277658462524414, "learning_rate": 2.251832210316618e-07, "loss": 0.5186, "step": 14051 }, { "epoch": 0.91, "grad_norm": 1.2600688934326172, "learning_rate": 2.24873155293846e-07, "loss": 0.493, "step": 14052 }, { "epoch": 0.91, "grad_norm": 1.2061749696731567, "learning_rate": 2.2456329826450408e-07, "loss": 0.499, "step": 14053 }, { "epoch": 0.91, "grad_norm": 1.2752312421798706, "learning_rate": 2.2425364995718012e-07, "loss": 0.5323, "step": 14054 }, { "epoch": 0.91, "grad_norm": 1.244465947151184, "learning_rate": 2.2394421038540725e-07, "loss": 0.5737, "step": 14055 }, { "epoch": 0.91, "grad_norm": 1.3109773397445679, "learning_rate": 2.2363497956271073e-07, "loss": 0.576, "step": 14056 }, { "epoch": 0.91, "grad_norm": 1.2511324882507324, "learning_rate": 2.2332595750260587e-07, "loss": 0.5289, "step": 14057 }, { "epoch": 0.91, "grad_norm": 1.1391409635543823, "learning_rate": 2.2301714421859965e-07, "loss": 0.4802, "step": 14058 }, { "epoch": 0.91, "grad_norm": 1.321776270866394, "learning_rate": 2.2270853972418904e-07, "loss": 0.4925, "step": 14059 }, { "epoch": 0.91, "grad_norm": 1.1952730417251587, "learning_rate": 2.2240014403286326e-07, "loss": 0.4879, "step": 14060 }, { "epoch": 0.91, "grad_norm": 1.2286107540130615, "learning_rate": 2.220919571581004e-07, "loss": 0.5351, "step": 14061 }, { "epoch": 0.91, "grad_norm": 1.2556926012039185, "learning_rate": 2.217839791133708e-07, "loss": 0.5243, "step": 14062 }, { "epoch": 0.91, "grad_norm": 1.2115319967269897, "learning_rate": 2.2147620991213538e-07, "loss": 0.5222, "step": 14063 }, { "epoch": 0.91, "grad_norm": 1.2293082475662231, "learning_rate": 2.211686495678461e-07, "loss": 0.5117, "step": 14064 }, { "epoch": 0.91, "grad_norm": 1.1991385221481323, "learning_rate": 2.2086129809394618e-07, "loss": 0.5182, "step": 14065 }, { "epoch": 0.91, "grad_norm": 1.1848877668380737, "learning_rate": 2.2055415550386817e-07, "loss": 0.4859, "step": 14066 }, { "epoch": 0.91, "grad_norm": 1.3641928434371948, "learning_rate": 2.202472218110363e-07, "loss": 0.4589, "step": 14067 }, { "epoch": 0.91, "grad_norm": 1.0970999002456665, "learning_rate": 2.199404970288671e-07, "loss": 0.4954, "step": 14068 }, { "epoch": 0.91, "grad_norm": 1.2412543296813965, "learning_rate": 2.1963398117076595e-07, "loss": 0.5166, "step": 14069 }, { "epoch": 0.91, "grad_norm": 1.2456203699111938, "learning_rate": 2.1932767425012936e-07, "loss": 0.508, "step": 14070 }, { "epoch": 0.91, "grad_norm": 1.151660680770874, "learning_rate": 2.1902157628034714e-07, "loss": 0.4805, "step": 14071 }, { "epoch": 0.91, "grad_norm": 1.157992959022522, "learning_rate": 2.1871568727479587e-07, "loss": 0.4604, "step": 14072 }, { "epoch": 0.91, "grad_norm": 1.1702015399932861, "learning_rate": 2.1841000724684592e-07, "loss": 0.5302, "step": 14073 }, { "epoch": 0.91, "grad_norm": 1.236574649810791, "learning_rate": 2.1810453620985828e-07, "loss": 0.4972, "step": 14074 }, { "epoch": 0.91, "grad_norm": 1.1142809391021729, "learning_rate": 2.1779927417718338e-07, "loss": 0.4815, "step": 14075 }, { "epoch": 0.91, "grad_norm": 1.158593773841858, "learning_rate": 2.1749422116216444e-07, "loss": 0.5097, "step": 14076 }, { "epoch": 0.91, "grad_norm": 1.3378146886825562, "learning_rate": 2.171893771781347e-07, "loss": 0.5654, "step": 14077 }, { "epoch": 0.91, "grad_norm": 1.1528974771499634, "learning_rate": 2.1688474223841626e-07, "loss": 0.4622, "step": 14078 }, { "epoch": 0.91, "grad_norm": 1.128050446510315, "learning_rate": 2.165803163563257e-07, "loss": 0.4814, "step": 14079 }, { "epoch": 0.91, "grad_norm": 1.2131624221801758, "learning_rate": 2.1627609954516794e-07, "loss": 0.5266, "step": 14080 }, { "epoch": 0.91, "grad_norm": 1.1173394918441772, "learning_rate": 2.1597209181823953e-07, "loss": 0.5312, "step": 14081 }, { "epoch": 0.91, "grad_norm": 1.2122677564620972, "learning_rate": 2.1566829318882876e-07, "loss": 0.4521, "step": 14082 }, { "epoch": 0.91, "grad_norm": 1.3640178442001343, "learning_rate": 2.1536470367021279e-07, "loss": 0.5288, "step": 14083 }, { "epoch": 0.91, "grad_norm": 1.320780634880066, "learning_rate": 2.1506132327566097e-07, "loss": 0.5009, "step": 14084 }, { "epoch": 0.91, "grad_norm": 1.1724765300750732, "learning_rate": 2.147581520184333e-07, "loss": 0.5046, "step": 14085 }, { "epoch": 0.91, "grad_norm": 1.149777889251709, "learning_rate": 2.1445518991178083e-07, "loss": 0.5145, "step": 14086 }, { "epoch": 0.91, "grad_norm": 1.2897534370422363, "learning_rate": 2.1415243696894462e-07, "loss": 0.534, "step": 14087 }, { "epoch": 0.91, "grad_norm": 1.1782773733139038, "learning_rate": 2.138498932031591e-07, "loss": 0.498, "step": 14088 }, { "epoch": 0.91, "grad_norm": 1.2584590911865234, "learning_rate": 2.135475586276453e-07, "loss": 0.5537, "step": 14089 }, { "epoch": 0.91, "grad_norm": 1.2758045196533203, "learning_rate": 2.132454332556183e-07, "loss": 0.4999, "step": 14090 }, { "epoch": 0.91, "grad_norm": 1.2486693859100342, "learning_rate": 2.1294351710028415e-07, "loss": 0.5116, "step": 14091 }, { "epoch": 0.91, "grad_norm": 1.282927393913269, "learning_rate": 2.1264181017483732e-07, "loss": 0.5339, "step": 14092 }, { "epoch": 0.91, "grad_norm": 1.1436675786972046, "learning_rate": 2.1234031249246612e-07, "loss": 0.5045, "step": 14093 }, { "epoch": 0.91, "grad_norm": 1.2076717615127563, "learning_rate": 2.120390240663478e-07, "loss": 0.5101, "step": 14094 }, { "epoch": 0.91, "grad_norm": 1.0915759801864624, "learning_rate": 2.1173794490964961e-07, "loss": 0.4704, "step": 14095 }, { "epoch": 0.91, "grad_norm": 1.4407336711883545, "learning_rate": 2.1143707503553268e-07, "loss": 0.523, "step": 14096 }, { "epoch": 0.91, "grad_norm": 1.1561838388442993, "learning_rate": 2.1113641445714594e-07, "loss": 0.5179, "step": 14097 }, { "epoch": 0.91, "grad_norm": 1.2983969449996948, "learning_rate": 2.1083596318763166e-07, "loss": 0.5495, "step": 14098 }, { "epoch": 0.91, "grad_norm": 1.2209123373031616, "learning_rate": 2.10535721240121e-07, "loss": 0.4978, "step": 14099 }, { "epoch": 0.91, "grad_norm": 1.0923314094543457, "learning_rate": 2.1023568862773735e-07, "loss": 0.479, "step": 14100 }, { "epoch": 0.91, "grad_norm": 1.168715000152588, "learning_rate": 2.0993586536359357e-07, "loss": 0.5, "step": 14101 }, { "epoch": 0.91, "grad_norm": 1.2846355438232422, "learning_rate": 2.0963625146079468e-07, "loss": 0.4618, "step": 14102 }, { "epoch": 0.91, "grad_norm": 1.2943905591964722, "learning_rate": 2.0933684693243583e-07, "loss": 0.4986, "step": 14103 }, { "epoch": 0.91, "grad_norm": 1.1999136209487915, "learning_rate": 2.090376517916032e-07, "loss": 0.4701, "step": 14104 }, { "epoch": 0.91, "grad_norm": 1.1947593688964844, "learning_rate": 2.0873866605137404e-07, "loss": 0.5071, "step": 14105 }, { "epoch": 0.91, "grad_norm": 1.2837092876434326, "learning_rate": 2.084398897248169e-07, "loss": 0.5278, "step": 14106 }, { "epoch": 0.91, "grad_norm": 1.2193660736083984, "learning_rate": 2.08141322824989e-07, "loss": 0.4865, "step": 14107 }, { "epoch": 0.91, "grad_norm": 1.2187905311584473, "learning_rate": 2.078429653649411e-07, "loss": 0.4829, "step": 14108 }, { "epoch": 0.91, "grad_norm": 1.2246495485305786, "learning_rate": 2.075448173577127e-07, "loss": 0.5136, "step": 14109 }, { "epoch": 0.91, "grad_norm": 1.2258145809173584, "learning_rate": 2.0724687881633566e-07, "loss": 0.5179, "step": 14110 }, { "epoch": 0.91, "grad_norm": 1.1248420476913452, "learning_rate": 2.069491497538323e-07, "loss": 0.5055, "step": 14111 }, { "epoch": 0.91, "grad_norm": 1.2493510246276855, "learning_rate": 2.066516301832161e-07, "loss": 0.5038, "step": 14112 }, { "epoch": 0.91, "grad_norm": 1.2010082006454468, "learning_rate": 2.0635432011748947e-07, "loss": 0.5003, "step": 14113 }, { "epoch": 0.91, "grad_norm": 1.4484220743179321, "learning_rate": 2.0605721956964808e-07, "loss": 0.4974, "step": 14114 }, { "epoch": 0.91, "grad_norm": 1.152525782585144, "learning_rate": 2.057603285526766e-07, "loss": 0.499, "step": 14115 }, { "epoch": 0.91, "grad_norm": 1.2156505584716797, "learning_rate": 2.054636470795518e-07, "loss": 0.482, "step": 14116 }, { "epoch": 0.91, "grad_norm": 1.249792218208313, "learning_rate": 2.0516717516324114e-07, "loss": 0.4877, "step": 14117 }, { "epoch": 0.91, "grad_norm": 1.245082139968872, "learning_rate": 2.0487091281670314e-07, "loss": 0.5081, "step": 14118 }, { "epoch": 0.91, "grad_norm": 1.4055473804473877, "learning_rate": 2.0457486005288518e-07, "loss": 0.522, "step": 14119 }, { "epoch": 0.91, "grad_norm": 1.203184723854065, "learning_rate": 2.042790168847275e-07, "loss": 0.4957, "step": 14120 }, { "epoch": 0.91, "grad_norm": 1.1147397756576538, "learning_rate": 2.0398338332516088e-07, "loss": 0.4673, "step": 14121 }, { "epoch": 0.91, "grad_norm": 1.226731538772583, "learning_rate": 2.0368795938710716e-07, "loss": 0.5089, "step": 14122 }, { "epoch": 0.91, "grad_norm": 1.1133254766464233, "learning_rate": 2.0339274508347827e-07, "loss": 0.5047, "step": 14123 }, { "epoch": 0.91, "grad_norm": 1.267196536064148, "learning_rate": 2.0309774042717668e-07, "loss": 0.501, "step": 14124 }, { "epoch": 0.91, "grad_norm": 1.3872524499893188, "learning_rate": 2.0280294543109647e-07, "loss": 0.4905, "step": 14125 }, { "epoch": 0.91, "grad_norm": 1.345834493637085, "learning_rate": 2.0250836010812292e-07, "loss": 0.4927, "step": 14126 }, { "epoch": 0.91, "grad_norm": 1.208847999572754, "learning_rate": 2.0221398447113127e-07, "loss": 0.5258, "step": 14127 }, { "epoch": 0.91, "grad_norm": 1.157729983329773, "learning_rate": 2.0191981853298735e-07, "loss": 0.4769, "step": 14128 }, { "epoch": 0.91, "grad_norm": 1.311092734336853, "learning_rate": 2.0162586230655034e-07, "loss": 0.4728, "step": 14129 }, { "epoch": 0.91, "grad_norm": 1.4571864604949951, "learning_rate": 2.0133211580466548e-07, "loss": 0.5537, "step": 14130 }, { "epoch": 0.91, "grad_norm": 1.1363961696624756, "learning_rate": 2.0103857904017365e-07, "loss": 0.4868, "step": 14131 }, { "epoch": 0.91, "grad_norm": 1.1840267181396484, "learning_rate": 2.00745252025904e-07, "loss": 0.4933, "step": 14132 }, { "epoch": 0.91, "grad_norm": 1.2516281604766846, "learning_rate": 2.0045213477467684e-07, "loss": 0.5362, "step": 14133 }, { "epoch": 0.91, "grad_norm": 1.1743720769882202, "learning_rate": 2.0015922729930414e-07, "loss": 0.5375, "step": 14134 }, { "epoch": 0.91, "grad_norm": 1.2285585403442383, "learning_rate": 1.9986652961258845e-07, "loss": 0.5366, "step": 14135 }, { "epoch": 0.91, "grad_norm": 1.3550108671188354, "learning_rate": 1.9957404172732176e-07, "loss": 0.5628, "step": 14136 }, { "epoch": 0.91, "grad_norm": 1.227819800376892, "learning_rate": 1.9928176365628826e-07, "loss": 0.5197, "step": 14137 }, { "epoch": 0.91, "grad_norm": 1.452844500541687, "learning_rate": 1.9898969541226276e-07, "loss": 0.5306, "step": 14138 }, { "epoch": 0.91, "grad_norm": 1.169498324394226, "learning_rate": 1.9869783700801115e-07, "loss": 0.5107, "step": 14139 }, { "epoch": 0.91, "grad_norm": 1.2351363897323608, "learning_rate": 1.984061884562899e-07, "loss": 0.5154, "step": 14140 }, { "epoch": 0.91, "grad_norm": 1.186769723892212, "learning_rate": 1.9811474976984547e-07, "loss": 0.5049, "step": 14141 }, { "epoch": 0.91, "grad_norm": 1.2726589441299438, "learning_rate": 1.97823520961416e-07, "loss": 0.4929, "step": 14142 }, { "epoch": 0.91, "grad_norm": 1.212510108947754, "learning_rate": 1.975325020437313e-07, "loss": 0.5087, "step": 14143 }, { "epoch": 0.91, "grad_norm": 1.1616663932800293, "learning_rate": 1.9724169302950958e-07, "loss": 0.4919, "step": 14144 }, { "epoch": 0.91, "grad_norm": 1.160124659538269, "learning_rate": 1.9695109393146339e-07, "loss": 0.498, "step": 14145 }, { "epoch": 0.91, "grad_norm": 1.2917308807373047, "learning_rate": 1.9666070476229204e-07, "loss": 0.5708, "step": 14146 }, { "epoch": 0.91, "grad_norm": 1.172204852104187, "learning_rate": 1.9637052553468817e-07, "loss": 0.4645, "step": 14147 }, { "epoch": 0.91, "grad_norm": 1.2393276691436768, "learning_rate": 1.960805562613355e-07, "loss": 0.4722, "step": 14148 }, { "epoch": 0.91, "grad_norm": 1.2955416440963745, "learning_rate": 1.9579079695490832e-07, "loss": 0.4732, "step": 14149 }, { "epoch": 0.91, "grad_norm": 1.2434977293014526, "learning_rate": 1.955012476280693e-07, "loss": 0.5681, "step": 14150 }, { "epoch": 0.91, "grad_norm": 1.5475099086761475, "learning_rate": 1.9521190829347502e-07, "loss": 0.553, "step": 14151 }, { "epoch": 0.91, "grad_norm": 1.2502063512802124, "learning_rate": 1.9492277896377197e-07, "loss": 0.5442, "step": 14152 }, { "epoch": 0.91, "grad_norm": 1.2187107801437378, "learning_rate": 1.9463385965159676e-07, "loss": 0.5219, "step": 14153 }, { "epoch": 0.91, "grad_norm": 1.2546675205230713, "learning_rate": 1.9434515036957813e-07, "loss": 0.5151, "step": 14154 }, { "epoch": 0.91, "grad_norm": 1.150847315788269, "learning_rate": 1.9405665113033379e-07, "loss": 0.4818, "step": 14155 }, { "epoch": 0.91, "grad_norm": 1.105293869972229, "learning_rate": 1.9376836194647364e-07, "loss": 0.4888, "step": 14156 }, { "epoch": 0.91, "grad_norm": 1.3331329822540283, "learning_rate": 1.9348028283059815e-07, "loss": 0.536, "step": 14157 }, { "epoch": 0.91, "grad_norm": 1.2056972980499268, "learning_rate": 1.9319241379529841e-07, "loss": 0.5231, "step": 14158 }, { "epoch": 0.91, "grad_norm": 1.2116615772247314, "learning_rate": 1.9290475485315708e-07, "loss": 0.529, "step": 14159 }, { "epoch": 0.91, "grad_norm": 1.113997220993042, "learning_rate": 1.9261730601674577e-07, "loss": 0.4756, "step": 14160 }, { "epoch": 0.91, "grad_norm": 1.0928014516830444, "learning_rate": 1.9233006729862947e-07, "loss": 0.5055, "step": 14161 }, { "epoch": 0.91, "grad_norm": 1.0581830739974976, "learning_rate": 1.9204303871136143e-07, "loss": 0.4935, "step": 14162 }, { "epoch": 0.91, "grad_norm": 1.2532424926757812, "learning_rate": 1.917562202674872e-07, "loss": 0.5399, "step": 14163 }, { "epoch": 0.91, "grad_norm": 1.3591172695159912, "learning_rate": 1.9146961197954394e-07, "loss": 0.5399, "step": 14164 }, { "epoch": 0.91, "grad_norm": 1.2249372005462646, "learning_rate": 1.9118321386005778e-07, "loss": 0.4744, "step": 14165 }, { "epoch": 0.91, "grad_norm": 1.3191330432891846, "learning_rate": 1.908970259215459e-07, "loss": 0.5105, "step": 14166 }, { "epoch": 0.91, "grad_norm": 1.2249641418457031, "learning_rate": 1.9061104817651722e-07, "loss": 0.5006, "step": 14167 }, { "epoch": 0.91, "grad_norm": 1.139650583267212, "learning_rate": 1.903252806374717e-07, "loss": 0.5222, "step": 14168 }, { "epoch": 0.91, "grad_norm": 1.0968230962753296, "learning_rate": 1.900397233168988e-07, "loss": 0.4903, "step": 14169 }, { "epoch": 0.91, "grad_norm": 1.1782848834991455, "learning_rate": 1.897543762272802e-07, "loss": 0.5123, "step": 14170 }, { "epoch": 0.91, "grad_norm": 1.1512420177459717, "learning_rate": 1.8946923938108707e-07, "loss": 0.4944, "step": 14171 }, { "epoch": 0.91, "grad_norm": 1.26972496509552, "learning_rate": 1.8918431279078165e-07, "loss": 0.5153, "step": 14172 }, { "epoch": 0.91, "grad_norm": 1.3453651666641235, "learning_rate": 1.8889959646881783e-07, "loss": 0.511, "step": 14173 }, { "epoch": 0.91, "grad_norm": 1.211989402770996, "learning_rate": 1.8861509042764016e-07, "loss": 0.506, "step": 14174 }, { "epoch": 0.91, "grad_norm": 1.1766389608383179, "learning_rate": 1.8833079467968307e-07, "loss": 0.5513, "step": 14175 }, { "epoch": 0.92, "grad_norm": 1.3996163606643677, "learning_rate": 1.8804670923737333e-07, "loss": 0.5583, "step": 14176 }, { "epoch": 0.92, "grad_norm": 1.2540515661239624, "learning_rate": 1.8776283411312656e-07, "loss": 0.5208, "step": 14177 }, { "epoch": 0.92, "grad_norm": 1.0851460695266724, "learning_rate": 1.8747916931935062e-07, "loss": 0.486, "step": 14178 }, { "epoch": 0.92, "grad_norm": 1.2689459323883057, "learning_rate": 1.8719571486844335e-07, "loss": 0.505, "step": 14179 }, { "epoch": 0.92, "grad_norm": 1.2018020153045654, "learning_rate": 1.8691247077279428e-07, "loss": 0.5085, "step": 14180 }, { "epoch": 0.92, "grad_norm": 1.3859593868255615, "learning_rate": 1.8662943704478353e-07, "loss": 0.498, "step": 14181 }, { "epoch": 0.92, "grad_norm": 1.1718268394470215, "learning_rate": 1.8634661369678176e-07, "loss": 0.517, "step": 14182 }, { "epoch": 0.92, "grad_norm": 1.1695191860198975, "learning_rate": 1.860640007411496e-07, "loss": 0.5391, "step": 14183 }, { "epoch": 0.92, "grad_norm": 1.343738079071045, "learning_rate": 1.8578159819023943e-07, "loss": 0.5348, "step": 14184 }, { "epoch": 0.92, "grad_norm": 1.1592974662780762, "learning_rate": 1.8549940605639527e-07, "loss": 0.4986, "step": 14185 }, { "epoch": 0.92, "grad_norm": 1.1780019998550415, "learning_rate": 1.8521742435195057e-07, "loss": 0.4814, "step": 14186 }, { "epoch": 0.92, "grad_norm": 1.1602896451950073, "learning_rate": 1.8493565308922933e-07, "loss": 0.4879, "step": 14187 }, { "epoch": 0.92, "grad_norm": 1.1839754581451416, "learning_rate": 1.8465409228054898e-07, "loss": 0.496, "step": 14188 }, { "epoch": 0.92, "grad_norm": 1.3446124792099, "learning_rate": 1.8437274193821352e-07, "loss": 0.5052, "step": 14189 }, { "epoch": 0.92, "grad_norm": 1.2341982126235962, "learning_rate": 1.8409160207452092e-07, "loss": 0.5169, "step": 14190 }, { "epoch": 0.92, "grad_norm": 1.184997797012329, "learning_rate": 1.8381067270175913e-07, "loss": 0.4683, "step": 14191 }, { "epoch": 0.92, "grad_norm": 1.1932276487350464, "learning_rate": 1.8352995383220716e-07, "loss": 0.4652, "step": 14192 }, { "epoch": 0.92, "grad_norm": 1.3709096908569336, "learning_rate": 1.8324944547813417e-07, "loss": 0.5534, "step": 14193 }, { "epoch": 0.92, "grad_norm": 1.317522406578064, "learning_rate": 1.8296914765180141e-07, "loss": 0.5001, "step": 14194 }, { "epoch": 0.92, "grad_norm": 1.1326472759246826, "learning_rate": 1.8268906036545796e-07, "loss": 0.4698, "step": 14195 }, { "epoch": 0.92, "grad_norm": 1.1971663236618042, "learning_rate": 1.8240918363134742e-07, "loss": 0.4891, "step": 14196 }, { "epoch": 0.92, "grad_norm": 1.2281297445297241, "learning_rate": 1.821295174617016e-07, "loss": 0.5419, "step": 14197 }, { "epoch": 0.92, "grad_norm": 1.2793326377868652, "learning_rate": 1.818500618687441e-07, "loss": 0.5012, "step": 14198 }, { "epoch": 0.92, "grad_norm": 1.2428512573242188, "learning_rate": 1.8157081686468959e-07, "loss": 0.52, "step": 14199 }, { "epoch": 0.92, "grad_norm": 1.1326252222061157, "learning_rate": 1.8129178246174385e-07, "loss": 0.4865, "step": 14200 }, { "epoch": 0.92, "grad_norm": 1.2084157466888428, "learning_rate": 1.81012958672101e-07, "loss": 0.4916, "step": 14201 }, { "epoch": 0.92, "grad_norm": 1.2897595167160034, "learning_rate": 1.8073434550794855e-07, "loss": 0.5031, "step": 14202 }, { "epoch": 0.92, "grad_norm": 1.2820613384246826, "learning_rate": 1.8045594298146397e-07, "loss": 0.4964, "step": 14203 }, { "epoch": 0.92, "grad_norm": 3.0608928203582764, "learning_rate": 1.8017775110481528e-07, "loss": 0.495, "step": 14204 }, { "epoch": 0.92, "grad_norm": 1.201222538948059, "learning_rate": 1.7989976989016222e-07, "loss": 0.5246, "step": 14205 }, { "epoch": 0.92, "grad_norm": 1.313037395477295, "learning_rate": 1.7962199934965453e-07, "loss": 0.5726, "step": 14206 }, { "epoch": 0.92, "grad_norm": 1.260953426361084, "learning_rate": 1.793444394954319e-07, "loss": 0.527, "step": 14207 }, { "epoch": 0.92, "grad_norm": 1.2966563701629639, "learning_rate": 1.790670903396269e-07, "loss": 0.5101, "step": 14208 }, { "epoch": 0.92, "grad_norm": 1.0497543811798096, "learning_rate": 1.7878995189436087e-07, "loss": 0.4968, "step": 14209 }, { "epoch": 0.92, "grad_norm": 1.1755882501602173, "learning_rate": 1.7851302417174698e-07, "loss": 0.5379, "step": 14210 }, { "epoch": 0.92, "grad_norm": 1.2343307733535767, "learning_rate": 1.7823630718388996e-07, "loss": 0.4831, "step": 14211 }, { "epoch": 0.92, "grad_norm": 1.0775303840637207, "learning_rate": 1.779598009428829e-07, "loss": 0.4807, "step": 14212 }, { "epoch": 0.92, "grad_norm": 1.190106749534607, "learning_rate": 1.776835054608117e-07, "loss": 0.4997, "step": 14213 }, { "epoch": 0.92, "grad_norm": 1.2546428442001343, "learning_rate": 1.7740742074975337e-07, "loss": 0.506, "step": 14214 }, { "epoch": 0.92, "grad_norm": 1.183526873588562, "learning_rate": 1.771315468217738e-07, "loss": 0.4578, "step": 14215 }, { "epoch": 0.92, "grad_norm": 1.2946687936782837, "learning_rate": 1.7685588368893113e-07, "loss": 0.5231, "step": 14216 }, { "epoch": 0.92, "grad_norm": 1.2603955268859863, "learning_rate": 1.7658043136327463e-07, "loss": 0.5308, "step": 14217 }, { "epoch": 0.92, "grad_norm": 1.3690190315246582, "learning_rate": 1.7630518985684242e-07, "loss": 0.5259, "step": 14218 }, { "epoch": 0.92, "grad_norm": 1.2345750331878662, "learning_rate": 1.760301591816649e-07, "loss": 0.4945, "step": 14219 }, { "epoch": 0.92, "grad_norm": 1.158706784248352, "learning_rate": 1.7575533934976297e-07, "loss": 0.5146, "step": 14220 }, { "epoch": 0.92, "grad_norm": 1.2859890460968018, "learning_rate": 1.7548073037314873e-07, "loss": 0.517, "step": 14221 }, { "epoch": 0.92, "grad_norm": 1.1597576141357422, "learning_rate": 1.7520633226382421e-07, "loss": 0.508, "step": 14222 }, { "epoch": 0.92, "grad_norm": 1.0956861972808838, "learning_rate": 1.7493214503378319e-07, "loss": 0.4383, "step": 14223 }, { "epoch": 0.92, "grad_norm": 1.3159949779510498, "learning_rate": 1.7465816869500885e-07, "loss": 0.5124, "step": 14224 }, { "epoch": 0.92, "grad_norm": 1.2340877056121826, "learning_rate": 1.743844032594766e-07, "loss": 0.4545, "step": 14225 }, { "epoch": 0.92, "grad_norm": 1.2797375917434692, "learning_rate": 1.741108487391513e-07, "loss": 0.5069, "step": 14226 }, { "epoch": 0.92, "grad_norm": 1.2401622533798218, "learning_rate": 1.7383750514599063e-07, "loss": 0.5242, "step": 14227 }, { "epoch": 0.92, "grad_norm": 1.186868667602539, "learning_rate": 1.735643724919406e-07, "loss": 0.5095, "step": 14228 }, { "epoch": 0.92, "grad_norm": 1.2980648279190063, "learning_rate": 1.7329145078893994e-07, "loss": 0.4964, "step": 14229 }, { "epoch": 0.92, "grad_norm": 1.182409405708313, "learning_rate": 1.7301874004891584e-07, "loss": 0.4722, "step": 14230 }, { "epoch": 0.92, "grad_norm": 1.3928158283233643, "learning_rate": 1.7274624028378928e-07, "loss": 0.5504, "step": 14231 }, { "epoch": 0.92, "grad_norm": 1.3428847789764404, "learning_rate": 1.724739515054702e-07, "loss": 0.5298, "step": 14232 }, { "epoch": 0.92, "grad_norm": 1.225989818572998, "learning_rate": 1.722018737258596e-07, "loss": 0.5214, "step": 14233 }, { "epoch": 0.92, "grad_norm": 1.1396112442016602, "learning_rate": 1.7193000695684968e-07, "loss": 0.482, "step": 14234 }, { "epoch": 0.92, "grad_norm": 1.287857174873352, "learning_rate": 1.7165835121032203e-07, "loss": 0.4982, "step": 14235 }, { "epoch": 0.92, "grad_norm": 1.2425792217254639, "learning_rate": 1.7138690649815048e-07, "loss": 0.5309, "step": 14236 }, { "epoch": 0.92, "grad_norm": 1.3993710279464722, "learning_rate": 1.7111567283219887e-07, "loss": 0.4742, "step": 14237 }, { "epoch": 0.92, "grad_norm": 1.2439650297164917, "learning_rate": 1.7084465022432383e-07, "loss": 0.5008, "step": 14238 }, { "epoch": 0.92, "grad_norm": 1.2595211267471313, "learning_rate": 1.705738386863687e-07, "loss": 0.4863, "step": 14239 }, { "epoch": 0.92, "grad_norm": 1.1799864768981934, "learning_rate": 1.7030323823017115e-07, "loss": 0.5094, "step": 14240 }, { "epoch": 0.92, "grad_norm": 1.2347934246063232, "learning_rate": 1.7003284886755844e-07, "loss": 0.4815, "step": 14241 }, { "epoch": 0.92, "grad_norm": 1.211828589439392, "learning_rate": 1.6976267061034778e-07, "loss": 0.5347, "step": 14242 }, { "epoch": 0.92, "grad_norm": 1.1672377586364746, "learning_rate": 1.6949270347034975e-07, "loss": 0.5258, "step": 14243 }, { "epoch": 0.92, "grad_norm": 1.2234139442443848, "learning_rate": 1.6922294745936209e-07, "loss": 0.4933, "step": 14244 }, { "epoch": 0.92, "grad_norm": 1.3175160884857178, "learning_rate": 1.6895340258917592e-07, "loss": 0.4805, "step": 14245 }, { "epoch": 0.92, "grad_norm": 1.3137528896331787, "learning_rate": 1.6868406887157185e-07, "loss": 0.496, "step": 14246 }, { "epoch": 0.92, "grad_norm": 1.1143410205841064, "learning_rate": 1.6841494631832322e-07, "loss": 0.4808, "step": 14247 }, { "epoch": 0.92, "grad_norm": 1.2617394924163818, "learning_rate": 1.6814603494119063e-07, "loss": 0.502, "step": 14248 }, { "epoch": 0.92, "grad_norm": 1.1934635639190674, "learning_rate": 1.6787733475192913e-07, "loss": 0.5332, "step": 14249 }, { "epoch": 0.92, "grad_norm": 1.2448742389678955, "learning_rate": 1.6760884576228153e-07, "loss": 0.5304, "step": 14250 }, { "epoch": 0.92, "grad_norm": 1.3280786275863647, "learning_rate": 1.6734056798398402e-07, "loss": 0.5345, "step": 14251 }, { "epoch": 0.92, "grad_norm": 1.2478704452514648, "learning_rate": 1.670725014287622e-07, "loss": 0.462, "step": 14252 }, { "epoch": 0.92, "grad_norm": 1.1276806592941284, "learning_rate": 1.6680464610833168e-07, "loss": 0.5694, "step": 14253 }, { "epoch": 0.92, "grad_norm": 1.1573052406311035, "learning_rate": 1.6653700203440038e-07, "loss": 0.4814, "step": 14254 }, { "epoch": 0.92, "grad_norm": 1.234739899635315, "learning_rate": 1.6626956921866665e-07, "loss": 0.4622, "step": 14255 }, { "epoch": 0.92, "grad_norm": 1.1989126205444336, "learning_rate": 1.660023476728184e-07, "loss": 0.5031, "step": 14256 }, { "epoch": 0.92, "grad_norm": 1.0853924751281738, "learning_rate": 1.6573533740853521e-07, "loss": 0.4946, "step": 14257 }, { "epoch": 0.92, "grad_norm": 1.1633594036102295, "learning_rate": 1.6546853843748934e-07, "loss": 0.5301, "step": 14258 }, { "epoch": 0.92, "grad_norm": 1.240571141242981, "learning_rate": 1.6520195077133928e-07, "loss": 0.5395, "step": 14259 }, { "epoch": 0.92, "grad_norm": 1.1811853647232056, "learning_rate": 1.6493557442173792e-07, "loss": 0.5211, "step": 14260 }, { "epoch": 0.92, "grad_norm": 1.2079240083694458, "learning_rate": 1.6466940940032816e-07, "loss": 0.4717, "step": 14261 }, { "epoch": 0.92, "grad_norm": 1.125805139541626, "learning_rate": 1.6440345571874295e-07, "loss": 0.524, "step": 14262 }, { "epoch": 0.92, "grad_norm": 1.1578150987625122, "learning_rate": 1.641377133886074e-07, "loss": 0.4756, "step": 14263 }, { "epoch": 0.92, "grad_norm": 1.182908535003662, "learning_rate": 1.6387218242153558e-07, "loss": 0.5166, "step": 14264 }, { "epoch": 0.92, "grad_norm": 1.2797541618347168, "learning_rate": 1.6360686282913262e-07, "loss": 0.551, "step": 14265 }, { "epoch": 0.92, "grad_norm": 1.2128973007202148, "learning_rate": 1.6334175462299595e-07, "loss": 0.5214, "step": 14266 }, { "epoch": 0.92, "grad_norm": 1.2322112321853638, "learning_rate": 1.6307685781471238e-07, "loss": 0.5481, "step": 14267 }, { "epoch": 0.92, "grad_norm": 1.2119437456130981, "learning_rate": 1.6281217241586044e-07, "loss": 0.4853, "step": 14268 }, { "epoch": 0.92, "grad_norm": 1.278343915939331, "learning_rate": 1.6254769843800756e-07, "loss": 0.5344, "step": 14269 }, { "epoch": 0.92, "grad_norm": 1.194452166557312, "learning_rate": 1.62283435892715e-07, "loss": 0.5618, "step": 14270 }, { "epoch": 0.92, "grad_norm": 1.2039660215377808, "learning_rate": 1.6201938479153133e-07, "loss": 0.5198, "step": 14271 }, { "epoch": 0.92, "grad_norm": 1.328014850616455, "learning_rate": 1.617555451459979e-07, "loss": 0.5324, "step": 14272 }, { "epoch": 0.92, "grad_norm": 1.2292943000793457, "learning_rate": 1.614919169676471e-07, "loss": 0.5252, "step": 14273 }, { "epoch": 0.92, "grad_norm": 1.2329920530319214, "learning_rate": 1.6122850026800086e-07, "loss": 0.5099, "step": 14274 }, { "epoch": 0.92, "grad_norm": 1.2096346616744995, "learning_rate": 1.6096529505857272e-07, "loss": 0.4747, "step": 14275 }, { "epoch": 0.92, "grad_norm": 1.2355666160583496, "learning_rate": 1.607023013508674e-07, "loss": 0.5164, "step": 14276 }, { "epoch": 0.92, "grad_norm": 1.253244400024414, "learning_rate": 1.6043951915637846e-07, "loss": 0.4847, "step": 14277 }, { "epoch": 0.92, "grad_norm": 1.266559362411499, "learning_rate": 1.601769484865917e-07, "loss": 0.5289, "step": 14278 }, { "epoch": 0.92, "grad_norm": 1.1701403856277466, "learning_rate": 1.5991458935298354e-07, "loss": 0.5383, "step": 14279 }, { "epoch": 0.92, "grad_norm": 1.2652918100357056, "learning_rate": 1.5965244176702144e-07, "loss": 0.4758, "step": 14280 }, { "epoch": 0.92, "grad_norm": 1.2614126205444336, "learning_rate": 1.5939050574016236e-07, "loss": 0.5099, "step": 14281 }, { "epoch": 0.92, "grad_norm": 1.2951312065124512, "learning_rate": 1.59128781283856e-07, "loss": 0.5371, "step": 14282 }, { "epoch": 0.92, "grad_norm": 1.1721450090408325, "learning_rate": 1.5886726840954046e-07, "loss": 0.4875, "step": 14283 }, { "epoch": 0.92, "grad_norm": 1.4060920476913452, "learning_rate": 1.5860596712864662e-07, "loss": 0.4594, "step": 14284 }, { "epoch": 0.92, "grad_norm": 1.1817039251327515, "learning_rate": 1.5834487745259475e-07, "loss": 0.5343, "step": 14285 }, { "epoch": 0.92, "grad_norm": 1.2836686372756958, "learning_rate": 1.5808399939279684e-07, "loss": 0.5442, "step": 14286 }, { "epoch": 0.92, "grad_norm": 1.254556655883789, "learning_rate": 1.5782333296065488e-07, "loss": 0.4944, "step": 14287 }, { "epoch": 0.92, "grad_norm": 1.2912670373916626, "learning_rate": 1.5756287816756255e-07, "loss": 0.4745, "step": 14288 }, { "epoch": 0.92, "grad_norm": 1.202474594116211, "learning_rate": 1.5730263502490239e-07, "loss": 0.5808, "step": 14289 }, { "epoch": 0.92, "grad_norm": 1.4289445877075195, "learning_rate": 1.5704260354405032e-07, "loss": 0.5807, "step": 14290 }, { "epoch": 0.92, "grad_norm": 1.2278698682785034, "learning_rate": 1.5678278373637057e-07, "loss": 0.5364, "step": 14291 }, { "epoch": 0.92, "grad_norm": 1.5768777132034302, "learning_rate": 1.5652317561321962e-07, "loss": 0.5098, "step": 14292 }, { "epoch": 0.92, "grad_norm": 1.2571673393249512, "learning_rate": 1.5626377918594448e-07, "loss": 0.5664, "step": 14293 }, { "epoch": 0.92, "grad_norm": 1.211941123008728, "learning_rate": 1.5600459446588335e-07, "loss": 0.5504, "step": 14294 }, { "epoch": 0.92, "grad_norm": 1.3203659057617188, "learning_rate": 1.5574562146436323e-07, "loss": 0.5122, "step": 14295 }, { "epoch": 0.92, "grad_norm": 1.0799022912979126, "learning_rate": 1.5548686019270343e-07, "loss": 0.5421, "step": 14296 }, { "epoch": 0.92, "grad_norm": 1.3400691747665405, "learning_rate": 1.5522831066221378e-07, "loss": 0.5066, "step": 14297 }, { "epoch": 0.92, "grad_norm": 1.2171380519866943, "learning_rate": 1.5496997288419524e-07, "loss": 0.4828, "step": 14298 }, { "epoch": 0.92, "grad_norm": 1.198333501815796, "learning_rate": 1.5471184686993935e-07, "loss": 0.5034, "step": 14299 }, { "epoch": 0.92, "grad_norm": 1.3479385375976562, "learning_rate": 1.544539326307265e-07, "loss": 0.5218, "step": 14300 }, { "epoch": 0.92, "grad_norm": 1.8581054210662842, "learning_rate": 1.5419623017783158e-07, "loss": 0.5212, "step": 14301 }, { "epoch": 0.92, "grad_norm": 1.2511049509048462, "learning_rate": 1.5393873952251614e-07, "loss": 0.5022, "step": 14302 }, { "epoch": 0.92, "grad_norm": 1.1792362928390503, "learning_rate": 1.5368146067603617e-07, "loss": 0.4782, "step": 14303 }, { "epoch": 0.92, "grad_norm": 1.1896544694900513, "learning_rate": 1.5342439364963547e-07, "loss": 0.4889, "step": 14304 }, { "epoch": 0.92, "grad_norm": 1.189982533454895, "learning_rate": 1.5316753845455113e-07, "loss": 0.5044, "step": 14305 }, { "epoch": 0.92, "grad_norm": 1.1595350503921509, "learning_rate": 1.5291089510200752e-07, "loss": 0.518, "step": 14306 }, { "epoch": 0.92, "grad_norm": 1.4172289371490479, "learning_rate": 1.5265446360322346e-07, "loss": 0.4908, "step": 14307 }, { "epoch": 0.92, "grad_norm": 1.2243175506591797, "learning_rate": 1.5239824396940662e-07, "loss": 0.4836, "step": 14308 }, { "epoch": 0.92, "grad_norm": 1.2655786275863647, "learning_rate": 1.5214223621175527e-07, "loss": 0.5763, "step": 14309 }, { "epoch": 0.92, "grad_norm": 1.101082682609558, "learning_rate": 1.5188644034145883e-07, "loss": 0.4884, "step": 14310 }, { "epoch": 0.92, "grad_norm": 1.2192102670669556, "learning_rate": 1.5163085636969888e-07, "loss": 0.51, "step": 14311 }, { "epoch": 0.92, "grad_norm": 1.156097412109375, "learning_rate": 1.5137548430764427e-07, "loss": 0.4697, "step": 14312 }, { "epoch": 0.92, "grad_norm": 1.1469969749450684, "learning_rate": 1.5112032416645828e-07, "loss": 0.5125, "step": 14313 }, { "epoch": 0.92, "grad_norm": 1.252614140510559, "learning_rate": 1.50865375957292e-07, "loss": 0.4819, "step": 14314 }, { "epoch": 0.92, "grad_norm": 1.1288585662841797, "learning_rate": 1.506106396912893e-07, "loss": 0.4536, "step": 14315 }, { "epoch": 0.92, "grad_norm": 1.332745909690857, "learning_rate": 1.5035611537958404e-07, "loss": 0.5331, "step": 14316 }, { "epoch": 0.92, "grad_norm": 1.296438217163086, "learning_rate": 1.5010180303330124e-07, "loss": 0.5327, "step": 14317 }, { "epoch": 0.92, "grad_norm": 1.2297000885009766, "learning_rate": 1.498477026635553e-07, "loss": 0.5309, "step": 14318 }, { "epoch": 0.92, "grad_norm": 1.4102458953857422, "learning_rate": 1.4959381428145292e-07, "loss": 0.568, "step": 14319 }, { "epoch": 0.92, "grad_norm": 1.317008137702942, "learning_rate": 1.4934013789809022e-07, "loss": 0.5279, "step": 14320 }, { "epoch": 0.92, "grad_norm": 1.1635485887527466, "learning_rate": 1.4908667352455554e-07, "loss": 0.4574, "step": 14321 }, { "epoch": 0.92, "grad_norm": 1.1844401359558105, "learning_rate": 1.4883342117192668e-07, "loss": 0.5566, "step": 14322 }, { "epoch": 0.92, "grad_norm": 1.1782125234603882, "learning_rate": 1.485803808512737e-07, "loss": 0.4895, "step": 14323 }, { "epoch": 0.92, "grad_norm": 1.2117550373077393, "learning_rate": 1.4832755257365438e-07, "loss": 0.5097, "step": 14324 }, { "epoch": 0.92, "grad_norm": 1.2284480333328247, "learning_rate": 1.480749363501205e-07, "loss": 0.5351, "step": 14325 }, { "epoch": 0.92, "grad_norm": 1.1936229467391968, "learning_rate": 1.4782253219171316e-07, "loss": 0.5192, "step": 14326 }, { "epoch": 0.92, "grad_norm": 1.2025660276412964, "learning_rate": 1.4757034010946414e-07, "loss": 0.5068, "step": 14327 }, { "epoch": 0.92, "grad_norm": 1.2035719156265259, "learning_rate": 1.4731836011439625e-07, "loss": 0.4752, "step": 14328 }, { "epoch": 0.92, "grad_norm": 1.8524531126022339, "learning_rate": 1.470665922175224e-07, "loss": 0.5084, "step": 14329 }, { "epoch": 0.92, "grad_norm": 1.1971795558929443, "learning_rate": 1.4681503642984707e-07, "loss": 0.5526, "step": 14330 }, { "epoch": 0.93, "grad_norm": 1.160488486289978, "learning_rate": 1.4656369276236537e-07, "loss": 0.5566, "step": 14331 }, { "epoch": 0.93, "grad_norm": 1.1693392992019653, "learning_rate": 1.4631256122606297e-07, "loss": 0.5064, "step": 14332 }, { "epoch": 0.93, "grad_norm": 1.2632160186767578, "learning_rate": 1.4606164183191552e-07, "loss": 0.5448, "step": 14333 }, { "epoch": 0.93, "grad_norm": 1.2014472484588623, "learning_rate": 1.458109345908898e-07, "loss": 0.4953, "step": 14334 }, { "epoch": 0.93, "grad_norm": 1.3454573154449463, "learning_rate": 1.4556043951394483e-07, "loss": 0.5675, "step": 14335 }, { "epoch": 0.93, "grad_norm": 1.271530270576477, "learning_rate": 1.4531015661202853e-07, "loss": 0.5168, "step": 14336 }, { "epoch": 0.93, "grad_norm": 1.3001668453216553, "learning_rate": 1.4506008589607935e-07, "loss": 0.4978, "step": 14337 }, { "epoch": 0.93, "grad_norm": 1.316979169845581, "learning_rate": 1.4481022737702854e-07, "loss": 0.4954, "step": 14338 }, { "epoch": 0.93, "grad_norm": 1.2242900133132935, "learning_rate": 1.4456058106579573e-07, "loss": 0.5026, "step": 14339 }, { "epoch": 0.93, "grad_norm": 1.2394027709960938, "learning_rate": 1.4431114697329274e-07, "loss": 0.4706, "step": 14340 }, { "epoch": 0.93, "grad_norm": 1.157460331916809, "learning_rate": 1.4406192511042194e-07, "loss": 0.5153, "step": 14341 }, { "epoch": 0.93, "grad_norm": 1.1767683029174805, "learning_rate": 1.4381291548807574e-07, "loss": 0.5139, "step": 14342 }, { "epoch": 0.93, "grad_norm": 1.3359217643737793, "learning_rate": 1.4356411811713767e-07, "loss": 0.4753, "step": 14343 }, { "epoch": 0.93, "grad_norm": 1.2507688999176025, "learning_rate": 1.4331553300848233e-07, "loss": 0.5121, "step": 14344 }, { "epoch": 0.93, "grad_norm": 1.113500714302063, "learning_rate": 1.4306716017297495e-07, "loss": 0.4805, "step": 14345 }, { "epoch": 0.93, "grad_norm": 1.210762858390808, "learning_rate": 1.4281899962147128e-07, "loss": 0.4822, "step": 14346 }, { "epoch": 0.93, "grad_norm": 1.162025809288025, "learning_rate": 1.4257105136481652e-07, "loss": 0.4739, "step": 14347 }, { "epoch": 0.93, "grad_norm": 1.2273073196411133, "learning_rate": 1.4232331541384926e-07, "loss": 0.525, "step": 14348 }, { "epoch": 0.93, "grad_norm": 1.161167860031128, "learning_rate": 1.4207579177939635e-07, "loss": 0.4942, "step": 14349 }, { "epoch": 0.93, "grad_norm": 1.1643693447113037, "learning_rate": 1.4182848047227747e-07, "loss": 0.503, "step": 14350 }, { "epoch": 0.93, "grad_norm": 1.3714306354522705, "learning_rate": 1.4158138150330124e-07, "loss": 0.5457, "step": 14351 }, { "epoch": 0.93, "grad_norm": 1.1618478298187256, "learning_rate": 1.4133449488326844e-07, "loss": 0.5095, "step": 14352 }, { "epoch": 0.93, "grad_norm": 1.2113547325134277, "learning_rate": 1.4108782062296934e-07, "loss": 0.5153, "step": 14353 }, { "epoch": 0.93, "grad_norm": 1.0922260284423828, "learning_rate": 1.4084135873318472e-07, "loss": 0.4621, "step": 14354 }, { "epoch": 0.93, "grad_norm": 1.1614304780960083, "learning_rate": 1.4059510922468766e-07, "loss": 0.5108, "step": 14355 }, { "epoch": 0.93, "grad_norm": 1.1722173690795898, "learning_rate": 1.4034907210824123e-07, "loss": 0.5019, "step": 14356 }, { "epoch": 0.93, "grad_norm": 1.2175536155700684, "learning_rate": 1.4010324739459847e-07, "loss": 0.5368, "step": 14357 }, { "epoch": 0.93, "grad_norm": 1.2167056798934937, "learning_rate": 1.3985763509450522e-07, "loss": 0.498, "step": 14358 }, { "epoch": 0.93, "grad_norm": 1.262526273727417, "learning_rate": 1.3961223521869406e-07, "loss": 0.5623, "step": 14359 }, { "epoch": 0.93, "grad_norm": 1.1572693586349487, "learning_rate": 1.3936704777789245e-07, "loss": 0.5281, "step": 14360 }, { "epoch": 0.93, "grad_norm": 1.9574687480926514, "learning_rate": 1.3912207278281685e-07, "loss": 0.5106, "step": 14361 }, { "epoch": 0.93, "grad_norm": 1.1509575843811035, "learning_rate": 1.3887731024417373e-07, "loss": 0.4622, "step": 14362 }, { "epoch": 0.93, "grad_norm": 1.2228442430496216, "learning_rate": 1.3863276017266224e-07, "loss": 0.4889, "step": 14363 }, { "epoch": 0.93, "grad_norm": 1.1069045066833496, "learning_rate": 1.3838842257897e-07, "loss": 0.5208, "step": 14364 }, { "epoch": 0.93, "grad_norm": 1.1611652374267578, "learning_rate": 1.3814429747377678e-07, "loss": 0.4902, "step": 14365 }, { "epoch": 0.93, "grad_norm": 1.2387158870697021, "learning_rate": 1.3790038486775238e-07, "loss": 0.4746, "step": 14366 }, { "epoch": 0.93, "grad_norm": 1.3714262247085571, "learning_rate": 1.376566847715577e-07, "loss": 0.5119, "step": 14367 }, { "epoch": 0.93, "grad_norm": 1.2807443141937256, "learning_rate": 1.3741319719584422e-07, "loss": 0.4868, "step": 14368 }, { "epoch": 0.93, "grad_norm": 1.2461529970169067, "learning_rate": 1.3716992215125458e-07, "loss": 0.4762, "step": 14369 }, { "epoch": 0.93, "grad_norm": 1.2072324752807617, "learning_rate": 1.3692685964842133e-07, "loss": 0.4744, "step": 14370 }, { "epoch": 0.93, "grad_norm": 1.3510197401046753, "learning_rate": 1.3668400969796824e-07, "loss": 0.4928, "step": 14371 }, { "epoch": 0.93, "grad_norm": 1.1665643453598022, "learning_rate": 1.3644137231050901e-07, "loss": 0.533, "step": 14372 }, { "epoch": 0.93, "grad_norm": 1.2500172853469849, "learning_rate": 1.3619894749664962e-07, "loss": 0.508, "step": 14373 }, { "epoch": 0.93, "grad_norm": 1.2517180442810059, "learning_rate": 1.359567352669855e-07, "loss": 0.5304, "step": 14374 }, { "epoch": 0.93, "grad_norm": 1.1636601686477661, "learning_rate": 1.3571473563210314e-07, "loss": 0.4893, "step": 14375 }, { "epoch": 0.93, "grad_norm": 1.3199304342269897, "learning_rate": 1.3547294860257964e-07, "loss": 0.5089, "step": 14376 }, { "epoch": 0.93, "grad_norm": 1.1940747499465942, "learning_rate": 1.352313741889827e-07, "loss": 0.5189, "step": 14377 }, { "epoch": 0.93, "grad_norm": 1.1471428871154785, "learning_rate": 1.3499001240187103e-07, "loss": 0.507, "step": 14378 }, { "epoch": 0.93, "grad_norm": 1.2615468502044678, "learning_rate": 1.347488632517946e-07, "loss": 0.5541, "step": 14379 }, { "epoch": 0.93, "grad_norm": 1.203983187675476, "learning_rate": 1.3450792674929215e-07, "loss": 0.5127, "step": 14380 }, { "epoch": 0.93, "grad_norm": 1.1405283212661743, "learning_rate": 1.3426720290489582e-07, "loss": 0.5243, "step": 14381 }, { "epoch": 0.93, "grad_norm": 1.1893446445465088, "learning_rate": 1.3402669172912607e-07, "loss": 0.4769, "step": 14382 }, { "epoch": 0.93, "grad_norm": 1.2107605934143066, "learning_rate": 1.3378639323249509e-07, "loss": 0.4938, "step": 14383 }, { "epoch": 0.93, "grad_norm": 1.1544370651245117, "learning_rate": 1.3354630742550557e-07, "loss": 0.534, "step": 14384 }, { "epoch": 0.93, "grad_norm": 1.2164801359176636, "learning_rate": 1.333064343186513e-07, "loss": 0.5263, "step": 14385 }, { "epoch": 0.93, "grad_norm": 1.3335000276565552, "learning_rate": 1.3306677392241674e-07, "loss": 0.5656, "step": 14386 }, { "epoch": 0.93, "grad_norm": 1.235152244567871, "learning_rate": 1.3282732624727734e-07, "loss": 0.4881, "step": 14387 }, { "epoch": 0.93, "grad_norm": 1.268175721168518, "learning_rate": 1.3258809130369753e-07, "loss": 0.5016, "step": 14388 }, { "epoch": 0.93, "grad_norm": 1.2065798044204712, "learning_rate": 1.3234906910213396e-07, "loss": 0.4779, "step": 14389 }, { "epoch": 0.93, "grad_norm": 1.1581228971481323, "learning_rate": 1.321102596530338e-07, "loss": 0.4636, "step": 14390 }, { "epoch": 0.93, "grad_norm": 1.194753885269165, "learning_rate": 1.3187166296683485e-07, "loss": 0.4704, "step": 14391 }, { "epoch": 0.93, "grad_norm": 1.3499579429626465, "learning_rate": 1.3163327905396596e-07, "loss": 0.4943, "step": 14392 }, { "epoch": 0.93, "grad_norm": 1.233691930770874, "learning_rate": 1.31395107924846e-07, "loss": 0.4551, "step": 14393 }, { "epoch": 0.93, "grad_norm": 1.2315624952316284, "learning_rate": 1.3115714958988446e-07, "loss": 0.4923, "step": 14394 }, { "epoch": 0.93, "grad_norm": 1.159706950187683, "learning_rate": 1.309194040594819e-07, "loss": 0.4849, "step": 14395 }, { "epoch": 0.93, "grad_norm": 1.141086220741272, "learning_rate": 1.3068187134403e-07, "loss": 0.4866, "step": 14396 }, { "epoch": 0.93, "grad_norm": 1.2700896263122559, "learning_rate": 1.3044455145391044e-07, "loss": 0.4834, "step": 14397 }, { "epoch": 0.93, "grad_norm": 1.162278652191162, "learning_rate": 1.3020744439949607e-07, "loss": 0.4727, "step": 14398 }, { "epoch": 0.93, "grad_norm": 1.2713581323623657, "learning_rate": 1.2997055019115024e-07, "loss": 0.5443, "step": 14399 }, { "epoch": 0.93, "grad_norm": 1.2872869968414307, "learning_rate": 1.2973386883922633e-07, "loss": 0.5292, "step": 14400 }, { "epoch": 0.93, "grad_norm": 1.308449625968933, "learning_rate": 1.2949740035407e-07, "loss": 0.5302, "step": 14401 }, { "epoch": 0.93, "grad_norm": 1.2019827365875244, "learning_rate": 1.292611447460157e-07, "loss": 0.4958, "step": 14402 }, { "epoch": 0.93, "grad_norm": 1.1928703784942627, "learning_rate": 1.2902510202539021e-07, "loss": 0.5037, "step": 14403 }, { "epoch": 0.93, "grad_norm": 1.3692227602005005, "learning_rate": 1.2878927220251026e-07, "loss": 0.4656, "step": 14404 }, { "epoch": 0.93, "grad_norm": 1.2587947845458984, "learning_rate": 1.2855365528768425e-07, "loss": 0.4994, "step": 14405 }, { "epoch": 0.93, "grad_norm": 1.2518627643585205, "learning_rate": 1.2831825129120845e-07, "loss": 0.5683, "step": 14406 }, { "epoch": 0.93, "grad_norm": 1.1331074237823486, "learning_rate": 1.2808306022337291e-07, "loss": 0.5069, "step": 14407 }, { "epoch": 0.93, "grad_norm": 1.2562190294265747, "learning_rate": 1.278480820944572e-07, "loss": 0.5059, "step": 14408 }, { "epoch": 0.93, "grad_norm": 1.2327444553375244, "learning_rate": 1.2761331691473145e-07, "loss": 0.507, "step": 14409 }, { "epoch": 0.93, "grad_norm": 1.2126916646957397, "learning_rate": 1.273787646944563e-07, "loss": 0.4517, "step": 14410 }, { "epoch": 0.93, "grad_norm": 1.2486138343811035, "learning_rate": 1.2714442544388473e-07, "loss": 0.5012, "step": 14411 }, { "epoch": 0.93, "grad_norm": 1.247189998626709, "learning_rate": 1.2691029917325793e-07, "loss": 0.516, "step": 14412 }, { "epoch": 0.93, "grad_norm": 1.2430360317230225, "learning_rate": 1.2667638589280884e-07, "loss": 0.5101, "step": 14413 }, { "epoch": 0.93, "grad_norm": 1.2468500137329102, "learning_rate": 1.264426856127615e-07, "loss": 0.4781, "step": 14414 }, { "epoch": 0.93, "grad_norm": 1.0966945886611938, "learning_rate": 1.2620919834333056e-07, "loss": 0.4723, "step": 14415 }, { "epoch": 0.93, "grad_norm": 1.2445930242538452, "learning_rate": 1.2597592409472171e-07, "loss": 0.4772, "step": 14416 }, { "epoch": 0.93, "grad_norm": 1.3568004369735718, "learning_rate": 1.2574286287712956e-07, "loss": 0.5189, "step": 14417 }, { "epoch": 0.93, "grad_norm": 1.1868584156036377, "learning_rate": 1.2551001470074098e-07, "loss": 0.5031, "step": 14418 }, { "epoch": 0.93, "grad_norm": 1.2078651189804077, "learning_rate": 1.252773795757334e-07, "loss": 0.4228, "step": 14419 }, { "epoch": 0.93, "grad_norm": 1.2101216316223145, "learning_rate": 1.250449575122742e-07, "loss": 0.4621, "step": 14420 }, { "epoch": 0.93, "grad_norm": 1.312597393989563, "learning_rate": 1.2481274852052306e-07, "loss": 0.501, "step": 14421 }, { "epoch": 0.93, "grad_norm": 1.1726617813110352, "learning_rate": 1.245807526106285e-07, "loss": 0.4637, "step": 14422 }, { "epoch": 0.93, "grad_norm": 1.2985988855361938, "learning_rate": 1.2434896979273026e-07, "loss": 0.5589, "step": 14423 }, { "epoch": 0.93, "grad_norm": 1.1059823036193848, "learning_rate": 1.2411740007695905e-07, "loss": 0.5105, "step": 14424 }, { "epoch": 0.93, "grad_norm": 1.3050923347473145, "learning_rate": 1.2388604347343625e-07, "loss": 0.4853, "step": 14425 }, { "epoch": 0.93, "grad_norm": 1.1959565877914429, "learning_rate": 1.236548999922743e-07, "loss": 0.5201, "step": 14426 }, { "epoch": 0.93, "grad_norm": 1.2862374782562256, "learning_rate": 1.2342396964357516e-07, "loss": 0.5274, "step": 14427 }, { "epoch": 0.93, "grad_norm": 1.2690473794937134, "learning_rate": 1.2319325243743296e-07, "loss": 0.5379, "step": 14428 }, { "epoch": 0.93, "grad_norm": 1.1888744831085205, "learning_rate": 1.229627483839313e-07, "loss": 0.4968, "step": 14429 }, { "epoch": 0.93, "grad_norm": 1.394945502281189, "learning_rate": 1.227324574931449e-07, "loss": 0.5485, "step": 14430 }, { "epoch": 0.93, "grad_norm": 1.1488666534423828, "learning_rate": 1.2250237977513956e-07, "loss": 0.4915, "step": 14431 }, { "epoch": 0.93, "grad_norm": 1.1881771087646484, "learning_rate": 1.2227251523997064e-07, "loss": 0.5202, "step": 14432 }, { "epoch": 0.93, "grad_norm": 1.178977370262146, "learning_rate": 1.2204286389768505e-07, "loss": 0.5, "step": 14433 }, { "epoch": 0.93, "grad_norm": 1.2163842916488647, "learning_rate": 1.218134257583209e-07, "loss": 0.4743, "step": 14434 }, { "epoch": 0.93, "grad_norm": 1.4462053775787354, "learning_rate": 1.2158420083190681e-07, "loss": 0.5433, "step": 14435 }, { "epoch": 0.93, "grad_norm": 1.2437183856964111, "learning_rate": 1.2135518912846034e-07, "loss": 0.5359, "step": 14436 }, { "epoch": 0.93, "grad_norm": 1.2988871335983276, "learning_rate": 1.211263906579907e-07, "loss": 0.5151, "step": 14437 }, { "epoch": 0.93, "grad_norm": 1.4840660095214844, "learning_rate": 1.208978054304999e-07, "loss": 0.4702, "step": 14438 }, { "epoch": 0.93, "grad_norm": 1.2769997119903564, "learning_rate": 1.2066943345597715e-07, "loss": 0.5606, "step": 14439 }, { "epoch": 0.93, "grad_norm": 1.2766666412353516, "learning_rate": 1.2044127474440503e-07, "loss": 0.5031, "step": 14440 }, { "epoch": 0.93, "grad_norm": 1.123057246208191, "learning_rate": 1.20213329305755e-07, "loss": 0.4832, "step": 14441 }, { "epoch": 0.93, "grad_norm": 1.3756275177001953, "learning_rate": 1.1998559714999026e-07, "loss": 0.5225, "step": 14442 }, { "epoch": 0.93, "grad_norm": 1.2141444683074951, "learning_rate": 1.19758078287065e-07, "loss": 0.4886, "step": 14443 }, { "epoch": 0.93, "grad_norm": 1.2667783498764038, "learning_rate": 1.1953077272692239e-07, "loss": 0.5498, "step": 14444 }, { "epoch": 0.93, "grad_norm": 1.2467482089996338, "learning_rate": 1.1930368047949836e-07, "loss": 0.541, "step": 14445 }, { "epoch": 0.93, "grad_norm": 1.3164033889770508, "learning_rate": 1.190768015547178e-07, "loss": 0.5759, "step": 14446 }, { "epoch": 0.93, "grad_norm": 1.22409188747406, "learning_rate": 1.1885013596249772e-07, "loss": 0.488, "step": 14447 }, { "epoch": 0.93, "grad_norm": 1.2621840238571167, "learning_rate": 1.1862368371274414e-07, "loss": 0.5951, "step": 14448 }, { "epoch": 0.93, "grad_norm": 1.2624435424804688, "learning_rate": 1.183974448153552e-07, "loss": 0.5337, "step": 14449 }, { "epoch": 0.93, "grad_norm": 1.1965454816818237, "learning_rate": 1.1817141928021969e-07, "loss": 0.4912, "step": 14450 }, { "epoch": 0.93, "grad_norm": 1.2787607908248901, "learning_rate": 1.1794560711721581e-07, "loss": 0.5392, "step": 14451 }, { "epoch": 0.93, "grad_norm": 1.1051037311553955, "learning_rate": 1.1772000833621399e-07, "loss": 0.4865, "step": 14452 }, { "epoch": 0.93, "grad_norm": 1.193488597869873, "learning_rate": 1.1749462294707359e-07, "loss": 0.4858, "step": 14453 }, { "epoch": 0.93, "grad_norm": 1.325847864151001, "learning_rate": 1.172694509596467e-07, "loss": 0.5354, "step": 14454 }, { "epoch": 0.93, "grad_norm": 1.234618902206421, "learning_rate": 1.170444923837738e-07, "loss": 0.5023, "step": 14455 }, { "epoch": 0.93, "grad_norm": 1.226367712020874, "learning_rate": 1.1681974722928869e-07, "loss": 0.4833, "step": 14456 }, { "epoch": 0.93, "grad_norm": 1.2596626281738281, "learning_rate": 1.1659521550601294e-07, "loss": 0.574, "step": 14457 }, { "epoch": 0.93, "grad_norm": 1.372339129447937, "learning_rate": 1.1637089722376204e-07, "loss": 0.5084, "step": 14458 }, { "epoch": 0.93, "grad_norm": 1.07884681224823, "learning_rate": 1.1614679239233817e-07, "loss": 0.4542, "step": 14459 }, { "epoch": 0.93, "grad_norm": 1.4232240915298462, "learning_rate": 1.1592290102153791e-07, "loss": 0.511, "step": 14460 }, { "epoch": 0.93, "grad_norm": 1.318767786026001, "learning_rate": 1.1569922312114679e-07, "loss": 0.474, "step": 14461 }, { "epoch": 0.93, "grad_norm": 1.2136675119400024, "learning_rate": 1.1547575870094085e-07, "loss": 0.4814, "step": 14462 }, { "epoch": 0.93, "grad_norm": 1.2463573217391968, "learning_rate": 1.1525250777068675e-07, "loss": 0.5182, "step": 14463 }, { "epoch": 0.93, "grad_norm": 1.1843457221984863, "learning_rate": 1.150294703401439e-07, "loss": 0.5163, "step": 14464 }, { "epoch": 0.93, "grad_norm": 1.2354952096939087, "learning_rate": 1.1480664641905837e-07, "loss": 0.5106, "step": 14465 }, { "epoch": 0.93, "grad_norm": 1.1279321908950806, "learning_rate": 1.1458403601717072e-07, "loss": 0.515, "step": 14466 }, { "epoch": 0.93, "grad_norm": 1.124036431312561, "learning_rate": 1.1436163914421039e-07, "loss": 0.5181, "step": 14467 }, { "epoch": 0.93, "grad_norm": 1.249261498451233, "learning_rate": 1.1413945580989738e-07, "loss": 0.5061, "step": 14468 }, { "epoch": 0.93, "grad_norm": 1.1542633771896362, "learning_rate": 1.1391748602394393e-07, "loss": 0.5026, "step": 14469 }, { "epoch": 0.93, "grad_norm": 1.2175559997558594, "learning_rate": 1.1369572979605059e-07, "loss": 0.5096, "step": 14470 }, { "epoch": 0.93, "grad_norm": 1.236768364906311, "learning_rate": 1.1347418713590963e-07, "loss": 0.5159, "step": 14471 }, { "epoch": 0.93, "grad_norm": 1.183408498764038, "learning_rate": 1.132528580532044e-07, "loss": 0.5028, "step": 14472 }, { "epoch": 0.93, "grad_norm": 1.1297228336334229, "learning_rate": 1.1303174255760885e-07, "loss": 0.5116, "step": 14473 }, { "epoch": 0.93, "grad_norm": 1.258527398109436, "learning_rate": 1.1281084065878745e-07, "loss": 0.5022, "step": 14474 }, { "epoch": 0.93, "grad_norm": 1.2804937362670898, "learning_rate": 1.1259015236639581e-07, "loss": 0.5573, "step": 14475 }, { "epoch": 0.93, "grad_norm": 1.0799809694290161, "learning_rate": 1.1236967769007844e-07, "loss": 0.4978, "step": 14476 }, { "epoch": 0.93, "grad_norm": 1.1210476160049438, "learning_rate": 1.1214941663947154e-07, "loss": 0.4719, "step": 14477 }, { "epoch": 0.93, "grad_norm": 1.1438153982162476, "learning_rate": 1.119293692242035e-07, "loss": 0.5231, "step": 14478 }, { "epoch": 0.93, "grad_norm": 1.2073829174041748, "learning_rate": 1.1170953545389107e-07, "loss": 0.5322, "step": 14479 }, { "epoch": 0.93, "grad_norm": 1.1229292154312134, "learning_rate": 1.1148991533814268e-07, "loss": 0.4667, "step": 14480 }, { "epoch": 0.93, "grad_norm": 1.2665119171142578, "learning_rate": 1.1127050888655844e-07, "loss": 0.5319, "step": 14481 }, { "epoch": 0.93, "grad_norm": 1.3314180374145508, "learning_rate": 1.1105131610872676e-07, "loss": 0.5549, "step": 14482 }, { "epoch": 0.93, "grad_norm": 1.1698095798492432, "learning_rate": 1.108323370142278e-07, "loss": 0.5037, "step": 14483 }, { "epoch": 0.93, "grad_norm": 1.2259305715560913, "learning_rate": 1.1061357161263387e-07, "loss": 0.4915, "step": 14484 }, { "epoch": 0.93, "grad_norm": 1.5364933013916016, "learning_rate": 1.103950199135051e-07, "loss": 0.4997, "step": 14485 }, { "epoch": 0.94, "grad_norm": 1.2468410730361938, "learning_rate": 1.1017668192639553e-07, "loss": 0.5048, "step": 14486 }, { "epoch": 0.94, "grad_norm": 1.2247483730316162, "learning_rate": 1.0995855766084696e-07, "loss": 0.4637, "step": 14487 }, { "epoch": 0.94, "grad_norm": 1.1671123504638672, "learning_rate": 1.0974064712639343e-07, "loss": 0.4935, "step": 14488 }, { "epoch": 0.94, "grad_norm": 1.2371727228164673, "learning_rate": 1.09522950332559e-07, "loss": 0.5156, "step": 14489 }, { "epoch": 0.94, "grad_norm": 1.4991346597671509, "learning_rate": 1.0930546728885883e-07, "loss": 0.4935, "step": 14490 }, { "epoch": 0.94, "grad_norm": 1.307208776473999, "learning_rate": 1.0908819800479863e-07, "loss": 0.5149, "step": 14491 }, { "epoch": 0.94, "grad_norm": 1.143257975578308, "learning_rate": 1.0887114248987474e-07, "loss": 0.5255, "step": 14492 }, { "epoch": 0.94, "grad_norm": 1.2350592613220215, "learning_rate": 1.0865430075357397e-07, "loss": 0.5151, "step": 14493 }, { "epoch": 0.94, "grad_norm": 1.2474005222320557, "learning_rate": 1.0843767280537376e-07, "loss": 0.5146, "step": 14494 }, { "epoch": 0.94, "grad_norm": 1.2367844581604004, "learning_rate": 1.0822125865474209e-07, "loss": 0.529, "step": 14495 }, { "epoch": 0.94, "grad_norm": 1.186059594154358, "learning_rate": 1.080050583111386e-07, "loss": 0.5536, "step": 14496 }, { "epoch": 0.94, "grad_norm": 1.1557179689407349, "learning_rate": 1.0778907178401243e-07, "loss": 0.5214, "step": 14497 }, { "epoch": 0.94, "grad_norm": 1.1382492780685425, "learning_rate": 1.0757329908280379e-07, "loss": 0.5126, "step": 14498 }, { "epoch": 0.94, "grad_norm": 1.3031269311904907, "learning_rate": 1.0735774021694401e-07, "loss": 0.5091, "step": 14499 }, { "epoch": 0.94, "grad_norm": 1.276646614074707, "learning_rate": 1.071423951958539e-07, "loss": 0.4987, "step": 14500 }, { "epoch": 0.94, "grad_norm": 1.2968077659606934, "learning_rate": 1.0692726402894593e-07, "loss": 0.5166, "step": 14501 }, { "epoch": 0.94, "grad_norm": 1.1116526126861572, "learning_rate": 1.0671234672562259e-07, "loss": 0.5052, "step": 14502 }, { "epoch": 0.94, "grad_norm": 1.1779932975769043, "learning_rate": 1.0649764329527801e-07, "loss": 0.5568, "step": 14503 }, { "epoch": 0.94, "grad_norm": 1.2067097425460815, "learning_rate": 1.0628315374729636e-07, "loss": 0.4638, "step": 14504 }, { "epoch": 0.94, "grad_norm": 1.144454836845398, "learning_rate": 1.0606887809105238e-07, "loss": 0.5027, "step": 14505 }, { "epoch": 0.94, "grad_norm": 1.3271753787994385, "learning_rate": 1.0585481633591021e-07, "loss": 0.5503, "step": 14506 }, { "epoch": 0.94, "grad_norm": 1.2782526016235352, "learning_rate": 1.056409684912274e-07, "loss": 0.5646, "step": 14507 }, { "epoch": 0.94, "grad_norm": 1.2515690326690674, "learning_rate": 1.0542733456635034e-07, "loss": 0.5371, "step": 14508 }, { "epoch": 0.94, "grad_norm": 1.1665838956832886, "learning_rate": 1.0521391457061658e-07, "loss": 0.5025, "step": 14509 }, { "epoch": 0.94, "grad_norm": 1.1647053956985474, "learning_rate": 1.0500070851335365e-07, "loss": 0.5068, "step": 14510 }, { "epoch": 0.94, "grad_norm": 1.2308624982833862, "learning_rate": 1.0478771640388074e-07, "loss": 0.4924, "step": 14511 }, { "epoch": 0.94, "grad_norm": 1.2446775436401367, "learning_rate": 1.0457493825150655e-07, "loss": 0.5091, "step": 14512 }, { "epoch": 0.94, "grad_norm": 1.329343318939209, "learning_rate": 1.043623740655314e-07, "loss": 0.5466, "step": 14513 }, { "epoch": 0.94, "grad_norm": 1.2343342304229736, "learning_rate": 1.041500238552462e-07, "loss": 0.502, "step": 14514 }, { "epoch": 0.94, "grad_norm": 1.1030105352401733, "learning_rate": 1.0393788762993184e-07, "loss": 0.456, "step": 14515 }, { "epoch": 0.94, "grad_norm": 1.2808141708374023, "learning_rate": 1.0372596539886037e-07, "loss": 0.5107, "step": 14516 }, { "epoch": 0.94, "grad_norm": 1.1897330284118652, "learning_rate": 1.0351425717129438e-07, "loss": 0.5122, "step": 14517 }, { "epoch": 0.94, "grad_norm": 1.2135090827941895, "learning_rate": 1.0330276295648756e-07, "loss": 0.5046, "step": 14518 }, { "epoch": 0.94, "grad_norm": 1.2734018564224243, "learning_rate": 1.0309148276368309e-07, "loss": 0.5102, "step": 14519 }, { "epoch": 0.94, "grad_norm": 1.6829979419708252, "learning_rate": 1.0288041660211578e-07, "loss": 0.4995, "step": 14520 }, { "epoch": 0.94, "grad_norm": 1.205752968788147, "learning_rate": 1.0266956448101051e-07, "loss": 0.4624, "step": 14521 }, { "epoch": 0.94, "grad_norm": 1.2456682920455933, "learning_rate": 1.0245892640958322e-07, "loss": 0.5561, "step": 14522 }, { "epoch": 0.94, "grad_norm": 1.3692024946212769, "learning_rate": 1.0224850239704043e-07, "loss": 0.5342, "step": 14523 }, { "epoch": 0.94, "grad_norm": 1.1714997291564941, "learning_rate": 1.0203829245257979e-07, "loss": 0.5046, "step": 14524 }, { "epoch": 0.94, "grad_norm": 1.4535666704177856, "learning_rate": 1.0182829658538784e-07, "loss": 0.5361, "step": 14525 }, { "epoch": 0.94, "grad_norm": 1.2109564542770386, "learning_rate": 1.0161851480464391e-07, "loss": 0.4985, "step": 14526 }, { "epoch": 0.94, "grad_norm": 1.208031415939331, "learning_rate": 1.0140894711951621e-07, "loss": 0.4744, "step": 14527 }, { "epoch": 0.94, "grad_norm": 1.3747389316558838, "learning_rate": 1.0119959353916576e-07, "loss": 0.526, "step": 14528 }, { "epoch": 0.94, "grad_norm": 1.21064293384552, "learning_rate": 1.0099045407274132e-07, "loss": 0.5074, "step": 14529 }, { "epoch": 0.94, "grad_norm": 1.273622989654541, "learning_rate": 1.0078152872938451e-07, "loss": 0.4921, "step": 14530 }, { "epoch": 0.94, "grad_norm": 1.1796882152557373, "learning_rate": 1.0057281751822744e-07, "loss": 0.5156, "step": 14531 }, { "epoch": 0.94, "grad_norm": 1.1485611200332642, "learning_rate": 1.0036432044839118e-07, "loss": 0.5254, "step": 14532 }, { "epoch": 0.94, "grad_norm": 1.1708924770355225, "learning_rate": 1.0015603752898951e-07, "loss": 0.4807, "step": 14533 }, { "epoch": 0.94, "grad_norm": 1.228474736213684, "learning_rate": 9.994796876912626e-08, "loss": 0.5341, "step": 14534 }, { "epoch": 0.94, "grad_norm": 1.2103780508041382, "learning_rate": 9.97401141778942e-08, "loss": 0.5322, "step": 14535 }, { "epoch": 0.94, "grad_norm": 1.251885175704956, "learning_rate": 9.953247376437936e-08, "loss": 0.5104, "step": 14536 }, { "epoch": 0.94, "grad_norm": 1.277968168258667, "learning_rate": 9.932504753765614e-08, "loss": 0.5244, "step": 14537 }, { "epoch": 0.94, "grad_norm": 1.114141583442688, "learning_rate": 9.911783550679177e-08, "loss": 0.4996, "step": 14538 }, { "epoch": 0.94, "grad_norm": 1.2925876379013062, "learning_rate": 9.891083768084231e-08, "loss": 0.5155, "step": 14539 }, { "epoch": 0.94, "grad_norm": 1.243514895439148, "learning_rate": 9.870405406885497e-08, "loss": 0.5302, "step": 14540 }, { "epoch": 0.94, "grad_norm": 1.1607820987701416, "learning_rate": 9.849748467986808e-08, "loss": 0.461, "step": 14541 }, { "epoch": 0.94, "grad_norm": 1.2903733253479004, "learning_rate": 9.829112952290997e-08, "loss": 0.4894, "step": 14542 }, { "epoch": 0.94, "grad_norm": 1.163374423980713, "learning_rate": 9.80849886070001e-08, "loss": 0.4696, "step": 14543 }, { "epoch": 0.94, "grad_norm": 1.1646332740783691, "learning_rate": 9.787906194114794e-08, "loss": 0.5346, "step": 14544 }, { "epoch": 0.94, "grad_norm": 1.1347225904464722, "learning_rate": 9.767334953435459e-08, "loss": 0.4889, "step": 14545 }, { "epoch": 0.94, "grad_norm": 1.2081149816513062, "learning_rate": 9.746785139561121e-08, "loss": 0.5218, "step": 14546 }, { "epoch": 0.94, "grad_norm": 1.1789273023605347, "learning_rate": 9.726256753389895e-08, "loss": 0.5189, "step": 14547 }, { "epoch": 0.94, "grad_norm": 1.28943932056427, "learning_rate": 9.705749795819064e-08, "loss": 0.4752, "step": 14548 }, { "epoch": 0.94, "grad_norm": 1.2136415243148804, "learning_rate": 9.685264267744964e-08, "loss": 0.5143, "step": 14549 }, { "epoch": 0.94, "grad_norm": 1.135939121246338, "learning_rate": 9.664800170062937e-08, "loss": 0.4847, "step": 14550 }, { "epoch": 0.94, "grad_norm": 1.2808843851089478, "learning_rate": 9.644357503667378e-08, "loss": 0.5263, "step": 14551 }, { "epoch": 0.94, "grad_norm": 1.1820193529129028, "learning_rate": 9.62393626945185e-08, "loss": 0.5097, "step": 14552 }, { "epoch": 0.94, "grad_norm": 1.2862142324447632, "learning_rate": 9.60353646830886e-08, "loss": 0.5102, "step": 14553 }, { "epoch": 0.94, "grad_norm": 1.2504416704177856, "learning_rate": 9.58315810113003e-08, "loss": 0.5297, "step": 14554 }, { "epoch": 0.94, "grad_norm": 1.257144570350647, "learning_rate": 9.56280116880609e-08, "loss": 0.5296, "step": 14555 }, { "epoch": 0.94, "grad_norm": 1.2348411083221436, "learning_rate": 9.542465672226775e-08, "loss": 0.5377, "step": 14556 }, { "epoch": 0.94, "grad_norm": 1.2038482427597046, "learning_rate": 9.522151612280928e-08, "loss": 0.5023, "step": 14557 }, { "epoch": 0.94, "grad_norm": 1.215950608253479, "learning_rate": 9.501858989856339e-08, "loss": 0.5359, "step": 14558 }, { "epoch": 0.94, "grad_norm": 1.2173506021499634, "learning_rate": 9.481587805839964e-08, "loss": 0.5123, "step": 14559 }, { "epoch": 0.94, "grad_norm": 1.1683056354522705, "learning_rate": 9.461338061117876e-08, "loss": 0.5323, "step": 14560 }, { "epoch": 0.94, "grad_norm": 1.246636986732483, "learning_rate": 9.441109756575029e-08, "loss": 0.4602, "step": 14561 }, { "epoch": 0.94, "grad_norm": 1.348379373550415, "learning_rate": 9.420902893095663e-08, "loss": 0.5144, "step": 14562 }, { "epoch": 0.94, "grad_norm": 1.3557554483413696, "learning_rate": 9.400717471562959e-08, "loss": 0.5239, "step": 14563 }, { "epoch": 0.94, "grad_norm": 1.2208657264709473, "learning_rate": 9.380553492859101e-08, "loss": 0.4954, "step": 14564 }, { "epoch": 0.94, "grad_norm": 1.1860371828079224, "learning_rate": 9.36041095786544e-08, "loss": 0.5245, "step": 14565 }, { "epoch": 0.94, "grad_norm": 1.2569524049758911, "learning_rate": 9.340289867462326e-08, "loss": 0.5355, "step": 14566 }, { "epoch": 0.94, "grad_norm": 1.1720954179763794, "learning_rate": 9.320190222529224e-08, "loss": 0.5453, "step": 14567 }, { "epoch": 0.94, "grad_norm": 1.2559881210327148, "learning_rate": 9.300112023944597e-08, "loss": 0.5559, "step": 14568 }, { "epoch": 0.94, "grad_norm": 1.1514003276824951, "learning_rate": 9.28005527258613e-08, "loss": 0.4628, "step": 14569 }, { "epoch": 0.94, "grad_norm": 1.3070447444915771, "learning_rate": 9.260019969330347e-08, "loss": 0.4886, "step": 14570 }, { "epoch": 0.94, "grad_norm": 1.235981822013855, "learning_rate": 9.240006115052991e-08, "loss": 0.5449, "step": 14571 }, { "epoch": 0.94, "grad_norm": 1.116652250289917, "learning_rate": 9.22001371062875e-08, "loss": 0.4861, "step": 14572 }, { "epoch": 0.94, "grad_norm": 1.1918532848358154, "learning_rate": 9.200042756931482e-08, "loss": 0.4739, "step": 14573 }, { "epoch": 0.94, "grad_norm": 1.2537050247192383, "learning_rate": 9.180093254834044e-08, "loss": 0.5292, "step": 14574 }, { "epoch": 0.94, "grad_norm": 1.2205692529678345, "learning_rate": 9.16016520520846e-08, "loss": 0.5114, "step": 14575 }, { "epoch": 0.94, "grad_norm": 1.1669832468032837, "learning_rate": 9.140258608925645e-08, "loss": 0.4516, "step": 14576 }, { "epoch": 0.94, "grad_norm": 1.1969685554504395, "learning_rate": 9.120373466855681e-08, "loss": 0.5247, "step": 14577 }, { "epoch": 0.94, "grad_norm": 1.1596622467041016, "learning_rate": 9.100509779867706e-08, "loss": 0.4551, "step": 14578 }, { "epoch": 0.94, "grad_norm": 1.8603435754776, "learning_rate": 9.080667548829913e-08, "loss": 0.527, "step": 14579 }, { "epoch": 0.94, "grad_norm": 1.2190561294555664, "learning_rate": 9.0608467746095e-08, "loss": 0.5046, "step": 14580 }, { "epoch": 0.94, "grad_norm": 1.1746138334274292, "learning_rate": 9.041047458072937e-08, "loss": 0.4604, "step": 14581 }, { "epoch": 0.94, "grad_norm": 1.2613738775253296, "learning_rate": 9.021269600085424e-08, "loss": 0.5299, "step": 14582 }, { "epoch": 0.94, "grad_norm": 1.2312151193618774, "learning_rate": 9.001513201511547e-08, "loss": 0.4986, "step": 14583 }, { "epoch": 0.94, "grad_norm": 1.188595175743103, "learning_rate": 8.981778263214669e-08, "loss": 0.5231, "step": 14584 }, { "epoch": 0.94, "grad_norm": 1.221480369567871, "learning_rate": 8.962064786057434e-08, "loss": 0.4593, "step": 14585 }, { "epoch": 0.94, "grad_norm": 1.2189525365829468, "learning_rate": 8.942372770901431e-08, "loss": 0.5208, "step": 14586 }, { "epoch": 0.94, "grad_norm": 1.1815811395645142, "learning_rate": 8.922702218607416e-08, "loss": 0.526, "step": 14587 }, { "epoch": 0.94, "grad_norm": 1.1214550733566284, "learning_rate": 8.903053130035089e-08, "loss": 0.509, "step": 14588 }, { "epoch": 0.94, "grad_norm": 1.1817994117736816, "learning_rate": 8.883425506043208e-08, "loss": 0.5696, "step": 14589 }, { "epoch": 0.94, "grad_norm": 1.2042834758758545, "learning_rate": 8.863819347489755e-08, "loss": 0.5601, "step": 14590 }, { "epoch": 0.94, "grad_norm": 1.2050038576126099, "learning_rate": 8.844234655231599e-08, "loss": 0.5336, "step": 14591 }, { "epoch": 0.94, "grad_norm": 1.2877510786056519, "learning_rate": 8.82467143012472e-08, "loss": 0.5356, "step": 14592 }, { "epoch": 0.94, "grad_norm": 1.1309692859649658, "learning_rate": 8.805129673024271e-08, "loss": 0.488, "step": 14593 }, { "epoch": 0.94, "grad_norm": 1.2729148864746094, "learning_rate": 8.785609384784232e-08, "loss": 0.5394, "step": 14594 }, { "epoch": 0.94, "grad_norm": 1.1396859884262085, "learning_rate": 8.766110566257869e-08, "loss": 0.501, "step": 14595 }, { "epoch": 0.94, "grad_norm": 1.1249947547912598, "learning_rate": 8.74663321829744e-08, "loss": 0.4949, "step": 14596 }, { "epoch": 0.94, "grad_norm": 1.1913659572601318, "learning_rate": 8.727177341754156e-08, "loss": 0.4854, "step": 14597 }, { "epoch": 0.94, "grad_norm": 1.0977106094360352, "learning_rate": 8.707742937478558e-08, "loss": 0.4902, "step": 14598 }, { "epoch": 0.94, "grad_norm": 1.1760870218276978, "learning_rate": 8.688330006319911e-08, "loss": 0.4899, "step": 14599 }, { "epoch": 0.94, "grad_norm": 1.2640212774276733, "learning_rate": 8.668938549126759e-08, "loss": 0.4966, "step": 14600 }, { "epoch": 0.94, "grad_norm": 1.2334538698196411, "learning_rate": 8.649568566746591e-08, "loss": 0.529, "step": 14601 }, { "epoch": 0.94, "grad_norm": 1.1756409406661987, "learning_rate": 8.630220060026173e-08, "loss": 0.5039, "step": 14602 }, { "epoch": 0.94, "grad_norm": 1.2273578643798828, "learning_rate": 8.610893029811051e-08, "loss": 0.4436, "step": 14603 }, { "epoch": 0.94, "grad_norm": 1.171152114868164, "learning_rate": 8.591587476945996e-08, "loss": 0.5122, "step": 14604 }, { "epoch": 0.94, "grad_norm": 1.2734328508377075, "learning_rate": 8.572303402274773e-08, "loss": 0.5178, "step": 14605 }, { "epoch": 0.94, "grad_norm": 1.1882303953170776, "learning_rate": 8.553040806640322e-08, "loss": 0.4511, "step": 14606 }, { "epoch": 0.94, "grad_norm": 1.3084146976470947, "learning_rate": 8.53379969088447e-08, "loss": 0.5033, "step": 14607 }, { "epoch": 0.94, "grad_norm": 1.201319694519043, "learning_rate": 8.514580055848265e-08, "loss": 0.5166, "step": 14608 }, { "epoch": 0.94, "grad_norm": 1.2428622245788574, "learning_rate": 8.495381902371703e-08, "loss": 0.5043, "step": 14609 }, { "epoch": 0.94, "grad_norm": 1.1674495935440063, "learning_rate": 8.476205231293888e-08, "loss": 0.5302, "step": 14610 }, { "epoch": 0.94, "grad_norm": 1.1457055807113647, "learning_rate": 8.457050043452986e-08, "loss": 0.5261, "step": 14611 }, { "epoch": 0.94, "grad_norm": 1.287211298942566, "learning_rate": 8.437916339686215e-08, "loss": 0.5243, "step": 14612 }, { "epoch": 0.94, "grad_norm": 1.2557414770126343, "learning_rate": 8.418804120829959e-08, "loss": 0.4918, "step": 14613 }, { "epoch": 0.94, "grad_norm": 1.4221904277801514, "learning_rate": 8.399713387719389e-08, "loss": 0.5352, "step": 14614 }, { "epoch": 0.94, "grad_norm": 1.1387771368026733, "learning_rate": 8.380644141188998e-08, "loss": 0.4803, "step": 14615 }, { "epoch": 0.94, "grad_norm": 1.1925989389419556, "learning_rate": 8.36159638207229e-08, "loss": 0.5219, "step": 14616 }, { "epoch": 0.94, "grad_norm": 1.1806011199951172, "learning_rate": 8.34257011120182e-08, "loss": 0.5212, "step": 14617 }, { "epoch": 0.94, "grad_norm": 1.3057085275650024, "learning_rate": 8.323565329409033e-08, "loss": 0.5127, "step": 14618 }, { "epoch": 0.94, "grad_norm": 1.1652356386184692, "learning_rate": 8.304582037524655e-08, "loss": 0.5413, "step": 14619 }, { "epoch": 0.94, "grad_norm": 1.2611656188964844, "learning_rate": 8.285620236378467e-08, "loss": 0.5529, "step": 14620 }, { "epoch": 0.94, "grad_norm": 1.2719365358352661, "learning_rate": 8.266679926799137e-08, "loss": 0.4735, "step": 14621 }, { "epoch": 0.94, "grad_norm": 2.264500856399536, "learning_rate": 8.247761109614561e-08, "loss": 0.4784, "step": 14622 }, { "epoch": 0.94, "grad_norm": 1.3645893335342407, "learning_rate": 8.228863785651576e-08, "loss": 0.4903, "step": 14623 }, { "epoch": 0.94, "grad_norm": 1.276075839996338, "learning_rate": 8.209987955736188e-08, "loss": 0.5403, "step": 14624 }, { "epoch": 0.94, "grad_norm": 1.141595721244812, "learning_rate": 8.191133620693404e-08, "loss": 0.4947, "step": 14625 }, { "epoch": 0.94, "grad_norm": 1.1623696088790894, "learning_rate": 8.172300781347286e-08, "loss": 0.5263, "step": 14626 }, { "epoch": 0.94, "grad_norm": 1.1857926845550537, "learning_rate": 8.153489438520956e-08, "loss": 0.5411, "step": 14627 }, { "epoch": 0.94, "grad_norm": 1.184495210647583, "learning_rate": 8.134699593036643e-08, "loss": 0.4848, "step": 14628 }, { "epoch": 0.94, "grad_norm": 1.136198878288269, "learning_rate": 8.115931245715525e-08, "loss": 0.4917, "step": 14629 }, { "epoch": 0.94, "grad_norm": 1.0886790752410889, "learning_rate": 8.097184397378e-08, "loss": 0.4672, "step": 14630 }, { "epoch": 0.94, "grad_norm": 1.350600004196167, "learning_rate": 8.078459048843468e-08, "loss": 0.482, "step": 14631 }, { "epoch": 0.94, "grad_norm": 1.2466857433319092, "learning_rate": 8.059755200930275e-08, "loss": 0.5606, "step": 14632 }, { "epoch": 0.94, "grad_norm": 1.2050597667694092, "learning_rate": 8.041072854455933e-08, "loss": 0.5425, "step": 14633 }, { "epoch": 0.94, "grad_norm": 1.2900359630584717, "learning_rate": 8.022412010237068e-08, "loss": 0.519, "step": 14634 }, { "epoch": 0.94, "grad_norm": 1.171157956123352, "learning_rate": 8.003772669089249e-08, "loss": 0.5082, "step": 14635 }, { "epoch": 0.94, "grad_norm": 1.2218542098999023, "learning_rate": 7.985154831827158e-08, "loss": 0.5232, "step": 14636 }, { "epoch": 0.94, "grad_norm": 1.1094233989715576, "learning_rate": 7.966558499264532e-08, "loss": 0.5091, "step": 14637 }, { "epoch": 0.94, "grad_norm": 1.165621280670166, "learning_rate": 7.947983672214165e-08, "loss": 0.4819, "step": 14638 }, { "epoch": 0.94, "grad_norm": 1.299433708190918, "learning_rate": 7.92943035148791e-08, "loss": 0.5332, "step": 14639 }, { "epoch": 0.95, "grad_norm": 1.1563827991485596, "learning_rate": 7.910898537896727e-08, "loss": 0.5399, "step": 14640 }, { "epoch": 0.95, "grad_norm": 1.2679449319839478, "learning_rate": 7.892388232250525e-08, "loss": 0.5194, "step": 14641 }, { "epoch": 0.95, "grad_norm": 1.3067792654037476, "learning_rate": 7.873899435358379e-08, "loss": 0.5376, "step": 14642 }, { "epoch": 0.95, "grad_norm": 1.1617141962051392, "learning_rate": 7.855432148028419e-08, "loss": 0.5195, "step": 14643 }, { "epoch": 0.95, "grad_norm": 1.3128935098648071, "learning_rate": 7.836986371067723e-08, "loss": 0.5055, "step": 14644 }, { "epoch": 0.95, "grad_norm": 1.0748625993728638, "learning_rate": 7.818562105282645e-08, "loss": 0.5094, "step": 14645 }, { "epoch": 0.95, "grad_norm": 1.450008511543274, "learning_rate": 7.800159351478321e-08, "loss": 0.5114, "step": 14646 }, { "epoch": 0.95, "grad_norm": 1.3204824924468994, "learning_rate": 7.781778110459103e-08, "loss": 0.5117, "step": 14647 }, { "epoch": 0.95, "grad_norm": 1.153673529624939, "learning_rate": 7.763418383028465e-08, "loss": 0.5096, "step": 14648 }, { "epoch": 0.95, "grad_norm": 1.2145767211914062, "learning_rate": 7.745080169988817e-08, "loss": 0.5109, "step": 14649 }, { "epoch": 0.95, "grad_norm": 1.0979883670806885, "learning_rate": 7.726763472141629e-08, "loss": 0.4839, "step": 14650 }, { "epoch": 0.95, "grad_norm": 1.2435718774795532, "learning_rate": 7.708468290287597e-08, "loss": 0.4967, "step": 14651 }, { "epoch": 0.95, "grad_norm": 1.3332446813583374, "learning_rate": 7.6901946252263e-08, "loss": 0.5151, "step": 14652 }, { "epoch": 0.95, "grad_norm": 1.1885638236999512, "learning_rate": 7.671942477756378e-08, "loss": 0.5309, "step": 14653 }, { "epoch": 0.95, "grad_norm": 1.302436351776123, "learning_rate": 7.653711848675582e-08, "loss": 0.5332, "step": 14654 }, { "epoch": 0.95, "grad_norm": 1.0837152004241943, "learning_rate": 7.635502738780831e-08, "loss": 0.514, "step": 14655 }, { "epoch": 0.95, "grad_norm": 1.314453363418579, "learning_rate": 7.617315148867932e-08, "loss": 0.5314, "step": 14656 }, { "epoch": 0.95, "grad_norm": 1.1973354816436768, "learning_rate": 7.59914907973186e-08, "loss": 0.5369, "step": 14657 }, { "epoch": 0.95, "grad_norm": 1.4549705982208252, "learning_rate": 7.58100453216648e-08, "loss": 0.5404, "step": 14658 }, { "epoch": 0.95, "grad_norm": 1.245936632156372, "learning_rate": 7.562881506964992e-08, "loss": 0.4748, "step": 14659 }, { "epoch": 0.95, "grad_norm": 1.2843639850616455, "learning_rate": 7.544780004919427e-08, "loss": 0.4938, "step": 14660 }, { "epoch": 0.95, "grad_norm": 1.1763455867767334, "learning_rate": 7.526700026820988e-08, "loss": 0.4303, "step": 14661 }, { "epoch": 0.95, "grad_norm": 1.2150378227233887, "learning_rate": 7.508641573459874e-08, "loss": 0.5064, "step": 14662 }, { "epoch": 0.95, "grad_norm": 1.1729207038879395, "learning_rate": 7.490604645625454e-08, "loss": 0.5179, "step": 14663 }, { "epoch": 0.95, "grad_norm": 1.1658058166503906, "learning_rate": 7.472589244105987e-08, "loss": 0.5045, "step": 14664 }, { "epoch": 0.95, "grad_norm": 1.2474725246429443, "learning_rate": 7.4545953696889e-08, "loss": 0.5159, "step": 14665 }, { "epoch": 0.95, "grad_norm": 1.155169129371643, "learning_rate": 7.436623023160616e-08, "loss": 0.4653, "step": 14666 }, { "epoch": 0.95, "grad_norm": 1.1585147380828857, "learning_rate": 7.418672205306731e-08, "loss": 0.5421, "step": 14667 }, { "epoch": 0.95, "grad_norm": 1.2238227128982544, "learning_rate": 7.40074291691184e-08, "loss": 0.4636, "step": 14668 }, { "epoch": 0.95, "grad_norm": 1.3179714679718018, "learning_rate": 7.382835158759593e-08, "loss": 0.5144, "step": 14669 }, { "epoch": 0.95, "grad_norm": 1.1777381896972656, "learning_rate": 7.364948931632587e-08, "loss": 0.4892, "step": 14670 }, { "epoch": 0.95, "grad_norm": 1.1354382038116455, "learning_rate": 7.34708423631264e-08, "loss": 0.4877, "step": 14671 }, { "epoch": 0.95, "grad_norm": 1.4374319314956665, "learning_rate": 7.329241073580573e-08, "loss": 0.5668, "step": 14672 }, { "epoch": 0.95, "grad_norm": 1.2599979639053345, "learning_rate": 7.311419444216317e-08, "loss": 0.4841, "step": 14673 }, { "epoch": 0.95, "grad_norm": 1.3562390804290771, "learning_rate": 7.293619348998749e-08, "loss": 0.5215, "step": 14674 }, { "epoch": 0.95, "grad_norm": 1.2603901624679565, "learning_rate": 7.275840788705912e-08, "loss": 0.551, "step": 14675 }, { "epoch": 0.95, "grad_norm": 1.1459014415740967, "learning_rate": 7.258083764114743e-08, "loss": 0.4651, "step": 14676 }, { "epoch": 0.95, "grad_norm": 1.2342827320098877, "learning_rate": 7.240348276001507e-08, "loss": 0.4974, "step": 14677 }, { "epoch": 0.95, "grad_norm": 1.2319673299789429, "learning_rate": 7.222634325141309e-08, "loss": 0.5295, "step": 14678 }, { "epoch": 0.95, "grad_norm": 1.2359652519226074, "learning_rate": 7.204941912308306e-08, "loss": 0.504, "step": 14679 }, { "epoch": 0.95, "grad_norm": 1.3237829208374023, "learning_rate": 7.187271038275989e-08, "loss": 0.5407, "step": 14680 }, { "epoch": 0.95, "grad_norm": 1.1546260118484497, "learning_rate": 7.16962170381652e-08, "loss": 0.5128, "step": 14681 }, { "epoch": 0.95, "grad_norm": 1.2803982496261597, "learning_rate": 7.151993909701338e-08, "loss": 0.5228, "step": 14682 }, { "epoch": 0.95, "grad_norm": 1.160733938217163, "learning_rate": 7.134387656700937e-08, "loss": 0.4974, "step": 14683 }, { "epoch": 0.95, "grad_norm": 1.2967315912246704, "learning_rate": 7.116802945584867e-08, "loss": 0.5403, "step": 14684 }, { "epoch": 0.95, "grad_norm": 1.2466377019882202, "learning_rate": 7.09923977712168e-08, "loss": 0.5231, "step": 14685 }, { "epoch": 0.95, "grad_norm": 1.2613003253936768, "learning_rate": 7.081698152079097e-08, "loss": 0.5447, "step": 14686 }, { "epoch": 0.95, "grad_norm": 1.174528956413269, "learning_rate": 7.064178071223615e-08, "loss": 0.5239, "step": 14687 }, { "epoch": 0.95, "grad_norm": 1.2206830978393555, "learning_rate": 7.046679535321178e-08, "loss": 0.5407, "step": 14688 }, { "epoch": 0.95, "grad_norm": 1.295965313911438, "learning_rate": 7.02920254513656e-08, "loss": 0.4935, "step": 14689 }, { "epoch": 0.95, "grad_norm": 1.1301943063735962, "learning_rate": 7.011747101433597e-08, "loss": 0.4927, "step": 14690 }, { "epoch": 0.95, "grad_norm": 1.2456790208816528, "learning_rate": 6.994313204975234e-08, "loss": 0.5424, "step": 14691 }, { "epoch": 0.95, "grad_norm": 1.4227876663208008, "learning_rate": 6.976900856523472e-08, "loss": 0.5091, "step": 14692 }, { "epoch": 0.95, "grad_norm": 1.3586386442184448, "learning_rate": 6.959510056839369e-08, "loss": 0.4631, "step": 14693 }, { "epoch": 0.95, "grad_norm": 1.1859018802642822, "learning_rate": 6.942140806682985e-08, "loss": 0.5111, "step": 14694 }, { "epoch": 0.95, "grad_norm": 1.2694783210754395, "learning_rate": 6.924793106813544e-08, "loss": 0.507, "step": 14695 }, { "epoch": 0.95, "grad_norm": 1.2512954473495483, "learning_rate": 6.907466957989272e-08, "loss": 0.5221, "step": 14696 }, { "epoch": 0.95, "grad_norm": 1.2014048099517822, "learning_rate": 6.890162360967401e-08, "loss": 0.4863, "step": 14697 }, { "epoch": 0.95, "grad_norm": 1.2618194818496704, "learning_rate": 6.872879316504321e-08, "loss": 0.5165, "step": 14698 }, { "epoch": 0.95, "grad_norm": 1.3468000888824463, "learning_rate": 6.85561782535532e-08, "loss": 0.5179, "step": 14699 }, { "epoch": 0.95, "grad_norm": 1.179635763168335, "learning_rate": 6.838377888275016e-08, "loss": 0.4849, "step": 14700 }, { "epoch": 0.95, "grad_norm": 1.2532018423080444, "learning_rate": 6.821159506016806e-08, "loss": 0.5155, "step": 14701 }, { "epoch": 0.95, "grad_norm": 1.1249957084655762, "learning_rate": 6.80396267933331e-08, "loss": 0.458, "step": 14702 }, { "epoch": 0.95, "grad_norm": 1.1506671905517578, "learning_rate": 6.786787408976147e-08, "loss": 0.5312, "step": 14703 }, { "epoch": 0.95, "grad_norm": 1.2148103713989258, "learning_rate": 6.769633695695943e-08, "loss": 0.4499, "step": 14704 }, { "epoch": 0.95, "grad_norm": 1.2562263011932373, "learning_rate": 6.75250154024254e-08, "loss": 0.519, "step": 14705 }, { "epoch": 0.95, "grad_norm": 1.2516356706619263, "learning_rate": 6.735390943364728e-08, "loss": 0.5328, "step": 14706 }, { "epoch": 0.95, "grad_norm": 1.0534900426864624, "learning_rate": 6.718301905810298e-08, "loss": 0.489, "step": 14707 }, { "epoch": 0.95, "grad_norm": 1.1766213178634644, "learning_rate": 6.701234428326209e-08, "loss": 0.5032, "step": 14708 }, { "epoch": 0.95, "grad_norm": 1.2063775062561035, "learning_rate": 6.684188511658473e-08, "loss": 0.5484, "step": 14709 }, { "epoch": 0.95, "grad_norm": 1.1637994050979614, "learning_rate": 6.667164156552053e-08, "loss": 0.5385, "step": 14710 }, { "epoch": 0.95, "grad_norm": 1.1570987701416016, "learning_rate": 6.650161363751073e-08, "loss": 0.5042, "step": 14711 }, { "epoch": 0.95, "grad_norm": 1.218078851699829, "learning_rate": 6.633180133998717e-08, "loss": 0.5278, "step": 14712 }, { "epoch": 0.95, "grad_norm": 1.150496244430542, "learning_rate": 6.616220468037116e-08, "loss": 0.5117, "step": 14713 }, { "epoch": 0.95, "grad_norm": 1.5045876502990723, "learning_rate": 6.599282366607563e-08, "loss": 0.5176, "step": 14714 }, { "epoch": 0.95, "grad_norm": 1.2949788570404053, "learning_rate": 6.582365830450411e-08, "loss": 0.527, "step": 14715 }, { "epoch": 0.95, "grad_norm": 1.2111791372299194, "learning_rate": 6.565470860305012e-08, "loss": 0.498, "step": 14716 }, { "epoch": 0.95, "grad_norm": 1.2582461833953857, "learning_rate": 6.548597456909778e-08, "loss": 0.4743, "step": 14717 }, { "epoch": 0.95, "grad_norm": 1.1416903734207153, "learning_rate": 6.531745621002228e-08, "loss": 0.4561, "step": 14718 }, { "epoch": 0.95, "grad_norm": 1.2525984048843384, "learning_rate": 6.514915353318941e-08, "loss": 0.5511, "step": 14719 }, { "epoch": 0.95, "grad_norm": 1.1782487630844116, "learning_rate": 6.498106654595493e-08, "loss": 0.5046, "step": 14720 }, { "epoch": 0.95, "grad_norm": 1.2428263425827026, "learning_rate": 6.48131952556652e-08, "loss": 0.4934, "step": 14721 }, { "epoch": 0.95, "grad_norm": 1.1437034606933594, "learning_rate": 6.464553966965826e-08, "loss": 0.527, "step": 14722 }, { "epoch": 0.95, "grad_norm": 1.2266243696212769, "learning_rate": 6.447809979526099e-08, "loss": 0.4948, "step": 14723 }, { "epoch": 0.95, "grad_norm": 1.197531819343567, "learning_rate": 6.431087563979254e-08, "loss": 0.5025, "step": 14724 }, { "epoch": 0.95, "grad_norm": 1.2492690086364746, "learning_rate": 6.414386721056098e-08, "loss": 0.4718, "step": 14725 }, { "epoch": 0.95, "grad_norm": 1.2804348468780518, "learning_rate": 6.397707451486656e-08, "loss": 0.5494, "step": 14726 }, { "epoch": 0.95, "grad_norm": 1.1762638092041016, "learning_rate": 6.381049755999901e-08, "loss": 0.4791, "step": 14727 }, { "epoch": 0.95, "grad_norm": 1.1331619024276733, "learning_rate": 6.364413635323918e-08, "loss": 0.4829, "step": 14728 }, { "epoch": 0.95, "grad_norm": 1.275106430053711, "learning_rate": 6.347799090185846e-08, "loss": 0.4951, "step": 14729 }, { "epoch": 0.95, "grad_norm": 1.2627782821655273, "learning_rate": 6.331206121311773e-08, "loss": 0.523, "step": 14730 }, { "epoch": 0.95, "grad_norm": 1.2352921962738037, "learning_rate": 6.314634729427005e-08, "loss": 0.5209, "step": 14731 }, { "epoch": 0.95, "grad_norm": 1.285125494003296, "learning_rate": 6.298084915255853e-08, "loss": 0.5343, "step": 14732 }, { "epoch": 0.95, "grad_norm": 1.2171630859375, "learning_rate": 6.281556679521683e-08, "loss": 0.5324, "step": 14733 }, { "epoch": 0.95, "grad_norm": 1.2326221466064453, "learning_rate": 6.265050022946806e-08, "loss": 0.5314, "step": 14734 }, { "epoch": 0.95, "grad_norm": 1.1038403511047363, "learning_rate": 6.248564946252755e-08, "loss": 0.4919, "step": 14735 }, { "epoch": 0.95, "grad_norm": 1.2422956228256226, "learning_rate": 6.23210145016001e-08, "loss": 0.5225, "step": 14736 }, { "epoch": 0.95, "grad_norm": 1.1779334545135498, "learning_rate": 6.215659535388219e-08, "loss": 0.5048, "step": 14737 }, { "epoch": 0.95, "grad_norm": 1.2576987743377686, "learning_rate": 6.199239202655915e-08, "loss": 0.4938, "step": 14738 }, { "epoch": 0.95, "grad_norm": 1.2307015657424927, "learning_rate": 6.182840452680916e-08, "loss": 0.5045, "step": 14739 }, { "epoch": 0.95, "grad_norm": 1.2730375528335571, "learning_rate": 6.166463286179814e-08, "loss": 0.5106, "step": 14740 }, { "epoch": 0.95, "grad_norm": 1.1114399433135986, "learning_rate": 6.150107703868535e-08, "loss": 0.4954, "step": 14741 }, { "epoch": 0.95, "grad_norm": 1.1279797554016113, "learning_rate": 6.133773706461898e-08, "loss": 0.4745, "step": 14742 }, { "epoch": 0.95, "grad_norm": 1.2515217065811157, "learning_rate": 6.117461294673776e-08, "loss": 0.5271, "step": 14743 }, { "epoch": 0.95, "grad_norm": 1.2396742105484009, "learning_rate": 6.101170469217266e-08, "loss": 0.5172, "step": 14744 }, { "epoch": 0.95, "grad_norm": 1.220438003540039, "learning_rate": 6.084901230804297e-08, "loss": 0.5169, "step": 14745 }, { "epoch": 0.95, "grad_norm": 1.2761863470077515, "learning_rate": 6.068653580145966e-08, "loss": 0.5091, "step": 14746 }, { "epoch": 0.95, "grad_norm": 1.3456811904907227, "learning_rate": 6.052427517952375e-08, "loss": 0.5176, "step": 14747 }, { "epoch": 0.95, "grad_norm": 1.2002726793289185, "learning_rate": 6.036223044932843e-08, "loss": 0.5196, "step": 14748 }, { "epoch": 0.95, "grad_norm": 1.355077862739563, "learning_rate": 6.020040161795526e-08, "loss": 0.5304, "step": 14749 }, { "epoch": 0.95, "grad_norm": 1.2464768886566162, "learning_rate": 6.003878869247748e-08, "loss": 0.5586, "step": 14750 }, { "epoch": 0.95, "grad_norm": 1.2715318202972412, "learning_rate": 5.987739167995999e-08, "loss": 0.5346, "step": 14751 }, { "epoch": 0.95, "grad_norm": 1.3343863487243652, "learning_rate": 5.971621058745492e-08, "loss": 0.5501, "step": 14752 }, { "epoch": 0.95, "grad_norm": 1.2283028364181519, "learning_rate": 5.9555245422008856e-08, "loss": 0.4827, "step": 14753 }, { "epoch": 0.95, "grad_norm": 1.1610400676727295, "learning_rate": 5.939449619065618e-08, "loss": 0.52, "step": 14754 }, { "epoch": 0.95, "grad_norm": 1.258475661277771, "learning_rate": 5.923396290042294e-08, "loss": 0.5336, "step": 14755 }, { "epoch": 0.95, "grad_norm": 1.3315417766571045, "learning_rate": 5.9073645558326286e-08, "loss": 0.5429, "step": 14756 }, { "epoch": 0.95, "grad_norm": 1.2100497484207153, "learning_rate": 5.891354417137285e-08, "loss": 0.531, "step": 14757 }, { "epoch": 0.95, "grad_norm": 1.1808656454086304, "learning_rate": 5.8753658746559806e-08, "loss": 0.4854, "step": 14758 }, { "epoch": 0.95, "grad_norm": 1.1267393827438354, "learning_rate": 5.8593989290876006e-08, "loss": 0.494, "step": 14759 }, { "epoch": 0.95, "grad_norm": 1.2005265951156616, "learning_rate": 5.8434535811299765e-08, "loss": 0.523, "step": 14760 }, { "epoch": 0.95, "grad_norm": 1.258799433708191, "learning_rate": 5.8275298314800496e-08, "loss": 0.548, "step": 14761 }, { "epoch": 0.95, "grad_norm": 1.0864276885986328, "learning_rate": 5.8116276808338204e-08, "loss": 0.51, "step": 14762 }, { "epoch": 0.95, "grad_norm": 1.301220417022705, "learning_rate": 5.795747129886342e-08, "loss": 0.5239, "step": 14763 }, { "epoch": 0.95, "grad_norm": 1.1758164167404175, "learning_rate": 5.779888179331672e-08, "loss": 0.5027, "step": 14764 }, { "epoch": 0.95, "grad_norm": 1.18965482711792, "learning_rate": 5.764050829863033e-08, "loss": 0.4905, "step": 14765 }, { "epoch": 0.95, "grad_norm": 1.461358666419983, "learning_rate": 5.7482350821725374e-08, "loss": 0.4706, "step": 14766 }, { "epoch": 0.95, "grad_norm": 1.224561333656311, "learning_rate": 5.732440936951467e-08, "loss": 0.5294, "step": 14767 }, { "epoch": 0.95, "grad_norm": 1.1849818229675293, "learning_rate": 5.716668394890268e-08, "loss": 0.4953, "step": 14768 }, { "epoch": 0.95, "grad_norm": 1.338465929031372, "learning_rate": 5.7009174566781675e-08, "loss": 0.5195, "step": 14769 }, { "epoch": 0.95, "grad_norm": 1.3005709648132324, "learning_rate": 5.685188123003671e-08, "loss": 0.5272, "step": 14770 }, { "epoch": 0.95, "grad_norm": 1.2458126544952393, "learning_rate": 5.6694803945542276e-08, "loss": 0.4825, "step": 14771 }, { "epoch": 0.95, "grad_norm": 1.2416621446609497, "learning_rate": 5.6537942720163996e-08, "loss": 0.4893, "step": 14772 }, { "epoch": 0.95, "grad_norm": 1.2407026290893555, "learning_rate": 5.638129756075805e-08, "loss": 0.533, "step": 14773 }, { "epoch": 0.95, "grad_norm": 1.2740103006362915, "learning_rate": 5.6224868474171744e-08, "loss": 0.487, "step": 14774 }, { "epoch": 0.95, "grad_norm": 1.2435994148254395, "learning_rate": 5.6068655467240165e-08, "loss": 0.5178, "step": 14775 }, { "epoch": 0.95, "grad_norm": 1.1767548322677612, "learning_rate": 5.5912658546792844e-08, "loss": 0.5106, "step": 14776 }, { "epoch": 0.95, "grad_norm": 1.3045063018798828, "learning_rate": 5.5756877719647105e-08, "loss": 0.5362, "step": 14777 }, { "epoch": 0.95, "grad_norm": 1.1104367971420288, "learning_rate": 5.56013129926114e-08, "loss": 0.4859, "step": 14778 }, { "epoch": 0.95, "grad_norm": 1.2657629251480103, "learning_rate": 5.544596437248639e-08, "loss": 0.5214, "step": 14779 }, { "epoch": 0.95, "grad_norm": 1.3229584693908691, "learning_rate": 5.5290831866061103e-08, "loss": 0.5117, "step": 14780 }, { "epoch": 0.95, "grad_norm": 1.2029163837432861, "learning_rate": 5.513591548011565e-08, "loss": 0.4959, "step": 14781 }, { "epoch": 0.95, "grad_norm": 1.3267486095428467, "learning_rate": 5.49812152214213e-08, "loss": 0.5496, "step": 14782 }, { "epoch": 0.95, "grad_norm": 1.1217509508132935, "learning_rate": 5.482673109673986e-08, "loss": 0.4971, "step": 14783 }, { "epoch": 0.95, "grad_norm": 1.1926143169403076, "learning_rate": 5.467246311282315e-08, "loss": 0.4935, "step": 14784 }, { "epoch": 0.95, "grad_norm": 1.3729385137557983, "learning_rate": 5.451841127641411e-08, "loss": 0.5377, "step": 14785 }, { "epoch": 0.95, "grad_norm": 1.366873860359192, "learning_rate": 5.436457559424624e-08, "loss": 0.5195, "step": 14786 }, { "epoch": 0.95, "grad_norm": 1.1344356536865234, "learning_rate": 5.42109560730425e-08, "loss": 0.5044, "step": 14787 }, { "epoch": 0.95, "grad_norm": 1.361507534980774, "learning_rate": 5.405755271951751e-08, "loss": 0.507, "step": 14788 }, { "epoch": 0.95, "grad_norm": 1.2754791975021362, "learning_rate": 5.390436554037592e-08, "loss": 0.5351, "step": 14789 }, { "epoch": 0.95, "grad_norm": 1.3171477317810059, "learning_rate": 5.375139454231404e-08, "loss": 0.5366, "step": 14790 }, { "epoch": 0.95, "grad_norm": 1.1361483335494995, "learning_rate": 5.3598639732017066e-08, "loss": 0.4629, "step": 14791 }, { "epoch": 0.95, "grad_norm": 1.3132736682891846, "learning_rate": 5.344610111616133e-08, "loss": 0.5126, "step": 14792 }, { "epoch": 0.95, "grad_norm": 1.194483995437622, "learning_rate": 5.3293778701414275e-08, "loss": 0.5002, "step": 14793 }, { "epoch": 0.95, "grad_norm": 1.1943937540054321, "learning_rate": 5.314167249443336e-08, "loss": 0.4516, "step": 14794 }, { "epoch": 0.96, "grad_norm": 1.1505805253982544, "learning_rate": 5.2989782501867706e-08, "loss": 0.532, "step": 14795 }, { "epoch": 0.96, "grad_norm": 1.2173136472702026, "learning_rate": 5.283810873035422e-08, "loss": 0.5051, "step": 14796 }, { "epoch": 0.96, "grad_norm": 1.2060093879699707, "learning_rate": 5.268665118652372e-08, "loss": 0.4438, "step": 14797 }, { "epoch": 0.96, "grad_norm": 1.1792858839035034, "learning_rate": 5.253540987699535e-08, "loss": 0.4844, "step": 14798 }, { "epoch": 0.96, "grad_norm": 1.095787525177002, "learning_rate": 5.238438480837937e-08, "loss": 0.4847, "step": 14799 }, { "epoch": 0.96, "grad_norm": 1.1488698720932007, "learning_rate": 5.223357598727719e-08, "loss": 0.5164, "step": 14800 }, { "epoch": 0.96, "grad_norm": 1.2428841590881348, "learning_rate": 5.208298342027962e-08, "loss": 0.5175, "step": 14801 }, { "epoch": 0.96, "grad_norm": 1.1701570749282837, "learning_rate": 5.193260711396975e-08, "loss": 0.5136, "step": 14802 }, { "epoch": 0.96, "grad_norm": 1.2130107879638672, "learning_rate": 5.178244707491842e-08, "loss": 0.5252, "step": 14803 }, { "epoch": 0.96, "grad_norm": 1.2187808752059937, "learning_rate": 5.163250330969094e-08, "loss": 0.4974, "step": 14804 }, { "epoch": 0.96, "grad_norm": 1.2150574922561646, "learning_rate": 5.1482775824838735e-08, "loss": 0.5244, "step": 14805 }, { "epoch": 0.96, "grad_norm": 1.0678807497024536, "learning_rate": 5.1333264626907685e-08, "loss": 0.5047, "step": 14806 }, { "epoch": 0.96, "grad_norm": 1.1408250331878662, "learning_rate": 5.1183969722431446e-08, "loss": 0.4962, "step": 14807 }, { "epoch": 0.96, "grad_norm": 1.1846078634262085, "learning_rate": 5.103489111793591e-08, "loss": 0.5277, "step": 14808 }, { "epoch": 0.96, "grad_norm": 1.1591718196868896, "learning_rate": 5.088602881993754e-08, "loss": 0.4815, "step": 14809 }, { "epoch": 0.96, "grad_norm": 1.3430850505828857, "learning_rate": 5.073738283494112e-08, "loss": 0.496, "step": 14810 }, { "epoch": 0.96, "grad_norm": 1.265054702758789, "learning_rate": 5.058895316944479e-08, "loss": 0.5, "step": 14811 }, { "epoch": 0.96, "grad_norm": 1.2545969486236572, "learning_rate": 5.044073982993558e-08, "loss": 0.5355, "step": 14812 }, { "epoch": 0.96, "grad_norm": 1.1583465337753296, "learning_rate": 5.0292742822891095e-08, "loss": 0.4475, "step": 14813 }, { "epoch": 0.96, "grad_norm": 1.188053011894226, "learning_rate": 5.0144962154781154e-08, "loss": 0.5197, "step": 14814 }, { "epoch": 0.96, "grad_norm": 1.3439174890518188, "learning_rate": 4.999739783206392e-08, "loss": 0.5197, "step": 14815 }, { "epoch": 0.96, "grad_norm": 1.2074534893035889, "learning_rate": 4.985004986118924e-08, "loss": 0.4966, "step": 14816 }, { "epoch": 0.96, "grad_norm": 1.1345711946487427, "learning_rate": 4.970291824859696e-08, "loss": 0.5124, "step": 14817 }, { "epoch": 0.96, "grad_norm": 1.4097803831100464, "learning_rate": 4.955600300071861e-08, "loss": 0.5323, "step": 14818 }, { "epoch": 0.96, "grad_norm": 1.4028384685516357, "learning_rate": 4.94093041239746e-08, "loss": 0.507, "step": 14819 }, { "epoch": 0.96, "grad_norm": 1.1560782194137573, "learning_rate": 4.926282162477758e-08, "loss": 0.4931, "step": 14820 }, { "epoch": 0.96, "grad_norm": 1.2227559089660645, "learning_rate": 4.911655550952965e-08, "loss": 0.5182, "step": 14821 }, { "epoch": 0.96, "grad_norm": 1.2033123970031738, "learning_rate": 4.897050578462348e-08, "loss": 0.5562, "step": 14822 }, { "epoch": 0.96, "grad_norm": 1.094481348991394, "learning_rate": 4.882467245644229e-08, "loss": 0.4873, "step": 14823 }, { "epoch": 0.96, "grad_norm": 1.2181944847106934, "learning_rate": 4.867905553136098e-08, "loss": 0.5038, "step": 14824 }, { "epoch": 0.96, "grad_norm": 1.2130577564239502, "learning_rate": 4.853365501574337e-08, "loss": 0.4583, "step": 14825 }, { "epoch": 0.96, "grad_norm": 1.2351597547531128, "learning_rate": 4.8388470915944895e-08, "loss": 0.5285, "step": 14826 }, { "epoch": 0.96, "grad_norm": 1.2818423509597778, "learning_rate": 4.824350323831106e-08, "loss": 0.4885, "step": 14827 }, { "epoch": 0.96, "grad_norm": 1.1903176307678223, "learning_rate": 4.80987519891779e-08, "loss": 0.5235, "step": 14828 }, { "epoch": 0.96, "grad_norm": 1.140073537826538, "learning_rate": 4.795421717487203e-08, "loss": 0.4799, "step": 14829 }, { "epoch": 0.96, "grad_norm": 1.2798508405685425, "learning_rate": 4.780989880171116e-08, "loss": 0.5492, "step": 14830 }, { "epoch": 0.96, "grad_norm": 1.2585119009017944, "learning_rate": 4.766579687600248e-08, "loss": 0.5147, "step": 14831 }, { "epoch": 0.96, "grad_norm": 1.4274816513061523, "learning_rate": 4.752191140404483e-08, "loss": 0.5189, "step": 14832 }, { "epoch": 0.96, "grad_norm": 1.251968502998352, "learning_rate": 4.7378242392127074e-08, "loss": 0.4844, "step": 14833 }, { "epoch": 0.96, "grad_norm": 1.2151464223861694, "learning_rate": 4.723478984652807e-08, "loss": 0.5424, "step": 14834 }, { "epoch": 0.96, "grad_norm": 1.2906540632247925, "learning_rate": 4.709155377351837e-08, "loss": 0.4996, "step": 14835 }, { "epoch": 0.96, "grad_norm": 1.231080174446106, "learning_rate": 4.6948534179357965e-08, "loss": 0.5357, "step": 14836 }, { "epoch": 0.96, "grad_norm": 1.1422936916351318, "learning_rate": 4.680573107029796e-08, "loss": 0.4569, "step": 14837 }, { "epoch": 0.96, "grad_norm": 1.25371515750885, "learning_rate": 4.6663144452580026e-08, "loss": 0.4782, "step": 14838 }, { "epoch": 0.96, "grad_norm": 1.4043599367141724, "learning_rate": 4.652077433243696e-08, "loss": 0.5065, "step": 14839 }, { "epoch": 0.96, "grad_norm": 1.22329843044281, "learning_rate": 4.63786207160899e-08, "loss": 0.47, "step": 14840 }, { "epoch": 0.96, "grad_norm": 1.1631027460098267, "learning_rate": 4.623668360975331e-08, "loss": 0.4782, "step": 14841 }, { "epoch": 0.96, "grad_norm": 1.3958556652069092, "learning_rate": 4.609496301963001e-08, "loss": 0.5782, "step": 14842 }, { "epoch": 0.96, "grad_norm": 1.1691282987594604, "learning_rate": 4.59534589519145e-08, "loss": 0.5089, "step": 14843 }, { "epoch": 0.96, "grad_norm": 1.243282675743103, "learning_rate": 4.581217141279237e-08, "loss": 0.511, "step": 14844 }, { "epoch": 0.96, "grad_norm": 1.3103951215744019, "learning_rate": 4.567110040843814e-08, "loss": 0.5201, "step": 14845 }, { "epoch": 0.96, "grad_norm": 1.3086916208267212, "learning_rate": 4.553024594501743e-08, "loss": 0.5039, "step": 14846 }, { "epoch": 0.96, "grad_norm": 1.1671621799468994, "learning_rate": 4.538960802868697e-08, "loss": 0.4814, "step": 14847 }, { "epoch": 0.96, "grad_norm": 1.376732349395752, "learning_rate": 4.5249186665593524e-08, "loss": 0.4656, "step": 14848 }, { "epoch": 0.96, "grad_norm": 1.2909517288208008, "learning_rate": 4.510898186187496e-08, "loss": 0.496, "step": 14849 }, { "epoch": 0.96, "grad_norm": 1.243862509727478, "learning_rate": 4.496899362365914e-08, "loss": 0.4732, "step": 14850 }, { "epoch": 0.96, "grad_norm": 1.2052693367004395, "learning_rate": 4.482922195706452e-08, "loss": 0.5462, "step": 14851 }, { "epoch": 0.96, "grad_norm": 1.1777608394622803, "learning_rate": 4.468966686819953e-08, "loss": 0.4962, "step": 14852 }, { "epoch": 0.96, "grad_norm": 1.1668998003005981, "learning_rate": 4.4550328363164864e-08, "loss": 0.5045, "step": 14853 }, { "epoch": 0.96, "grad_norm": 1.1353124380111694, "learning_rate": 4.441120644805008e-08, "loss": 0.4748, "step": 14854 }, { "epoch": 0.96, "grad_norm": 1.2224076986312866, "learning_rate": 4.427230112893532e-08, "loss": 0.5413, "step": 14855 }, { "epoch": 0.96, "grad_norm": 1.2508509159088135, "learning_rate": 4.413361241189351e-08, "loss": 0.5455, "step": 14856 }, { "epoch": 0.96, "grad_norm": 1.1812790632247925, "learning_rate": 4.399514030298424e-08, "loss": 0.4502, "step": 14857 }, { "epoch": 0.96, "grad_norm": 1.095528244972229, "learning_rate": 4.3856884808261e-08, "loss": 0.552, "step": 14858 }, { "epoch": 0.96, "grad_norm": 1.282785415649414, "learning_rate": 4.3718845933766186e-08, "loss": 0.5445, "step": 14859 }, { "epoch": 0.96, "grad_norm": 1.1048636436462402, "learning_rate": 4.3581023685532744e-08, "loss": 0.4502, "step": 14860 }, { "epoch": 0.96, "grad_norm": 1.2080132961273193, "learning_rate": 4.344341806958585e-08, "loss": 0.5123, "step": 14861 }, { "epoch": 0.96, "grad_norm": 1.1342843770980835, "learning_rate": 4.330602909193904e-08, "loss": 0.5305, "step": 14862 }, { "epoch": 0.96, "grad_norm": 1.2769500017166138, "learning_rate": 4.3168856758596945e-08, "loss": 0.5367, "step": 14863 }, { "epoch": 0.96, "grad_norm": 1.2530746459960938, "learning_rate": 4.303190107555533e-08, "loss": 0.5017, "step": 14864 }, { "epoch": 0.96, "grad_norm": 1.1923389434814453, "learning_rate": 4.289516204879996e-08, "loss": 0.5258, "step": 14865 }, { "epoch": 0.96, "grad_norm": 1.1530306339263916, "learning_rate": 4.2758639684307714e-08, "loss": 0.5384, "step": 14866 }, { "epoch": 0.96, "grad_norm": 1.0998003482818604, "learning_rate": 4.2622333988045496e-08, "loss": 0.4412, "step": 14867 }, { "epoch": 0.96, "grad_norm": 1.309877872467041, "learning_rate": 4.2486244965971314e-08, "loss": 0.5172, "step": 14868 }, { "epoch": 0.96, "grad_norm": 1.369704008102417, "learning_rate": 4.235037262403263e-08, "loss": 0.4768, "step": 14869 }, { "epoch": 0.96, "grad_norm": 1.3300182819366455, "learning_rate": 4.2214716968168036e-08, "loss": 0.5291, "step": 14870 }, { "epoch": 0.96, "grad_norm": 1.3021838665008545, "learning_rate": 4.2079278004306665e-08, "loss": 0.5249, "step": 14871 }, { "epoch": 0.96, "grad_norm": 1.249010682106018, "learning_rate": 4.1944055738368794e-08, "loss": 0.4775, "step": 14872 }, { "epoch": 0.96, "grad_norm": 1.2110172510147095, "learning_rate": 4.180905017626413e-08, "loss": 0.494, "step": 14873 }, { "epoch": 0.96, "grad_norm": 1.2217323780059814, "learning_rate": 4.167426132389407e-08, "loss": 0.5023, "step": 14874 }, { "epoch": 0.96, "grad_norm": 1.2420401573181152, "learning_rate": 4.15396891871489e-08, "loss": 0.5235, "step": 14875 }, { "epoch": 0.96, "grad_norm": 1.1460076570510864, "learning_rate": 4.1405333771910584e-08, "loss": 0.5035, "step": 14876 }, { "epoch": 0.96, "grad_norm": 1.3536806106567383, "learning_rate": 4.1271195084052196e-08, "loss": 0.4613, "step": 14877 }, { "epoch": 0.96, "grad_norm": 1.1954238414764404, "learning_rate": 4.1137273129436273e-08, "loss": 0.5064, "step": 14878 }, { "epoch": 0.96, "grad_norm": 1.2310093641281128, "learning_rate": 4.100356791391591e-08, "loss": 0.4762, "step": 14879 }, { "epoch": 0.96, "grad_norm": 1.214621901512146, "learning_rate": 4.087007944333532e-08, "loss": 0.5431, "step": 14880 }, { "epoch": 0.96, "grad_norm": 1.2291830778121948, "learning_rate": 4.073680772352928e-08, "loss": 0.4667, "step": 14881 }, { "epoch": 0.96, "grad_norm": 1.236000895500183, "learning_rate": 4.060375276032147e-08, "loss": 0.5209, "step": 14882 }, { "epoch": 0.96, "grad_norm": 1.2192012071609497, "learning_rate": 4.047091455952834e-08, "loss": 0.4851, "step": 14883 }, { "epoch": 0.96, "grad_norm": 1.2709749937057495, "learning_rate": 4.033829312695636e-08, "loss": 0.5309, "step": 14884 }, { "epoch": 0.96, "grad_norm": 1.1339994668960571, "learning_rate": 4.020588846840146e-08, "loss": 0.4983, "step": 14885 }, { "epoch": 0.96, "grad_norm": 1.2101621627807617, "learning_rate": 4.00737005896501e-08, "loss": 0.5083, "step": 14886 }, { "epoch": 0.96, "grad_norm": 1.1766270399093628, "learning_rate": 3.994172949648101e-08, "loss": 0.4687, "step": 14887 }, { "epoch": 0.96, "grad_norm": 1.2394256591796875, "learning_rate": 3.980997519466123e-08, "loss": 0.5502, "step": 14888 }, { "epoch": 0.96, "grad_norm": 1.2258341312408447, "learning_rate": 3.967843768995117e-08, "loss": 0.523, "step": 14889 }, { "epoch": 0.96, "grad_norm": 1.1392790079116821, "learning_rate": 3.954711698809788e-08, "loss": 0.4694, "step": 14890 }, { "epoch": 0.96, "grad_norm": 1.1476901769638062, "learning_rate": 3.941601309484178e-08, "loss": 0.4974, "step": 14891 }, { "epoch": 0.96, "grad_norm": 1.232161045074463, "learning_rate": 3.9285126015913835e-08, "loss": 0.5226, "step": 14892 }, { "epoch": 0.96, "grad_norm": 1.3108965158462524, "learning_rate": 3.9154455757033914e-08, "loss": 0.5264, "step": 14893 }, { "epoch": 0.96, "grad_norm": 1.2220289707183838, "learning_rate": 3.9024002323913565e-08, "loss": 0.5214, "step": 14894 }, { "epoch": 0.96, "grad_norm": 1.2073358297348022, "learning_rate": 3.8893765722254895e-08, "loss": 0.4754, "step": 14895 }, { "epoch": 0.96, "grad_norm": 1.2180479764938354, "learning_rate": 3.8763745957749455e-08, "loss": 0.5104, "step": 14896 }, { "epoch": 0.96, "grad_norm": 1.1732953786849976, "learning_rate": 3.863394303608048e-08, "loss": 0.5301, "step": 14897 }, { "epoch": 0.96, "grad_norm": 1.0952961444854736, "learning_rate": 3.8504356962921765e-08, "loss": 0.5176, "step": 14898 }, { "epoch": 0.96, "grad_norm": 1.2375532388687134, "learning_rate": 3.8374987743936554e-08, "loss": 0.4857, "step": 14899 }, { "epoch": 0.96, "grad_norm": 1.1687469482421875, "learning_rate": 3.824583538477977e-08, "loss": 0.4622, "step": 14900 }, { "epoch": 0.96, "grad_norm": 1.3023171424865723, "learning_rate": 3.8116899891095794e-08, "loss": 0.5675, "step": 14901 }, { "epoch": 0.96, "grad_norm": 1.4465726613998413, "learning_rate": 3.798818126852011e-08, "loss": 0.5394, "step": 14902 }, { "epoch": 0.96, "grad_norm": 1.2156116962432861, "learning_rate": 3.7859679522679325e-08, "loss": 0.4935, "step": 14903 }, { "epoch": 0.96, "grad_norm": 1.2575980424880981, "learning_rate": 3.773139465918896e-08, "loss": 0.5111, "step": 14904 }, { "epoch": 0.96, "grad_norm": 1.2005743980407715, "learning_rate": 3.760332668365674e-08, "loss": 0.4887, "step": 14905 }, { "epoch": 0.96, "grad_norm": 1.1681498289108276, "learning_rate": 3.7475475601679854e-08, "loss": 0.5099, "step": 14906 }, { "epoch": 0.96, "grad_norm": 1.0998388528823853, "learning_rate": 3.734784141884662e-08, "loss": 0.4485, "step": 14907 }, { "epoch": 0.96, "grad_norm": 1.2747037410736084, "learning_rate": 3.7220424140735346e-08, "loss": 0.5506, "step": 14908 }, { "epoch": 0.96, "grad_norm": 1.1898250579833984, "learning_rate": 3.709322377291547e-08, "loss": 0.5159, "step": 14909 }, { "epoch": 0.96, "grad_norm": 1.1982107162475586, "learning_rate": 3.6966240320945865e-08, "loss": 0.5187, "step": 14910 }, { "epoch": 0.96, "grad_norm": 1.281261682510376, "learning_rate": 3.683947379037711e-08, "loss": 0.5181, "step": 14911 }, { "epoch": 0.96, "grad_norm": 1.3353091478347778, "learning_rate": 3.671292418674976e-08, "loss": 0.4921, "step": 14912 }, { "epoch": 0.96, "grad_norm": 1.0870723724365234, "learning_rate": 3.6586591515595514e-08, "loss": 0.5194, "step": 14913 }, { "epoch": 0.96, "grad_norm": 1.1770691871643066, "learning_rate": 3.6460475782434945e-08, "loss": 0.4934, "step": 14914 }, { "epoch": 0.96, "grad_norm": 1.193394422531128, "learning_rate": 3.633457699278142e-08, "loss": 0.4945, "step": 14915 }, { "epoch": 0.96, "grad_norm": 1.1315147876739502, "learning_rate": 3.6208895152137215e-08, "loss": 0.515, "step": 14916 }, { "epoch": 0.96, "grad_norm": 1.2301620244979858, "learning_rate": 3.6083430265995144e-08, "loss": 0.5381, "step": 14917 }, { "epoch": 0.96, "grad_norm": 1.2434742450714111, "learning_rate": 3.595818233983916e-08, "loss": 0.5161, "step": 14918 }, { "epoch": 0.96, "grad_norm": 1.2353733777999878, "learning_rate": 3.5833151379143785e-08, "loss": 0.5239, "step": 14919 }, { "epoch": 0.96, "grad_norm": 1.2108973264694214, "learning_rate": 3.570833738937352e-08, "loss": 0.4768, "step": 14920 }, { "epoch": 0.96, "grad_norm": 1.179641842842102, "learning_rate": 3.5583740375984e-08, "loss": 0.5517, "step": 14921 }, { "epoch": 0.96, "grad_norm": 1.2090400457382202, "learning_rate": 3.545936034442088e-08, "loss": 0.5337, "step": 14922 }, { "epoch": 0.96, "grad_norm": 1.2045809030532837, "learning_rate": 3.5335197300119806e-08, "loss": 0.5257, "step": 14923 }, { "epoch": 0.96, "grad_norm": 1.2014737129211426, "learning_rate": 3.521125124850866e-08, "loss": 0.4876, "step": 14924 }, { "epoch": 0.96, "grad_norm": 1.2841217517852783, "learning_rate": 3.5087522195004775e-08, "loss": 0.4852, "step": 14925 }, { "epoch": 0.96, "grad_norm": 1.2857744693756104, "learning_rate": 3.496401014501494e-08, "loss": 0.5206, "step": 14926 }, { "epoch": 0.96, "grad_norm": 1.1841622591018677, "learning_rate": 3.484071510393927e-08, "loss": 0.4791, "step": 14927 }, { "epoch": 0.96, "grad_norm": 1.2386482954025269, "learning_rate": 3.4717637077164576e-08, "loss": 0.4904, "step": 14928 }, { "epoch": 0.96, "grad_norm": 1.1970915794372559, "learning_rate": 3.459477607007211e-08, "loss": 0.5035, "step": 14929 }, { "epoch": 0.96, "grad_norm": 1.2373558282852173, "learning_rate": 3.447213208803035e-08, "loss": 0.5947, "step": 14930 }, { "epoch": 0.96, "grad_norm": 1.244702696800232, "learning_rate": 3.4349705136401125e-08, "loss": 0.5015, "step": 14931 }, { "epoch": 0.96, "grad_norm": 1.2485685348510742, "learning_rate": 3.422749522053459e-08, "loss": 0.5289, "step": 14932 }, { "epoch": 0.96, "grad_norm": 1.242504596710205, "learning_rate": 3.410550234577203e-08, "loss": 0.5005, "step": 14933 }, { "epoch": 0.96, "grad_norm": 1.1882332563400269, "learning_rate": 3.398372651744641e-08, "loss": 0.537, "step": 14934 }, { "epoch": 0.96, "grad_norm": 1.181043028831482, "learning_rate": 3.3862167740879026e-08, "loss": 0.4882, "step": 14935 }, { "epoch": 0.96, "grad_norm": 1.2629013061523438, "learning_rate": 3.37408260213834e-08, "loss": 0.5193, "step": 14936 }, { "epoch": 0.96, "grad_norm": 1.2531718015670776, "learning_rate": 3.361970136426363e-08, "loss": 0.4922, "step": 14937 }, { "epoch": 0.96, "grad_norm": 1.040369987487793, "learning_rate": 3.349879377481324e-08, "loss": 0.4801, "step": 14938 }, { "epoch": 0.96, "grad_norm": 1.278085470199585, "learning_rate": 3.337810325831692e-08, "loss": 0.5072, "step": 14939 }, { "epoch": 0.96, "grad_norm": 1.2885067462921143, "learning_rate": 3.325762982004932e-08, "loss": 0.4922, "step": 14940 }, { "epoch": 0.96, "grad_norm": 1.2354201078414917, "learning_rate": 3.3137373465276234e-08, "loss": 0.4948, "step": 14941 }, { "epoch": 0.96, "grad_norm": 1.2408725023269653, "learning_rate": 3.3017334199254017e-08, "loss": 0.5342, "step": 14942 }, { "epoch": 0.96, "grad_norm": 1.4202176332473755, "learning_rate": 3.289751202722902e-08, "loss": 0.5121, "step": 14943 }, { "epoch": 0.96, "grad_norm": 1.074175238609314, "learning_rate": 3.277790695443927e-08, "loss": 0.4514, "step": 14944 }, { "epoch": 0.96, "grad_norm": 1.2473633289337158, "learning_rate": 3.265851898611061e-08, "loss": 0.4786, "step": 14945 }, { "epoch": 0.96, "grad_norm": 1.1553088426589966, "learning_rate": 3.253934812746273e-08, "loss": 0.4951, "step": 14946 }, { "epoch": 0.96, "grad_norm": 1.2861945629119873, "learning_rate": 3.24203943837037e-08, "loss": 0.4766, "step": 14947 }, { "epoch": 0.96, "grad_norm": 1.1572999954223633, "learning_rate": 3.2301657760032135e-08, "loss": 0.4947, "step": 14948 }, { "epoch": 0.96, "grad_norm": 1.279482364654541, "learning_rate": 3.218313826163888e-08, "loss": 0.4907, "step": 14949 }, { "epoch": 0.97, "grad_norm": 1.159881591796875, "learning_rate": 3.206483589370368e-08, "loss": 0.4672, "step": 14950 }, { "epoch": 0.97, "grad_norm": 1.2519161701202393, "learning_rate": 3.1946750661396294e-08, "loss": 0.5114, "step": 14951 }, { "epoch": 0.97, "grad_norm": 1.2493581771850586, "learning_rate": 3.182888256987926e-08, "loss": 0.4651, "step": 14952 }, { "epoch": 0.97, "grad_norm": 1.0937799215316772, "learning_rate": 3.171123162430345e-08, "loss": 0.5255, "step": 14953 }, { "epoch": 0.97, "grad_norm": 1.2022640705108643, "learning_rate": 3.159379782981142e-08, "loss": 0.4742, "step": 14954 }, { "epoch": 0.97, "grad_norm": 1.133829116821289, "learning_rate": 3.1476581191535735e-08, "loss": 0.4977, "step": 14955 }, { "epoch": 0.97, "grad_norm": 1.2105066776275635, "learning_rate": 3.1359581714600074e-08, "loss": 0.5178, "step": 14956 }, { "epoch": 0.97, "grad_norm": 1.2232602834701538, "learning_rate": 3.124279940411756e-08, "loss": 0.4624, "step": 14957 }, { "epoch": 0.97, "grad_norm": 1.212788701057434, "learning_rate": 3.112623426519301e-08, "loss": 0.5137, "step": 14958 }, { "epoch": 0.97, "grad_norm": 1.2829360961914062, "learning_rate": 3.1009886302920675e-08, "loss": 0.5196, "step": 14959 }, { "epoch": 0.97, "grad_norm": 1.1895256042480469, "learning_rate": 3.0893755522385934e-08, "loss": 0.583, "step": 14960 }, { "epoch": 0.97, "grad_norm": 1.2169760465621948, "learning_rate": 3.0777841928664733e-08, "loss": 0.5526, "step": 14961 }, { "epoch": 0.97, "grad_norm": 1.1580026149749756, "learning_rate": 3.066214552682356e-08, "loss": 0.4886, "step": 14962 }, { "epoch": 0.97, "grad_norm": 1.2309608459472656, "learning_rate": 3.054666632191838e-08, "loss": 0.5204, "step": 14963 }, { "epoch": 0.97, "grad_norm": 1.2163251638412476, "learning_rate": 3.043140431899738e-08, "loss": 0.5112, "step": 14964 }, { "epoch": 0.97, "grad_norm": 1.107817530632019, "learning_rate": 3.031635952309819e-08, "loss": 0.4434, "step": 14965 }, { "epoch": 0.97, "grad_norm": 1.3359451293945312, "learning_rate": 3.020153193924902e-08, "loss": 0.5505, "step": 14966 }, { "epoch": 0.97, "grad_norm": 1.1621296405792236, "learning_rate": 3.0086921572468066e-08, "loss": 0.4964, "step": 14967 }, { "epoch": 0.97, "grad_norm": 1.228749394416809, "learning_rate": 2.997252842776577e-08, "loss": 0.565, "step": 14968 }, { "epoch": 0.97, "grad_norm": 1.128088355064392, "learning_rate": 2.985835251014146e-08, "loss": 0.4924, "step": 14969 }, { "epoch": 0.97, "grad_norm": 1.2948213815689087, "learning_rate": 2.974439382458505e-08, "loss": 0.5591, "step": 14970 }, { "epoch": 0.97, "grad_norm": 1.2034651041030884, "learning_rate": 2.9630652376078096e-08, "loss": 0.5019, "step": 14971 }, { "epoch": 0.97, "grad_norm": 1.2426633834838867, "learning_rate": 2.951712816959107e-08, "loss": 0.5188, "step": 14972 }, { "epoch": 0.97, "grad_norm": 1.3052061796188354, "learning_rate": 2.9403821210087225e-08, "loss": 0.4908, "step": 14973 }, { "epoch": 0.97, "grad_norm": 1.1007988452911377, "learning_rate": 2.92907315025176e-08, "loss": 0.5091, "step": 14974 }, { "epoch": 0.97, "grad_norm": 1.2005128860473633, "learning_rate": 2.9177859051825462e-08, "loss": 0.4909, "step": 14975 }, { "epoch": 0.97, "grad_norm": 1.327522873878479, "learning_rate": 2.906520386294409e-08, "loss": 0.5194, "step": 14976 }, { "epoch": 0.97, "grad_norm": 1.1871113777160645, "learning_rate": 2.8952765940797323e-08, "loss": 0.5008, "step": 14977 }, { "epoch": 0.97, "grad_norm": 1.3013700246810913, "learning_rate": 2.8840545290300115e-08, "loss": 0.556, "step": 14978 }, { "epoch": 0.97, "grad_norm": 1.2752524614334106, "learning_rate": 2.8728541916356878e-08, "loss": 0.5264, "step": 14979 }, { "epoch": 0.97, "grad_norm": 1.3912022113800049, "learning_rate": 2.8616755823862585e-08, "loss": 0.5227, "step": 14980 }, { "epoch": 0.97, "grad_norm": 1.2305337190628052, "learning_rate": 2.8505187017703885e-08, "loss": 0.48, "step": 14981 }, { "epoch": 0.97, "grad_norm": 1.3124428987503052, "learning_rate": 2.8393835502756872e-08, "loss": 0.508, "step": 14982 }, { "epoch": 0.97, "grad_norm": 1.2335468530654907, "learning_rate": 2.8282701283888214e-08, "loss": 0.5057, "step": 14983 }, { "epoch": 0.97, "grad_norm": 1.2904889583587646, "learning_rate": 2.817178436595569e-08, "loss": 0.5105, "step": 14984 }, { "epoch": 0.97, "grad_norm": 1.223517656326294, "learning_rate": 2.8061084753807088e-08, "loss": 0.5232, "step": 14985 }, { "epoch": 0.97, "grad_norm": 1.3100440502166748, "learning_rate": 2.7950602452280206e-08, "loss": 0.5377, "step": 14986 }, { "epoch": 0.97, "grad_norm": 1.3184783458709717, "learning_rate": 2.784033746620507e-08, "loss": 0.5033, "step": 14987 }, { "epoch": 0.97, "grad_norm": 1.1220216751098633, "learning_rate": 2.7730289800400045e-08, "loss": 0.4493, "step": 14988 }, { "epoch": 0.97, "grad_norm": 1.1769146919250488, "learning_rate": 2.7620459459675174e-08, "loss": 0.4911, "step": 14989 }, { "epoch": 0.97, "grad_norm": 1.2350929975509644, "learning_rate": 2.751084644883162e-08, "loss": 0.514, "step": 14990 }, { "epoch": 0.97, "grad_norm": 1.2850003242492676, "learning_rate": 2.7401450772659434e-08, "loss": 0.5029, "step": 14991 }, { "epoch": 0.97, "grad_norm": 1.3476855754852295, "learning_rate": 2.7292272435940903e-08, "loss": 0.5396, "step": 14992 }, { "epoch": 0.97, "grad_norm": 1.2161073684692383, "learning_rate": 2.718331144344666e-08, "loss": 0.5028, "step": 14993 }, { "epoch": 0.97, "grad_norm": 1.1911077499389648, "learning_rate": 2.707456779994011e-08, "loss": 0.4932, "step": 14994 }, { "epoch": 0.97, "grad_norm": 1.2920109033584595, "learning_rate": 2.6966041510174123e-08, "loss": 0.4874, "step": 14995 }, { "epoch": 0.97, "grad_norm": 1.1893094778060913, "learning_rate": 2.685773257889157e-08, "loss": 0.49, "step": 14996 }, { "epoch": 0.97, "grad_norm": 1.3280161619186401, "learning_rate": 2.6749641010827e-08, "loss": 0.5444, "step": 14997 }, { "epoch": 0.97, "grad_norm": 1.2045787572860718, "learning_rate": 2.664176681070385e-08, "loss": 0.4941, "step": 14998 }, { "epoch": 0.97, "grad_norm": 1.2007373571395874, "learning_rate": 2.65341099832378e-08, "loss": 0.5264, "step": 14999 }, { "epoch": 0.97, "grad_norm": 1.212031364440918, "learning_rate": 2.6426670533134524e-08, "loss": 0.5445, "step": 15000 }, { "epoch": 0.97, "grad_norm": 1.1336009502410889, "learning_rate": 2.6319448465089158e-08, "loss": 0.4724, "step": 15001 }, { "epoch": 0.97, "grad_norm": 1.2414652109146118, "learning_rate": 2.621244378378851e-08, "loss": 0.5299, "step": 15002 }, { "epoch": 0.97, "grad_norm": 1.3633884191513062, "learning_rate": 2.6105656493909393e-08, "loss": 0.531, "step": 15003 }, { "epoch": 0.97, "grad_norm": 1.1841282844543457, "learning_rate": 2.5999086600119185e-08, "loss": 0.4888, "step": 15004 }, { "epoch": 0.97, "grad_norm": 1.22537100315094, "learning_rate": 2.5892734107075824e-08, "loss": 0.492, "step": 15005 }, { "epoch": 0.97, "grad_norm": 1.1785248517990112, "learning_rate": 2.5786599019427815e-08, "loss": 0.5004, "step": 15006 }, { "epoch": 0.97, "grad_norm": 1.1617722511291504, "learning_rate": 2.5680681341813675e-08, "loss": 0.4456, "step": 15007 }, { "epoch": 0.97, "grad_norm": 1.1672215461730957, "learning_rate": 2.5574981078863025e-08, "loss": 0.5148, "step": 15008 }, { "epoch": 0.97, "grad_norm": 1.349532127380371, "learning_rate": 2.5469498235196067e-08, "loss": 0.5204, "step": 15009 }, { "epoch": 0.97, "grad_norm": 1.1813197135925293, "learning_rate": 2.5364232815422997e-08, "loss": 0.5099, "step": 15010 }, { "epoch": 0.97, "grad_norm": 1.1631486415863037, "learning_rate": 2.5259184824144023e-08, "loss": 0.4877, "step": 15011 }, { "epoch": 0.97, "grad_norm": 1.198701024055481, "learning_rate": 2.5154354265951585e-08, "loss": 0.5015, "step": 15012 }, { "epoch": 0.97, "grad_norm": 1.112415075302124, "learning_rate": 2.504974114542702e-08, "loss": 0.5137, "step": 15013 }, { "epoch": 0.97, "grad_norm": 1.2982091903686523, "learning_rate": 2.4945345467142224e-08, "loss": 0.5735, "step": 15014 }, { "epoch": 0.97, "grad_norm": 1.1443642377853394, "learning_rate": 2.4841167235661322e-08, "loss": 0.4713, "step": 15015 }, { "epoch": 0.97, "grad_norm": 1.1830203533172607, "learning_rate": 2.4737206455536233e-08, "loss": 0.4762, "step": 15016 }, { "epoch": 0.97, "grad_norm": 1.181743860244751, "learning_rate": 2.463346313131165e-08, "loss": 0.541, "step": 15017 }, { "epoch": 0.97, "grad_norm": 1.182541012763977, "learning_rate": 2.452993726752173e-08, "loss": 0.4971, "step": 15018 }, { "epoch": 0.97, "grad_norm": 1.3317997455596924, "learning_rate": 2.4426628868691737e-08, "loss": 0.5564, "step": 15019 }, { "epoch": 0.97, "grad_norm": 1.2119776010513306, "learning_rate": 2.4323537939336395e-08, "loss": 0.4964, "step": 15020 }, { "epoch": 0.97, "grad_norm": 1.203614592552185, "learning_rate": 2.42206644839621e-08, "loss": 0.5166, "step": 15021 }, { "epoch": 0.97, "grad_norm": 1.1809993982315063, "learning_rate": 2.4118008507064138e-08, "loss": 0.4739, "step": 15022 }, { "epoch": 0.97, "grad_norm": 1.1134904623031616, "learning_rate": 2.4015570013130596e-08, "loss": 0.4991, "step": 15023 }, { "epoch": 0.97, "grad_norm": 1.3681432008743286, "learning_rate": 2.391334900663844e-08, "loss": 0.5301, "step": 15024 }, { "epoch": 0.97, "grad_norm": 1.1811274290084839, "learning_rate": 2.3811345492054662e-08, "loss": 0.498, "step": 15025 }, { "epoch": 0.97, "grad_norm": 1.3167129755020142, "learning_rate": 2.3709559473839016e-08, "loss": 0.5015, "step": 15026 }, { "epoch": 0.97, "grad_norm": 1.2613836526870728, "learning_rate": 2.3607990956439063e-08, "loss": 0.5121, "step": 15027 }, { "epoch": 0.97, "grad_norm": 1.1894680261611938, "learning_rate": 2.3506639944294585e-08, "loss": 0.523, "step": 15028 }, { "epoch": 0.97, "grad_norm": 1.210914134979248, "learning_rate": 2.3405506441835923e-08, "loss": 0.5132, "step": 15029 }, { "epoch": 0.97, "grad_norm": 1.207903504371643, "learning_rate": 2.3304590453481767e-08, "loss": 0.5295, "step": 15030 }, { "epoch": 0.97, "grad_norm": 1.207208275794983, "learning_rate": 2.32038919836447e-08, "loss": 0.5382, "step": 15031 }, { "epoch": 0.97, "grad_norm": 1.268135666847229, "learning_rate": 2.3103411036725086e-08, "loss": 0.5435, "step": 15032 }, { "epoch": 0.97, "grad_norm": 1.2043859958648682, "learning_rate": 2.3003147617114974e-08, "loss": 0.4896, "step": 15033 }, { "epoch": 0.97, "grad_norm": 1.3785834312438965, "learning_rate": 2.2903101729195854e-08, "loss": 0.5242, "step": 15034 }, { "epoch": 0.97, "grad_norm": 1.3182038068771362, "learning_rate": 2.2803273377341452e-08, "loss": 0.4892, "step": 15035 }, { "epoch": 0.97, "grad_norm": 1.2281537055969238, "learning_rate": 2.2703662565914386e-08, "loss": 0.5683, "step": 15036 }, { "epoch": 0.97, "grad_norm": 1.1721255779266357, "learning_rate": 2.2604269299268954e-08, "loss": 0.5111, "step": 15037 }, { "epoch": 0.97, "grad_norm": 1.3316094875335693, "learning_rate": 2.2505093581748904e-08, "loss": 0.4939, "step": 15038 }, { "epoch": 0.97, "grad_norm": 1.2284553050994873, "learning_rate": 2.24061354176891e-08, "loss": 0.4971, "step": 15039 }, { "epoch": 0.97, "grad_norm": 1.200661301612854, "learning_rate": 2.2307394811414973e-08, "loss": 0.5007, "step": 15040 }, { "epoch": 0.97, "grad_norm": 1.1621140241622925, "learning_rate": 2.2208871767241956e-08, "loss": 0.4954, "step": 15041 }, { "epoch": 0.97, "grad_norm": 1.225520133972168, "learning_rate": 2.2110566289476053e-08, "loss": 0.54, "step": 15042 }, { "epoch": 0.97, "grad_norm": 1.2180521488189697, "learning_rate": 2.201247838241438e-08, "loss": 0.538, "step": 15043 }, { "epoch": 0.97, "grad_norm": 1.2686054706573486, "learning_rate": 2.191460805034351e-08, "loss": 0.5091, "step": 15044 }, { "epoch": 0.97, "grad_norm": 1.1592345237731934, "learning_rate": 2.1816955297541685e-08, "loss": 0.4896, "step": 15045 }, { "epoch": 0.97, "grad_norm": 1.1746317148208618, "learning_rate": 2.1719520128277162e-08, "loss": 0.5183, "step": 15046 }, { "epoch": 0.97, "grad_norm": 1.1099299192428589, "learning_rate": 2.1622302546808194e-08, "loss": 0.5054, "step": 15047 }, { "epoch": 0.97, "grad_norm": 1.308905005455017, "learning_rate": 2.1525302557383608e-08, "loss": 0.5073, "step": 15048 }, { "epoch": 0.97, "grad_norm": 1.2075918912887573, "learning_rate": 2.1428520164243906e-08, "loss": 0.4766, "step": 15049 }, { "epoch": 0.97, "grad_norm": 1.2220872640609741, "learning_rate": 2.1331955371618475e-08, "loss": 0.5088, "step": 15050 }, { "epoch": 0.97, "grad_norm": 1.1821104288101196, "learning_rate": 2.123560818372783e-08, "loss": 0.4992, "step": 15051 }, { "epoch": 0.97, "grad_norm": 1.113665223121643, "learning_rate": 2.1139478604783604e-08, "loss": 0.4582, "step": 15052 }, { "epoch": 0.97, "grad_norm": 1.26312255859375, "learning_rate": 2.1043566638987435e-08, "loss": 0.503, "step": 15053 }, { "epoch": 0.97, "grad_norm": 1.2125931978225708, "learning_rate": 2.094787229053097e-08, "loss": 0.482, "step": 15054 }, { "epoch": 0.97, "grad_norm": 1.316112995147705, "learning_rate": 2.0852395563596417e-08, "loss": 0.528, "step": 15055 }, { "epoch": 0.97, "grad_norm": 1.196850061416626, "learning_rate": 2.0757136462357665e-08, "loss": 0.5202, "step": 15056 }, { "epoch": 0.97, "grad_norm": 1.2725898027420044, "learning_rate": 2.0662094990978044e-08, "loss": 0.483, "step": 15057 }, { "epoch": 0.97, "grad_norm": 1.2652506828308105, "learning_rate": 2.0567271153610902e-08, "loss": 0.5185, "step": 15058 }, { "epoch": 0.97, "grad_norm": 1.2018908262252808, "learning_rate": 2.047266495440181e-08, "loss": 0.5014, "step": 15059 }, { "epoch": 0.97, "grad_norm": 1.3272937536239624, "learning_rate": 2.0378276397484687e-08, "loss": 0.5295, "step": 15060 }, { "epoch": 0.97, "grad_norm": 1.1917182207107544, "learning_rate": 2.0284105486985673e-08, "loss": 0.5263, "step": 15061 }, { "epoch": 0.97, "grad_norm": 1.3287476301193237, "learning_rate": 2.019015222702092e-08, "loss": 0.5608, "step": 15062 }, { "epoch": 0.97, "grad_norm": 1.2712135314941406, "learning_rate": 2.0096416621696035e-08, "loss": 0.4835, "step": 15063 }, { "epoch": 0.97, "grad_norm": 1.2050749063491821, "learning_rate": 2.0002898675108295e-08, "loss": 0.5258, "step": 15064 }, { "epoch": 0.97, "grad_norm": 1.2417234182357788, "learning_rate": 1.9909598391345542e-08, "loss": 0.5262, "step": 15065 }, { "epoch": 0.97, "grad_norm": 1.1841280460357666, "learning_rate": 1.9816515774485623e-08, "loss": 0.5156, "step": 15066 }, { "epoch": 0.97, "grad_norm": 1.1771690845489502, "learning_rate": 1.972365082859695e-08, "loss": 0.5201, "step": 15067 }, { "epoch": 0.97, "grad_norm": 1.1909054517745972, "learning_rate": 1.963100355773795e-08, "loss": 0.5159, "step": 15068 }, { "epoch": 0.97, "grad_norm": 1.191444754600525, "learning_rate": 1.9538573965958153e-08, "loss": 0.4861, "step": 15069 }, { "epoch": 0.97, "grad_norm": 1.2952899932861328, "learning_rate": 1.9446362057297663e-08, "loss": 0.48, "step": 15070 }, { "epoch": 0.97, "grad_norm": 1.1716160774230957, "learning_rate": 1.935436783578659e-08, "loss": 0.4676, "step": 15071 }, { "epoch": 0.97, "grad_norm": 1.264045238494873, "learning_rate": 1.926259130544561e-08, "loss": 0.5014, "step": 15072 }, { "epoch": 0.97, "grad_norm": 1.333768367767334, "learning_rate": 1.917103247028651e-08, "loss": 0.4937, "step": 15073 }, { "epoch": 0.97, "grad_norm": 1.2321351766586304, "learning_rate": 1.907969133431109e-08, "loss": 0.4903, "step": 15074 }, { "epoch": 0.97, "grad_norm": 1.1978577375411987, "learning_rate": 1.8988567901511158e-08, "loss": 0.5047, "step": 15075 }, { "epoch": 0.97, "grad_norm": 1.2430274486541748, "learning_rate": 1.8897662175870192e-08, "loss": 0.5402, "step": 15076 }, { "epoch": 0.97, "grad_norm": 1.2493561506271362, "learning_rate": 1.880697416136057e-08, "loss": 0.5171, "step": 15077 }, { "epoch": 0.97, "grad_norm": 1.1816811561584473, "learning_rate": 1.8716503861946344e-08, "loss": 0.4978, "step": 15078 }, { "epoch": 0.97, "grad_norm": 1.1145468950271606, "learning_rate": 1.8626251281582132e-08, "loss": 0.5153, "step": 15079 }, { "epoch": 0.97, "grad_norm": 1.2361822128295898, "learning_rate": 1.853621642421255e-08, "loss": 0.5123, "step": 15080 }, { "epoch": 0.97, "grad_norm": 1.2511142492294312, "learning_rate": 1.8446399293772232e-08, "loss": 0.4931, "step": 15081 }, { "epoch": 0.97, "grad_norm": 1.1954177618026733, "learning_rate": 1.8356799894186926e-08, "loss": 0.5121, "step": 15082 }, { "epoch": 0.97, "grad_norm": 1.1174265146255493, "learning_rate": 1.8267418229373503e-08, "loss": 0.4774, "step": 15083 }, { "epoch": 0.97, "grad_norm": 1.1605255603790283, "learning_rate": 1.8178254303238275e-08, "loss": 0.5282, "step": 15084 }, { "epoch": 0.97, "grad_norm": 1.2883198261260986, "learning_rate": 1.808930811967813e-08, "loss": 0.4918, "step": 15085 }, { "epoch": 0.97, "grad_norm": 1.2949674129486084, "learning_rate": 1.8000579682581065e-08, "loss": 0.5669, "step": 15086 }, { "epoch": 0.97, "grad_norm": 1.1943200826644897, "learning_rate": 1.791206899582454e-08, "loss": 0.4459, "step": 15087 }, { "epoch": 0.97, "grad_norm": 1.331079125404358, "learning_rate": 1.7823776063277675e-08, "loss": 0.5526, "step": 15088 }, { "epoch": 0.97, "grad_norm": 1.1787246465682983, "learning_rate": 1.773570088879961e-08, "loss": 0.5069, "step": 15089 }, { "epoch": 0.97, "grad_norm": 1.1901679039001465, "learning_rate": 1.7647843476238934e-08, "loss": 0.5108, "step": 15090 }, { "epoch": 0.97, "grad_norm": 1.1867047548294067, "learning_rate": 1.7560203829437573e-08, "loss": 0.509, "step": 15091 }, { "epoch": 0.97, "grad_norm": 1.1635853052139282, "learning_rate": 1.7472781952223573e-08, "loss": 0.4653, "step": 15092 }, { "epoch": 0.97, "grad_norm": 1.203133225440979, "learning_rate": 1.738557784841999e-08, "loss": 0.5328, "step": 15093 }, { "epoch": 0.97, "grad_norm": 1.1792505979537964, "learning_rate": 1.7298591521837104e-08, "loss": 0.5106, "step": 15094 }, { "epoch": 0.97, "grad_norm": 1.1866549253463745, "learning_rate": 1.7211822976277438e-08, "loss": 0.5194, "step": 15095 }, { "epoch": 0.97, "grad_norm": 1.977675437927246, "learning_rate": 1.712527221553295e-08, "loss": 0.4843, "step": 15096 }, { "epoch": 0.97, "grad_norm": 1.2649269104003906, "learning_rate": 1.7038939243387286e-08, "loss": 0.5364, "step": 15097 }, { "epoch": 0.97, "grad_norm": 1.2438567876815796, "learning_rate": 1.695282406361298e-08, "loss": 0.5212, "step": 15098 }, { "epoch": 0.97, "grad_norm": 1.1211148500442505, "learning_rate": 1.6866926679974804e-08, "loss": 0.4721, "step": 15099 }, { "epoch": 0.97, "grad_norm": 1.1159299612045288, "learning_rate": 1.678124709622586e-08, "loss": 0.4851, "step": 15100 }, { "epoch": 0.97, "grad_norm": 1.2014880180358887, "learning_rate": 1.6695785316112044e-08, "loss": 0.5115, "step": 15101 }, { "epoch": 0.97, "grad_norm": 1.1978601217269897, "learning_rate": 1.6610541343368702e-08, "loss": 0.4903, "step": 15102 }, { "epoch": 0.97, "grad_norm": 1.2090955972671509, "learning_rate": 1.6525515181721184e-08, "loss": 0.4843, "step": 15103 }, { "epoch": 0.97, "grad_norm": 1.1860359907150269, "learning_rate": 1.644070683488541e-08, "loss": 0.4862, "step": 15104 }, { "epoch": 0.98, "grad_norm": 1.1747260093688965, "learning_rate": 1.635611630656897e-08, "loss": 0.4773, "step": 15105 }, { "epoch": 0.98, "grad_norm": 1.1955615282058716, "learning_rate": 1.6271743600468905e-08, "loss": 0.5547, "step": 15106 }, { "epoch": 0.98, "grad_norm": 1.2317397594451904, "learning_rate": 1.6187588720272263e-08, "loss": 0.4827, "step": 15107 }, { "epoch": 0.98, "grad_norm": 1.2656959295272827, "learning_rate": 1.610365166965777e-08, "loss": 0.4815, "step": 15108 }, { "epoch": 0.98, "grad_norm": 1.1748467683792114, "learning_rate": 1.6019932452294162e-08, "loss": 0.486, "step": 15109 }, { "epoch": 0.98, "grad_norm": 1.1377450227737427, "learning_rate": 1.593643107184073e-08, "loss": 0.4924, "step": 15110 }, { "epoch": 0.98, "grad_norm": 1.1504696607589722, "learning_rate": 1.5853147531946224e-08, "loss": 0.4795, "step": 15111 }, { "epoch": 0.98, "grad_norm": 1.1709520816802979, "learning_rate": 1.577008183625217e-08, "loss": 0.5029, "step": 15112 }, { "epoch": 0.98, "grad_norm": 1.2311288118362427, "learning_rate": 1.568723398838734e-08, "loss": 0.5199, "step": 15113 }, { "epoch": 0.98, "grad_norm": 1.2710851430892944, "learning_rate": 1.560460399197494e-08, "loss": 0.5345, "step": 15114 }, { "epoch": 0.98, "grad_norm": 1.2141742706298828, "learning_rate": 1.552219185062487e-08, "loss": 0.54, "step": 15115 }, { "epoch": 0.98, "grad_norm": 1.2572795152664185, "learning_rate": 1.5439997567939234e-08, "loss": 0.4852, "step": 15116 }, { "epoch": 0.98, "grad_norm": 1.1283237934112549, "learning_rate": 1.5358021147511282e-08, "loss": 0.4794, "step": 15117 }, { "epoch": 0.98, "grad_norm": 1.228497862815857, "learning_rate": 1.5276262592923696e-08, "loss": 0.4822, "step": 15118 }, { "epoch": 0.98, "grad_norm": 1.3175054788589478, "learning_rate": 1.5194721907750286e-08, "loss": 0.5379, "step": 15119 }, { "epoch": 0.98, "grad_norm": 1.2307490110397339, "learning_rate": 1.5113399095554315e-08, "loss": 0.5466, "step": 15120 }, { "epoch": 0.98, "grad_norm": 1.2496709823608398, "learning_rate": 1.5032294159890713e-08, "loss": 0.5508, "step": 15121 }, { "epoch": 0.98, "grad_norm": 1.1762181520462036, "learning_rate": 1.4951407104303872e-08, "loss": 0.4854, "step": 15122 }, { "epoch": 0.98, "grad_norm": 1.3019993305206299, "learning_rate": 1.4870737932329293e-08, "loss": 0.5407, "step": 15123 }, { "epoch": 0.98, "grad_norm": 1.2388166189193726, "learning_rate": 1.4790286647493045e-08, "loss": 0.5043, "step": 15124 }, { "epoch": 0.98, "grad_norm": 1.3601969480514526, "learning_rate": 1.4710053253311208e-08, "loss": 0.518, "step": 15125 }, { "epoch": 0.98, "grad_norm": 1.2164887189865112, "learning_rate": 1.4630037753291526e-08, "loss": 0.4537, "step": 15126 }, { "epoch": 0.98, "grad_norm": 1.2302652597427368, "learning_rate": 1.455024015092954e-08, "loss": 0.5261, "step": 15127 }, { "epoch": 0.98, "grad_norm": 1.2214604616165161, "learning_rate": 1.4470660449714124e-08, "loss": 0.5033, "step": 15128 }, { "epoch": 0.98, "grad_norm": 1.1941663026809692, "learning_rate": 1.4391298653123609e-08, "loss": 0.5077, "step": 15129 }, { "epoch": 0.98, "grad_norm": 1.246354341506958, "learning_rate": 1.431215476462633e-08, "loss": 0.5259, "step": 15130 }, { "epoch": 0.98, "grad_norm": 1.1532894372940063, "learning_rate": 1.4233228787681186e-08, "loss": 0.5003, "step": 15131 }, { "epoch": 0.98, "grad_norm": 1.2408510446548462, "learning_rate": 1.4154520725738752e-08, "loss": 0.5233, "step": 15132 }, { "epoch": 0.98, "grad_norm": 1.1420339345932007, "learning_rate": 1.4076030582237942e-08, "loss": 0.4684, "step": 15133 }, { "epoch": 0.98, "grad_norm": 1.2571885585784912, "learning_rate": 1.3997758360610458e-08, "loss": 0.484, "step": 15134 }, { "epoch": 0.98, "grad_norm": 1.1411994695663452, "learning_rate": 1.3919704064276895e-08, "loss": 0.4752, "step": 15135 }, { "epoch": 0.98, "grad_norm": 1.284521222114563, "learning_rate": 1.3841867696648415e-08, "loss": 0.5084, "step": 15136 }, { "epoch": 0.98, "grad_norm": 1.2327626943588257, "learning_rate": 1.3764249261127294e-08, "loss": 0.5209, "step": 15137 }, { "epoch": 0.98, "grad_norm": 1.2997322082519531, "learning_rate": 1.368684876110693e-08, "loss": 0.5435, "step": 15138 }, { "epoch": 0.98, "grad_norm": 1.1580579280853271, "learning_rate": 1.3609666199969062e-08, "loss": 0.463, "step": 15139 }, { "epoch": 0.98, "grad_norm": 1.2603569030761719, "learning_rate": 1.3532701581087659e-08, "loss": 0.5208, "step": 15140 }, { "epoch": 0.98, "grad_norm": 1.2309626340866089, "learning_rate": 1.3455954907826696e-08, "loss": 0.4848, "step": 15141 }, { "epoch": 0.98, "grad_norm": 1.1680783033370972, "learning_rate": 1.3379426183540157e-08, "loss": 0.4941, "step": 15142 }, { "epoch": 0.98, "grad_norm": 1.2589976787567139, "learning_rate": 1.3303115411573697e-08, "loss": 0.4836, "step": 15143 }, { "epoch": 0.98, "grad_norm": 1.1619412899017334, "learning_rate": 1.322702259526243e-08, "loss": 0.5189, "step": 15144 }, { "epoch": 0.98, "grad_norm": 1.2079637050628662, "learning_rate": 1.315114773793147e-08, "loss": 0.5174, "step": 15145 }, { "epoch": 0.98, "grad_norm": 1.269074559211731, "learning_rate": 1.3075490842897609e-08, "loss": 0.5113, "step": 15146 }, { "epoch": 0.98, "grad_norm": 1.20255708694458, "learning_rate": 1.3000051913467649e-08, "loss": 0.5666, "step": 15147 }, { "epoch": 0.98, "grad_norm": 1.0996806621551514, "learning_rate": 1.292483095293895e-08, "loss": 0.4488, "step": 15148 }, { "epoch": 0.98, "grad_norm": 1.238501787185669, "learning_rate": 1.2849827964598882e-08, "loss": 0.5333, "step": 15149 }, { "epoch": 0.98, "grad_norm": 1.2443121671676636, "learning_rate": 1.2775042951725935e-08, "loss": 0.5072, "step": 15150 }, { "epoch": 0.98, "grad_norm": 1.2252401113510132, "learning_rate": 1.2700475917588607e-08, "loss": 0.5528, "step": 15151 }, { "epoch": 0.98, "grad_norm": 1.4470405578613281, "learning_rate": 1.2626126865445953e-08, "loss": 0.542, "step": 15152 }, { "epoch": 0.98, "grad_norm": 1.2025883197784424, "learning_rate": 1.25519957985476e-08, "loss": 0.4917, "step": 15153 }, { "epoch": 0.98, "grad_norm": 1.1632540225982666, "learning_rate": 1.2478082720134288e-08, "loss": 0.4549, "step": 15154 }, { "epoch": 0.98, "grad_norm": 1.211719036102295, "learning_rate": 1.2404387633435655e-08, "loss": 0.5054, "step": 15155 }, { "epoch": 0.98, "grad_norm": 1.204418659210205, "learning_rate": 1.2330910541673013e-08, "loss": 0.4629, "step": 15156 }, { "epoch": 0.98, "grad_norm": 1.2641618251800537, "learning_rate": 1.2257651448058238e-08, "loss": 0.4998, "step": 15157 }, { "epoch": 0.98, "grad_norm": 1.1548562049865723, "learning_rate": 1.2184610355792658e-08, "loss": 0.502, "step": 15158 }, { "epoch": 0.98, "grad_norm": 1.24359929561615, "learning_rate": 1.211178726806872e-08, "loss": 0.5109, "step": 15159 }, { "epoch": 0.98, "grad_norm": 1.3523993492126465, "learning_rate": 1.2039182188069987e-08, "loss": 0.5185, "step": 15160 }, { "epoch": 0.98, "grad_norm": 1.222121000289917, "learning_rate": 1.1966795118969476e-08, "loss": 0.5691, "step": 15161 }, { "epoch": 0.98, "grad_norm": 1.1769953966140747, "learning_rate": 1.1894626063931325e-08, "loss": 0.4876, "step": 15162 }, { "epoch": 0.98, "grad_norm": 1.1375824213027954, "learning_rate": 1.182267502610912e-08, "loss": 0.4957, "step": 15163 }, { "epoch": 0.98, "grad_norm": 1.1358855962753296, "learning_rate": 1.175094200864868e-08, "loss": 0.4805, "step": 15164 }, { "epoch": 0.98, "grad_norm": 1.2676805257797241, "learning_rate": 1.1679427014684164e-08, "loss": 0.5066, "step": 15165 }, { "epoch": 0.98, "grad_norm": 1.2160899639129639, "learning_rate": 1.1608130047341959e-08, "loss": 0.5287, "step": 15166 }, { "epoch": 0.98, "grad_norm": 1.3424739837646484, "learning_rate": 1.1537051109738463e-08, "loss": 0.5006, "step": 15167 }, { "epoch": 0.98, "grad_norm": 1.1532478332519531, "learning_rate": 1.146619020497952e-08, "loss": 0.4929, "step": 15168 }, { "epoch": 0.98, "grad_norm": 1.2766387462615967, "learning_rate": 1.1395547336163216e-08, "loss": 0.5132, "step": 15169 }, { "epoch": 0.98, "grad_norm": 1.1640260219573975, "learning_rate": 1.1325122506377073e-08, "loss": 0.4793, "step": 15170 }, { "epoch": 0.98, "grad_norm": 1.22823166847229, "learning_rate": 1.1254915718698633e-08, "loss": 0.5099, "step": 15171 }, { "epoch": 0.98, "grad_norm": 1.2176905870437622, "learning_rate": 1.1184926976195997e-08, "loss": 0.4564, "step": 15172 }, { "epoch": 0.98, "grad_norm": 1.249462604522705, "learning_rate": 1.1115156281929496e-08, "loss": 0.5472, "step": 15173 }, { "epoch": 0.98, "grad_norm": 1.2687495946884155, "learning_rate": 1.1045603638948354e-08, "loss": 0.5406, "step": 15174 }, { "epoch": 0.98, "grad_norm": 1.1165435314178467, "learning_rate": 1.097626905029181e-08, "loss": 0.5195, "step": 15175 }, { "epoch": 0.98, "grad_norm": 1.3172905445098877, "learning_rate": 1.0907152518990772e-08, "loss": 0.5092, "step": 15176 }, { "epoch": 0.98, "grad_norm": 1.2329641580581665, "learning_rate": 1.08382540480656e-08, "loss": 0.5293, "step": 15177 }, { "epoch": 0.98, "grad_norm": 1.1988219022750854, "learning_rate": 1.0769573640528885e-08, "loss": 0.5015, "step": 15178 }, { "epoch": 0.98, "grad_norm": 1.2127268314361572, "learning_rate": 1.0701111299381562e-08, "loss": 0.5147, "step": 15179 }, { "epoch": 0.98, "grad_norm": 1.1889854669570923, "learning_rate": 1.0632867027615678e-08, "loss": 0.4617, "step": 15180 }, { "epoch": 0.98, "grad_norm": 1.235856294631958, "learning_rate": 1.0564840828214407e-08, "loss": 0.5095, "step": 15181 }, { "epoch": 0.98, "grad_norm": 1.2777594327926636, "learning_rate": 1.0497032704151477e-08, "loss": 0.53, "step": 15182 }, { "epoch": 0.98, "grad_norm": 1.2012380361557007, "learning_rate": 1.0429442658390076e-08, "loss": 0.5258, "step": 15183 }, { "epoch": 0.98, "grad_norm": 1.23689603805542, "learning_rate": 1.0362070693883952e-08, "loss": 0.53, "step": 15184 }, { "epoch": 0.98, "grad_norm": 1.230828046798706, "learning_rate": 1.0294916813579082e-08, "loss": 0.5413, "step": 15185 }, { "epoch": 0.98, "grad_norm": 1.364583134651184, "learning_rate": 1.0227981020409227e-08, "loss": 0.5455, "step": 15186 }, { "epoch": 0.98, "grad_norm": 1.2746998071670532, "learning_rate": 1.016126331730094e-08, "loss": 0.534, "step": 15187 }, { "epoch": 0.98, "grad_norm": 1.2721654176712036, "learning_rate": 1.0094763707169664e-08, "loss": 0.4691, "step": 15188 }, { "epoch": 0.98, "grad_norm": 1.3407080173492432, "learning_rate": 1.0028482192921961e-08, "loss": 0.4883, "step": 15189 }, { "epoch": 0.98, "grad_norm": 1.2244937419891357, "learning_rate": 9.962418777454964e-09, "loss": 0.5292, "step": 15190 }, { "epoch": 0.98, "grad_norm": 1.1823294162750244, "learning_rate": 9.89657346365691e-09, "loss": 0.5212, "step": 15191 }, { "epoch": 0.98, "grad_norm": 1.1794943809509277, "learning_rate": 9.830946254404395e-09, "loss": 0.5118, "step": 15192 }, { "epoch": 0.98, "grad_norm": 1.3102774620056152, "learning_rate": 9.765537152566229e-09, "loss": 0.4717, "step": 15193 }, { "epoch": 0.98, "grad_norm": 1.307742953300476, "learning_rate": 9.700346161001794e-09, "loss": 0.5327, "step": 15194 }, { "epoch": 0.98, "grad_norm": 1.1431130170822144, "learning_rate": 9.635373282559924e-09, "loss": 0.5283, "step": 15195 }, { "epoch": 0.98, "grad_norm": 1.1328575611114502, "learning_rate": 9.570618520080566e-09, "loss": 0.4834, "step": 15196 }, { "epoch": 0.98, "grad_norm": 1.2108781337738037, "learning_rate": 9.506081876394235e-09, "loss": 0.5398, "step": 15197 }, { "epoch": 0.98, "grad_norm": 1.3016185760498047, "learning_rate": 9.441763354321454e-09, "loss": 0.515, "step": 15198 }, { "epoch": 0.98, "grad_norm": 1.4888663291931152, "learning_rate": 9.37766295667275e-09, "loss": 0.4738, "step": 15199 }, { "epoch": 0.98, "grad_norm": 1.1879218816757202, "learning_rate": 9.313780686250885e-09, "loss": 0.4983, "step": 15200 }, { "epoch": 0.98, "grad_norm": 1.4194084405899048, "learning_rate": 9.250116545847509e-09, "loss": 0.5417, "step": 15201 }, { "epoch": 0.98, "grad_norm": 1.3342933654785156, "learning_rate": 9.186670538245401e-09, "loss": 0.5452, "step": 15202 }, { "epoch": 0.98, "grad_norm": 1.2430685758590698, "learning_rate": 9.123442666217342e-09, "loss": 0.5056, "step": 15203 }, { "epoch": 0.98, "grad_norm": 1.1734111309051514, "learning_rate": 9.060432932526674e-09, "loss": 0.4638, "step": 15204 }, { "epoch": 0.98, "grad_norm": 1.2339197397232056, "learning_rate": 8.997641339927865e-09, "loss": 0.4883, "step": 15205 }, { "epoch": 0.98, "grad_norm": 1.1707216501235962, "learning_rate": 8.935067891165384e-09, "loss": 0.4949, "step": 15206 }, { "epoch": 0.98, "grad_norm": 1.3094018697738647, "learning_rate": 8.87271258897371e-09, "loss": 0.5182, "step": 15207 }, { "epoch": 0.98, "grad_norm": 1.2420215606689453, "learning_rate": 8.810575436078993e-09, "loss": 0.5171, "step": 15208 }, { "epoch": 0.98, "grad_norm": 1.3634178638458252, "learning_rate": 8.748656435195735e-09, "loss": 0.4991, "step": 15209 }, { "epoch": 0.98, "grad_norm": 1.1989401578903198, "learning_rate": 8.686955589031765e-09, "loss": 0.5518, "step": 15210 }, { "epoch": 0.98, "grad_norm": 1.1612637042999268, "learning_rate": 8.625472900283261e-09, "loss": 0.5238, "step": 15211 }, { "epoch": 0.98, "grad_norm": 1.39320707321167, "learning_rate": 8.564208371636961e-09, "loss": 0.5209, "step": 15212 }, { "epoch": 0.98, "grad_norm": 1.1711674928665161, "learning_rate": 8.503162005771281e-09, "loss": 0.5126, "step": 15213 }, { "epoch": 0.98, "grad_norm": 1.19697904586792, "learning_rate": 8.442333805354086e-09, "loss": 0.516, "step": 15214 }, { "epoch": 0.98, "grad_norm": 1.2340052127838135, "learning_rate": 8.381723773044359e-09, "loss": 0.4254, "step": 15215 }, { "epoch": 0.98, "grad_norm": 1.2829231023788452, "learning_rate": 8.321331911490538e-09, "loss": 0.5068, "step": 15216 }, { "epoch": 0.98, "grad_norm": 1.242016315460205, "learning_rate": 8.261158223332733e-09, "loss": 0.4958, "step": 15217 }, { "epoch": 0.98, "grad_norm": 1.112712025642395, "learning_rate": 8.201202711200507e-09, "loss": 0.5062, "step": 15218 }, { "epoch": 0.98, "grad_norm": 1.1414942741394043, "learning_rate": 8.141465377715097e-09, "loss": 0.512, "step": 15219 }, { "epoch": 0.98, "grad_norm": 1.2335917949676514, "learning_rate": 8.081946225487192e-09, "loss": 0.513, "step": 15220 }, { "epoch": 0.98, "grad_norm": 1.2499468326568604, "learning_rate": 8.022645257118045e-09, "loss": 0.4937, "step": 15221 }, { "epoch": 0.98, "grad_norm": 1.3094178438186646, "learning_rate": 7.963562475199471e-09, "loss": 0.5345, "step": 15222 }, { "epoch": 0.98, "grad_norm": 1.279592514038086, "learning_rate": 7.90469788231385e-09, "loss": 0.5097, "step": 15223 }, { "epoch": 0.98, "grad_norm": 1.5947827100753784, "learning_rate": 7.846051481034123e-09, "loss": 0.481, "step": 15224 }, { "epoch": 0.98, "grad_norm": 1.3062134981155396, "learning_rate": 7.787623273923795e-09, "loss": 0.514, "step": 15225 }, { "epoch": 0.98, "grad_norm": 1.2274552583694458, "learning_rate": 7.729413263536378e-09, "loss": 0.5462, "step": 15226 }, { "epoch": 0.98, "grad_norm": 1.2520208358764648, "learning_rate": 7.67142145241595e-09, "loss": 0.5013, "step": 15227 }, { "epoch": 0.98, "grad_norm": 1.2876187562942505, "learning_rate": 7.613647843097149e-09, "loss": 0.5223, "step": 15228 }, { "epoch": 0.98, "grad_norm": 1.1200889348983765, "learning_rate": 7.556092438105734e-09, "loss": 0.4966, "step": 15229 }, { "epoch": 0.98, "grad_norm": 1.2635778188705444, "learning_rate": 7.498755239956357e-09, "loss": 0.512, "step": 15230 }, { "epoch": 0.98, "grad_norm": 1.2012345790863037, "learning_rate": 7.4416362511559034e-09, "loss": 0.5062, "step": 15231 }, { "epoch": 0.98, "grad_norm": 1.1173630952835083, "learning_rate": 7.384735474200155e-09, "loss": 0.5093, "step": 15232 }, { "epoch": 0.98, "grad_norm": 1.1234543323516846, "learning_rate": 7.328052911577121e-09, "loss": 0.4968, "step": 15233 }, { "epoch": 0.98, "grad_norm": 1.2576289176940918, "learning_rate": 7.271588565763155e-09, "loss": 0.4905, "step": 15234 }, { "epoch": 0.98, "grad_norm": 1.2264474630355835, "learning_rate": 7.215342439226281e-09, "loss": 0.527, "step": 15235 }, { "epoch": 0.98, "grad_norm": 1.1329439878463745, "learning_rate": 7.159314534425643e-09, "loss": 0.506, "step": 15236 }, { "epoch": 0.98, "grad_norm": 1.4977422952651978, "learning_rate": 7.10350485380984e-09, "loss": 0.5525, "step": 15237 }, { "epoch": 0.98, "grad_norm": 1.231839656829834, "learning_rate": 7.0479133998180295e-09, "loss": 0.5108, "step": 15238 }, { "epoch": 0.98, "grad_norm": 1.2787224054336548, "learning_rate": 6.992540174879936e-09, "loss": 0.5563, "step": 15239 }, { "epoch": 0.98, "grad_norm": 1.2305911779403687, "learning_rate": 6.9373851814152905e-09, "loss": 0.499, "step": 15240 }, { "epoch": 0.98, "grad_norm": 1.216318130493164, "learning_rate": 6.882448421835497e-09, "loss": 0.5175, "step": 15241 }, { "epoch": 0.98, "grad_norm": 1.2073726654052734, "learning_rate": 6.827729898541968e-09, "loss": 0.4864, "step": 15242 }, { "epoch": 0.98, "grad_norm": 1.160848617553711, "learning_rate": 6.773229613925569e-09, "loss": 0.5133, "step": 15243 }, { "epoch": 0.98, "grad_norm": 1.2014790773391724, "learning_rate": 6.718947570368284e-09, "loss": 0.4771, "step": 15244 }, { "epoch": 0.98, "grad_norm": 1.1582118272781372, "learning_rate": 6.664883770243214e-09, "loss": 0.5385, "step": 15245 }, { "epoch": 0.98, "grad_norm": 1.1722368001937866, "learning_rate": 6.611038215912913e-09, "loss": 0.5217, "step": 15246 }, { "epoch": 0.98, "grad_norm": 1.3502849340438843, "learning_rate": 6.557410909731054e-09, "loss": 0.5486, "step": 15247 }, { "epoch": 0.98, "grad_norm": 1.2141270637512207, "learning_rate": 6.5040018540413196e-09, "loss": 0.4724, "step": 15248 }, { "epoch": 0.98, "grad_norm": 1.29718017578125, "learning_rate": 6.450811051178507e-09, "loss": 0.5133, "step": 15249 }, { "epoch": 0.98, "grad_norm": 1.1870132684707642, "learning_rate": 6.397838503467424e-09, "loss": 0.5319, "step": 15250 }, { "epoch": 0.98, "grad_norm": 1.1860851049423218, "learning_rate": 6.345084213222885e-09, "loss": 0.4496, "step": 15251 }, { "epoch": 0.98, "grad_norm": 1.2472753524780273, "learning_rate": 6.292548182750824e-09, "loss": 0.5482, "step": 15252 }, { "epoch": 0.98, "grad_norm": 1.036404013633728, "learning_rate": 6.240230414347736e-09, "loss": 0.4867, "step": 15253 }, { "epoch": 0.98, "grad_norm": 1.2181158065795898, "learning_rate": 6.1881309103001275e-09, "loss": 0.4882, "step": 15254 }, { "epoch": 0.98, "grad_norm": 1.2129511833190918, "learning_rate": 6.136249672885064e-09, "loss": 0.4817, "step": 15255 }, { "epoch": 0.98, "grad_norm": 1.2129557132720947, "learning_rate": 6.0845867043701765e-09, "loss": 0.5406, "step": 15256 }, { "epoch": 0.98, "grad_norm": 1.2787339687347412, "learning_rate": 6.033142007013659e-09, "loss": 0.5309, "step": 15257 }, { "epoch": 0.98, "grad_norm": 1.1406415700912476, "learning_rate": 5.981915583063713e-09, "loss": 0.4767, "step": 15258 }, { "epoch": 0.98, "grad_norm": 1.2620348930358887, "learning_rate": 5.9309074347596585e-09, "loss": 0.4623, "step": 15259 }, { "epoch": 0.99, "grad_norm": 1.2123148441314697, "learning_rate": 5.880117564330823e-09, "loss": 0.5416, "step": 15260 }, { "epoch": 0.99, "grad_norm": 1.2474193572998047, "learning_rate": 5.829545973996542e-09, "loss": 0.4784, "step": 15261 }, { "epoch": 0.99, "grad_norm": 1.2125579118728638, "learning_rate": 5.77919266596838e-09, "loss": 0.4949, "step": 15262 }, { "epoch": 0.99, "grad_norm": 1.2377632856369019, "learning_rate": 5.729057642446245e-09, "loss": 0.5114, "step": 15263 }, { "epoch": 0.99, "grad_norm": 1.152169942855835, "learning_rate": 5.6791409056211615e-09, "loss": 0.5146, "step": 15264 }, { "epoch": 0.99, "grad_norm": 1.2874929904937744, "learning_rate": 5.6294424576758265e-09, "loss": 0.5624, "step": 15265 }, { "epoch": 0.99, "grad_norm": 1.438843011856079, "learning_rate": 5.5799623007818384e-09, "loss": 0.4844, "step": 15266 }, { "epoch": 0.99, "grad_norm": 1.229248046875, "learning_rate": 5.5307004371013555e-09, "loss": 0.5054, "step": 15267 }, { "epoch": 0.99, "grad_norm": 1.125233769416809, "learning_rate": 5.481656868788765e-09, "loss": 0.5103, "step": 15268 }, { "epoch": 0.99, "grad_norm": 1.1833173036575317, "learning_rate": 5.4328315979867986e-09, "loss": 0.5086, "step": 15269 }, { "epoch": 0.99, "grad_norm": 1.2154266834259033, "learning_rate": 5.384224626829304e-09, "loss": 0.5194, "step": 15270 }, { "epoch": 0.99, "grad_norm": 1.2024403810501099, "learning_rate": 5.3358359574412486e-09, "loss": 0.536, "step": 15271 }, { "epoch": 0.99, "grad_norm": 1.1589468717575073, "learning_rate": 5.287665591937052e-09, "loss": 0.5166, "step": 15272 }, { "epoch": 0.99, "grad_norm": 1.2396599054336548, "learning_rate": 5.239713532422808e-09, "loss": 0.5175, "step": 15273 }, { "epoch": 0.99, "grad_norm": 1.203269600868225, "learning_rate": 5.191979780994061e-09, "loss": 0.4709, "step": 15274 }, { "epoch": 0.99, "grad_norm": 1.3001134395599365, "learning_rate": 5.144464339736921e-09, "loss": 0.5436, "step": 15275 }, { "epoch": 0.99, "grad_norm": 1.1927706003189087, "learning_rate": 5.097167210728615e-09, "loss": 0.4999, "step": 15276 }, { "epoch": 0.99, "grad_norm": 1.1241145133972168, "learning_rate": 5.0500883960358235e-09, "loss": 0.4897, "step": 15277 }, { "epoch": 0.99, "grad_norm": 1.2519960403442383, "learning_rate": 5.0032278977169005e-09, "loss": 0.5232, "step": 15278 }, { "epoch": 0.99, "grad_norm": 1.211857795715332, "learning_rate": 4.956585717819095e-09, "loss": 0.4873, "step": 15279 }, { "epoch": 0.99, "grad_norm": 1.3060909509658813, "learning_rate": 4.91016185838189e-09, "loss": 0.506, "step": 15280 }, { "epoch": 0.99, "grad_norm": 1.3230016231536865, "learning_rate": 4.863956321434216e-09, "loss": 0.5539, "step": 15281 }, { "epoch": 0.99, "grad_norm": 1.2282809019088745, "learning_rate": 4.817969108995013e-09, "loss": 0.4894, "step": 15282 }, { "epoch": 0.99, "grad_norm": 1.2925258874893188, "learning_rate": 4.772200223074896e-09, "loss": 0.4979, "step": 15283 }, { "epoch": 0.99, "grad_norm": 1.143862009048462, "learning_rate": 4.726649665673933e-09, "loss": 0.4584, "step": 15284 }, { "epoch": 0.99, "grad_norm": 1.233378291130066, "learning_rate": 4.681317438782751e-09, "loss": 0.5422, "step": 15285 }, { "epoch": 0.99, "grad_norm": 1.3128926753997803, "learning_rate": 4.636203544383655e-09, "loss": 0.5325, "step": 15286 }, { "epoch": 0.99, "grad_norm": 1.2195348739624023, "learning_rate": 4.591307984447846e-09, "loss": 0.5126, "step": 15287 }, { "epoch": 0.99, "grad_norm": 1.2151808738708496, "learning_rate": 4.546630760937088e-09, "loss": 0.4938, "step": 15288 }, { "epoch": 0.99, "grad_norm": 1.159547209739685, "learning_rate": 4.502171875805372e-09, "loss": 0.5041, "step": 15289 }, { "epoch": 0.99, "grad_norm": 1.234533429145813, "learning_rate": 4.4579313309944804e-09, "loss": 0.5001, "step": 15290 }, { "epoch": 0.99, "grad_norm": 1.1957834959030151, "learning_rate": 4.4139091284395306e-09, "loss": 0.5383, "step": 15291 }, { "epoch": 0.99, "grad_norm": 1.2277905941009521, "learning_rate": 4.3701052700628746e-09, "loss": 0.4936, "step": 15292 }, { "epoch": 0.99, "grad_norm": 1.1476101875305176, "learning_rate": 4.326519757780756e-09, "loss": 0.5422, "step": 15293 }, { "epoch": 0.99, "grad_norm": 1.1544407606124878, "learning_rate": 4.283152593497208e-09, "loss": 0.4925, "step": 15294 }, { "epoch": 0.99, "grad_norm": 3.956988573074341, "learning_rate": 4.240003779107937e-09, "loss": 0.452, "step": 15295 }, { "epoch": 0.99, "grad_norm": 1.4233949184417725, "learning_rate": 4.197073316499211e-09, "loss": 0.4856, "step": 15296 }, { "epoch": 0.99, "grad_norm": 1.258918285369873, "learning_rate": 4.154361207546753e-09, "loss": 0.5403, "step": 15297 }, { "epoch": 0.99, "grad_norm": 1.185730218887329, "learning_rate": 4.111867454117402e-09, "loss": 0.4681, "step": 15298 }, { "epoch": 0.99, "grad_norm": 1.1229058504104614, "learning_rate": 4.069592058069116e-09, "loss": 0.559, "step": 15299 }, { "epoch": 0.99, "grad_norm": 1.1483440399169922, "learning_rate": 4.027535021249307e-09, "loss": 0.5074, "step": 15300 }, { "epoch": 0.99, "grad_norm": 1.1657227277755737, "learning_rate": 3.985696345495949e-09, "loss": 0.4749, "step": 15301 }, { "epoch": 0.99, "grad_norm": 1.1926249265670776, "learning_rate": 3.944076032638133e-09, "loss": 0.4778, "step": 15302 }, { "epoch": 0.99, "grad_norm": 1.1059390306472778, "learning_rate": 3.902674084494962e-09, "loss": 0.4795, "step": 15303 }, { "epoch": 0.99, "grad_norm": 1.3019427061080933, "learning_rate": 3.861490502874987e-09, "loss": 0.506, "step": 15304 }, { "epoch": 0.99, "grad_norm": 1.2582347393035889, "learning_rate": 3.820525289580102e-09, "loss": 0.5287, "step": 15305 }, { "epoch": 0.99, "grad_norm": 1.299688696861267, "learning_rate": 3.7797784463988744e-09, "loss": 0.5361, "step": 15306 }, { "epoch": 0.99, "grad_norm": 1.2367244958877563, "learning_rate": 3.739249975113768e-09, "loss": 0.4811, "step": 15307 }, { "epoch": 0.99, "grad_norm": 1.3505983352661133, "learning_rate": 3.6989398774950336e-09, "loss": 0.5323, "step": 15308 }, { "epoch": 0.99, "grad_norm": 1.1470364332199097, "learning_rate": 3.6588481553051504e-09, "loss": 0.4735, "step": 15309 }, { "epoch": 0.99, "grad_norm": 1.241531252861023, "learning_rate": 3.6189748102966047e-09, "loss": 0.5069, "step": 15310 }, { "epoch": 0.99, "grad_norm": 1.1504385471343994, "learning_rate": 3.5793198442113375e-09, "loss": 0.5256, "step": 15311 }, { "epoch": 0.99, "grad_norm": 1.1971172094345093, "learning_rate": 3.5398832587829613e-09, "loss": 0.4835, "step": 15312 }, { "epoch": 0.99, "grad_norm": 1.1921395063400269, "learning_rate": 3.5006650557356523e-09, "loss": 0.5235, "step": 15313 }, { "epoch": 0.99, "grad_norm": 1.1844969987869263, "learning_rate": 3.4616652367830404e-09, "loss": 0.5037, "step": 15314 }, { "epoch": 0.99, "grad_norm": 1.2637358903884888, "learning_rate": 3.422883803629873e-09, "loss": 0.4719, "step": 15315 }, { "epoch": 0.99, "grad_norm": 1.2706235647201538, "learning_rate": 3.3843207579714597e-09, "loss": 0.5118, "step": 15316 }, { "epoch": 0.99, "grad_norm": 1.2296946048736572, "learning_rate": 3.34597610149201e-09, "loss": 0.5483, "step": 15317 }, { "epoch": 0.99, "grad_norm": 1.220992922782898, "learning_rate": 3.3078498358690704e-09, "loss": 0.501, "step": 15318 }, { "epoch": 0.99, "grad_norm": 1.233705997467041, "learning_rate": 3.26994196276853e-09, "loss": 0.5441, "step": 15319 }, { "epoch": 0.99, "grad_norm": 1.503658652305603, "learning_rate": 3.232252483846843e-09, "loss": 0.5185, "step": 15320 }, { "epoch": 0.99, "grad_norm": 1.2971689701080322, "learning_rate": 3.194781400751579e-09, "loss": 0.4953, "step": 15321 }, { "epoch": 0.99, "grad_norm": 1.3130290508270264, "learning_rate": 3.1575287151203173e-09, "loss": 0.5345, "step": 15322 }, { "epoch": 0.99, "grad_norm": 1.22923743724823, "learning_rate": 3.1204944285812e-09, "loss": 0.4846, "step": 15323 }, { "epoch": 0.99, "grad_norm": 1.1101410388946533, "learning_rate": 3.0836785427534878e-09, "loss": 0.473, "step": 15324 }, { "epoch": 0.99, "grad_norm": 1.252625823020935, "learning_rate": 3.0470810592464483e-09, "loss": 0.524, "step": 15325 }, { "epoch": 0.99, "grad_norm": 1.2833621501922607, "learning_rate": 3.0107019796588034e-09, "loss": 0.5238, "step": 15326 }, { "epoch": 0.99, "grad_norm": 1.235093116760254, "learning_rate": 2.974541305580947e-09, "loss": 0.5171, "step": 15327 }, { "epoch": 0.99, "grad_norm": 1.819556713104248, "learning_rate": 2.9385990385932818e-09, "loss": 0.5312, "step": 15328 }, { "epoch": 0.99, "grad_norm": 1.2820329666137695, "learning_rate": 2.902875180266773e-09, "loss": 0.4795, "step": 15329 }, { "epoch": 0.99, "grad_norm": 1.2352955341339111, "learning_rate": 2.86736973216295e-09, "loss": 0.4986, "step": 15330 }, { "epoch": 0.99, "grad_norm": 1.2750568389892578, "learning_rate": 2.8320826958339045e-09, "loss": 0.5204, "step": 15331 }, { "epoch": 0.99, "grad_norm": 1.0776865482330322, "learning_rate": 2.7970140728211803e-09, "loss": 0.5005, "step": 15332 }, { "epoch": 0.99, "grad_norm": 1.1747055053710938, "learning_rate": 2.7621638646585515e-09, "loss": 0.4871, "step": 15333 }, { "epoch": 0.99, "grad_norm": 1.231046438217163, "learning_rate": 2.7275320728686883e-09, "loss": 0.4549, "step": 15334 }, { "epoch": 0.99, "grad_norm": 2.1429128646850586, "learning_rate": 2.693118698964825e-09, "loss": 0.5137, "step": 15335 }, { "epoch": 0.99, "grad_norm": 1.1681357622146606, "learning_rate": 2.658923744451869e-09, "loss": 0.5236, "step": 15336 }, { "epoch": 0.99, "grad_norm": 1.2364908456802368, "learning_rate": 2.6249472108236253e-09, "loss": 0.5113, "step": 15337 }, { "epoch": 0.99, "grad_norm": 1.1894536018371582, "learning_rate": 2.591189099566127e-09, "loss": 0.4506, "step": 15338 }, { "epoch": 0.99, "grad_norm": 1.0996816158294678, "learning_rate": 2.557649412153751e-09, "loss": 0.4996, "step": 15339 }, { "epoch": 0.99, "grad_norm": 1.1736520528793335, "learning_rate": 2.5243281500531012e-09, "loss": 0.5629, "step": 15340 }, { "epoch": 0.99, "grad_norm": 1.2756273746490479, "learning_rate": 2.491225314720791e-09, "loss": 0.4785, "step": 15341 }, { "epoch": 0.99, "grad_norm": 1.3341282606124878, "learning_rate": 2.4583409076028853e-09, "loss": 0.5441, "step": 15342 }, { "epoch": 0.99, "grad_norm": 1.3096948862075806, "learning_rate": 2.4256749301371228e-09, "loss": 0.5845, "step": 15343 }, { "epoch": 0.99, "grad_norm": 1.195312261581421, "learning_rate": 2.3932273837512508e-09, "loss": 0.5674, "step": 15344 }, { "epoch": 0.99, "grad_norm": 1.1795527935028076, "learning_rate": 2.3609982698635793e-09, "loss": 0.4962, "step": 15345 }, { "epoch": 0.99, "grad_norm": 1.1928138732910156, "learning_rate": 2.3289875898818704e-09, "loss": 0.514, "step": 15346 }, { "epoch": 0.99, "grad_norm": 1.260117530822754, "learning_rate": 2.297195345206671e-09, "loss": 0.5481, "step": 15347 }, { "epoch": 0.99, "grad_norm": 1.231653094291687, "learning_rate": 2.2656215372268697e-09, "loss": 0.5521, "step": 15348 }, { "epoch": 0.99, "grad_norm": 1.3271162509918213, "learning_rate": 2.234266167321919e-09, "loss": 0.474, "step": 15349 }, { "epoch": 0.99, "grad_norm": 1.2824457883834839, "learning_rate": 2.2031292368629444e-09, "loss": 0.5371, "step": 15350 }, { "epoch": 0.99, "grad_norm": 1.2161388397216797, "learning_rate": 2.172210747211079e-09, "loss": 0.527, "step": 15351 }, { "epoch": 0.99, "grad_norm": 1.2858660221099854, "learning_rate": 2.141510699716909e-09, "loss": 0.5328, "step": 15352 }, { "epoch": 0.99, "grad_norm": 1.148419737815857, "learning_rate": 2.111029095722694e-09, "loss": 0.494, "step": 15353 }, { "epoch": 0.99, "grad_norm": 1.1798005104064941, "learning_rate": 2.0807659365607023e-09, "loss": 0.4983, "step": 15354 }, { "epoch": 0.99, "grad_norm": 1.1038644313812256, "learning_rate": 2.0507212235537646e-09, "loss": 0.4843, "step": 15355 }, { "epoch": 0.99, "grad_norm": 1.581398367881775, "learning_rate": 2.0208949580147188e-09, "loss": 0.4812, "step": 15356 }, { "epoch": 0.99, "grad_norm": 1.9913851022720337, "learning_rate": 1.991287141247522e-09, "loss": 0.4941, "step": 15357 }, { "epoch": 0.99, "grad_norm": 1.3378479480743408, "learning_rate": 1.9618977745461398e-09, "loss": 0.5404, "step": 15358 }, { "epoch": 0.99, "grad_norm": 1.2644834518432617, "learning_rate": 1.9327268591950998e-09, "loss": 0.5485, "step": 15359 }, { "epoch": 0.99, "grad_norm": 1.212224006652832, "learning_rate": 1.903774396469493e-09, "loss": 0.5203, "step": 15360 }, { "epoch": 0.99, "grad_norm": 1.1985429525375366, "learning_rate": 1.875040387634419e-09, "loss": 0.5127, "step": 15361 }, { "epoch": 0.99, "grad_norm": 1.222673773765564, "learning_rate": 1.846524833946095e-09, "loss": 0.505, "step": 15362 }, { "epoch": 0.99, "grad_norm": 1.2481573820114136, "learning_rate": 1.8182277366507462e-09, "loss": 0.4874, "step": 15363 }, { "epoch": 0.99, "grad_norm": 1.2274290323257446, "learning_rate": 1.790149096985716e-09, "loss": 0.5184, "step": 15364 }, { "epoch": 0.99, "grad_norm": 1.1757549047470093, "learning_rate": 1.762288916176691e-09, "loss": 0.5028, "step": 15365 }, { "epoch": 0.99, "grad_norm": 1.2684472799301147, "learning_rate": 1.734647195443251e-09, "loss": 0.4863, "step": 15366 }, { "epoch": 0.99, "grad_norm": 1.2959330081939697, "learning_rate": 1.7072239359922082e-09, "loss": 0.529, "step": 15367 }, { "epoch": 0.99, "grad_norm": 1.1756243705749512, "learning_rate": 1.6800191390226039e-09, "loss": 0.4911, "step": 15368 }, { "epoch": 0.99, "grad_norm": 1.3040677309036255, "learning_rate": 1.653032805724042e-09, "loss": 0.4797, "step": 15369 }, { "epoch": 0.99, "grad_norm": 1.2174094915390015, "learning_rate": 1.6262649372750239e-09, "loss": 0.5177, "step": 15370 }, { "epoch": 0.99, "grad_norm": 1.253974437713623, "learning_rate": 1.59971553484628e-09, "loss": 0.5317, "step": 15371 }, { "epoch": 0.99, "grad_norm": 1.281294584274292, "learning_rate": 1.5733845995974383e-09, "loss": 0.486, "step": 15372 }, { "epoch": 0.99, "grad_norm": 1.1382508277893066, "learning_rate": 1.5472721326803553e-09, "loss": 0.5205, "step": 15373 }, { "epoch": 0.99, "grad_norm": 1.2279671430587769, "learning_rate": 1.5213781352357848e-09, "loss": 0.5213, "step": 15374 }, { "epoch": 0.99, "grad_norm": 1.204464316368103, "learning_rate": 1.4957026083950444e-09, "loss": 0.5371, "step": 15375 }, { "epoch": 0.99, "grad_norm": 1.1537240743637085, "learning_rate": 1.4702455532811244e-09, "loss": 0.4722, "step": 15376 }, { "epoch": 0.99, "grad_norm": 1.1525191068649292, "learning_rate": 1.4450069710064684e-09, "loss": 0.5238, "step": 15377 }, { "epoch": 0.99, "grad_norm": 1.2224680185317993, "learning_rate": 1.4199868626746383e-09, "loss": 0.5071, "step": 15378 }, { "epoch": 0.99, "grad_norm": 1.1957316398620605, "learning_rate": 1.3951852293780932e-09, "loss": 0.4603, "step": 15379 }, { "epoch": 0.99, "grad_norm": 1.2800929546356201, "learning_rate": 1.370602072200966e-09, "loss": 0.4668, "step": 15380 }, { "epoch": 0.99, "grad_norm": 1.20456862449646, "learning_rate": 1.346237392219063e-09, "loss": 0.4819, "step": 15381 }, { "epoch": 0.99, "grad_norm": 1.2538223266601562, "learning_rate": 1.3220911904959778e-09, "loss": 0.559, "step": 15382 }, { "epoch": 0.99, "grad_norm": 1.23941969871521, "learning_rate": 1.298163468087532e-09, "loss": 0.483, "step": 15383 }, { "epoch": 0.99, "grad_norm": 1.2170413732528687, "learning_rate": 1.2744542260395565e-09, "loss": 0.4978, "step": 15384 }, { "epoch": 0.99, "grad_norm": 1.4347636699676514, "learning_rate": 1.250963465388444e-09, "loss": 0.5463, "step": 15385 }, { "epoch": 0.99, "grad_norm": 1.114661693572998, "learning_rate": 1.2276911871605957e-09, "loss": 0.4751, "step": 15386 }, { "epoch": 0.99, "grad_norm": 1.1932309865951538, "learning_rate": 1.2046373923735311e-09, "loss": 0.5078, "step": 15387 }, { "epoch": 0.99, "grad_norm": 1.230939269065857, "learning_rate": 1.1818020820347776e-09, "loss": 0.4599, "step": 15388 }, { "epoch": 0.99, "grad_norm": 1.0830516815185547, "learning_rate": 1.1591852571418705e-09, "loss": 0.4734, "step": 15389 }, { "epoch": 0.99, "grad_norm": 1.3271119594573975, "learning_rate": 1.1367869186840186e-09, "loss": 0.5131, "step": 15390 }, { "epoch": 0.99, "grad_norm": 1.1841946840286255, "learning_rate": 1.1146070676404387e-09, "loss": 0.5287, "step": 15391 }, { "epoch": 0.99, "grad_norm": 1.187314510345459, "learning_rate": 1.092645704979245e-09, "loss": 0.4634, "step": 15392 }, { "epoch": 0.99, "grad_norm": 1.3243544101715088, "learning_rate": 1.0709028316618907e-09, "loss": 0.5461, "step": 15393 }, { "epoch": 0.99, "grad_norm": 1.2038156986236572, "learning_rate": 1.0493784486376169e-09, "loss": 0.5102, "step": 15394 }, { "epoch": 0.99, "grad_norm": 1.2467164993286133, "learning_rate": 1.0280725568473371e-09, "loss": 0.5603, "step": 15395 }, { "epoch": 0.99, "grad_norm": 1.1906508207321167, "learning_rate": 1.0069851572230838e-09, "loss": 0.4862, "step": 15396 }, { "epoch": 0.99, "grad_norm": 1.3511619567871094, "learning_rate": 9.861162506857869e-10, "loss": 0.5329, "step": 15397 }, { "epoch": 0.99, "grad_norm": 1.4045454263687134, "learning_rate": 9.654658381474945e-10, "loss": 0.5002, "step": 15398 }, { "epoch": 0.99, "grad_norm": 1.1781249046325684, "learning_rate": 9.450339205108182e-10, "loss": 0.5141, "step": 15399 }, { "epoch": 0.99, "grad_norm": 1.2342782020568848, "learning_rate": 9.248204986694875e-10, "loss": 0.5193, "step": 15400 }, { "epoch": 0.99, "grad_norm": 1.2329760789871216, "learning_rate": 9.048255735061295e-10, "loss": 0.5057, "step": 15401 }, { "epoch": 0.99, "grad_norm": 1.1810585260391235, "learning_rate": 8.85049145895045e-10, "loss": 0.499, "step": 15402 }, { "epoch": 0.99, "grad_norm": 1.3481844663619995, "learning_rate": 8.654912167005425e-10, "loss": 0.5217, "step": 15403 }, { "epoch": 0.99, "grad_norm": 1.1593843698501587, "learning_rate": 8.461517867774938e-10, "loss": 0.5101, "step": 15404 }, { "epoch": 0.99, "grad_norm": 1.1932283639907837, "learning_rate": 8.270308569713337e-10, "loss": 0.5152, "step": 15405 }, { "epoch": 0.99, "grad_norm": 1.1690860986709595, "learning_rate": 8.081284281175051e-10, "loss": 0.5027, "step": 15406 }, { "epoch": 0.99, "grad_norm": 1.2774851322174072, "learning_rate": 7.894445010420138e-10, "loss": 0.4838, "step": 15407 }, { "epoch": 0.99, "grad_norm": 1.1392230987548828, "learning_rate": 7.70979076561984e-10, "loss": 0.5018, "step": 15408 }, { "epoch": 0.99, "grad_norm": 1.1162898540496826, "learning_rate": 7.52732155484548e-10, "loss": 0.4713, "step": 15409 }, { "epoch": 0.99, "grad_norm": 1.1099278926849365, "learning_rate": 7.347037386068456e-10, "loss": 0.5292, "step": 15410 }, { "epoch": 0.99, "grad_norm": 1.213243007659912, "learning_rate": 7.168938267165804e-10, "loss": 0.5388, "step": 15411 }, { "epoch": 0.99, "grad_norm": 1.1134216785430908, "learning_rate": 6.993024205931287e-10, "loss": 0.5031, "step": 15412 }, { "epoch": 0.99, "grad_norm": 1.144004464149475, "learning_rate": 6.819295210042099e-10, "loss": 0.4985, "step": 15413 }, { "epoch": 0.99, "grad_norm": 1.1875251531600952, "learning_rate": 6.647751287103265e-10, "loss": 0.5081, "step": 15414 }, { "epoch": 1.0, "grad_norm": 1.137729287147522, "learning_rate": 6.478392444603243e-10, "loss": 0.5303, "step": 15415 }, { "epoch": 1.0, "grad_norm": 1.3581444025039673, "learning_rate": 6.311218689947219e-10, "loss": 0.5295, "step": 15416 }, { "epoch": 1.0, "grad_norm": 1.166957139968872, "learning_rate": 6.146230030440459e-10, "loss": 0.5169, "step": 15417 }, { "epoch": 1.0, "grad_norm": 1.278786540031433, "learning_rate": 5.983426473299414e-10, "loss": 0.558, "step": 15418 }, { "epoch": 1.0, "grad_norm": 1.1943365335464478, "learning_rate": 5.822808025640614e-10, "loss": 0.5017, "step": 15419 }, { "epoch": 1.0, "grad_norm": 1.2802482843399048, "learning_rate": 5.664374694475117e-10, "loss": 0.5044, "step": 15420 }, { "epoch": 1.0, "grad_norm": 1.135405421257019, "learning_rate": 5.508126486730714e-10, "loss": 0.4945, "step": 15421 }, { "epoch": 1.0, "grad_norm": 1.1814254522323608, "learning_rate": 5.354063409240828e-10, "loss": 0.4942, "step": 15422 }, { "epoch": 1.0, "grad_norm": 1.2718796730041504, "learning_rate": 5.202185468738963e-10, "loss": 0.5485, "step": 15423 }, { "epoch": 1.0, "grad_norm": 1.1192373037338257, "learning_rate": 5.0524926718587e-10, "loss": 0.5213, "step": 15424 }, { "epoch": 1.0, "grad_norm": 1.2355250120162964, "learning_rate": 4.904985025144804e-10, "loss": 0.5352, "step": 15425 }, { "epoch": 1.0, "grad_norm": 1.2770555019378662, "learning_rate": 4.759662535047672e-10, "loss": 0.4546, "step": 15426 }, { "epoch": 1.0, "grad_norm": 1.1450320482254028, "learning_rate": 4.616525207917777e-10, "loss": 0.4935, "step": 15427 }, { "epoch": 1.0, "grad_norm": 1.2131094932556152, "learning_rate": 4.475573050005677e-10, "loss": 0.4967, "step": 15428 }, { "epoch": 1.0, "grad_norm": 1.1574739217758179, "learning_rate": 4.3368060674786603e-10, "loss": 0.4628, "step": 15429 }, { "epoch": 1.0, "grad_norm": 1.22710382938385, "learning_rate": 4.2002242663929936e-10, "loss": 0.4962, "step": 15430 }, { "epoch": 1.0, "grad_norm": 1.1627213954925537, "learning_rate": 4.065827652732779e-10, "loss": 0.4868, "step": 15431 }, { "epoch": 1.0, "grad_norm": 1.2347207069396973, "learning_rate": 3.9336162323599937e-10, "loss": 0.5334, "step": 15432 }, { "epoch": 1.0, "grad_norm": 1.3421735763549805, "learning_rate": 3.8035900110589e-10, "loss": 0.531, "step": 15433 }, { "epoch": 1.0, "grad_norm": 1.1566088199615479, "learning_rate": 3.675748994508288e-10, "loss": 0.5112, "step": 15434 }, { "epoch": 1.0, "grad_norm": 1.2216614484786987, "learning_rate": 3.550093188303683e-10, "loss": 0.5772, "step": 15435 }, { "epoch": 1.0, "grad_norm": 1.285794973373413, "learning_rate": 3.426622597929585e-10, "loss": 0.5084, "step": 15436 }, { "epoch": 1.0, "grad_norm": 1.1462799310684204, "learning_rate": 3.30533722878168e-10, "loss": 0.4964, "step": 15437 }, { "epoch": 1.0, "grad_norm": 1.2611467838287354, "learning_rate": 3.1862370861668323e-10, "loss": 0.4883, "step": 15438 }, { "epoch": 1.0, "grad_norm": 1.2836124897003174, "learning_rate": 3.0693221752864376e-10, "loss": 0.5181, "step": 15439 }, { "epoch": 1.0, "grad_norm": 1.1904670000076294, "learning_rate": 2.954592501253073e-10, "loss": 0.5164, "step": 15440 }, { "epoch": 1.0, "grad_norm": 1.2139699459075928, "learning_rate": 2.842048069084946e-10, "loss": 0.4685, "step": 15441 }, { "epoch": 1.0, "grad_norm": 1.4368301630020142, "learning_rate": 2.731688883689243e-10, "loss": 0.5221, "step": 15442 }, { "epoch": 1.0, "grad_norm": 1.186977744102478, "learning_rate": 2.623514949900985e-10, "loss": 0.501, "step": 15443 }, { "epoch": 1.0, "grad_norm": 1.1945723295211792, "learning_rate": 2.5175262724441707e-10, "loss": 0.4948, "step": 15444 }, { "epoch": 1.0, "grad_norm": 1.269412875175476, "learning_rate": 2.4137228559484306e-10, "loss": 0.5469, "step": 15445 }, { "epoch": 1.0, "grad_norm": 1.2450108528137207, "learning_rate": 2.3121047049545763e-10, "loss": 0.5644, "step": 15446 }, { "epoch": 1.0, "grad_norm": 1.1706949472427368, "learning_rate": 2.2126718239035006e-10, "loss": 0.5087, "step": 15447 }, { "epoch": 1.0, "grad_norm": 1.2567055225372314, "learning_rate": 2.1154242171417261e-10, "loss": 0.5144, "step": 15448 }, { "epoch": 1.0, "grad_norm": 1.2993402481079102, "learning_rate": 2.020361888915856e-10, "loss": 0.5496, "step": 15449 }, { "epoch": 1.0, "grad_norm": 1.2382004261016846, "learning_rate": 1.9274848433836757e-10, "loss": 0.4591, "step": 15450 }, { "epoch": 1.0, "grad_norm": 1.132319688796997, "learning_rate": 1.8367930846030502e-10, "loss": 0.4862, "step": 15451 }, { "epoch": 1.0, "grad_norm": 1.2748461961746216, "learning_rate": 1.7482866165430268e-10, "loss": 0.5225, "step": 15452 }, { "epoch": 1.0, "grad_norm": 1.2684078216552734, "learning_rate": 1.6619654430671816e-10, "loss": 0.5474, "step": 15453 }, { "epoch": 1.0, "grad_norm": 1.19266676902771, "learning_rate": 1.577829567950273e-10, "loss": 0.4803, "step": 15454 }, { "epoch": 1.0, "grad_norm": 1.266031265258789, "learning_rate": 1.4958789948671393e-10, "loss": 0.5581, "step": 15455 }, { "epoch": 1.0, "grad_norm": 1.193976640701294, "learning_rate": 1.4161137273982494e-10, "loss": 0.5344, "step": 15456 }, { "epoch": 1.0, "grad_norm": 1.2255632877349854, "learning_rate": 1.3385337690352552e-10, "loss": 0.4992, "step": 15457 }, { "epoch": 1.0, "grad_norm": 1.1812338829040527, "learning_rate": 1.2631391231698875e-10, "loss": 0.5022, "step": 15458 }, { "epoch": 1.0, "grad_norm": 1.3080058097839355, "learning_rate": 1.189929793093958e-10, "loss": 0.5329, "step": 15459 }, { "epoch": 1.0, "grad_norm": 1.276973009109497, "learning_rate": 1.1189057820049087e-10, "loss": 0.5337, "step": 15460 }, { "epoch": 1.0, "grad_norm": 1.1744798421859741, "learning_rate": 1.0500670930058132e-10, "loss": 0.4996, "step": 15461 }, { "epoch": 1.0, "grad_norm": 1.300916314125061, "learning_rate": 9.834137291164781e-11, "loss": 0.5818, "step": 15462 }, { "epoch": 1.0, "grad_norm": 1.21553373336792, "learning_rate": 9.189456932401364e-11, "loss": 0.5275, "step": 15463 }, { "epoch": 1.0, "grad_norm": 1.376712441444397, "learning_rate": 8.566629881967547e-11, "loss": 0.5277, "step": 15464 }, { "epoch": 1.0, "grad_norm": 1.5000717639923096, "learning_rate": 7.965656167119306e-11, "loss": 0.52, "step": 15465 }, { "epoch": 1.0, "grad_norm": 1.1889352798461914, "learning_rate": 7.386535814057904e-11, "loss": 0.4532, "step": 15466 }, { "epoch": 1.0, "grad_norm": 1.2522287368774414, "learning_rate": 6.829268848151938e-11, "loss": 0.4977, "step": 15467 }, { "epoch": 1.0, "grad_norm": 1.20290207862854, "learning_rate": 6.29385529371529e-11, "loss": 0.4874, "step": 15468 }, { "epoch": 1.0, "grad_norm": 1.4367649555206299, "learning_rate": 5.780295174173667e-11, "loss": 0.5264, "step": 15469 }, { "epoch": 1.0, "grad_norm": 1.2477915287017822, "learning_rate": 5.288588512009085e-11, "loss": 0.5042, "step": 15470 }, { "epoch": 1.0, "grad_norm": 1.219416856765747, "learning_rate": 4.8187353286488494e-11, "loss": 0.5525, "step": 15471 }, { "epoch": 1.0, "grad_norm": 1.2134571075439453, "learning_rate": 4.370735644687596e-11, "loss": 0.5284, "step": 15472 }, { "epoch": 1.0, "grad_norm": 1.3550338745117188, "learning_rate": 3.944589479665251e-11, "loss": 0.5576, "step": 15473 }, { "epoch": 1.0, "grad_norm": 1.2262320518493652, "learning_rate": 3.540296852178049e-11, "loss": 0.528, "step": 15474 }, { "epoch": 1.0, "grad_norm": 1.5099848508834839, "learning_rate": 3.1578577799895596e-11, "loss": 0.5451, "step": 15475 }, { "epoch": 1.0, "grad_norm": 1.1887458562850952, "learning_rate": 2.797272279753127e-11, "loss": 0.5238, "step": 15476 }, { "epoch": 1.0, "grad_norm": 1.2041072845458984, "learning_rate": 2.4585403672339192e-11, "loss": 0.5391, "step": 15477 }, { "epoch": 1.0, "grad_norm": 1.3035813570022583, "learning_rate": 2.1416620571979017e-11, "loss": 0.5042, "step": 15478 }, { "epoch": 1.0, "grad_norm": 1.1929165124893188, "learning_rate": 1.8466373635783742e-11, "loss": 0.5002, "step": 15479 }, { "epoch": 1.0, "grad_norm": 1.1861213445663452, "learning_rate": 1.573466299253923e-11, "loss": 0.4832, "step": 15480 }, { "epoch": 1.0, "grad_norm": 1.1834149360656738, "learning_rate": 1.3221488761039348e-11, "loss": 0.4806, "step": 15481 }, { "epoch": 1.0, "grad_norm": 1.2728193998336792, "learning_rate": 1.0926851051196175e-11, "loss": 0.5183, "step": 15482 }, { "epoch": 1.0, "grad_norm": 1.1480746269226074, "learning_rate": 8.850749964040007e-12, "loss": 0.488, "step": 15483 }, { "epoch": 1.0, "grad_norm": 1.1378017663955688, "learning_rate": 6.993185590054019e-12, "loss": 0.5215, "step": 15484 }, { "epoch": 1.0, "grad_norm": 1.2032815217971802, "learning_rate": 5.354158009729382e-12, "loss": 0.4805, "step": 15485 }, { "epoch": 1.0, "grad_norm": 1.1589854955673218, "learning_rate": 3.933667295230592e-12, "loss": 0.4862, "step": 15486 }, { "epoch": 1.0, "grad_norm": 1.197444200515747, "learning_rate": 2.7317135092852497e-12, "loss": 0.5066, "step": 15487 }, { "epoch": 1.0, "grad_norm": 1.1749597787857056, "learning_rate": 1.7482967029636145e-12, "loss": 0.5141, "step": 15488 }, { "epoch": 1.0, "grad_norm": 1.2526309490203857, "learning_rate": 9.834169206746068e-13, "loss": 0.5376, "step": 15489 }, { "epoch": 1.0, "grad_norm": 1.2439075708389282, "learning_rate": 4.37074194614695e-13, "loss": 0.5596, "step": 15490 }, { "epoch": 1.0, "grad_norm": 1.2610630989074707, "learning_rate": 1.0926854976389678e-13, "loss": 0.5217, "step": 15491 }, { "epoch": 1.0, "grad_norm": 1.1793808937072754, "learning_rate": 0.0, "loss": 0.4981, "step": 15492 }, { "epoch": 1.0, "step": 15492, "total_flos": 4.134195635252573e+19, "train_loss": 0.5451035319435298, "train_runtime": 214532.9483, "train_samples_per_second": 18.487, "train_steps_per_second": 0.072 } ], "logging_steps": 1.0, "max_steps": 15492, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 8000, "total_flos": 4.134195635252573e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }